scalable-pypeline 2.1.0__py2.py3-none-any.whl → 2.1.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pypeline/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.1.0"
1
+ __version__ = "2.1.2"
pypeline/dramatiq.py CHANGED
@@ -34,6 +34,9 @@ from pypeline.constants import (
34
34
  )
35
35
  from pypeline.pipelines.middleware.parallel_pipeline_middleware import ParallelPipeline
36
36
  from pypeline.pipelines.middleware.pypeline_middleware import PypelineMiddleware
37
+ from pypeline.pipelines.middleware.deduplication_middleware import (
38
+ DeduplicationMiddleware,
39
+ )
37
40
  from pypeline.utils.config_utils import (
38
41
  retrieve_latest_schedule_config,
39
42
  get_service_config_for_worker,
@@ -72,6 +75,7 @@ def configure_default_broker(broker: Broker = None):
72
75
  rabbit_broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
73
76
  rabbit_broker.add_middleware(CurrentMessage())
74
77
  register_actors_for_workers(rabbit_broker)
78
+ rabbit_broker.add_middleware(DeduplicationMiddleware(redis_url=REDIS_URL))
75
79
  set_broker(rabbit_broker)
76
80
 
77
81
 
@@ -32,11 +32,39 @@ def create_pipeline_settings_schema(pipeline_settings_schema_data):
32
32
  "float": fields.Float,
33
33
  "boolean": fields.Boolean,
34
34
  "datetime": fields.DateTime,
35
+ "array": fields.List,
36
+ "object": fields.Nested,
35
37
  }.get(data_type)
36
38
 
37
39
  if not field_type:
38
40
  raise ValidationError(f"Unsupported dataType `{data_type}` for `{key}`.")
39
41
 
42
+ # Handle array type
43
+ if data_type == "array":
44
+ element_type = config.get("elementType")
45
+ if not element_type:
46
+ raise ValidationError(f"`elementType` is required for array `{key}`.")
47
+ field_args["cls_or_instance"] = {
48
+ "string": fields.String,
49
+ "int": fields.Integer,
50
+ "float": fields.Float,
51
+ "boolean": fields.Boolean,
52
+ "datetime": fields.DateTime,
53
+ }.get(element_type)
54
+ if not field_args["cls_or_instance"]:
55
+ raise ValidationError(
56
+ f"Unsupported elementType `{element_type}` for array `{key}`."
57
+ )
58
+
59
+ # Handle object type
60
+ if data_type == "object":
61
+ properties = config.get("properties")
62
+ if not properties:
63
+ raise ValidationError(f"`properties` is required for object `{key}`.")
64
+ # Recursively create a schema for the nested object
65
+ nested_schema = create_pipeline_settings_schema({"properties": properties})
66
+ field_args["schema"] = nested_schema
67
+
40
68
  # Handle range validation for numeric fields
41
69
  if data_type in ["int", "float"]:
42
70
  if "minimum" in config or "maximum" in config:
@@ -64,6 +92,97 @@ def create_pipeline_settings_schema(pipeline_settings_schema_data):
64
92
  return DynamicPipelineSettingsSchema()
65
93
 
66
94
 
95
+ def create_pipeline_settings_schema(pipeline_settings_schema_data):
96
+ """
97
+ Dynamically create a schema to validate user data based on settings.
98
+
99
+ Args:
100
+ pipeline_settings_schema_data (dict): The settings schema data containing
101
+ field configurations.
102
+
103
+ Returns:
104
+ Schema: A dynamically created schema class for validating user data.
105
+ """
106
+
107
+ # Dictionary to store dynamically generated fields
108
+ schema_fields = {}
109
+
110
+ for key, config in pipeline_settings_schema_data["properties"].items():
111
+ data_type = config.get("dataType")
112
+ input_type = config.get("inputType")
113
+ field_args = {}
114
+
115
+ # Map dataType to Marshmallow field type
116
+ field_type = {
117
+ "string": fields.String,
118
+ "int": fields.Integer,
119
+ "float": fields.Float,
120
+ "boolean": fields.Boolean,
121
+ "datetime": fields.DateTime,
122
+ "array": fields.List,
123
+ "object": fields.Nested,
124
+ }.get(data_type)
125
+
126
+ if not field_type:
127
+ raise ValidationError(f"Unsupported dataType `{data_type}` for `{key}`.")
128
+
129
+ # Handle array type
130
+ if data_type == "array":
131
+ element_type = config.get("elementType")
132
+ if not element_type:
133
+ raise ValidationError(f"`elementType` is required for array `{key}`.")
134
+ field_args["cls_or_instance"] = {
135
+ "string": fields.String,
136
+ "int": fields.Integer,
137
+ "float": fields.Float,
138
+ "boolean": fields.Boolean,
139
+ "datetime": fields.DateTime,
140
+ }.get(element_type)
141
+ if not field_args["cls_or_instance"]:
142
+ raise ValidationError(
143
+ f"Unsupported elementType `{element_type}` for array `{key}`."
144
+ )
145
+
146
+ # Handle object type
147
+ if data_type == "object":
148
+ properties = config.get("properties")
149
+ if not properties:
150
+ raise ValidationError(f"`properties` is required for object `{key}`.")
151
+ # Recursively create a schema for the nested object
152
+ nested_schema = create_pipeline_settings_schema({"properties": properties})
153
+ # Use the nested schema as the `nested` argument for fields.Nested
154
+ field_type = fields.Nested(nested_schema)
155
+
156
+ # Handle range validation for numeric fields
157
+ if data_type in ["int", "float"]:
158
+ if "minimum" in config or "maximum" in config:
159
+ field_args["validate"] = validate.Range(
160
+ min=config.get("minimum"), max=config.get("maximum")
161
+ )
162
+
163
+ # Handle dropdown or radio input options
164
+ if input_type in ["dropdown", "radio"] and "options" in config:
165
+ allowed_values = [option["value"] for option in config["options"]]
166
+ field_args["validate"] = validate.OneOf(allowed_values)
167
+
168
+ # Mark the field as required if specified
169
+ if key in pipeline_settings_schema_data.get("required", []):
170
+ field_args["required"] = True
171
+
172
+ # Create the field and add to the schema fields dictionary
173
+ if data_type == "object":
174
+ schema_fields[key] = field_type
175
+ else:
176
+ schema_fields[key] = field_type(**field_args)
177
+
178
+ # Dynamically create a schema class with the generated fields
179
+ DynamicPipelineSettingsSchema = type(
180
+ "DynamicPipelineSettingsSchema", (Schema,), schema_fields
181
+ )
182
+
183
+ return DynamicPipelineSettingsSchema()
184
+
185
+
67
186
  class OptionSchema(Schema):
68
187
  label = fields.String(
69
188
  required=True,
@@ -103,13 +222,15 @@ def validate_min_max(data):
103
222
  class SettingSchema(Schema):
104
223
  dataType = fields.String(
105
224
  required=True,
106
- validate=validate.OneOf(["string", "int", "float", "boolean", "datetime"]),
225
+ validate=validate.OneOf(
226
+ ["string", "int", "float", "boolean", "datetime", "array", "object"]
227
+ ),
107
228
  metadata={"description": "The underlying data type of the setting"},
108
229
  )
109
230
  inputType = fields.String(
110
231
  required=True,
111
232
  validate=validate.OneOf(
112
- ["text", "dropdown", "radio", "checkbox", "searchable"]
233
+ ["text", "dropdown", "radio", "checkbox", "searchable", "custom"]
113
234
  ),
114
235
  metadata={"description": "The type of input UI element"},
115
236
  )
@@ -133,6 +254,17 @@ class SettingSchema(Schema):
133
254
  searchEndpoint = fields.String(
134
255
  metadata={"description": "Endpoint for searchable fields"}
135
256
  )
257
+ component = fields.String(
258
+ metadata={"description": "React component for custom input types"}
259
+ )
260
+ elementType = fields.String(
261
+ metadata={"description": "Element type for array data types"}
262
+ )
263
+ properties = fields.Dict(
264
+ keys=fields.String(),
265
+ values=fields.Nested(lambda: SettingSchema),
266
+ metadata={"description": "Properties for object data types"},
267
+ )
136
268
 
137
269
  class Meta:
138
270
  ordered = True
@@ -197,6 +329,57 @@ class SettingSchema(Schema):
197
329
  field_name="searchEndpoint",
198
330
  )
199
331
 
332
+ @validates_schema
333
+ def validate_custom_component(self, data, **kwargs):
334
+ """Ensure component is provided for custom input types."""
335
+ input_type = data.get("inputType")
336
+ component = data.get("component")
337
+
338
+ if input_type == "custom" and not component:
339
+ raise ValidationError(
340
+ "`component` is required for `custom` input types.",
341
+ field_name="component",
342
+ )
343
+ elif input_type != "custom" and component:
344
+ raise ValidationError(
345
+ "`component` is not allowed for non-custom input types.",
346
+ field_name="component",
347
+ )
348
+
349
+ @validates_schema
350
+ def validate_array_element_type(self, data, **kwargs):
351
+ """Ensure elementType is provided for array data types."""
352
+ data_type = data.get("dataType")
353
+ element_type = data.get("elementType")
354
+
355
+ if data_type == "array" and not element_type:
356
+ raise ValidationError(
357
+ "`elementType` is required for `array` data types.",
358
+ field_name="elementType",
359
+ )
360
+ elif data_type != "array" and element_type:
361
+ raise ValidationError(
362
+ "`elementType` is not allowed for non-array data types.",
363
+ field_name="elementType",
364
+ )
365
+
366
+ @validates_schema
367
+ def validate_object_properties(self, data, **kwargs):
368
+ """Ensure properties are provided for object data types."""
369
+ data_type = data.get("dataType")
370
+ properties = data.get("properties")
371
+
372
+ if data_type == "object" and not properties:
373
+ raise ValidationError(
374
+ "`properties` is required for `object` data types.",
375
+ field_name="properties",
376
+ )
377
+ elif data_type != "object" and properties:
378
+ raise ValidationError(
379
+ "`properties` is not allowed for non-object data types.",
380
+ field_name="properties",
381
+ )
382
+
200
383
 
201
384
  class PipelineSettingsSchema(Schema):
202
385
  properties = fields.Dict(
@@ -288,8 +471,8 @@ if __name__ == "__main__":
288
471
  "dataType": "int",
289
472
  "inputType": "text",
290
473
  "label": "Parameter 2",
291
- "minimum": 1,
292
- "maximum": -1,
474
+ "minimum": -1,
475
+ "maximum": 1,
293
476
  },
294
477
  "param3": {
295
478
  "dataType": "boolean",
@@ -322,6 +505,30 @@ if __name__ == "__main__":
322
505
  "label": "Select Pipeline",
323
506
  "searchEndpoint": "/api/pipelines",
324
507
  },
508
+ "param7": {
509
+ "dataType": "array",
510
+ "inputType": "text",
511
+ "label": "Array of Integers",
512
+ "elementType": "int",
513
+ },
514
+ "param8": {
515
+ "dataType": "object",
516
+ "inputType": "custom",
517
+ "label": "Custom Object",
518
+ "component": "CustomObjectComponent",
519
+ "properties": {
520
+ "subParam1": {
521
+ "dataType": "string",
522
+ "inputType": "text",
523
+ "label": "Sub Parameter 1",
524
+ },
525
+ "subParam2": {
526
+ "dataType": "int",
527
+ "inputType": "text",
528
+ "label": "Sub Parameter 2",
529
+ },
530
+ },
531
+ },
325
532
  },
326
533
  "required": ["param1", "param2", "param4"],
327
534
  }
@@ -0,0 +1,91 @@
1
+ import dramatiq
2
+ import signal
3
+ from dramatiq.middleware import Middleware
4
+ from pypeline.barrier import LockingParallelBarrier
5
+ from pypeline.constants import DEFAULT_TASK_TTL
6
+ import logging
7
+
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class DeduplicationMiddleware(Middleware):
13
+ def __init__(self, redis_url="redis://localhost:6379/0"):
14
+ self.redis_url = redis_url
15
+ self.active_locks = {}
16
+
17
+ def before_process_message(self, broker, message):
18
+ task_id = message.message_id
19
+ task_key = f"dramatiq:task_counter:{task_id}"
20
+ lock_key = f"dramatiq:lock:{task_id}"
21
+ try:
22
+ # Try to acquire a lock for the task
23
+ locking_parallel_barrier = LockingParallelBarrier(
24
+ self.redis_url,
25
+ task_key=task_key,
26
+ lock_key=lock_key,
27
+ )
28
+ if (
29
+ locking_parallel_barrier.get_task_count() > 0
30
+ or not locking_parallel_barrier.acquire_lock(timeout=DEFAULT_TASK_TTL)
31
+ ):
32
+ raise dramatiq.middleware.SkipMessage(
33
+ f"Task {task_id} is already being processed."
34
+ )
35
+
36
+ locking_parallel_barrier.set_task_count(1)
37
+ # Store the lock reference in the message and track it globally
38
+ message.options["dedupe_task_key"] = task_key
39
+ message.options["dedupe_lock_key"] = lock_key
40
+ self.active_locks[lock_key] = locking_parallel_barrier
41
+ except dramatiq.middleware.SkipMessage:
42
+ raise dramatiq.middleware.SkipMessage(
43
+ f"Task {task_id} is already being processed."
44
+ )
45
+ except Exception as e:
46
+ logger.exception(e)
47
+ raise e
48
+
49
+ def after_process_message(self, broker, message, *, result=None, exception=None):
50
+ """Releases lock for the message that just finished."""
51
+ dedupe_task_key = message.options.get("dedupe_task_key", None)
52
+ dedupe_lock_key = message.options.get("dedupe_lock_key", None)
53
+ if not dedupe_lock_key or not dedupe_task_key:
54
+ logger.warning("unexpected in after_process_message: dedupe task or lock key not in message")
55
+ return
56
+ if dedupe_lock_key in self.active_locks:
57
+ try:
58
+ lock = self.active_locks[dedupe_lock_key]
59
+ lock.decrement_task_count()
60
+ lock.release_lock()
61
+ del self.active_locks[dedupe_lock_key]
62
+ except Exception as e:
63
+ logger.info(
64
+ f"Exception while trying to release lock {dedupe_lock_key}: {e}"
65
+ )
66
+ raise e
67
+ else:
68
+ lock = LockingParallelBarrier(
69
+ self.redis_url,
70
+ task_key=dedupe_task_key,
71
+ lock_key=dedupe_lock_key,
72
+ )
73
+ lock.decrement_task_count()
74
+ lock.release_lock()
75
+
76
+ def before_worker_shutdown(self, *args):
77
+ self.release_all_locks()
78
+
79
+ def before_worker_thread_shutdown(self, *args):
80
+ self.release_all_locks()
81
+
82
+ def release_all_locks(self, *args):
83
+ """Release all locks when the worker shuts down."""
84
+ for lock_key, lock in self.active_locks.items():
85
+ try:
86
+ lock.decrement_task_count()
87
+ lock.release_lock()
88
+ except Exception as e:
89
+ logger.info(f"Exception while trying to release lock {lock_key}: {e}")
90
+ raise e
91
+ self.active_locks.clear()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scalable-pypeline
3
- Version: 2.1.0
3
+ Version: 2.1.2
4
4
  Summary: PypeLine - Python pipelines for the Real World
5
5
  Home-page: https://gitlab.com/bravos2/pypeline
6
6
  Author: Bravos Power Corporation
@@ -1,10 +1,10 @@
1
- pypeline/__init__.py,sha256=Xybt2skBZamGMNlLuOX1IG-h4uIxqUDGAO8MIGWrJac,22
1
+ pypeline/__init__.py,sha256=UiuBcRXPtXxPUBDdp0ZDvWl0U9Db1kMNfT3oAfhxqLg,22
2
2
  pypeline/barrier.py,sha256=oO964l9qOCOibweOHyNivmAvufdXOke9nz2tdgclouo,1172
3
3
  pypeline/constants.py,sha256=coiF8dMP25qIwoNYSnS7oy7hCd4-5yqPFmdPsN93Q1A,2892
4
- pypeline/dramatiq.py,sha256=LWsl0o0t5FdxewIl87ARZKrNK0ENoYJEJAEVDSNFa40,12272
4
+ pypeline/dramatiq.py,sha256=TGwXWSInpCPFtYC5C-Omc2sEk5ecpsOG8xHH_mx9WTo,12451
5
5
  pypeline/extensions.py,sha256=BzOTnXhNxap3N7uIUUh_hO6dDwx08Vc_RJDE93_K0Lo,610
6
6
  pypeline/pipeline_config_schema.py,sha256=hK2_egtg-YFx_XJDs_NyrOTGKkel7W83X-G0sic52sM,10592
7
- pypeline/pipeline_settings_schema.py,sha256=7zMXtBT_V4qom_j6JInRVPq0X1f1vQOgH33RNBaLo-o,12136
7
+ pypeline/pipeline_settings_schema.py,sha256=84AuNFYsOUpoADsjEo_n9T6Ica-c21oK_V9s15I4lCg,20212
8
8
  pypeline/pypeline_yaml.py,sha256=Og08sUKwOjq7JYPnkg-NIcGbHravYCkC5Arz22rZEtA,16981
9
9
  pypeline/schedule_config_schema.py,sha256=vtZV-5wpGcAiYcXxdBPRkrjsbR6x_9E-1PC2elrKKbE,3611
10
10
  pypeline/flask/__init__.py,sha256=AdljRh0lMiS8ExgDmgzObwVs8jW7hqQuf83Ml8kn8GQ,491
@@ -19,6 +19,7 @@ pypeline/pipelines/composition/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
19
19
  pypeline/pipelines/composition/parallel_pipeline_composition.py,sha256=pTw9Xb9h4JnV4siFc3JStm5lB-i9djUADo3Kh5K3s7g,12976
20
20
  pypeline/pipelines/composition/pypeline_composition.py,sha256=ieTuQZ8zxTtvmPEkrWFbItjGtvO3JUotXcR-Jim2mss,7204
21
21
  pypeline/pipelines/middleware/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ pypeline/pipelines/middleware/deduplication_middleware.py,sha256=IGO9Kc6NPMUaw1ytT3_ud2ITSZuh1-_WaU5_onazeh8,3556
22
23
  pypeline/pipelines/middleware/parallel_pipeline_middleware.py,sha256=kTp6niYoe2nXIiN6EGRfdpxrJyioo0GPxDkfefbGlEk,2821
23
24
  pypeline/pipelines/middleware/pypeline_middleware.py,sha256=kvt5A9OxDwpIo0PsH11Im62tH6VquUc6OFoZDw2Gxsk,8036
24
25
  pypeline/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -28,9 +29,9 @@ pypeline/utils/module_utils.py,sha256=-yEJIukDCoXnmlZVXB6Dww25tH6GdPE5SoFqv6pfdV
28
29
  pypeline/utils/pipeline_utils.py,sha256=kGP1QwCJikGC5QNRtzRXCDVewyRMpWIqERTNnxGLlSY,4795
29
30
  pypeline/utils/schema_utils.py,sha256=Fgl0y9Cuo_TZeEx_S3gaSVnLjn6467LTkjb2ek7Ms98,851
30
31
  tests/fixtures/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- scalable_pypeline-2.1.0.dist-info/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
32
- scalable_pypeline-2.1.0.dist-info/METADATA,sha256=Oulf-ivIRF8QmuY4zeDajhjwgH_Ae7WMz_7BYO76HC0,5926
33
- scalable_pypeline-2.1.0.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
34
- scalable_pypeline-2.1.0.dist-info/entry_points.txt,sha256=uWs10ODfHSBKo2Cx_QaUjPHQTpZ3e77j9VlAdRRmMyg,119
35
- scalable_pypeline-2.1.0.dist-info/top_level.txt,sha256=C7dpkEOc_-nnsAQb28BfQknjD6XHRyS9ZrvVeoIbV7s,15
36
- scalable_pypeline-2.1.0.dist-info/RECORD,,
32
+ scalable_pypeline-2.1.2.dist-info/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
33
+ scalable_pypeline-2.1.2.dist-info/METADATA,sha256=M9cP1_2S3B289LGum8QtlruOBUfdnH7x4IDtcM3Swpk,5926
34
+ scalable_pypeline-2.1.2.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
35
+ scalable_pypeline-2.1.2.dist-info/entry_points.txt,sha256=uWs10ODfHSBKo2Cx_QaUjPHQTpZ3e77j9VlAdRRmMyg,119
36
+ scalable_pypeline-2.1.2.dist-info/top_level.txt,sha256=C7dpkEOc_-nnsAQb28BfQknjD6XHRyS9ZrvVeoIbV7s,15
37
+ scalable_pypeline-2.1.2.dist-info/RECORD,,