FlowerPower 0.11.6.20__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. flowerpower/__init__.py +2 -6
  2. flowerpower/cfg/__init__.py +7 -14
  3. flowerpower/cfg/base.py +29 -25
  4. flowerpower/cfg/pipeline/__init__.py +8 -6
  5. flowerpower/cfg/pipeline/_schedule.py +32 -0
  6. flowerpower/cfg/pipeline/adapter.py +0 -5
  7. flowerpower/cfg/pipeline/builder.py +377 -0
  8. flowerpower/cfg/pipeline/run.py +36 -0
  9. flowerpower/cfg/project/__init__.py +11 -24
  10. flowerpower/cfg/project/adapter.py +0 -12
  11. flowerpower/cli/__init__.py +2 -21
  12. flowerpower/cli/cfg.py +0 -3
  13. flowerpower/cli/mqtt.py +0 -6
  14. flowerpower/cli/pipeline.py +22 -415
  15. flowerpower/cli/utils.py +0 -1
  16. flowerpower/flowerpower.py +345 -146
  17. flowerpower/pipeline/__init__.py +2 -0
  18. flowerpower/pipeline/base.py +21 -12
  19. flowerpower/pipeline/io.py +58 -54
  20. flowerpower/pipeline/manager.py +165 -726
  21. flowerpower/pipeline/pipeline.py +643 -0
  22. flowerpower/pipeline/registry.py +285 -18
  23. flowerpower/pipeline/visualizer.py +5 -6
  24. flowerpower/plugins/io/__init__.py +8 -0
  25. flowerpower/plugins/mqtt/__init__.py +7 -11
  26. flowerpower/settings/__init__.py +0 -2
  27. flowerpower/settings/{backend.py → _backend.py} +0 -21
  28. flowerpower/settings/logging.py +1 -1
  29. flowerpower/utils/logging.py +24 -12
  30. flowerpower/utils/misc.py +17 -256
  31. flowerpower/utils/monkey.py +1 -83
  32. flowerpower-0.21.0.dist-info/METADATA +463 -0
  33. flowerpower-0.21.0.dist-info/RECORD +44 -0
  34. flowerpower/cfg/pipeline/schedule.py +0 -74
  35. flowerpower/cfg/project/job_queue.py +0 -238
  36. flowerpower/cli/job_queue.py +0 -1061
  37. flowerpower/fs/__init__.py +0 -29
  38. flowerpower/fs/base.py +0 -662
  39. flowerpower/fs/ext.py +0 -2143
  40. flowerpower/fs/storage_options.py +0 -1420
  41. flowerpower/job_queue/__init__.py +0 -294
  42. flowerpower/job_queue/apscheduler/__init__.py +0 -11
  43. flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
  44. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
  45. flowerpower/job_queue/apscheduler/manager.py +0 -1051
  46. flowerpower/job_queue/apscheduler/setup.py +0 -554
  47. flowerpower/job_queue/apscheduler/trigger.py +0 -169
  48. flowerpower/job_queue/apscheduler/utils.py +0 -311
  49. flowerpower/job_queue/base.py +0 -413
  50. flowerpower/job_queue/rq/__init__.py +0 -10
  51. flowerpower/job_queue/rq/_trigger.py +0 -37
  52. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
  53. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -231
  54. flowerpower/job_queue/rq/manager.py +0 -1582
  55. flowerpower/job_queue/rq/setup.py +0 -154
  56. flowerpower/job_queue/rq/utils.py +0 -69
  57. flowerpower/mqtt.py +0 -12
  58. flowerpower/pipeline/job_queue.py +0 -583
  59. flowerpower/pipeline/runner.py +0 -603
  60. flowerpower/plugins/io/base.py +0 -2520
  61. flowerpower/plugins/io/helpers/datetime.py +0 -298
  62. flowerpower/plugins/io/helpers/polars.py +0 -875
  63. flowerpower/plugins/io/helpers/pyarrow.py +0 -570
  64. flowerpower/plugins/io/helpers/sql.py +0 -202
  65. flowerpower/plugins/io/loader/__init__.py +0 -28
  66. flowerpower/plugins/io/loader/csv.py +0 -37
  67. flowerpower/plugins/io/loader/deltatable.py +0 -190
  68. flowerpower/plugins/io/loader/duckdb.py +0 -19
  69. flowerpower/plugins/io/loader/json.py +0 -37
  70. flowerpower/plugins/io/loader/mqtt.py +0 -159
  71. flowerpower/plugins/io/loader/mssql.py +0 -26
  72. flowerpower/plugins/io/loader/mysql.py +0 -26
  73. flowerpower/plugins/io/loader/oracle.py +0 -26
  74. flowerpower/plugins/io/loader/parquet.py +0 -35
  75. flowerpower/plugins/io/loader/postgres.py +0 -26
  76. flowerpower/plugins/io/loader/pydala.py +0 -19
  77. flowerpower/plugins/io/loader/sqlite.py +0 -23
  78. flowerpower/plugins/io/metadata.py +0 -244
  79. flowerpower/plugins/io/saver/__init__.py +0 -28
  80. flowerpower/plugins/io/saver/csv.py +0 -36
  81. flowerpower/plugins/io/saver/deltatable.py +0 -186
  82. flowerpower/plugins/io/saver/duckdb.py +0 -19
  83. flowerpower/plugins/io/saver/json.py +0 -36
  84. flowerpower/plugins/io/saver/mqtt.py +0 -28
  85. flowerpower/plugins/io/saver/mssql.py +0 -26
  86. flowerpower/plugins/io/saver/mysql.py +0 -26
  87. flowerpower/plugins/io/saver/oracle.py +0 -26
  88. flowerpower/plugins/io/saver/parquet.py +0 -36
  89. flowerpower/plugins/io/saver/postgres.py +0 -26
  90. flowerpower/plugins/io/saver/pydala.py +0 -20
  91. flowerpower/plugins/io/saver/sqlite.py +0 -24
  92. flowerpower/plugins/mqtt/cfg.py +0 -17
  93. flowerpower/plugins/mqtt/manager.py +0 -962
  94. flowerpower/settings/job_queue.py +0 -87
  95. flowerpower/utils/scheduler.py +0 -311
  96. flowerpower-0.11.6.20.dist-info/METADATA +0 -537
  97. flowerpower-0.11.6.20.dist-info/RECORD +0 -102
  98. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/WHEEL +0 -0
  99. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/entry_points.txt +0 -0
  100. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/licenses/LICENSE +0 -0
  101. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/top_level.txt +0 -0
@@ -1,413 +0,0 @@
1
- """
2
- Base scheduler interface for FlowerPower.
3
-
4
- This module defines the abstract base classes for scheduling operations
5
- that can be implemented by different backend providers (APScheduler, RQ, etc.).
6
- """
7
-
8
- import abc
9
- import importlib
10
- import os
11
- import posixpath
12
- import sys
13
- import urllib.parse
14
- from dataclasses import dataclass, field
15
- from enum import Enum
16
- from pathlib import Path
17
- from typing import Any, TypeVar
18
-
19
- if importlib.util.find_spec("sqlalchemy"):
20
- from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
21
- else:
22
- create_async_engine = None
23
- AsyncEngine = TypeVar("AsyncEngine")
24
-
25
- from ..cfg import ProjectConfig
26
- from ..fs import AbstractFileSystem, get_filesystem
27
- # from ..utils.misc import update_config_from_dict
28
- from ..settings import BACKEND_PROPERTIES, CACHE_DIR, CONFIG_DIR, PIPELINES_DIR
29
-
30
-
31
- class BackendType(str, Enum):
32
- POSTGRESQL = "postgresql"
33
- MYSQL = "mysql"
34
- SQLITE = "sqlite"
35
- MONGODB = "mongodb"
36
- MQTT = "mqtt"
37
- REDIS = "redis"
38
- NATS_KV = "nats_kv"
39
- MEMORY = "memory"
40
-
41
- @property
42
- def properties(self):
43
- return BACKEND_PROPERTIES[self.value]
44
-
45
- @property
46
- def uri_prefix(self) -> str:
47
- return self.properties.get("uri_prefix", "")
48
-
49
- @property
50
- def default_port(self):
51
- return self.properties.get("default_port")
52
-
53
- @property
54
- def default_host(self) -> str:
55
- return self.properties.get("default_host", "")
56
-
57
- @property
58
- def default_username(self) -> str:
59
- return self.properties.get("default_username", "")
60
-
61
- @property
62
- def default_password(self) -> str:
63
- return self.properties.get("default_password", "")
64
-
65
- @property
66
- def default_database(self) -> str:
67
- return self.properties.get("default_database", "")
68
-
69
- @property
70
- def is_sqla_type(self) -> bool:
71
- return self.properties.get("is_sqla_type", False)
72
-
73
- @property
74
- def is_mongodb_type(self) -> bool:
75
- return self.value == "mongodb"
76
-
77
- @property
78
- def is_mqtt_type(self) -> bool:
79
- return self.value == "mqtt"
80
-
81
- @property
82
- def is_redis_type(self) -> bool:
83
- return self.value == "redis"
84
-
85
- @property
86
- def is_nats_kv_type(self) -> bool:
87
- return self.value == "nats_kv"
88
-
89
- @property
90
- def is_memory_type(self) -> bool:
91
- return self.value == "memory"
92
-
93
- @property
94
- def is_sqlite_type(self) -> bool:
95
- return self.value == "sqlite"
96
-
97
- def gen_uri(
98
- self,
99
- host: str | None = None,
100
- port: int | None = None,
101
- username: str | None = None,
102
- password: str | None = None,
103
- database: str | None = None,
104
- ssl: bool = False,
105
- ca_file: str | None = None,
106
- cert_file: str | None = None,
107
- key_file: str | None = None,
108
- verify_ssl: bool = False,
109
- ) -> str:
110
- # Handle host and port
111
- host = host or self.default_host
112
- port = port or self.default_port
113
- database = database or self.default_database
114
- username = username or self.default_username
115
- password = password or self.default_password
116
-
117
- # components: List[str] = []
118
- # Get the appropriate URI prefix based on backend type and SSL setting
119
- if self.is_redis_type:
120
- uri_prefix = "rediss://" if ssl else "redis://"
121
- elif self.is_nats_kv_type:
122
- uri_prefix = "nats+tls://" if ssl else "nats://"
123
- elif self.is_mqtt_type:
124
- uri_prefix = "mqtts://" if ssl else "mqtt://"
125
- if ssl and port == 1883:
126
- port = 8883
127
- else:
128
- uri_prefix = self.uri_prefix
129
-
130
- # Handle authentication
131
- if username and password:
132
- auth = f"{urllib.parse.quote(username)}:{urllib.parse.quote(password)}@"
133
- elif username:
134
- auth = f"{urllib.parse.quote(username)}@"
135
- elif password:
136
- auth = f":{urllib.parse.quote(password)}@"
137
- else:
138
- auth = ""
139
-
140
- port_part = f":{port}" # if port is not None else self.default_port
141
-
142
- # Special handling for SQLite and memory types
143
- if self.is_sqlite_type or self.is_memory_type:
144
- if self.is_sqlite_type:
145
- if database:
146
- return f"{uri_prefix}{database}"
147
- else:
148
- return f"{uri_prefix}"
149
- return "memory://"
150
-
151
- # Build path component
152
- database = database or self.default_database
153
- path = f"/{database}" if database else ""
154
-
155
- # Construct base URI
156
- base_uri = f"{uri_prefix}{auth}{host}{port_part}{path}"
157
-
158
- # Prepare query parameters for SSL files
159
- query_params: list[str] = []
160
-
161
- if ssl:
162
- # Always add ssl query parameter if ssl=True
163
- if self.value == "postgresql":
164
- query_params.append("ssl=verify-full" if verify_ssl else "ssl=allow")
165
- if ca_file:
166
- query_params.append(f"sslrootcert={urllib.parse.quote(ca_file)}")
167
- if cert_file:
168
- query_params.append(f"sslcert={urllib.parse.quote(cert_file)}")
169
- if key_file:
170
- query_params.append(f"sslkey={urllib.parse.quote(key_file)}")
171
- elif self.value == "mysql":
172
- query_params.append("ssl=true")
173
- if ca_file:
174
- query_params.append(f"ssl_ca={urllib.parse.quote(ca_file)}")
175
- if cert_file:
176
- query_params.append(f"ssl_cert={urllib.parse.quote(cert_file)}")
177
- if key_file:
178
- query_params.append(f"ssl_key={urllib.parse.quote(key_file)}")
179
- elif self.is_mongodb_type:
180
- query_params.append("tls=true")
181
- if ca_file:
182
- query_params.append(f"tlsCAFile={urllib.parse.quote(ca_file)}")
183
- if cert_file and key_file:
184
- query_params.append(
185
- f"tlsCertificateKeyFile={urllib.parse.quote(cert_file)}"
186
- )
187
- elif self.is_redis_type:
188
- if not verify_ssl:
189
- query_params.append("ssl_cert_reqs=none")
190
- if ca_file:
191
- query_params.append(f"ssl_ca_certs={urllib.parse.quote(ca_file)}")
192
- if cert_file:
193
- query_params.append(f"ssl_certfile={urllib.parse.quote(cert_file)}")
194
- if key_file:
195
- query_params.append(f"ssl_keyfile={urllib.parse.quote(key_file)}")
196
- elif self.is_nats_kv_type:
197
- query_params.append("tls=true")
198
- if ca_file:
199
- query_params.append(f"tls_ca_file={urllib.parse.quote(ca_file)}")
200
- if cert_file:
201
- query_params.append(
202
- f"tls_cert_file={urllib.parse.quote(cert_file)}"
203
- )
204
- if key_file:
205
- query_params.append(f"tls_key_file={urllib.parse.quote(key_file)}")
206
- elif self.is_mqtt_type:
207
- query_params.append("tls=true")
208
- if ca_file:
209
- query_params.append(f"tls_ca_file={urllib.parse.quote(ca_file)}")
210
- if cert_file:
211
- query_params.append(
212
- f"tls_cert_file={urllib.parse.quote(cert_file)}"
213
- )
214
- if key_file:
215
- query_params.append(f"tls_key_file={urllib.parse.quote(key_file)}")
216
-
217
- # Compose query string if Any params exist
218
- query_string = ""
219
- if query_params:
220
- query_string = "?" + "&".join(query_params)
221
-
222
- return f"{base_uri}{query_string}"
223
-
224
-
225
- @dataclass(slots=True)
226
- class BaseBackend:
227
- type: BackendType | str | None = None
228
- uri: str | None = None
229
- username: str | None = None
230
- password: str | None = None
231
- host: str | None = None
232
- port: int | None = None
233
- database: str | None = None
234
- ssl: bool = False
235
- ca_file: str | None = None
236
- cert_file: str | None = None
237
- key_file: str | None = None
238
- verify_ssl: bool = False
239
- _kwargs: dict = field(default_factory=dict)
240
- _sqla_engine: AsyncEngine | None = (
241
- None # SQLAlchemy async engine instance for SQL backends
242
- )
243
- _client: Any | None = None # Native client instance for non-SQL backends
244
-
245
- def __post_init__(self):
246
- if self.type is None:
247
- self.type = "memory"
248
-
249
- elif isinstance(self.type, str):
250
- try:
251
- self.type = BackendType[self.type.upper()]
252
- except KeyError:
253
- raise ValueError(
254
- f"Invalid backend type: {self.type}. Valid types: {[bt.value for bt in BackendType]}"
255
- )
256
-
257
- if not self.uri:
258
- self.uri = self.type.gen_uri(
259
- username=self.username,
260
- password=self.password,
261
- host=self.host,
262
- port=self.port,
263
- database=self.database,
264
- ssl=self.ssl,
265
- ca_file=self.ca_file,
266
- cert_file=self.cert_file,
267
- key_file=self.key_file,
268
- verify_ssl=self.verify_ssl,
269
- )
270
-
271
- # Setup is handled by backend-specific implementations
272
-
273
- @classmethod
274
- def from_dict(cls, d: dict) -> "BaseBackend":
275
- return cls(**d)
276
-
277
-
278
- class BaseTrigger(abc.ABC):
279
- """
280
- Abstract base class for schedule triggers.
281
-
282
- A trigger determines when a scheduled job should be executed.
283
- """
284
-
285
- def __init__(self, trigger_type: str):
286
- self.trigger_type = trigger_type
287
-
288
- @abc.abstractmethod
289
- def get_trigger_instance(self, **kwargs) -> Any:
290
- """
291
- Get the backend-specific trigger instance.
292
-
293
- Args:
294
- **kwargs: Keyword arguments specific to the trigger type
295
-
296
- Returns:
297
- Any: A backend-specific trigger instance
298
- """
299
- pass
300
-
301
-
302
- class BaseJobQueueManager:
303
- """
304
- Abstract base class for scheduler workers (APScheduler, RQ, etc.).
305
- Defines the required interface for all scheduler backends.
306
-
307
- Can be used as a context manager:
308
-
309
- ```python
310
- with RQManager(name="test") as manager:
311
- manager.add_job(job1)
312
- ```
313
- """
314
-
315
- def __enter__(self):
316
- """Context manager entry - returns self for use in with statement."""
317
- return self
318
-
319
- def __exit__(self, exc_type, exc_val, exc_tb):
320
- """Context manager exit - ensures workers are stopped."""
321
- if hasattr(self, "_worker_process") and self._worker_process is not None:
322
- self.stop_worker()
323
- if hasattr(self, "_worker_pool") and self._worker_pool is not None:
324
- self.stop_worker_pool()
325
- if hasattr(self, "_worker") and self._worker is not None:
326
- self.stop_worker()
327
- if hasattr(self, "_scheduler") and self._scheduler is not None:
328
- self.stop_scheduler()
329
- return False # Don't suppress exceptions
330
-
331
- def __init__(
332
- self,
333
- type: str | None = None,
334
- name: str | None = None,
335
- base_dir: str | None = None,
336
- backend: BaseBackend | None = None,
337
- storage_options: dict = None,
338
- fs: AbstractFileSystem | None = None,
339
- **kwargs,
340
- ):
341
- """
342
- Initialize the APScheduler backend.
343
-
344
- Args:
345
- name: Name of the scheduler
346
- base_dir: Base directory for the FlowerPower project
347
- backend: APSBackend instance with data store and event broker
348
- storage_options: Storage options for filesystem access
349
- fs: Filesystem to use
350
- cfg_override: Configuration overrides for the worker
351
- """
352
- self.name = name or ""
353
- self._base_dir = base_dir or str(Path.cwd())
354
- # self._storage_options = storage_options or {}
355
- self._backend = backend
356
- self._type = type
357
- self._pipelines_dir = kwargs.get("pipelines_dir", PIPELINES_DIR)
358
- self._cfg_dir = CONFIG_DIR
359
-
360
- if storage_options is not None:
361
- cached = True
362
- cache_storage = posixpath.join(
363
- posixpath.expanduser(CACHE_DIR), self._base_dir.split("://")[-1]
364
- )
365
- os.makedirs(cache_storage, exist_ok=True)
366
- else:
367
- cached = False
368
- cache_storage = None
369
- if not fs:
370
- fs = get_filesystem(
371
- self._base_dir,
372
- storage_options=storage_options,
373
- cached=cached,
374
- cache_storage=cache_storage,
375
- )
376
- self._fs = fs
377
- self._storage_options = storage_options or fs.storage_options
378
-
379
- self._add_modules_path()
380
- self._load_config()
381
-
382
- def _load_config(self) -> None:
383
- """Load the configuration.
384
-
385
- Args:
386
- cfg_updates: Configuration updates to apply
387
- """
388
- self.cfg = ProjectConfig.load(
389
- base_dir=self._base_dir, job_queue_type=self._type, fs=self._fs
390
- ).job_queue
391
-
392
- def _add_modules_path(self):
393
- """
394
- Sync the filesystem.
395
-
396
- Returns:
397
- None
398
- """
399
- if self._fs.is_cache_fs:
400
- self._fs.sync_cache()
401
- project_path = self._fs._mapper.directory
402
- modules_path = posixpath.join(project_path, self._pipelines_dir)
403
-
404
- else:
405
- # Use the base directory directly if not using cache
406
- project_path = self._fs.path
407
- modules_path = posixpath.join(project_path, self._pipelines_dir)
408
-
409
- if project_path not in sys.path:
410
- sys.path.insert(0, project_path)
411
-
412
- if modules_path not in sys.path:
413
- sys.path.insert(0, modules_path)
@@ -1,10 +0,0 @@
1
- from .manager import RQManager
2
- from .setup import RQBackend
3
- from .utils import show_jobs, show_schedules
4
-
5
- __all__ = [
6
- "RQManager",
7
- "RQBackend",
8
- "show_jobs",
9
- "show_schedules",
10
- ]
@@ -1,37 +0,0 @@
1
- from typing import Any, Dict
2
-
3
- from ..base import BaseTrigger
4
-
5
-
6
- class RQTrigger(BaseTrigger):
7
- """
8
- RQTrigger adapts trigger logic for the RQ worker backend.
9
-
10
- Inherits from BaseTrigger and provides a trigger instance
11
- in dictionary format suitable for RQ scheduling.
12
- """
13
-
14
- def __init__(self, trigger_type: str):
15
- super().__init__(trigger_type)
16
-
17
- def get_trigger_instance(self, **kwargs) -> Dict[str, Any]:
18
- """
19
- Get trigger parameters for RQ Scheduler.
20
-
21
- Args:
22
- **kwargs: Keyword arguments for the trigger
23
-
24
- Returns:
25
- Dict[str, Any]: A dictionary with trigger configuration
26
- """
27
- # RQ doesn't have specific trigger classes like APScheduler.
28
- # Instead, we'll return a dictionary with parameters that can
29
- # be used by RQSchedulerBackend to schedule jobs appropriately.
30
-
31
- result = {"type": self.trigger_type, **kwargs}
32
-
33
- # For cron triggers, handle crontab string specifically
34
- if self.trigger_type == "cron" and "crontab" in kwargs:
35
- result["crontab"] = kwargs["crontab"]
36
-
37
- return result
@@ -1,226 +0,0 @@
1
- # Monkey patch as early as possible
2
- try:
3
- from gevent import monkey
4
-
5
- monkey.patch_all()
6
- import gevent
7
- import gevent.pool
8
-
9
- GEVENT_AVAILABLE = True
10
- except ImportError:
11
- GEVENT_AVAILABLE = False
12
- raise ImportError(
13
- "Gevent is required for GeventWorker. Please install it with 'pip install gevent'."
14
- )
15
-
16
-
17
- import datetime as dt
18
-
19
- from loguru import logger
20
- from rq import worker
21
- from rq.exceptions import DequeueTimeout
22
- from rq.job import JobStatus
23
- from rq.worker import StopRequested
24
-
25
- from flowerpower.utils.logging import setup_logging
26
-
27
- # Use utcnow directly for simplicity
28
- utcnow = dt.datetime.utcnow
29
- setup_logging("INFO")
30
-
31
-
32
- class GeventWorker(worker.Worker):
33
- """
34
- A variation of the RQ Worker that uses Gevent to perform jobs concurrently
35
- within a single worker process using greenlets.
36
-
37
- Ideal for I/O bound tasks, offering very lightweight concurrency.
38
- Jobs share the same memory space within the worker process.
39
-
40
- Requires gevent to be installed and monkey-patching to be applied
41
- (done automatically when this module is imported).
42
- """
43
-
44
- def __init__(
45
- self,
46
- queues,
47
- name=None,
48
- max_greenlets=1000,
49
- default_result_ttl=500,
50
- connection=None,
51
- exc_handler=None,
52
- exception_handlers=None,
53
- default_worker_ttl=None,
54
- job_class=None,
55
- queue_class=None,
56
- log_job_description=True,
57
- job_monitoring_interval=30,
58
- disable_default_exception_handler=False,
59
- prepare_for_work=True,
60
- maintenance_interval=600,
61
- ):
62
- super().__init__(
63
- queues,
64
- name=name,
65
- default_result_ttl=default_result_ttl,
66
- connection=connection,
67
- exc_handler=exc_handler,
68
- exception_handlers=exception_handlers,
69
- default_worker_ttl=default_worker_ttl,
70
- job_class=job_class,
71
- queue_class=queue_class,
72
- log_job_description=log_job_description,
73
- job_monitoring_interval=job_monitoring_interval,
74
- disable_default_exception_handler=disable_default_exception_handler,
75
- prepare_for_work=prepare_for_work,
76
- maintenance_interval=maintenance_interval,
77
- )
78
-
79
- self.max_greenlets = max_greenlets
80
- self._pool = None
81
- self.log = logger
82
- logger.info(f"GeventWorker initialized with max_greenlets={self.max_greenlets}")
83
-
84
- def work(
85
- self,
86
- burst=False,
87
- logging_level="INFO",
88
- date_format=worker.DEFAULT_LOGGING_DATE_FORMAT,
89
- log_format=worker.DEFAULT_LOGGING_FORMAT,
90
- max_jobs=None,
91
- with_scheduler=False,
92
- ):
93
- """Starts the worker's main loop using gevent for concurrent job execution."""
94
- self._install_signal_handlers()
95
- did_perform_work = False
96
- self.register_birth()
97
- self.log.info("Worker %s: started, version %s", self.key, worker.VERSION)
98
- self.set_state(worker.WorkerStatus.STARTED)
99
-
100
- self._pool = gevent.pool.Pool(self.max_greenlets)
101
- processed_jobs = 0
102
-
103
- try:
104
- while True:
105
- if self._stop_requested or (
106
- max_jobs is not None and processed_jobs >= max_jobs
107
- ):
108
- break
109
-
110
- self.run_maintenance_tasks()
111
-
112
- # Wait for space in the greenlet pool if it's full
113
- if self._pool.full():
114
- gevent.sleep(0.1) # Yield to other greenlets
115
- continue
116
-
117
- try:
118
- result = self.dequeue_job_and_maintain_ttl(timeout=1)
119
- except DequeueTimeout:
120
- if burst:
121
- break
122
- gevent.sleep(0.1)
123
- continue
124
- except StopRequested:
125
- break
126
- except Exception:
127
- self.log.error("Error during dequeue:", exc_info=True)
128
- gevent.sleep(1)
129
- continue
130
-
131
- if result is None:
132
- if burst:
133
- did_perform_work = True
134
- break
135
- gevent.sleep(0.1)
136
- continue
137
-
138
- job, queue = result
139
- self.log.info("Processing job %s: %s", job.id, job.description)
140
-
141
- try:
142
- # Spawn job execution in the gevent pool
143
- greenlet = self._pool.spawn(self.execute_job, job, queue)
144
- # Optional: Add error callback
145
- greenlet.link_exception(
146
- lambda g: self.log.error(
147
- f"Error in greenlet for job {job.id}", exc_info=g.exception
148
- )
149
- )
150
- except Exception as e:
151
- self.log.error(f"Failed to spawn job {job.id}: {e}", exc_info=True)
152
- continue
153
-
154
- did_perform_work = True
155
- processed_jobs += 1
156
-
157
- finally:
158
- if self._pool:
159
- self.log.info("Waiting for active greenlets to complete...")
160
- self._pool.join(timeout=30) # Wait up to 30 seconds for jobs to finish
161
- self._pool.kill() # Kill any remaining greenlets
162
- self.register_death()
163
-
164
- return did_perform_work
165
-
166
- def set_job_status(self, job, status):
167
- """Sets the job status."""
168
- if job:
169
- job.set_status(status)
170
-
171
- def handle_job_success(self, job, queue, started_job_registry):
172
- """Handles job completion."""
173
- try:
174
- if started_job_registry:
175
- try:
176
- started_job_registry.remove(job)
177
- except NotImplementedError:
178
- pass
179
- job.ended_at = utcnow()
180
- job.set_status(JobStatus.FINISHED)
181
- except Exception as e:
182
- self.log.error(f"Error handling job success for {job.id}: {e}")
183
-
184
- def handle_job_failure(self, job, queue, started_job_registry, exc_info=None):
185
- """Handles job failure."""
186
- try:
187
- if started_job_registry:
188
- try:
189
- started_job_registry.remove(job)
190
- except NotImplementedError:
191
- pass
192
- job.ended_at = utcnow()
193
- job.set_status(JobStatus.FAILED)
194
- except Exception as e:
195
- self.log.error(f"Error handling job failure for {job.id}: {e}")
196
-
197
- def execute_job(self, job, queue):
198
- """Execute a job in a greenlet."""
199
- job_id = job.id if job else "unknown"
200
-
201
- try:
202
- self.set_job_status(job, JobStatus.STARTED)
203
- started_job_registry = queue.started_job_registry
204
-
205
- try:
206
- started_job_registry.add(
207
- job,
208
- self.job_monitoring_interval * 1000
209
- if self.job_monitoring_interval
210
- else -1,
211
- )
212
- except NotImplementedError:
213
- pass
214
-
215
- rv = job.perform()
216
- self.handle_job_success(
217
- job=job, queue=queue, started_job_registry=started_job_registry
218
- )
219
- return rv
220
-
221
- except Exception as e:
222
- self.log.error(f"Job {job_id} failed: {e}", exc_info=True)
223
- self.handle_job_failure(
224
- job=job, queue=queue, started_job_registry=started_job_registry
225
- )
226
- raise