FlowerPower 0.11.6.20__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +2 -6
- flowerpower/cfg/__init__.py +7 -14
- flowerpower/cfg/base.py +29 -25
- flowerpower/cfg/pipeline/__init__.py +8 -6
- flowerpower/cfg/pipeline/_schedule.py +32 -0
- flowerpower/cfg/pipeline/adapter.py +0 -5
- flowerpower/cfg/pipeline/builder.py +377 -0
- flowerpower/cfg/pipeline/run.py +36 -0
- flowerpower/cfg/project/__init__.py +11 -24
- flowerpower/cfg/project/adapter.py +0 -12
- flowerpower/cli/__init__.py +2 -21
- flowerpower/cli/cfg.py +0 -3
- flowerpower/cli/mqtt.py +0 -6
- flowerpower/cli/pipeline.py +22 -415
- flowerpower/cli/utils.py +0 -1
- flowerpower/flowerpower.py +345 -146
- flowerpower/pipeline/__init__.py +2 -0
- flowerpower/pipeline/base.py +21 -12
- flowerpower/pipeline/io.py +58 -54
- flowerpower/pipeline/manager.py +165 -726
- flowerpower/pipeline/pipeline.py +643 -0
- flowerpower/pipeline/registry.py +285 -18
- flowerpower/pipeline/visualizer.py +5 -6
- flowerpower/plugins/io/__init__.py +8 -0
- flowerpower/plugins/mqtt/__init__.py +7 -11
- flowerpower/settings/__init__.py +0 -2
- flowerpower/settings/{backend.py → _backend.py} +0 -21
- flowerpower/settings/logging.py +1 -1
- flowerpower/utils/logging.py +24 -12
- flowerpower/utils/misc.py +17 -256
- flowerpower/utils/monkey.py +1 -83
- flowerpower-0.21.0.dist-info/METADATA +463 -0
- flowerpower-0.21.0.dist-info/RECORD +44 -0
- flowerpower/cfg/pipeline/schedule.py +0 -74
- flowerpower/cfg/project/job_queue.py +0 -238
- flowerpower/cli/job_queue.py +0 -1061
- flowerpower/fs/__init__.py +0 -29
- flowerpower/fs/base.py +0 -662
- flowerpower/fs/ext.py +0 -2143
- flowerpower/fs/storage_options.py +0 -1420
- flowerpower/job_queue/__init__.py +0 -294
- flowerpower/job_queue/apscheduler/__init__.py +0 -11
- flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
- flowerpower/job_queue/apscheduler/manager.py +0 -1051
- flowerpower/job_queue/apscheduler/setup.py +0 -554
- flowerpower/job_queue/apscheduler/trigger.py +0 -169
- flowerpower/job_queue/apscheduler/utils.py +0 -311
- flowerpower/job_queue/base.py +0 -413
- flowerpower/job_queue/rq/__init__.py +0 -10
- flowerpower/job_queue/rq/_trigger.py +0 -37
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -231
- flowerpower/job_queue/rq/manager.py +0 -1582
- flowerpower/job_queue/rq/setup.py +0 -154
- flowerpower/job_queue/rq/utils.py +0 -69
- flowerpower/mqtt.py +0 -12
- flowerpower/pipeline/job_queue.py +0 -583
- flowerpower/pipeline/runner.py +0 -603
- flowerpower/plugins/io/base.py +0 -2520
- flowerpower/plugins/io/helpers/datetime.py +0 -298
- flowerpower/plugins/io/helpers/polars.py +0 -875
- flowerpower/plugins/io/helpers/pyarrow.py +0 -570
- flowerpower/plugins/io/helpers/sql.py +0 -202
- flowerpower/plugins/io/loader/__init__.py +0 -28
- flowerpower/plugins/io/loader/csv.py +0 -37
- flowerpower/plugins/io/loader/deltatable.py +0 -190
- flowerpower/plugins/io/loader/duckdb.py +0 -19
- flowerpower/plugins/io/loader/json.py +0 -37
- flowerpower/plugins/io/loader/mqtt.py +0 -159
- flowerpower/plugins/io/loader/mssql.py +0 -26
- flowerpower/plugins/io/loader/mysql.py +0 -26
- flowerpower/plugins/io/loader/oracle.py +0 -26
- flowerpower/plugins/io/loader/parquet.py +0 -35
- flowerpower/plugins/io/loader/postgres.py +0 -26
- flowerpower/plugins/io/loader/pydala.py +0 -19
- flowerpower/plugins/io/loader/sqlite.py +0 -23
- flowerpower/plugins/io/metadata.py +0 -244
- flowerpower/plugins/io/saver/__init__.py +0 -28
- flowerpower/plugins/io/saver/csv.py +0 -36
- flowerpower/plugins/io/saver/deltatable.py +0 -186
- flowerpower/plugins/io/saver/duckdb.py +0 -19
- flowerpower/plugins/io/saver/json.py +0 -36
- flowerpower/plugins/io/saver/mqtt.py +0 -28
- flowerpower/plugins/io/saver/mssql.py +0 -26
- flowerpower/plugins/io/saver/mysql.py +0 -26
- flowerpower/plugins/io/saver/oracle.py +0 -26
- flowerpower/plugins/io/saver/parquet.py +0 -36
- flowerpower/plugins/io/saver/postgres.py +0 -26
- flowerpower/plugins/io/saver/pydala.py +0 -20
- flowerpower/plugins/io/saver/sqlite.py +0 -24
- flowerpower/plugins/mqtt/cfg.py +0 -17
- flowerpower/plugins/mqtt/manager.py +0 -962
- flowerpower/settings/job_queue.py +0 -87
- flowerpower/utils/scheduler.py +0 -311
- flowerpower-0.11.6.20.dist-info/METADATA +0 -537
- flowerpower-0.11.6.20.dist-info/RECORD +0 -102
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/top_level.txt +0 -0
flowerpower/job_queue/base.py
DELETED
@@ -1,413 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Base scheduler interface for FlowerPower.
|
3
|
-
|
4
|
-
This module defines the abstract base classes for scheduling operations
|
5
|
-
that can be implemented by different backend providers (APScheduler, RQ, etc.).
|
6
|
-
"""
|
7
|
-
|
8
|
-
import abc
|
9
|
-
import importlib
|
10
|
-
import os
|
11
|
-
import posixpath
|
12
|
-
import sys
|
13
|
-
import urllib.parse
|
14
|
-
from dataclasses import dataclass, field
|
15
|
-
from enum import Enum
|
16
|
-
from pathlib import Path
|
17
|
-
from typing import Any, TypeVar
|
18
|
-
|
19
|
-
if importlib.util.find_spec("sqlalchemy"):
|
20
|
-
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
|
21
|
-
else:
|
22
|
-
create_async_engine = None
|
23
|
-
AsyncEngine = TypeVar("AsyncEngine")
|
24
|
-
|
25
|
-
from ..cfg import ProjectConfig
|
26
|
-
from ..fs import AbstractFileSystem, get_filesystem
|
27
|
-
# from ..utils.misc import update_config_from_dict
|
28
|
-
from ..settings import BACKEND_PROPERTIES, CACHE_DIR, CONFIG_DIR, PIPELINES_DIR
|
29
|
-
|
30
|
-
|
31
|
-
class BackendType(str, Enum):
|
32
|
-
POSTGRESQL = "postgresql"
|
33
|
-
MYSQL = "mysql"
|
34
|
-
SQLITE = "sqlite"
|
35
|
-
MONGODB = "mongodb"
|
36
|
-
MQTT = "mqtt"
|
37
|
-
REDIS = "redis"
|
38
|
-
NATS_KV = "nats_kv"
|
39
|
-
MEMORY = "memory"
|
40
|
-
|
41
|
-
@property
|
42
|
-
def properties(self):
|
43
|
-
return BACKEND_PROPERTIES[self.value]
|
44
|
-
|
45
|
-
@property
|
46
|
-
def uri_prefix(self) -> str:
|
47
|
-
return self.properties.get("uri_prefix", "")
|
48
|
-
|
49
|
-
@property
|
50
|
-
def default_port(self):
|
51
|
-
return self.properties.get("default_port")
|
52
|
-
|
53
|
-
@property
|
54
|
-
def default_host(self) -> str:
|
55
|
-
return self.properties.get("default_host", "")
|
56
|
-
|
57
|
-
@property
|
58
|
-
def default_username(self) -> str:
|
59
|
-
return self.properties.get("default_username", "")
|
60
|
-
|
61
|
-
@property
|
62
|
-
def default_password(self) -> str:
|
63
|
-
return self.properties.get("default_password", "")
|
64
|
-
|
65
|
-
@property
|
66
|
-
def default_database(self) -> str:
|
67
|
-
return self.properties.get("default_database", "")
|
68
|
-
|
69
|
-
@property
|
70
|
-
def is_sqla_type(self) -> bool:
|
71
|
-
return self.properties.get("is_sqla_type", False)
|
72
|
-
|
73
|
-
@property
|
74
|
-
def is_mongodb_type(self) -> bool:
|
75
|
-
return self.value == "mongodb"
|
76
|
-
|
77
|
-
@property
|
78
|
-
def is_mqtt_type(self) -> bool:
|
79
|
-
return self.value == "mqtt"
|
80
|
-
|
81
|
-
@property
|
82
|
-
def is_redis_type(self) -> bool:
|
83
|
-
return self.value == "redis"
|
84
|
-
|
85
|
-
@property
|
86
|
-
def is_nats_kv_type(self) -> bool:
|
87
|
-
return self.value == "nats_kv"
|
88
|
-
|
89
|
-
@property
|
90
|
-
def is_memory_type(self) -> bool:
|
91
|
-
return self.value == "memory"
|
92
|
-
|
93
|
-
@property
|
94
|
-
def is_sqlite_type(self) -> bool:
|
95
|
-
return self.value == "sqlite"
|
96
|
-
|
97
|
-
def gen_uri(
|
98
|
-
self,
|
99
|
-
host: str | None = None,
|
100
|
-
port: int | None = None,
|
101
|
-
username: str | None = None,
|
102
|
-
password: str | None = None,
|
103
|
-
database: str | None = None,
|
104
|
-
ssl: bool = False,
|
105
|
-
ca_file: str | None = None,
|
106
|
-
cert_file: str | None = None,
|
107
|
-
key_file: str | None = None,
|
108
|
-
verify_ssl: bool = False,
|
109
|
-
) -> str:
|
110
|
-
# Handle host and port
|
111
|
-
host = host or self.default_host
|
112
|
-
port = port or self.default_port
|
113
|
-
database = database or self.default_database
|
114
|
-
username = username or self.default_username
|
115
|
-
password = password or self.default_password
|
116
|
-
|
117
|
-
# components: List[str] = []
|
118
|
-
# Get the appropriate URI prefix based on backend type and SSL setting
|
119
|
-
if self.is_redis_type:
|
120
|
-
uri_prefix = "rediss://" if ssl else "redis://"
|
121
|
-
elif self.is_nats_kv_type:
|
122
|
-
uri_prefix = "nats+tls://" if ssl else "nats://"
|
123
|
-
elif self.is_mqtt_type:
|
124
|
-
uri_prefix = "mqtts://" if ssl else "mqtt://"
|
125
|
-
if ssl and port == 1883:
|
126
|
-
port = 8883
|
127
|
-
else:
|
128
|
-
uri_prefix = self.uri_prefix
|
129
|
-
|
130
|
-
# Handle authentication
|
131
|
-
if username and password:
|
132
|
-
auth = f"{urllib.parse.quote(username)}:{urllib.parse.quote(password)}@"
|
133
|
-
elif username:
|
134
|
-
auth = f"{urllib.parse.quote(username)}@"
|
135
|
-
elif password:
|
136
|
-
auth = f":{urllib.parse.quote(password)}@"
|
137
|
-
else:
|
138
|
-
auth = ""
|
139
|
-
|
140
|
-
port_part = f":{port}" # if port is not None else self.default_port
|
141
|
-
|
142
|
-
# Special handling for SQLite and memory types
|
143
|
-
if self.is_sqlite_type or self.is_memory_type:
|
144
|
-
if self.is_sqlite_type:
|
145
|
-
if database:
|
146
|
-
return f"{uri_prefix}{database}"
|
147
|
-
else:
|
148
|
-
return f"{uri_prefix}"
|
149
|
-
return "memory://"
|
150
|
-
|
151
|
-
# Build path component
|
152
|
-
database = database or self.default_database
|
153
|
-
path = f"/{database}" if database else ""
|
154
|
-
|
155
|
-
# Construct base URI
|
156
|
-
base_uri = f"{uri_prefix}{auth}{host}{port_part}{path}"
|
157
|
-
|
158
|
-
# Prepare query parameters for SSL files
|
159
|
-
query_params: list[str] = []
|
160
|
-
|
161
|
-
if ssl:
|
162
|
-
# Always add ssl query parameter if ssl=True
|
163
|
-
if self.value == "postgresql":
|
164
|
-
query_params.append("ssl=verify-full" if verify_ssl else "ssl=allow")
|
165
|
-
if ca_file:
|
166
|
-
query_params.append(f"sslrootcert={urllib.parse.quote(ca_file)}")
|
167
|
-
if cert_file:
|
168
|
-
query_params.append(f"sslcert={urllib.parse.quote(cert_file)}")
|
169
|
-
if key_file:
|
170
|
-
query_params.append(f"sslkey={urllib.parse.quote(key_file)}")
|
171
|
-
elif self.value == "mysql":
|
172
|
-
query_params.append("ssl=true")
|
173
|
-
if ca_file:
|
174
|
-
query_params.append(f"ssl_ca={urllib.parse.quote(ca_file)}")
|
175
|
-
if cert_file:
|
176
|
-
query_params.append(f"ssl_cert={urllib.parse.quote(cert_file)}")
|
177
|
-
if key_file:
|
178
|
-
query_params.append(f"ssl_key={urllib.parse.quote(key_file)}")
|
179
|
-
elif self.is_mongodb_type:
|
180
|
-
query_params.append("tls=true")
|
181
|
-
if ca_file:
|
182
|
-
query_params.append(f"tlsCAFile={urllib.parse.quote(ca_file)}")
|
183
|
-
if cert_file and key_file:
|
184
|
-
query_params.append(
|
185
|
-
f"tlsCertificateKeyFile={urllib.parse.quote(cert_file)}"
|
186
|
-
)
|
187
|
-
elif self.is_redis_type:
|
188
|
-
if not verify_ssl:
|
189
|
-
query_params.append("ssl_cert_reqs=none")
|
190
|
-
if ca_file:
|
191
|
-
query_params.append(f"ssl_ca_certs={urllib.parse.quote(ca_file)}")
|
192
|
-
if cert_file:
|
193
|
-
query_params.append(f"ssl_certfile={urllib.parse.quote(cert_file)}")
|
194
|
-
if key_file:
|
195
|
-
query_params.append(f"ssl_keyfile={urllib.parse.quote(key_file)}")
|
196
|
-
elif self.is_nats_kv_type:
|
197
|
-
query_params.append("tls=true")
|
198
|
-
if ca_file:
|
199
|
-
query_params.append(f"tls_ca_file={urllib.parse.quote(ca_file)}")
|
200
|
-
if cert_file:
|
201
|
-
query_params.append(
|
202
|
-
f"tls_cert_file={urllib.parse.quote(cert_file)}"
|
203
|
-
)
|
204
|
-
if key_file:
|
205
|
-
query_params.append(f"tls_key_file={urllib.parse.quote(key_file)}")
|
206
|
-
elif self.is_mqtt_type:
|
207
|
-
query_params.append("tls=true")
|
208
|
-
if ca_file:
|
209
|
-
query_params.append(f"tls_ca_file={urllib.parse.quote(ca_file)}")
|
210
|
-
if cert_file:
|
211
|
-
query_params.append(
|
212
|
-
f"tls_cert_file={urllib.parse.quote(cert_file)}"
|
213
|
-
)
|
214
|
-
if key_file:
|
215
|
-
query_params.append(f"tls_key_file={urllib.parse.quote(key_file)}")
|
216
|
-
|
217
|
-
# Compose query string if Any params exist
|
218
|
-
query_string = ""
|
219
|
-
if query_params:
|
220
|
-
query_string = "?" + "&".join(query_params)
|
221
|
-
|
222
|
-
return f"{base_uri}{query_string}"
|
223
|
-
|
224
|
-
|
225
|
-
@dataclass(slots=True)
|
226
|
-
class BaseBackend:
|
227
|
-
type: BackendType | str | None = None
|
228
|
-
uri: str | None = None
|
229
|
-
username: str | None = None
|
230
|
-
password: str | None = None
|
231
|
-
host: str | None = None
|
232
|
-
port: int | None = None
|
233
|
-
database: str | None = None
|
234
|
-
ssl: bool = False
|
235
|
-
ca_file: str | None = None
|
236
|
-
cert_file: str | None = None
|
237
|
-
key_file: str | None = None
|
238
|
-
verify_ssl: bool = False
|
239
|
-
_kwargs: dict = field(default_factory=dict)
|
240
|
-
_sqla_engine: AsyncEngine | None = (
|
241
|
-
None # SQLAlchemy async engine instance for SQL backends
|
242
|
-
)
|
243
|
-
_client: Any | None = None # Native client instance for non-SQL backends
|
244
|
-
|
245
|
-
def __post_init__(self):
|
246
|
-
if self.type is None:
|
247
|
-
self.type = "memory"
|
248
|
-
|
249
|
-
elif isinstance(self.type, str):
|
250
|
-
try:
|
251
|
-
self.type = BackendType[self.type.upper()]
|
252
|
-
except KeyError:
|
253
|
-
raise ValueError(
|
254
|
-
f"Invalid backend type: {self.type}. Valid types: {[bt.value for bt in BackendType]}"
|
255
|
-
)
|
256
|
-
|
257
|
-
if not self.uri:
|
258
|
-
self.uri = self.type.gen_uri(
|
259
|
-
username=self.username,
|
260
|
-
password=self.password,
|
261
|
-
host=self.host,
|
262
|
-
port=self.port,
|
263
|
-
database=self.database,
|
264
|
-
ssl=self.ssl,
|
265
|
-
ca_file=self.ca_file,
|
266
|
-
cert_file=self.cert_file,
|
267
|
-
key_file=self.key_file,
|
268
|
-
verify_ssl=self.verify_ssl,
|
269
|
-
)
|
270
|
-
|
271
|
-
# Setup is handled by backend-specific implementations
|
272
|
-
|
273
|
-
@classmethod
|
274
|
-
def from_dict(cls, d: dict) -> "BaseBackend":
|
275
|
-
return cls(**d)
|
276
|
-
|
277
|
-
|
278
|
-
class BaseTrigger(abc.ABC):
|
279
|
-
"""
|
280
|
-
Abstract base class for schedule triggers.
|
281
|
-
|
282
|
-
A trigger determines when a scheduled job should be executed.
|
283
|
-
"""
|
284
|
-
|
285
|
-
def __init__(self, trigger_type: str):
|
286
|
-
self.trigger_type = trigger_type
|
287
|
-
|
288
|
-
@abc.abstractmethod
|
289
|
-
def get_trigger_instance(self, **kwargs) -> Any:
|
290
|
-
"""
|
291
|
-
Get the backend-specific trigger instance.
|
292
|
-
|
293
|
-
Args:
|
294
|
-
**kwargs: Keyword arguments specific to the trigger type
|
295
|
-
|
296
|
-
Returns:
|
297
|
-
Any: A backend-specific trigger instance
|
298
|
-
"""
|
299
|
-
pass
|
300
|
-
|
301
|
-
|
302
|
-
class BaseJobQueueManager:
|
303
|
-
"""
|
304
|
-
Abstract base class for scheduler workers (APScheduler, RQ, etc.).
|
305
|
-
Defines the required interface for all scheduler backends.
|
306
|
-
|
307
|
-
Can be used as a context manager:
|
308
|
-
|
309
|
-
```python
|
310
|
-
with RQManager(name="test") as manager:
|
311
|
-
manager.add_job(job1)
|
312
|
-
```
|
313
|
-
"""
|
314
|
-
|
315
|
-
def __enter__(self):
|
316
|
-
"""Context manager entry - returns self for use in with statement."""
|
317
|
-
return self
|
318
|
-
|
319
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
320
|
-
"""Context manager exit - ensures workers are stopped."""
|
321
|
-
if hasattr(self, "_worker_process") and self._worker_process is not None:
|
322
|
-
self.stop_worker()
|
323
|
-
if hasattr(self, "_worker_pool") and self._worker_pool is not None:
|
324
|
-
self.stop_worker_pool()
|
325
|
-
if hasattr(self, "_worker") and self._worker is not None:
|
326
|
-
self.stop_worker()
|
327
|
-
if hasattr(self, "_scheduler") and self._scheduler is not None:
|
328
|
-
self.stop_scheduler()
|
329
|
-
return False # Don't suppress exceptions
|
330
|
-
|
331
|
-
def __init__(
|
332
|
-
self,
|
333
|
-
type: str | None = None,
|
334
|
-
name: str | None = None,
|
335
|
-
base_dir: str | None = None,
|
336
|
-
backend: BaseBackend | None = None,
|
337
|
-
storage_options: dict = None,
|
338
|
-
fs: AbstractFileSystem | None = None,
|
339
|
-
**kwargs,
|
340
|
-
):
|
341
|
-
"""
|
342
|
-
Initialize the APScheduler backend.
|
343
|
-
|
344
|
-
Args:
|
345
|
-
name: Name of the scheduler
|
346
|
-
base_dir: Base directory for the FlowerPower project
|
347
|
-
backend: APSBackend instance with data store and event broker
|
348
|
-
storage_options: Storage options for filesystem access
|
349
|
-
fs: Filesystem to use
|
350
|
-
cfg_override: Configuration overrides for the worker
|
351
|
-
"""
|
352
|
-
self.name = name or ""
|
353
|
-
self._base_dir = base_dir or str(Path.cwd())
|
354
|
-
# self._storage_options = storage_options or {}
|
355
|
-
self._backend = backend
|
356
|
-
self._type = type
|
357
|
-
self._pipelines_dir = kwargs.get("pipelines_dir", PIPELINES_DIR)
|
358
|
-
self._cfg_dir = CONFIG_DIR
|
359
|
-
|
360
|
-
if storage_options is not None:
|
361
|
-
cached = True
|
362
|
-
cache_storage = posixpath.join(
|
363
|
-
posixpath.expanduser(CACHE_DIR), self._base_dir.split("://")[-1]
|
364
|
-
)
|
365
|
-
os.makedirs(cache_storage, exist_ok=True)
|
366
|
-
else:
|
367
|
-
cached = False
|
368
|
-
cache_storage = None
|
369
|
-
if not fs:
|
370
|
-
fs = get_filesystem(
|
371
|
-
self._base_dir,
|
372
|
-
storage_options=storage_options,
|
373
|
-
cached=cached,
|
374
|
-
cache_storage=cache_storage,
|
375
|
-
)
|
376
|
-
self._fs = fs
|
377
|
-
self._storage_options = storage_options or fs.storage_options
|
378
|
-
|
379
|
-
self._add_modules_path()
|
380
|
-
self._load_config()
|
381
|
-
|
382
|
-
def _load_config(self) -> None:
|
383
|
-
"""Load the configuration.
|
384
|
-
|
385
|
-
Args:
|
386
|
-
cfg_updates: Configuration updates to apply
|
387
|
-
"""
|
388
|
-
self.cfg = ProjectConfig.load(
|
389
|
-
base_dir=self._base_dir, job_queue_type=self._type, fs=self._fs
|
390
|
-
).job_queue
|
391
|
-
|
392
|
-
def _add_modules_path(self):
|
393
|
-
"""
|
394
|
-
Sync the filesystem.
|
395
|
-
|
396
|
-
Returns:
|
397
|
-
None
|
398
|
-
"""
|
399
|
-
if self._fs.is_cache_fs:
|
400
|
-
self._fs.sync_cache()
|
401
|
-
project_path = self._fs._mapper.directory
|
402
|
-
modules_path = posixpath.join(project_path, self._pipelines_dir)
|
403
|
-
|
404
|
-
else:
|
405
|
-
# Use the base directory directly if not using cache
|
406
|
-
project_path = self._fs.path
|
407
|
-
modules_path = posixpath.join(project_path, self._pipelines_dir)
|
408
|
-
|
409
|
-
if project_path not in sys.path:
|
410
|
-
sys.path.insert(0, project_path)
|
411
|
-
|
412
|
-
if modules_path not in sys.path:
|
413
|
-
sys.path.insert(0, modules_path)
|
@@ -1,37 +0,0 @@
|
|
1
|
-
from typing import Any, Dict
|
2
|
-
|
3
|
-
from ..base import BaseTrigger
|
4
|
-
|
5
|
-
|
6
|
-
class RQTrigger(BaseTrigger):
|
7
|
-
"""
|
8
|
-
RQTrigger adapts trigger logic for the RQ worker backend.
|
9
|
-
|
10
|
-
Inherits from BaseTrigger and provides a trigger instance
|
11
|
-
in dictionary format suitable for RQ scheduling.
|
12
|
-
"""
|
13
|
-
|
14
|
-
def __init__(self, trigger_type: str):
|
15
|
-
super().__init__(trigger_type)
|
16
|
-
|
17
|
-
def get_trigger_instance(self, **kwargs) -> Dict[str, Any]:
|
18
|
-
"""
|
19
|
-
Get trigger parameters for RQ Scheduler.
|
20
|
-
|
21
|
-
Args:
|
22
|
-
**kwargs: Keyword arguments for the trigger
|
23
|
-
|
24
|
-
Returns:
|
25
|
-
Dict[str, Any]: A dictionary with trigger configuration
|
26
|
-
"""
|
27
|
-
# RQ doesn't have specific trigger classes like APScheduler.
|
28
|
-
# Instead, we'll return a dictionary with parameters that can
|
29
|
-
# be used by RQSchedulerBackend to schedule jobs appropriately.
|
30
|
-
|
31
|
-
result = {"type": self.trigger_type, **kwargs}
|
32
|
-
|
33
|
-
# For cron triggers, handle crontab string specifically
|
34
|
-
if self.trigger_type == "cron" and "crontab" in kwargs:
|
35
|
-
result["crontab"] = kwargs["crontab"]
|
36
|
-
|
37
|
-
return result
|
@@ -1,226 +0,0 @@
|
|
1
|
-
# Monkey patch as early as possible
|
2
|
-
try:
|
3
|
-
from gevent import monkey
|
4
|
-
|
5
|
-
monkey.patch_all()
|
6
|
-
import gevent
|
7
|
-
import gevent.pool
|
8
|
-
|
9
|
-
GEVENT_AVAILABLE = True
|
10
|
-
except ImportError:
|
11
|
-
GEVENT_AVAILABLE = False
|
12
|
-
raise ImportError(
|
13
|
-
"Gevent is required for GeventWorker. Please install it with 'pip install gevent'."
|
14
|
-
)
|
15
|
-
|
16
|
-
|
17
|
-
import datetime as dt
|
18
|
-
|
19
|
-
from loguru import logger
|
20
|
-
from rq import worker
|
21
|
-
from rq.exceptions import DequeueTimeout
|
22
|
-
from rq.job import JobStatus
|
23
|
-
from rq.worker import StopRequested
|
24
|
-
|
25
|
-
from flowerpower.utils.logging import setup_logging
|
26
|
-
|
27
|
-
# Use utcnow directly for simplicity
|
28
|
-
utcnow = dt.datetime.utcnow
|
29
|
-
setup_logging("INFO")
|
30
|
-
|
31
|
-
|
32
|
-
class GeventWorker(worker.Worker):
|
33
|
-
"""
|
34
|
-
A variation of the RQ Worker that uses Gevent to perform jobs concurrently
|
35
|
-
within a single worker process using greenlets.
|
36
|
-
|
37
|
-
Ideal for I/O bound tasks, offering very lightweight concurrency.
|
38
|
-
Jobs share the same memory space within the worker process.
|
39
|
-
|
40
|
-
Requires gevent to be installed and monkey-patching to be applied
|
41
|
-
(done automatically when this module is imported).
|
42
|
-
"""
|
43
|
-
|
44
|
-
def __init__(
|
45
|
-
self,
|
46
|
-
queues,
|
47
|
-
name=None,
|
48
|
-
max_greenlets=1000,
|
49
|
-
default_result_ttl=500,
|
50
|
-
connection=None,
|
51
|
-
exc_handler=None,
|
52
|
-
exception_handlers=None,
|
53
|
-
default_worker_ttl=None,
|
54
|
-
job_class=None,
|
55
|
-
queue_class=None,
|
56
|
-
log_job_description=True,
|
57
|
-
job_monitoring_interval=30,
|
58
|
-
disable_default_exception_handler=False,
|
59
|
-
prepare_for_work=True,
|
60
|
-
maintenance_interval=600,
|
61
|
-
):
|
62
|
-
super().__init__(
|
63
|
-
queues,
|
64
|
-
name=name,
|
65
|
-
default_result_ttl=default_result_ttl,
|
66
|
-
connection=connection,
|
67
|
-
exc_handler=exc_handler,
|
68
|
-
exception_handlers=exception_handlers,
|
69
|
-
default_worker_ttl=default_worker_ttl,
|
70
|
-
job_class=job_class,
|
71
|
-
queue_class=queue_class,
|
72
|
-
log_job_description=log_job_description,
|
73
|
-
job_monitoring_interval=job_monitoring_interval,
|
74
|
-
disable_default_exception_handler=disable_default_exception_handler,
|
75
|
-
prepare_for_work=prepare_for_work,
|
76
|
-
maintenance_interval=maintenance_interval,
|
77
|
-
)
|
78
|
-
|
79
|
-
self.max_greenlets = max_greenlets
|
80
|
-
self._pool = None
|
81
|
-
self.log = logger
|
82
|
-
logger.info(f"GeventWorker initialized with max_greenlets={self.max_greenlets}")
|
83
|
-
|
84
|
-
def work(
|
85
|
-
self,
|
86
|
-
burst=False,
|
87
|
-
logging_level="INFO",
|
88
|
-
date_format=worker.DEFAULT_LOGGING_DATE_FORMAT,
|
89
|
-
log_format=worker.DEFAULT_LOGGING_FORMAT,
|
90
|
-
max_jobs=None,
|
91
|
-
with_scheduler=False,
|
92
|
-
):
|
93
|
-
"""Starts the worker's main loop using gevent for concurrent job execution."""
|
94
|
-
self._install_signal_handlers()
|
95
|
-
did_perform_work = False
|
96
|
-
self.register_birth()
|
97
|
-
self.log.info("Worker %s: started, version %s", self.key, worker.VERSION)
|
98
|
-
self.set_state(worker.WorkerStatus.STARTED)
|
99
|
-
|
100
|
-
self._pool = gevent.pool.Pool(self.max_greenlets)
|
101
|
-
processed_jobs = 0
|
102
|
-
|
103
|
-
try:
|
104
|
-
while True:
|
105
|
-
if self._stop_requested or (
|
106
|
-
max_jobs is not None and processed_jobs >= max_jobs
|
107
|
-
):
|
108
|
-
break
|
109
|
-
|
110
|
-
self.run_maintenance_tasks()
|
111
|
-
|
112
|
-
# Wait for space in the greenlet pool if it's full
|
113
|
-
if self._pool.full():
|
114
|
-
gevent.sleep(0.1) # Yield to other greenlets
|
115
|
-
continue
|
116
|
-
|
117
|
-
try:
|
118
|
-
result = self.dequeue_job_and_maintain_ttl(timeout=1)
|
119
|
-
except DequeueTimeout:
|
120
|
-
if burst:
|
121
|
-
break
|
122
|
-
gevent.sleep(0.1)
|
123
|
-
continue
|
124
|
-
except StopRequested:
|
125
|
-
break
|
126
|
-
except Exception:
|
127
|
-
self.log.error("Error during dequeue:", exc_info=True)
|
128
|
-
gevent.sleep(1)
|
129
|
-
continue
|
130
|
-
|
131
|
-
if result is None:
|
132
|
-
if burst:
|
133
|
-
did_perform_work = True
|
134
|
-
break
|
135
|
-
gevent.sleep(0.1)
|
136
|
-
continue
|
137
|
-
|
138
|
-
job, queue = result
|
139
|
-
self.log.info("Processing job %s: %s", job.id, job.description)
|
140
|
-
|
141
|
-
try:
|
142
|
-
# Spawn job execution in the gevent pool
|
143
|
-
greenlet = self._pool.spawn(self.execute_job, job, queue)
|
144
|
-
# Optional: Add error callback
|
145
|
-
greenlet.link_exception(
|
146
|
-
lambda g: self.log.error(
|
147
|
-
f"Error in greenlet for job {job.id}", exc_info=g.exception
|
148
|
-
)
|
149
|
-
)
|
150
|
-
except Exception as e:
|
151
|
-
self.log.error(f"Failed to spawn job {job.id}: {e}", exc_info=True)
|
152
|
-
continue
|
153
|
-
|
154
|
-
did_perform_work = True
|
155
|
-
processed_jobs += 1
|
156
|
-
|
157
|
-
finally:
|
158
|
-
if self._pool:
|
159
|
-
self.log.info("Waiting for active greenlets to complete...")
|
160
|
-
self._pool.join(timeout=30) # Wait up to 30 seconds for jobs to finish
|
161
|
-
self._pool.kill() # Kill any remaining greenlets
|
162
|
-
self.register_death()
|
163
|
-
|
164
|
-
return did_perform_work
|
165
|
-
|
166
|
-
def set_job_status(self, job, status):
|
167
|
-
"""Sets the job status."""
|
168
|
-
if job:
|
169
|
-
job.set_status(status)
|
170
|
-
|
171
|
-
def handle_job_success(self, job, queue, started_job_registry):
|
172
|
-
"""Handles job completion."""
|
173
|
-
try:
|
174
|
-
if started_job_registry:
|
175
|
-
try:
|
176
|
-
started_job_registry.remove(job)
|
177
|
-
except NotImplementedError:
|
178
|
-
pass
|
179
|
-
job.ended_at = utcnow()
|
180
|
-
job.set_status(JobStatus.FINISHED)
|
181
|
-
except Exception as e:
|
182
|
-
self.log.error(f"Error handling job success for {job.id}: {e}")
|
183
|
-
|
184
|
-
def handle_job_failure(self, job, queue, started_job_registry, exc_info=None):
|
185
|
-
"""Handles job failure."""
|
186
|
-
try:
|
187
|
-
if started_job_registry:
|
188
|
-
try:
|
189
|
-
started_job_registry.remove(job)
|
190
|
-
except NotImplementedError:
|
191
|
-
pass
|
192
|
-
job.ended_at = utcnow()
|
193
|
-
job.set_status(JobStatus.FAILED)
|
194
|
-
except Exception as e:
|
195
|
-
self.log.error(f"Error handling job failure for {job.id}: {e}")
|
196
|
-
|
197
|
-
def execute_job(self, job, queue):
|
198
|
-
"""Execute a job in a greenlet."""
|
199
|
-
job_id = job.id if job else "unknown"
|
200
|
-
|
201
|
-
try:
|
202
|
-
self.set_job_status(job, JobStatus.STARTED)
|
203
|
-
started_job_registry = queue.started_job_registry
|
204
|
-
|
205
|
-
try:
|
206
|
-
started_job_registry.add(
|
207
|
-
job,
|
208
|
-
self.job_monitoring_interval * 1000
|
209
|
-
if self.job_monitoring_interval
|
210
|
-
else -1,
|
211
|
-
)
|
212
|
-
except NotImplementedError:
|
213
|
-
pass
|
214
|
-
|
215
|
-
rv = job.perform()
|
216
|
-
self.handle_job_success(
|
217
|
-
job=job, queue=queue, started_job_registry=started_job_registry
|
218
|
-
)
|
219
|
-
return rv
|
220
|
-
|
221
|
-
except Exception as e:
|
222
|
-
self.log.error(f"Job {job_id} failed: {e}", exc_info=True)
|
223
|
-
self.handle_job_failure(
|
224
|
-
job=job, queue=queue, started_job_registry=started_job_registry
|
225
|
-
)
|
226
|
-
raise
|