django-nativemojo 0.1.15__py3-none-any.whl → 0.1.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {django_nativemojo-0.1.15.dist-info → django_nativemojo-0.1.17.dist-info}/METADATA +3 -2
- django_nativemojo-0.1.17.dist-info/RECORD +302 -0
- mojo/__init__.py +1 -1
- mojo/apps/account/management/commands/serializer_admin.py +121 -1
- mojo/apps/account/migrations/0006_add_device_tracking_models.py +72 -0
- mojo/apps/account/migrations/0007_delete_userdevicelocation.py +16 -0
- mojo/apps/account/migrations/0008_userdevicelocation.py +33 -0
- mojo/apps/account/migrations/0009_geolocatedip_subnet.py +18 -0
- mojo/apps/account/migrations/0010_group_avatar.py +20 -0
- mojo/apps/account/migrations/0011_user_org_registereddevice_pushconfig_and_more.py +118 -0
- mojo/apps/account/migrations/0012_remove_pushconfig_apns_key_file_and_more.py +21 -0
- mojo/apps/account/migrations/0013_pushconfig_test_mode_alter_pushconfig_apns_enabled_and_more.py +28 -0
- mojo/apps/account/migrations/0014_notificationdelivery_data_payload_and_more.py +48 -0
- mojo/apps/account/models/__init__.py +2 -0
- mojo/apps/account/models/device.py +279 -0
- mojo/apps/account/models/group.py +294 -8
- mojo/apps/account/models/member.py +14 -1
- mojo/apps/account/models/push/__init__.py +4 -0
- mojo/apps/account/models/push/config.py +112 -0
- mojo/apps/account/models/push/delivery.py +93 -0
- mojo/apps/account/models/push/device.py +66 -0
- mojo/apps/account/models/push/template.py +99 -0
- mojo/apps/account/models/user.py +190 -17
- mojo/apps/account/rest/__init__.py +2 -0
- mojo/apps/account/rest/device.py +39 -0
- mojo/apps/account/rest/group.py +8 -0
- mojo/apps/account/rest/push.py +187 -0
- mojo/apps/account/rest/user.py +95 -5
- mojo/apps/account/services/__init__.py +1 -0
- mojo/apps/account/services/push.py +363 -0
- mojo/apps/aws/migrations/0001_initial.py +206 -0
- mojo/apps/aws/migrations/0002_emaildomain_can_recv_emaildomain_can_send_and_more.py +28 -0
- mojo/apps/aws/migrations/0003_mailbox_is_domain_default_mailbox_is_system_default_and_more.py +31 -0
- mojo/apps/aws/migrations/0004_s3bucket.py +39 -0
- mojo/apps/aws/migrations/0005_alter_emaildomain_region_delete_s3bucket.py +21 -0
- mojo/apps/aws/models/__init__.py +19 -0
- mojo/apps/aws/models/email_attachment.py +99 -0
- mojo/apps/aws/models/email_domain.py +218 -0
- mojo/apps/aws/models/email_template.py +132 -0
- mojo/apps/aws/models/incoming_email.py +197 -0
- mojo/apps/aws/models/mailbox.py +288 -0
- mojo/apps/aws/models/sent_message.py +175 -0
- mojo/apps/aws/rest/__init__.py +6 -0
- mojo/apps/aws/rest/email.py +33 -0
- mojo/apps/aws/rest/email_ops.py +183 -0
- mojo/apps/aws/rest/messages.py +32 -0
- mojo/apps/aws/rest/send.py +101 -0
- mojo/apps/aws/rest/sns.py +403 -0
- mojo/apps/aws/rest/templates.py +19 -0
- mojo/apps/aws/services/__init__.py +32 -0
- mojo/apps/aws/services/email.py +390 -0
- mojo/apps/aws/services/email_ops.py +548 -0
- mojo/apps/docit/__init__.py +6 -0
- mojo/apps/docit/markdown_plugins/syntax_highlight.py +25 -0
- mojo/apps/docit/markdown_plugins/toc.py +12 -0
- mojo/apps/docit/migrations/0001_initial.py +113 -0
- mojo/apps/docit/migrations/0002_alter_book_modified_by_alter_page_modified_by.py +26 -0
- mojo/apps/docit/migrations/0003_alter_book_group.py +20 -0
- mojo/apps/docit/models/__init__.py +17 -0
- mojo/apps/docit/models/asset.py +231 -0
- mojo/apps/docit/models/book.py +227 -0
- mojo/apps/docit/models/page.py +319 -0
- mojo/apps/docit/models/page_revision.py +203 -0
- mojo/apps/docit/rest/__init__.py +10 -0
- mojo/apps/docit/rest/asset.py +17 -0
- mojo/apps/docit/rest/book.py +22 -0
- mojo/apps/docit/rest/page.py +22 -0
- mojo/apps/docit/rest/page_revision.py +17 -0
- mojo/apps/docit/services/__init__.py +11 -0
- mojo/apps/docit/services/docit.py +315 -0
- mojo/apps/docit/services/markdown.py +44 -0
- mojo/apps/fileman/backends/s3.py +209 -0
- mojo/apps/fileman/models/file.py +45 -9
- mojo/apps/fileman/models/manager.py +269 -3
- mojo/apps/incident/migrations/0007_event_uid.py +18 -0
- mojo/apps/incident/migrations/0008_ticket_ticketnote.py +55 -0
- mojo/apps/incident/migrations/0009_incident_status.py +18 -0
- mojo/apps/incident/migrations/0010_event_country_code.py +18 -0
- mojo/apps/incident/migrations/0011_incident_country_code.py +18 -0
- mojo/apps/incident/migrations/0012_alter_incident_status.py +18 -0
- mojo/apps/incident/models/__init__.py +1 -0
- mojo/apps/incident/models/event.py +35 -0
- mojo/apps/incident/models/incident.py +2 -0
- mojo/apps/incident/models/ticket.py +62 -0
- mojo/apps/incident/reporter.py +21 -3
- mojo/apps/incident/rest/__init__.py +1 -0
- mojo/apps/incident/rest/ticket.py +43 -0
- mojo/apps/jobs/__init__.py +489 -0
- mojo/apps/jobs/adapters.py +24 -0
- mojo/apps/jobs/cli.py +616 -0
- mojo/apps/jobs/daemon.py +370 -0
- mojo/apps/jobs/examples/sample_jobs.py +376 -0
- mojo/apps/jobs/examples/webhook_examples.py +203 -0
- mojo/apps/jobs/handlers/__init__.py +5 -0
- mojo/apps/jobs/handlers/webhook.py +317 -0
- mojo/apps/jobs/job_engine.py +734 -0
- mojo/apps/jobs/keys.py +203 -0
- mojo/apps/jobs/local_queue.py +363 -0
- mojo/apps/jobs/management/__init__.py +3 -0
- mojo/apps/jobs/management/commands/__init__.py +3 -0
- mojo/apps/jobs/manager.py +1327 -0
- mojo/apps/jobs/migrations/0001_initial.py +97 -0
- mojo/apps/jobs/migrations/0002_alter_job_max_retries_joblog.py +39 -0
- mojo/apps/jobs/models/__init__.py +6 -0
- mojo/apps/jobs/models/job.py +441 -0
- mojo/apps/jobs/rest/__init__.py +2 -0
- mojo/apps/jobs/rest/control.py +466 -0
- mojo/apps/jobs/rest/jobs.py +421 -0
- mojo/apps/jobs/scheduler.py +571 -0
- mojo/apps/jobs/services/__init__.py +6 -0
- mojo/apps/jobs/services/job_actions.py +465 -0
- mojo/apps/jobs/settings.py +209 -0
- mojo/apps/logit/models/log.py +3 -0
- mojo/apps/metrics/__init__.py +8 -1
- mojo/apps/metrics/redis_metrics.py +198 -0
- mojo/apps/metrics/rest/__init__.py +3 -0
- mojo/apps/metrics/rest/categories.py +266 -0
- mojo/apps/metrics/rest/helpers.py +48 -0
- mojo/apps/metrics/rest/permissions.py +99 -0
- mojo/apps/metrics/rest/values.py +277 -0
- mojo/apps/metrics/utils.py +17 -0
- mojo/decorators/http.py +40 -1
- mojo/helpers/aws/__init__.py +11 -7
- mojo/helpers/aws/inbound_email.py +309 -0
- mojo/helpers/aws/kms.py +413 -0
- mojo/helpers/aws/ses_domain.py +959 -0
- mojo/helpers/crypto/__init__.py +1 -1
- mojo/helpers/crypto/utils.py +15 -0
- mojo/helpers/location/__init__.py +2 -0
- mojo/helpers/location/countries.py +262 -0
- mojo/helpers/location/geolocation.py +196 -0
- mojo/helpers/logit.py +37 -0
- mojo/helpers/redis/__init__.py +2 -0
- mojo/helpers/redis/adapter.py +606 -0
- mojo/helpers/redis/client.py +48 -0
- mojo/helpers/redis/pool.py +225 -0
- mojo/helpers/request.py +8 -0
- mojo/helpers/response.py +8 -0
- mojo/middleware/auth.py +1 -1
- mojo/middleware/cors.py +40 -0
- mojo/middleware/logging.py +131 -12
- mojo/middleware/mojo.py +5 -0
- mojo/models/rest.py +271 -57
- mojo/models/secrets.py +86 -0
- mojo/serializers/__init__.py +16 -10
- mojo/serializers/core/__init__.py +90 -0
- mojo/serializers/core/cache/__init__.py +121 -0
- mojo/serializers/core/cache/backends.py +518 -0
- mojo/serializers/core/cache/base.py +102 -0
- mojo/serializers/core/cache/disabled.py +181 -0
- mojo/serializers/core/cache/memory.py +287 -0
- mojo/serializers/core/cache/redis.py +533 -0
- mojo/serializers/core/cache/utils.py +454 -0
- mojo/serializers/{manager.py → core/manager.py} +53 -4
- mojo/serializers/core/serializer.py +475 -0
- mojo/serializers/{advanced/formats → formats}/csv.py +116 -139
- mojo/serializers/suggested_improvements.md +388 -0
- testit/client.py +1 -1
- testit/helpers.py +14 -0
- testit/runner.py +23 -6
- django_nativemojo-0.1.15.dist-info/RECORD +0 -234
- mojo/apps/notify/README.md +0 -91
- mojo/apps/notify/README_NOTIFICATIONS.md +0 -566
- mojo/apps/notify/admin.py +0 -52
- mojo/apps/notify/handlers/example_handlers.py +0 -516
- mojo/apps/notify/handlers/ses/__init__.py +0 -25
- mojo/apps/notify/handlers/ses/complaint.py +0 -25
- mojo/apps/notify/handlers/ses/message.py +0 -86
- mojo/apps/notify/management/commands/__init__.py +0 -1
- mojo/apps/notify/management/commands/process_notifications.py +0 -370
- mojo/apps/notify/mod +0 -0
- mojo/apps/notify/models/__init__.py +0 -12
- mojo/apps/notify/models/account.py +0 -128
- mojo/apps/notify/models/attachment.py +0 -24
- mojo/apps/notify/models/bounce.py +0 -68
- mojo/apps/notify/models/complaint.py +0 -40
- mojo/apps/notify/models/inbox.py +0 -113
- mojo/apps/notify/models/inbox_message.py +0 -173
- mojo/apps/notify/models/outbox.py +0 -129
- mojo/apps/notify/models/outbox_message.py +0 -288
- mojo/apps/notify/models/template.py +0 -30
- mojo/apps/notify/providers/aws.py +0 -73
- mojo/apps/notify/rest/ses.py +0 -0
- mojo/apps/notify/utils/__init__.py +0 -2
- mojo/apps/notify/utils/notifications.py +0 -404
- mojo/apps/notify/utils/parsing.py +0 -202
- mojo/apps/notify/utils/render.py +0 -144
- mojo/apps/tasks/README.md +0 -118
- mojo/apps/tasks/__init__.py +0 -44
- mojo/apps/tasks/manager.py +0 -644
- mojo/apps/tasks/rest/__init__.py +0 -2
- mojo/apps/tasks/rest/hooks.py +0 -0
- mojo/apps/tasks/rest/tasks.py +0 -76
- mojo/apps/tasks/runner.py +0 -439
- mojo/apps/tasks/task.py +0 -99
- mojo/apps/tasks/tq_handlers.py +0 -132
- mojo/helpers/crypto/__pycache__/hash.cpython-310.pyc +0 -0
- mojo/helpers/crypto/__pycache__/sign.cpython-310.pyc +0 -0
- mojo/helpers/crypto/__pycache__/utils.cpython-310.pyc +0 -0
- mojo/helpers/redis.py +0 -10
- mojo/models/meta.py +0 -262
- mojo/serializers/advanced/README.md +0 -363
- mojo/serializers/advanced/__init__.py +0 -247
- mojo/serializers/advanced/formats/__init__.py +0 -28
- mojo/serializers/advanced/formats/excel.py +0 -516
- mojo/serializers/advanced/formats/json.py +0 -239
- mojo/serializers/advanced/formats/response.py +0 -485
- mojo/serializers/advanced/serializer.py +0 -568
- mojo/serializers/optimized.py +0 -618
- {django_nativemojo-0.1.15.dist-info → django_nativemojo-0.1.17.dist-info}/LICENSE +0 -0
- {django_nativemojo-0.1.15.dist-info → django_nativemojo-0.1.17.dist-info}/NOTICE +0 -0
- {django_nativemojo-0.1.15.dist-info → django_nativemojo-0.1.17.dist-info}/WHEEL +0 -0
- /mojo/apps/{notify → aws/migrations}/__init__.py +0 -0
- /mojo/apps/{notify/handlers → docit/markdown_plugins}/__init__.py +0 -0
- /mojo/apps/{notify/management → docit/migrations}/__init__.py +0 -0
- /mojo/apps/{notify/providers → jobs/examples}/__init__.py +0 -0
- /mojo/apps/{notify/rest → jobs/migrations}/__init__.py +0 -0
- /mojo/{serializers → rest}/openapi.py +0 -0
- /mojo/serializers/{settings_example.py → examples/settings.py} +0 -0
- /mojo/{apps/notify/handlers/ses/bounce.py → serializers/formats/__init__.py} +0 -0
- /mojo/serializers/{advanced/formats → formats}/localizers.py +0 -0
@@ -0,0 +1,1327 @@
|
|
1
|
+
"""
|
2
|
+
JobManager for control and inspection of the jobs system.
|
3
|
+
|
4
|
+
Provides high-level management operations for monitoring and controlling
|
5
|
+
job runners, queues, and individual jobs.
|
6
|
+
"""
|
7
|
+
import json
|
8
|
+
import uuid
|
9
|
+
import time
|
10
|
+
from typing import Any, Dict, List, Optional, Tuple
|
11
|
+
from datetime import datetime, timedelta
|
12
|
+
|
13
|
+
from django.conf import settings
|
14
|
+
from django.utils import timezone
|
15
|
+
|
16
|
+
from mojo.helpers import logit
|
17
|
+
from .keys import JobKeys
|
18
|
+
from .adapters import get_adapter
|
19
|
+
from .models import Job, JobEvent
|
20
|
+
|
21
|
+
|
22
|
+
class JobManager:
|
23
|
+
"""
|
24
|
+
Management interface for the jobs system.
|
25
|
+
|
26
|
+
Provides methods for inspecting queue state, controlling runners,
|
27
|
+
and managing jobs.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(self):
|
31
|
+
"""Initialize the JobManager."""
|
32
|
+
self.redis = get_adapter()
|
33
|
+
self.keys = JobKeys()
|
34
|
+
|
35
|
+
def get_runners(self, channel: Optional[str] = None) -> List[Dict[str, Any]]:
|
36
|
+
"""
|
37
|
+
Get list of active runners.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
channel: Filter by channel (None for all runners)
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
List of runner info dicts with keys:
|
44
|
+
- runner_id: Runner identifier
|
45
|
+
- channels: List of channels served
|
46
|
+
- jobs_processed: Number of jobs completed
|
47
|
+
- jobs_failed: Number of jobs failed
|
48
|
+
- started: When runner started
|
49
|
+
- last_heartbeat: Last heartbeat time
|
50
|
+
- alive: Whether runner is considered alive
|
51
|
+
"""
|
52
|
+
runners = []
|
53
|
+
|
54
|
+
try:
|
55
|
+
# Find all runner heartbeat keys
|
56
|
+
pattern = self.keys.runner_hb('*')
|
57
|
+
|
58
|
+
# Note: In production, use SCAN instead of KEYS for better performance
|
59
|
+
# For now, using a simple approach
|
60
|
+
all_keys = []
|
61
|
+
cursor = 0
|
62
|
+
while True:
|
63
|
+
cursor, keys = self.redis.get_client().scan(
|
64
|
+
cursor, match=pattern, count=100
|
65
|
+
)
|
66
|
+
all_keys.extend(keys)
|
67
|
+
if cursor == 0:
|
68
|
+
break
|
69
|
+
|
70
|
+
# Check each runner
|
71
|
+
for key in all_keys:
|
72
|
+
try:
|
73
|
+
# Get heartbeat data
|
74
|
+
data = self.redis.get(key.decode('utf-8') if isinstance(key, bytes) else key)
|
75
|
+
if not data:
|
76
|
+
continue
|
77
|
+
|
78
|
+
runner_info = json.loads(data)
|
79
|
+
|
80
|
+
# Filter by channel if specified
|
81
|
+
if channel and channel not in runner_info.get('channels', []):
|
82
|
+
continue
|
83
|
+
|
84
|
+
# Check if alive (heartbeat within 3x interval)
|
85
|
+
last_hb = runner_info.get('last_heartbeat')
|
86
|
+
if last_hb:
|
87
|
+
last_hb_time = datetime.fromisoformat(last_hb)
|
88
|
+
if timezone.is_naive(last_hb_time):
|
89
|
+
last_hb_time = timezone.make_aware(last_hb_time)
|
90
|
+
|
91
|
+
age = (timezone.now() - last_hb_time).total_seconds()
|
92
|
+
alive = age < (getattr(settings, 'JOBS_RUNNER_HEARTBEAT_SEC', 5) * 3)
|
93
|
+
else:
|
94
|
+
alive = False
|
95
|
+
|
96
|
+
runner_info['alive'] = alive
|
97
|
+
runners.append(runner_info)
|
98
|
+
|
99
|
+
except Exception as e:
|
100
|
+
logit.warn(f"Failed to parse runner heartbeat: {e}")
|
101
|
+
|
102
|
+
except Exception as e:
|
103
|
+
logit.error(f"Failed to get runners: {e}")
|
104
|
+
|
105
|
+
# Sort by runner_id for consistency
|
106
|
+
runners.sort(key=lambda r: r.get('runner_id', ''))
|
107
|
+
|
108
|
+
return runners
|
109
|
+
|
110
|
+
def get_queue_state(self, channel: str) -> Dict[str, Any]:
|
111
|
+
"""
|
112
|
+
Get queue state for a channel.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
channel: Channel name
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
Dict with queue statistics (Plan B):
|
119
|
+
- queued_count: Number of messages waiting in the list queue (LLEN)
|
120
|
+
- inflight_count: Number of in-flight messages (ZCARD of processing)
|
121
|
+
- scheduled_count: Number of scheduled/delayed jobs (ZCARD of sched + sched_broadcast)
|
122
|
+
- runners: Number of active runners
|
123
|
+
"""
|
124
|
+
state = {
|
125
|
+
'channel': channel,
|
126
|
+
'queued_count': 0,
|
127
|
+
'inflight_count': 0,
|
128
|
+
'scheduled_count': 0,
|
129
|
+
'runners': 0,
|
130
|
+
}
|
131
|
+
|
132
|
+
try:
|
133
|
+
# Plan B counts: List + ZSET
|
134
|
+
queue_key = self.keys.queue(channel)
|
135
|
+
processing_key = self.keys.processing(channel)
|
136
|
+
sched_key = self.keys.sched(channel)
|
137
|
+
sched_b_key = self.keys.sched_broadcast(channel)
|
138
|
+
# Exact counts
|
139
|
+
state['queued_count'] = self.redis.llen(queue_key) or 0
|
140
|
+
state['inflight_count'] = self.redis.zcard(processing_key) or 0
|
141
|
+
state['scheduled_count'] = (self.redis.zcard(sched_key) or 0) + (self.redis.zcard(sched_b_key) or 0)
|
142
|
+
# Active runners for this channel
|
143
|
+
runners = self.get_runners(channel)
|
144
|
+
state['runners'] = len([r for r in runners if r.get('alive')])
|
145
|
+
# Add metrics (DB-derived)
|
146
|
+
state['metrics'] = self._get_channel_metrics(channel)
|
147
|
+
except Exception as e:
|
148
|
+
logit.error(f"Failed to get queue state for {channel}: {e}")
|
149
|
+
return state
|
150
|
+
|
151
|
+
def get_channel_health(self, channel: str) -> Dict[str, Any]:
|
152
|
+
"""
|
153
|
+
Get comprehensive health metrics for a channel.
|
154
|
+
|
155
|
+
Args:
|
156
|
+
channel: Channel name
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
Dict with health status including unclaimed jobs, stuck jobs, and alerts
|
160
|
+
"""
|
161
|
+
stream_key = self.keys.stream(channel)
|
162
|
+
group_key = self.keys.group_workers(channel)
|
163
|
+
sched_key = self.keys.sched(channel)
|
164
|
+
|
165
|
+
# Get basic queue state
|
166
|
+
state = self.get_queue_state(channel)
|
167
|
+
|
168
|
+
# Calculate unclaimed (waiting to be picked up)
|
169
|
+
total_messages = state['stream_length']
|
170
|
+
pending_count = state['pending_count']
|
171
|
+
unclaimed = max(0, total_messages - pending_count)
|
172
|
+
|
173
|
+
# Find stuck jobs
|
174
|
+
stuck = self._find_stuck_jobs(channel)
|
175
|
+
|
176
|
+
# Get active runners
|
177
|
+
runners = self.get_runners(channel)
|
178
|
+
active_runners = [r for r in runners if r.get('alive')]
|
179
|
+
|
180
|
+
# Build health status
|
181
|
+
health = {
|
182
|
+
'channel': channel,
|
183
|
+
'status': 'healthy', # Will update based on checks
|
184
|
+
'messages': {
|
185
|
+
'total': total_messages,
|
186
|
+
'unclaimed': unclaimed,
|
187
|
+
'pending': pending_count,
|
188
|
+
'scheduled': state['scheduled_count'],
|
189
|
+
'stuck': len(stuck)
|
190
|
+
},
|
191
|
+
'runners': {
|
192
|
+
'active': len(active_runners),
|
193
|
+
'total': len(runners)
|
194
|
+
},
|
195
|
+
'stuck_jobs': stuck[:10], # First 10 stuck jobs
|
196
|
+
'alerts': []
|
197
|
+
}
|
198
|
+
|
199
|
+
# Health checks
|
200
|
+
if unclaimed > 100:
|
201
|
+
health['alerts'].append(f"High unclaimed count: {unclaimed}")
|
202
|
+
health['status'] = 'warning'
|
203
|
+
|
204
|
+
if unclaimed > 500:
|
205
|
+
health['status'] = 'critical'
|
206
|
+
|
207
|
+
if len(stuck) > 0:
|
208
|
+
health['alerts'].append(f"Stuck jobs detected: {len(stuck)}")
|
209
|
+
health['status'] = 'warning'
|
210
|
+
|
211
|
+
if len(stuck) > 10:
|
212
|
+
health['status'] = 'critical'
|
213
|
+
|
214
|
+
if len(active_runners) == 0 and total_messages > 0:
|
215
|
+
health['alerts'].append("No active runners for channel with pending jobs")
|
216
|
+
health['status'] = 'critical'
|
217
|
+
|
218
|
+
# Add metrics if available
|
219
|
+
if 'metrics' in state:
|
220
|
+
health['metrics'] = state['metrics']
|
221
|
+
|
222
|
+
return health
|
223
|
+
|
224
|
+
def _find_stuck_jobs(self, channel: str, idle_threshold_ms: int = 60000) -> List[Dict]:
|
225
|
+
"""
|
226
|
+
Plan B: Find jobs that are in-flight (processing ZSET) longer than the idle threshold.
|
227
|
+
|
228
|
+
Args:
|
229
|
+
channel: Channel name
|
230
|
+
idle_threshold_ms: Consider stuck if idle longer than this (default 1 minute)
|
231
|
+
|
232
|
+
Returns:
|
233
|
+
List of stuck job details
|
234
|
+
"""
|
235
|
+
stuck: List[Dict] = []
|
236
|
+
try:
|
237
|
+
now_ms = int(time.time() * 1000)
|
238
|
+
cutoff = now_ms - max(0, int(idle_threshold_ms))
|
239
|
+
processing_key = self.keys.processing(channel)
|
240
|
+
|
241
|
+
if idle_threshold_ms <= 0:
|
242
|
+
# Return all in-flight entries
|
243
|
+
ids = self.redis.zrangebyscore(processing_key, float("-inf"), float("inf"))
|
244
|
+
for jid in ids:
|
245
|
+
stuck.append({'job_id': jid, 'idle_ms': None})
|
246
|
+
return stuck
|
247
|
+
|
248
|
+
# Return entries older than cutoff
|
249
|
+
ids = self.redis.zrangebyscore(processing_key, float("-inf"), cutoff)
|
250
|
+
for jid in ids:
|
251
|
+
# We don't store claim timestamp in the member by default; idle_ms calculation is approximate
|
252
|
+
stuck.append({'job_id': jid, 'idle_ms': idle_threshold_ms})
|
253
|
+
except Exception as e:
|
254
|
+
logit.error(f"Failed to check stuck jobs for channel {channel}: {e}")
|
255
|
+
|
256
|
+
return stuck
|
257
|
+
|
258
|
+
def clear_stuck_jobs(self, channel: str, idle_threshold_ms: int = 60000) -> Dict[str, Any]:
|
259
|
+
"""
|
260
|
+
Plan B: Clear stuck in-flight jobs from a channel by re-queueing or removing
|
261
|
+
entries from the processing ZSET based on an idle threshold.
|
262
|
+
|
263
|
+
Args:
|
264
|
+
channel: Channel name to clear
|
265
|
+
idle_threshold_ms: Consider stuck if older than this many ms (0 to clear all)
|
266
|
+
|
267
|
+
Returns:
|
268
|
+
Dict with results: {'cleared': int, 'details': [...], 'errors': [...]}
|
269
|
+
"""
|
270
|
+
results = {
|
271
|
+
'channel': channel,
|
272
|
+
'cleared': 0,
|
273
|
+
'details': [],
|
274
|
+
'errors': []
|
275
|
+
}
|
276
|
+
|
277
|
+
try:
|
278
|
+
now_ms = int(time.time() * 1000)
|
279
|
+
processing_key = self.keys.processing(channel)
|
280
|
+
queue_key = self.keys.queue(channel)
|
281
|
+
|
282
|
+
# Determine score range to clear
|
283
|
+
if idle_threshold_ms and idle_threshold_ms > 0:
|
284
|
+
cutoff = now_ms - int(idle_threshold_ms)
|
285
|
+
candidates = self.redis.zrangebyscore(processing_key, float("-inf"), cutoff)
|
286
|
+
else:
|
287
|
+
candidates = self.redis.zrangebyscore(processing_key, float("-inf"), float("inf"))
|
288
|
+
|
289
|
+
if not candidates:
|
290
|
+
results['message'] = f"No in-flight jobs found in {channel} matching threshold"
|
291
|
+
return results
|
292
|
+
|
293
|
+
for jid in candidates:
|
294
|
+
try:
|
295
|
+
# Remove from processing and requeue
|
296
|
+
self.redis.zrem(processing_key, jid)
|
297
|
+
self.redis.rpush(queue_key, jid)
|
298
|
+
results['cleared'] += 1
|
299
|
+
results['details'].append({'job_id': jid, 'requeued': True})
|
300
|
+
# Write event trail (best effort)
|
301
|
+
try:
|
302
|
+
job = Job.objects.get(id=jid)
|
303
|
+
JobEvent.objects.create(
|
304
|
+
job=job,
|
305
|
+
channel=channel,
|
306
|
+
event='retry',
|
307
|
+
details={'reason': 'manual_clear_stuck'}
|
308
|
+
)
|
309
|
+
except Exception:
|
310
|
+
pass
|
311
|
+
except Exception as e:
|
312
|
+
results['errors'].append(f"{jid}: {e}")
|
313
|
+
|
314
|
+
results['message'] = f"Requeued {results['cleared']} in-flight jobs from {channel}"
|
315
|
+
|
316
|
+
except Exception as e:
|
317
|
+
import traceback
|
318
|
+
results['errors'].append(str(e))
|
319
|
+
results['stack_trace'] = traceback.format_exc()
|
320
|
+
logit.error(f"Failed to clear stuck jobs from {channel}: {e}")
|
321
|
+
|
322
|
+
return results
|
323
|
+
|
324
|
+
def broadcast_command(self, command: str, data: Dict = None,
|
325
|
+
timeout: float = 2.0) -> List[Dict]:
|
326
|
+
"""
|
327
|
+
Send command to all runners and collect responses.
|
328
|
+
|
329
|
+
Args:
|
330
|
+
command: Command to send (status, shutdown, pause, resume)
|
331
|
+
data: Additional command data
|
332
|
+
timeout: Time to wait for responses
|
333
|
+
|
334
|
+
Returns:
|
335
|
+
List of responses from runners
|
336
|
+
"""
|
337
|
+
import uuid as uuid_module
|
338
|
+
reply_channel = f"mojo:jobs:replies:{uuid_module.uuid4().hex[:8]}"
|
339
|
+
|
340
|
+
# Subscribe to replies before sending
|
341
|
+
pubsub = self.redis.pubsub()
|
342
|
+
pubsub.subscribe(reply_channel)
|
343
|
+
|
344
|
+
# Send broadcast command
|
345
|
+
message = {
|
346
|
+
'command': command,
|
347
|
+
'data': data or {},
|
348
|
+
'reply_channel': reply_channel,
|
349
|
+
'timestamp': timezone.now().isoformat()
|
350
|
+
}
|
351
|
+
|
352
|
+
self.redis.publish("mojo:jobs:runners:broadcast", json.dumps(message))
|
353
|
+
|
354
|
+
# Collect responses
|
355
|
+
responses = []
|
356
|
+
start_time = time.time()
|
357
|
+
|
358
|
+
while time.time() - start_time < timeout:
|
359
|
+
msg = pubsub.get_message(timeout=0.1)
|
360
|
+
if msg and msg['type'] == 'message':
|
361
|
+
try:
|
362
|
+
response_data = msg['data']
|
363
|
+
if isinstance(response_data, bytes):
|
364
|
+
response_data = response_data.decode('utf-8')
|
365
|
+
response = json.loads(response_data)
|
366
|
+
responses.append(response)
|
367
|
+
except Exception as e:
|
368
|
+
logit.debug(f"Failed to parse response: {e}")
|
369
|
+
|
370
|
+
pubsub.close()
|
371
|
+
return responses
|
372
|
+
|
373
|
+
def ping(self, runner_id: str, timeout: float = 2.0) -> bool:
|
374
|
+
"""
|
375
|
+
Ping a runner to check if it's responsive.
|
376
|
+
|
377
|
+
Args:
|
378
|
+
runner_id: Runner identifier
|
379
|
+
timeout: Maximum time to wait for response (seconds)
|
380
|
+
|
381
|
+
Returns:
|
382
|
+
True if runner responded, False otherwise
|
383
|
+
"""
|
384
|
+
try:
|
385
|
+
# Create a unique response key
|
386
|
+
response_key = f"{self.keys.runner_ctl(runner_id)}:response:{uuid.uuid4().hex[:8]}"
|
387
|
+
|
388
|
+
# Send ping command
|
389
|
+
control_key = self.keys.runner_ctl(runner_id)
|
390
|
+
message = json.dumps({
|
391
|
+
'command': 'ping',
|
392
|
+
'response_key': response_key
|
393
|
+
})
|
394
|
+
|
395
|
+
self.redis.publish(control_key, message)
|
396
|
+
|
397
|
+
# Wait for response
|
398
|
+
start_time = time.time()
|
399
|
+
while time.time() - start_time < timeout:
|
400
|
+
response = self.redis.get(response_key)
|
401
|
+
if response == 'pong':
|
402
|
+
self.redis.delete(response_key)
|
403
|
+
return True
|
404
|
+
time.sleep(0.1)
|
405
|
+
|
406
|
+
# Timeout
|
407
|
+
self.redis.delete(response_key)
|
408
|
+
return False
|
409
|
+
|
410
|
+
except Exception as e:
|
411
|
+
logit.error(f"Failed to ping runner {runner_id}: {e}")
|
412
|
+
return False
|
413
|
+
|
414
|
+
def shutdown(self, runner_id: str, graceful: bool = True) -> None:
|
415
|
+
"""
|
416
|
+
Request a runner to shutdown.
|
417
|
+
|
418
|
+
Args:
|
419
|
+
runner_id: Runner identifier
|
420
|
+
graceful: If True, wait for current job to finish
|
421
|
+
"""
|
422
|
+
try:
|
423
|
+
control_key = self.keys.runner_ctl(runner_id)
|
424
|
+
message = json.dumps({
|
425
|
+
'command': 'shutdown',
|
426
|
+
'graceful': graceful
|
427
|
+
})
|
428
|
+
|
429
|
+
self.redis.publish(control_key, message)
|
430
|
+
logit.info(f"Sent shutdown command to runner {runner_id} (graceful={graceful})")
|
431
|
+
|
432
|
+
except Exception as e:
|
433
|
+
logit.error(f"Failed to shutdown runner {runner_id}: {e}")
|
434
|
+
|
435
|
+
def broadcast(self, channel: str, func: str, payload: Dict[str, Any],
|
436
|
+
**options) -> str:
|
437
|
+
"""
|
438
|
+
Publish a broadcast job to a channel.
|
439
|
+
|
440
|
+
Args:
|
441
|
+
channel: Channel to broadcast on
|
442
|
+
func: Job function module path
|
443
|
+
payload: Job payload
|
444
|
+
**options: Additional job options
|
445
|
+
|
446
|
+
Returns:
|
447
|
+
Job ID
|
448
|
+
"""
|
449
|
+
from . import publish
|
450
|
+
|
451
|
+
return publish(
|
452
|
+
func=func,
|
453
|
+
payload=payload,
|
454
|
+
channel=channel,
|
455
|
+
broadcast=True,
|
456
|
+
**options
|
457
|
+
)
|
458
|
+
|
459
|
+
def job_status(self, job_id: str) -> Optional[Dict[str, Any]]:
|
460
|
+
"""
|
461
|
+
Get detailed status of a job.
|
462
|
+
|
463
|
+
Args:
|
464
|
+
job_id: Job identifier
|
465
|
+
|
466
|
+
Returns:
|
467
|
+
Job status dict or None if not found
|
468
|
+
"""
|
469
|
+
from . import status
|
470
|
+
|
471
|
+
# Get basic status
|
472
|
+
job_info = status(job_id)
|
473
|
+
if not job_info:
|
474
|
+
return None
|
475
|
+
|
476
|
+
# Enhance with additional info
|
477
|
+
try:
|
478
|
+
# Add events timeline
|
479
|
+
job = Job.objects.get(id=job_id)
|
480
|
+
events = JobEvent.objects.filter(job=job).order_by('at')[:20]
|
481
|
+
|
482
|
+
job_info['events'] = [
|
483
|
+
{
|
484
|
+
'event': e.event,
|
485
|
+
'at': e.at.isoformat(),
|
486
|
+
'runner_id': e.runner_id,
|
487
|
+
'attempt': e.attempt,
|
488
|
+
'details': e.details
|
489
|
+
}
|
490
|
+
for e in events
|
491
|
+
]
|
492
|
+
|
493
|
+
# Add queue position if pending
|
494
|
+
if job_info['status'] == 'pending' and job.run_at:
|
495
|
+
# Check position in scheduled queue
|
496
|
+
sched_key = self.keys.sched(job.channel)
|
497
|
+
rank = self.redis.get_client().zrank(sched_key, job_id)
|
498
|
+
if rank is not None:
|
499
|
+
job_info['queue_position'] = rank + 1
|
500
|
+
|
501
|
+
except Exception as e:
|
502
|
+
logit.debug(f"Failed to enhance job status: {e}")
|
503
|
+
|
504
|
+
return job_info
|
505
|
+
|
506
|
+
def cancel_job(self, job_id: str) -> bool:
|
507
|
+
"""
|
508
|
+
Cancel a job.
|
509
|
+
|
510
|
+
Args:
|
511
|
+
job_id: Job identifier
|
512
|
+
|
513
|
+
Returns:
|
514
|
+
True if cancelled, False otherwise
|
515
|
+
"""
|
516
|
+
from . import cancel
|
517
|
+
return cancel(job_id)
|
518
|
+
|
519
|
+
def retry_job(self, job_id: str, delay: Optional[int] = None) -> bool:
|
520
|
+
"""
|
521
|
+
Retry a failed job.
|
522
|
+
|
523
|
+
Args:
|
524
|
+
job_id: Job identifier
|
525
|
+
delay: Delay in seconds before retry (default: immediate)
|
526
|
+
|
527
|
+
Returns:
|
528
|
+
True if retry scheduled, False otherwise
|
529
|
+
"""
|
530
|
+
try:
|
531
|
+
job = Job.objects.get(id=job_id)
|
532
|
+
|
533
|
+
if job.status not in ('failed', 'canceled'):
|
534
|
+
logit.warn(f"Cannot retry job {job_id} in status {job.status}")
|
535
|
+
return False
|
536
|
+
|
537
|
+
# Reset job for retry
|
538
|
+
job.status = 'pending'
|
539
|
+
job.attempt = 0
|
540
|
+
job.last_error = ''
|
541
|
+
job.stack_trace = ''
|
542
|
+
|
543
|
+
if delay:
|
544
|
+
job.run_at = timezone.now() + timedelta(seconds=delay)
|
545
|
+
else:
|
546
|
+
job.run_at = None
|
547
|
+
|
548
|
+
job.save()
|
549
|
+
|
550
|
+
# Re-publish to Redis
|
551
|
+
from . import publish
|
552
|
+
|
553
|
+
return publish(
|
554
|
+
func=job.func,
|
555
|
+
payload=job.payload,
|
556
|
+
channel=job.channel,
|
557
|
+
run_at=job.run_at,
|
558
|
+
broadcast=job.broadcast,
|
559
|
+
max_retries=job.max_retries,
|
560
|
+
expires_at=job.expires_at,
|
561
|
+
max_exec_seconds=job.max_exec_seconds
|
562
|
+
)
|
563
|
+
|
564
|
+
except Job.DoesNotExist:
|
565
|
+
logit.error(f"Job {job_id} not found")
|
566
|
+
return False
|
567
|
+
except Exception as e:
|
568
|
+
logit.error(f"Failed to retry job {job_id}: {e}")
|
569
|
+
return False
|
570
|
+
|
571
|
+
def _get_channel_metrics(self, channel: str) -> Dict[str, Any]:
|
572
|
+
"""Get recent metrics for a channel."""
|
573
|
+
metrics = {
|
574
|
+
'jobs_per_minute': 0,
|
575
|
+
'success_rate': 0,
|
576
|
+
'avg_duration_ms': 0
|
577
|
+
}
|
578
|
+
|
579
|
+
try:
|
580
|
+
# Get recent job counts from database
|
581
|
+
now = timezone.now()
|
582
|
+
last_hour = now - timedelta(hours=1)
|
583
|
+
|
584
|
+
# Jobs completed in last hour
|
585
|
+
completed = Job.objects.filter(
|
586
|
+
channel=channel,
|
587
|
+
status='completed',
|
588
|
+
finished_at__gte=last_hour
|
589
|
+
).count()
|
590
|
+
|
591
|
+
# Jobs failed in last hour
|
592
|
+
failed = Job.objects.filter(
|
593
|
+
channel=channel,
|
594
|
+
status='failed',
|
595
|
+
finished_at__gte=last_hour
|
596
|
+
).count()
|
597
|
+
|
598
|
+
total = completed + failed
|
599
|
+
if total > 0:
|
600
|
+
metrics['jobs_per_minute'] = round(total / 60, 2)
|
601
|
+
metrics['success_rate'] = round(completed / total * 100, 1)
|
602
|
+
|
603
|
+
# Average duration of recent completed jobs
|
604
|
+
from django.db.models import Avg, F
|
605
|
+
avg_duration = Job.objects.filter(
|
606
|
+
channel=channel,
|
607
|
+
status='completed',
|
608
|
+
finished_at__gte=last_hour,
|
609
|
+
started_at__isnull=False
|
610
|
+
).aggregate(
|
611
|
+
avg_ms=Avg(F('finished_at') - F('started_at'))
|
612
|
+
)
|
613
|
+
|
614
|
+
if avg_duration['avg_ms']:
|
615
|
+
metrics['avg_duration_ms'] = int(avg_duration['avg_ms'].total_seconds() * 1000)
|
616
|
+
|
617
|
+
except Exception as e:
|
618
|
+
logit.debug(f"Failed to get channel metrics: {e}")
|
619
|
+
|
620
|
+
return metrics
|
621
|
+
|
622
|
+
def get_stats(self) -> Dict[str, Any]:
|
623
|
+
"""
|
624
|
+
Get overall system statistics.
|
625
|
+
|
626
|
+
Returns:
|
627
|
+
System-wide statistics
|
628
|
+
"""
|
629
|
+
stats = {
|
630
|
+
'channels': {},
|
631
|
+
'runners': [],
|
632
|
+
'totals': {
|
633
|
+
'pending': 0,
|
634
|
+
'queued': 0,
|
635
|
+
'inflight': 0,
|
636
|
+
'running': 0,
|
637
|
+
'completed': 0,
|
638
|
+
'failed': 0,
|
639
|
+
'scheduled': 0,
|
640
|
+
'runners_active': 0
|
641
|
+
},
|
642
|
+
'scheduler': {
|
643
|
+
'active': False,
|
644
|
+
'lock_holder': None
|
645
|
+
}
|
646
|
+
}
|
647
|
+
|
648
|
+
try:
|
649
|
+
# Get stats for each configured channel
|
650
|
+
channels = getattr(settings, 'JOBS_CHANNELS', ['default', 'email', 'webhooks', 'priority'])
|
651
|
+
for channel in channels:
|
652
|
+
state = self.get_queue_state(channel)
|
653
|
+
|
654
|
+
# Include DB running count per channel for better visibility
|
655
|
+
try:
|
656
|
+
state['db_running'] = Job.objects.filter(channel=channel, status='running').count()
|
657
|
+
except Exception:
|
658
|
+
state['db_running'] = 0
|
659
|
+
|
660
|
+
stats['channels'][channel] = state
|
661
|
+
|
662
|
+
# Aggregate totals
|
663
|
+
stats['totals']['scheduled'] += state['scheduled_count']
|
664
|
+
# queued_count = unclaimed_count; inflight_count = pending_count
|
665
|
+
queued = state.get('queued_count', state.get('unclaimed_count', max(0, state.get('stream_length', 0) - state.get('pending_count', 0))))
|
666
|
+
inflight = state.get('inflight_count', state.get('pending_count', 0))
|
667
|
+
stats['totals']['queued'] += queued
|
668
|
+
stats['totals']['inflight'] += inflight
|
669
|
+
# Keep 'pending' as alias for queued for backward compatibility
|
670
|
+
stats['totals']['pending'] += queued
|
671
|
+
|
672
|
+
# Get all runners
|
673
|
+
all_runners = self.get_runners()
|
674
|
+
stats['runners'] = all_runners
|
675
|
+
alive_runners = [r for r in all_runners if r.get('alive')]
|
676
|
+
alive_ids = [r.get('runner_id') for r in alive_runners if r.get('runner_id')]
|
677
|
+
stats['totals']['runners_active'] = len(alive_runners)
|
678
|
+
|
679
|
+
# Database totals with active vs stale running split
|
680
|
+
running_total = Job.objects.filter(status='running').count()
|
681
|
+
if alive_ids:
|
682
|
+
running_active = Job.objects.filter(status='running', runner_id__in=alive_ids).count()
|
683
|
+
else:
|
684
|
+
running_active = 0
|
685
|
+
running_stale = max(0, running_total - running_active)
|
686
|
+
|
687
|
+
stats['totals']['running'] = running_total
|
688
|
+
stats['totals']['running_active'] = running_active
|
689
|
+
stats['totals']['running_stale'] = running_stale
|
690
|
+
stats['totals']['completed'] = Job.objects.filter(status='completed').count()
|
691
|
+
stats['totals']['failed'] = Job.objects.filter(status='failed').count()
|
692
|
+
|
693
|
+
# Check scheduler lock
|
694
|
+
lock_value = self.redis.get(self.keys.scheduler_lock())
|
695
|
+
if lock_value:
|
696
|
+
stats['scheduler']['active'] = True
|
697
|
+
stats['scheduler']['lock_holder'] = lock_value
|
698
|
+
|
699
|
+
except Exception as e:
|
700
|
+
logit.error(f"Failed to get system stats: {e}")
|
701
|
+
|
702
|
+
return stats
|
703
|
+
|
704
|
+
def pause_channel(self, channel: str) -> bool:
|
705
|
+
"""
|
706
|
+
Pause a channel by setting a pause flag in Redis.
|
707
|
+
Runners and scheduler should respect this flag.
|
708
|
+
"""
|
709
|
+
try:
|
710
|
+
self.redis.set(self.keys.channel_pause(channel), '1')
|
711
|
+
logit.info(f"Paused channel {channel}")
|
712
|
+
return True
|
713
|
+
except Exception as e:
|
714
|
+
logit.error(f"Failed to pause channel {channel}: {e}")
|
715
|
+
return False
|
716
|
+
|
717
|
+
def resume_channel(self, channel: str) -> bool:
|
718
|
+
"""
|
719
|
+
Resume a channel by clearing the pause flag in Redis.
|
720
|
+
"""
|
721
|
+
try:
|
722
|
+
self.redis.delete(self.keys.channel_pause(channel))
|
723
|
+
logit.info(f"Resumed channel {channel}")
|
724
|
+
return True
|
725
|
+
except Exception as e:
|
726
|
+
logit.error(f"Failed to resume channel {channel}: {e}")
|
727
|
+
return False
|
728
|
+
|
729
|
+
def clear_channel(self, channel: str, cancel_db_pending: bool = True) -> Dict[str, Any]:
|
730
|
+
"""
|
731
|
+
Completely clear a channel’s Redis queues and optionally cancel DB-pending jobs.
|
732
|
+
|
733
|
+
Steps:
|
734
|
+
1) Pause channel
|
735
|
+
2) Delete main stream, broadcast stream, scheduled and scheduled_broadcast ZSETs
|
736
|
+
3) Optionally mark DB pending jobs as canceled
|
737
|
+
4) Resume channel
|
738
|
+
"""
|
739
|
+
result: Dict[str, Any] = {
|
740
|
+
'channel': channel,
|
741
|
+
'deleted': {},
|
742
|
+
'db_pending_canceled': 0,
|
743
|
+
'status': True,
|
744
|
+
'errors': []
|
745
|
+
}
|
746
|
+
try:
|
747
|
+
self.pause_channel(channel)
|
748
|
+
|
749
|
+
# Delete Plan B keys and legacy streams
|
750
|
+
stream_key = self.keys.stream(channel) # legacy
|
751
|
+
broadcast_key = self.keys.stream_broadcast(channel) # legacy
|
752
|
+
sched_key = self.keys.sched(channel)
|
753
|
+
sched_b_key = self.keys.sched_broadcast(channel)
|
754
|
+
queue_key = self.keys.queue(channel)
|
755
|
+
processing_key = self.keys.processing(channel)
|
756
|
+
|
757
|
+
deleted_stream = self.redis.delete(stream_key)
|
758
|
+
deleted_broadcast = self.redis.delete(broadcast_key)
|
759
|
+
deleted_sched = self.redis.delete(sched_key)
|
760
|
+
deleted_sched_b = self.redis.delete(sched_b_key)
|
761
|
+
deleted_queue = self.redis.delete(queue_key)
|
762
|
+
deleted_processing = self.redis.delete(processing_key)
|
763
|
+
|
764
|
+
result['deleted'] = {
|
765
|
+
'stream': bool(deleted_stream),
|
766
|
+
'broadcast': bool(deleted_broadcast),
|
767
|
+
'scheduled': bool(deleted_sched),
|
768
|
+
'scheduled_broadcast': bool(deleted_sched_b),
|
769
|
+
'queue': bool(deleted_queue),
|
770
|
+
'processing': bool(deleted_processing),
|
771
|
+
}
|
772
|
+
|
773
|
+
if cancel_db_pending:
|
774
|
+
try:
|
775
|
+
count = Job.objects.filter(
|
776
|
+
channel=channel,
|
777
|
+
status='pending'
|
778
|
+
).update(
|
779
|
+
status='canceled',
|
780
|
+
finished_at=timezone.now()
|
781
|
+
)
|
782
|
+
result['db_pending_canceled'] = count
|
783
|
+
except Exception as e:
|
784
|
+
result['errors'].append(f"DB cancel pending failed: {e}")
|
785
|
+
result['status'] = False
|
786
|
+
|
787
|
+
except Exception as e:
|
788
|
+
result['errors'].append(str(e))
|
789
|
+
result['status'] = False
|
790
|
+
finally:
|
791
|
+
# Always attempt to resume to avoid leaving the channel paused
|
792
|
+
self.resume_channel(channel)
|
793
|
+
|
794
|
+
return result
|
795
|
+
|
796
|
+
def requeue_db_pending(self, channel: str, limit: Optional[int] = None) -> Dict[str, Any]:
|
797
|
+
"""
|
798
|
+
Requeue DB 'pending' jobs for a channel back into Redis streams.
|
799
|
+
Useful after a clear to rebuild the stream from DB truth.
|
800
|
+
"""
|
801
|
+
try:
|
802
|
+
qs = Job.objects.filter(channel=channel, status='pending').order_by('created')
|
803
|
+
if limit is not None:
|
804
|
+
qs = qs[:int(limit)]
|
805
|
+
|
806
|
+
requeued = 0
|
807
|
+
for job in qs:
|
808
|
+
stream_key = self.keys.stream_broadcast(channel) if job.broadcast else self.keys.stream(channel)
|
809
|
+
try:
|
810
|
+
self.redis.xadd(stream_key, {
|
811
|
+
'job_id': job.id,
|
812
|
+
'func': job.func,
|
813
|
+
'created': timezone.now().isoformat()
|
814
|
+
})
|
815
|
+
try:
|
816
|
+
JobEvent.objects.create(
|
817
|
+
job=job,
|
818
|
+
channel=channel,
|
819
|
+
event='queued',
|
820
|
+
details={'requeued': True}
|
821
|
+
)
|
822
|
+
except Exception:
|
823
|
+
pass
|
824
|
+
requeued += 1
|
825
|
+
except Exception as e:
|
826
|
+
logit.warn(f"Failed to requeue job {job.id} on {channel}: {e}")
|
827
|
+
|
828
|
+
return {'status': True, 'requeued': requeued, 'channel': channel}
|
829
|
+
except Exception as e:
|
830
|
+
return {'status': False, 'error': str(e), 'channel': channel}
|
831
|
+
|
832
|
+
def purge_old_jobs(self, days_old: int, status: Optional[str] = None, dry_run: bool = False) -> Dict[str, Any]:
|
833
|
+
"""
|
834
|
+
Purge old jobs (and their events via cascade) from the database.
|
835
|
+
|
836
|
+
Args:
|
837
|
+
days_old: Delete jobs older than this many days
|
838
|
+
status: Optional status filter to narrow deletion
|
839
|
+
dry_run: If true, only count and do not delete
|
840
|
+
|
841
|
+
Returns:
|
842
|
+
dict with status and either count (dry_run) or delete details
|
843
|
+
"""
|
844
|
+
try:
|
845
|
+
cutoff = timezone.now() - timedelta(days=int(days_old))
|
846
|
+
from django.db.models import Q
|
847
|
+
query = Q(created__lt=cutoff)
|
848
|
+
if status:
|
849
|
+
query &= Q(status=status)
|
850
|
+
qs = Job.objects.filter(query)
|
851
|
+
count = qs.count()
|
852
|
+
if dry_run:
|
853
|
+
return {
|
854
|
+
'status': True,
|
855
|
+
'dry_run': True,
|
856
|
+
'count': count,
|
857
|
+
'cutoff': cutoff.isoformat(),
|
858
|
+
'status_filter': status
|
859
|
+
}
|
860
|
+
deleted, details = qs.delete()
|
861
|
+
return {
|
862
|
+
'status': True,
|
863
|
+
'deleted': deleted,
|
864
|
+
'details': details,
|
865
|
+
'cutoff': cutoff.isoformat(),
|
866
|
+
'status_filter': status
|
867
|
+
}
|
868
|
+
except Exception as e:
|
869
|
+
return {'status': False, 'error': str(e)}
|
870
|
+
|
871
|
+
def get_registered_channels(self) -> List[str]:
|
872
|
+
"""
|
873
|
+
Discover registered channels by scanning Redis for main stream keys.
|
874
|
+
Returns a sorted, de-duplicated list of channel names.
|
875
|
+
"""
|
876
|
+
channels: List[str] = []
|
877
|
+
try:
|
878
|
+
pattern = f"{self.keys.prefix}:stream:*"
|
879
|
+
client = self.redis.get_client()
|
880
|
+
cursor = 0
|
881
|
+
found = set()
|
882
|
+
while True:
|
883
|
+
cursor, keys = client.scan(cursor, match=pattern, count=200)
|
884
|
+
for k in keys or []:
|
885
|
+
key_str = k.decode('utf-8') if isinstance(k, (bytes, bytearray)) else k
|
886
|
+
parts = key_str.split(":stream:")
|
887
|
+
if len(parts) == 2 and parts[1]:
|
888
|
+
channel = parts[1]
|
889
|
+
# ignore broadcast suffix if present
|
890
|
+
if channel.endswith(":broadcast"):
|
891
|
+
channel = channel.rsplit(":broadcast", 1)[0]
|
892
|
+
if channel:
|
893
|
+
found.add(channel)
|
894
|
+
if cursor == 0:
|
895
|
+
break
|
896
|
+
channels = sorted(found)
|
897
|
+
except Exception as e:
|
898
|
+
logit.debug(f"Failed to discover channels via Redis scan: {e}")
|
899
|
+
channels = []
|
900
|
+
return channels
|
901
|
+
|
902
|
+
def get_queue_sizes(self, channels: Optional[List[str]] = None) -> Dict[str, Any]:
|
903
|
+
"""
|
904
|
+
Get current queue sizes for channels including DB status counts.
|
905
|
+
|
906
|
+
Args:
|
907
|
+
channels: Optional list of channels. Defaults to discovered streams or settings.JOBS_CHANNELS
|
908
|
+
|
909
|
+
Returns:
|
910
|
+
dict with per-channel sizes and DB status counts
|
911
|
+
"""
|
912
|
+
try:
|
913
|
+
from django.conf import settings as dj_settings
|
914
|
+
channels = channels or self.get_registered_channels() or getattr(dj_settings, 'JOBS_CHANNELS', ['default'])
|
915
|
+
sizes: Dict[str, Any] = {}
|
916
|
+
for channel in channels:
|
917
|
+
stream_key = self.keys.stream(channel)
|
918
|
+
sched_key = self.keys.sched(channel)
|
919
|
+
sched_b_key = self.keys.sched_broadcast(channel)
|
920
|
+
|
921
|
+
# Stream length
|
922
|
+
try:
|
923
|
+
info = self.redis.xinfo_stream(stream_key)
|
924
|
+
stream_len = info.get('length', 0)
|
925
|
+
except Exception:
|
926
|
+
stream_len = 0
|
927
|
+
|
928
|
+
# Scheduled counts (both ZSETs)
|
929
|
+
scheduled = (self.redis.zcard(sched_key) or 0) + (self.redis.zcard(sched_b_key) or 0)
|
930
|
+
|
931
|
+
# DB status counts
|
932
|
+
from django.db.models import Count
|
933
|
+
db_counts_qs = Job.objects.filter(channel=channel).values('status').annotate(count=Count('id'))
|
934
|
+
status_counts = {row['status']: row['count'] for row in db_counts_qs}
|
935
|
+
|
936
|
+
sizes[channel] = {
|
937
|
+
'stream': stream_len,
|
938
|
+
'scheduled': scheduled,
|
939
|
+
'db_pending': status_counts.get('pending', 0),
|
940
|
+
'db_running': status_counts.get('running', 0),
|
941
|
+
'db_completed': status_counts.get('completed', 0),
|
942
|
+
'db_failed': status_counts.get('failed', 0),
|
943
|
+
'db_canceled': status_counts.get('canceled', 0),
|
944
|
+
'db_expired': status_counts.get('expired', 0),
|
945
|
+
}
|
946
|
+
|
947
|
+
return {'status': True, 'data': sizes}
|
948
|
+
except Exception as e:
|
949
|
+
return {'status': False, 'error': str(e)}
|
950
|
+
|
951
|
+
|
952
|
+
def _jobmanager_cleanup_consumer_groups(self, channel: Optional[str] = None, destroy_empty_groups: bool = True) -> Dict[str, Any]:
|
953
|
+
"""
|
954
|
+
Clean up Redis Stream consumer groups and consumers.
|
955
|
+
|
956
|
+
- If channel is provided, operates on that channel only.
|
957
|
+
- Otherwise, iterates discovered channels (or settings fallback).
|
958
|
+
- Removes consumers with no pending messages.
|
959
|
+
- Optionally destroys empty groups after consumer cleanup.
|
960
|
+
|
961
|
+
Returns:
|
962
|
+
Dict with per-channel cleanup results and any errors.
|
963
|
+
"""
|
964
|
+
results: Dict[str, Any] = {'status': True, 'channels': {}, 'errors': []}
|
965
|
+
try:
|
966
|
+
# Determine channels to process
|
967
|
+
try:
|
968
|
+
from django.conf import settings as dj_settings
|
969
|
+
except Exception:
|
970
|
+
dj_settings = None
|
971
|
+
|
972
|
+
if channel:
|
973
|
+
channels = [channel]
|
974
|
+
else:
|
975
|
+
channels = self.get_registered_channels()
|
976
|
+
if not channels and dj_settings:
|
977
|
+
channels = getattr(dj_settings, 'JOBS_CHANNELS', ['default'])
|
978
|
+
|
979
|
+
client = self.redis.get_client()
|
980
|
+
|
981
|
+
for ch in channels:
|
982
|
+
channel_result: Dict[str, Any] = {
|
983
|
+
'stream': self.keys.stream(ch),
|
984
|
+
'groups_processed': 0,
|
985
|
+
'consumers_removed': 0,
|
986
|
+
'groups_destroyed': 0,
|
987
|
+
'errors': []
|
988
|
+
}
|
989
|
+
stream_key = self.keys.stream(ch)
|
990
|
+
|
991
|
+
# Fetch groups for this stream
|
992
|
+
try:
|
993
|
+
groups = client.xinfo_groups(stream_key)
|
994
|
+
except Exception as e:
|
995
|
+
# If stream doesn't exist, nothing to clean
|
996
|
+
channel_result['errors'].append(f"xinfo_groups failed: {e}")
|
997
|
+
results['channels'][ch] = channel_result
|
998
|
+
continue
|
999
|
+
|
1000
|
+
# Normalize groups to dicts with string keys
|
1001
|
+
norm_groups = []
|
1002
|
+
try:
|
1003
|
+
for g in groups or []:
|
1004
|
+
if isinstance(g, dict):
|
1005
|
+
name = g.get('name')
|
1006
|
+
if isinstance(name, bytes):
|
1007
|
+
name = name.decode('utf-8')
|
1008
|
+
consumers_count = g.get('consumers', 0)
|
1009
|
+
pending = g.get('pending', 0)
|
1010
|
+
last_id = g.get('last-delivered-id')
|
1011
|
+
if isinstance(last_id, bytes):
|
1012
|
+
last_id = last_id.decode('utf-8')
|
1013
|
+
norm_groups.append({
|
1014
|
+
'name': name,
|
1015
|
+
'consumers': int(consumers_count or 0),
|
1016
|
+
'pending': int(pending or 0),
|
1017
|
+
'last_delivered_id': last_id or ''
|
1018
|
+
})
|
1019
|
+
except Exception as e:
|
1020
|
+
channel_result['errors'].append(f"group normalization failed: {e}")
|
1021
|
+
results['channels'][ch] = channel_result
|
1022
|
+
continue
|
1023
|
+
|
1024
|
+
# Process each group
|
1025
|
+
for g in norm_groups:
|
1026
|
+
group_name = g['name']
|
1027
|
+
channel_result['groups_processed'] += 1
|
1028
|
+
try:
|
1029
|
+
consumers = client.xinfo_consumers(stream_key, group_name)
|
1030
|
+
except Exception as e:
|
1031
|
+
channel_result['errors'].append(f"xinfo_consumers({group_name}) failed: {e}")
|
1032
|
+
consumers = []
|
1033
|
+
|
1034
|
+
# Remove consumers with no pending messages
|
1035
|
+
removed = 0
|
1036
|
+
try:
|
1037
|
+
for c in consumers or []:
|
1038
|
+
cname = c.get('name')
|
1039
|
+
if isinstance(cname, bytes):
|
1040
|
+
cname = cname.decode('utf-8')
|
1041
|
+
pending_c = int(c.get('pending', 0) or 0)
|
1042
|
+
if pending_c == 0 and cname:
|
1043
|
+
try:
|
1044
|
+
client.execute_command('XGROUP', 'DELCONSUMER', stream_key, group_name, cname)
|
1045
|
+
removed += 1
|
1046
|
+
except Exception as e:
|
1047
|
+
channel_result['errors'].append(f"DELCONSUMER {group_name}/{cname} failed: {e}")
|
1048
|
+
channel_result['consumers_removed'] += removed
|
1049
|
+
except Exception as e:
|
1050
|
+
channel_result['errors'].append(f"consumer removal loop failed for {group_name}: {e}")
|
1051
|
+
|
1052
|
+
# Optionally destroy empty group
|
1053
|
+
if destroy_empty_groups:
|
1054
|
+
try:
|
1055
|
+
# Refresh group info to check if any consumers remain
|
1056
|
+
refreshed_groups = client.xinfo_groups(stream_key)
|
1057
|
+
grp = None
|
1058
|
+
for rg in refreshed_groups or []:
|
1059
|
+
nm = rg.get('name')
|
1060
|
+
if isinstance(nm, bytes):
|
1061
|
+
nm = nm.decode('utf-8')
|
1062
|
+
if nm == group_name:
|
1063
|
+
grp = rg
|
1064
|
+
break
|
1065
|
+
remaining = int(grp.get('consumers', 0) or 0) if grp else 0
|
1066
|
+
if remaining == 0:
|
1067
|
+
try:
|
1068
|
+
client.execute_command('XGROUP', 'DESTROY', stream_key, group_name)
|
1069
|
+
channel_result['groups_destroyed'] += 1
|
1070
|
+
except Exception as e:
|
1071
|
+
channel_result['errors'].append(f"XGROUP DESTROY {group_name} failed: {e}")
|
1072
|
+
except Exception as e:
|
1073
|
+
channel_result['errors'].append(f"post-clean xinfo_groups failed: {e}")
|
1074
|
+
|
1075
|
+
results['channels'][ch] = channel_result
|
1076
|
+
|
1077
|
+
except Exception as e:
|
1078
|
+
results['status'] = False
|
1079
|
+
results['errors'].append(str(e))
|
1080
|
+
|
1081
|
+
return results
|
1082
|
+
|
1083
|
+
# Attach as a method on JobManager for runtime use
|
1084
|
+
JobManager.cleanup_consumer_groups = _jobmanager_cleanup_consumer_groups
|
1085
|
+
|
1086
|
+
def _jobmanager_rebuild_scheduled(self, channel: Optional[str] = None, limit: Optional[int] = None) -> Dict[str, Any]:
|
1087
|
+
"""
|
1088
|
+
Rebuild scheduled ZSETs from DB truth for pending jobs with future run_at.
|
1089
|
+
Useful if ZSETs were not populated during publish or after outages.
|
1090
|
+
|
1091
|
+
Args:
|
1092
|
+
channel: Optional channel to restrict rebuild
|
1093
|
+
limit: Optional max number of jobs per channel
|
1094
|
+
|
1095
|
+
Returns:
|
1096
|
+
Dict with per-channel counts and errors.
|
1097
|
+
"""
|
1098
|
+
results: Dict[str, Any] = {'status': True, 'channels': {}, 'errors': []}
|
1099
|
+
try:
|
1100
|
+
from django.utils import timezone
|
1101
|
+
now = timezone.now()
|
1102
|
+
|
1103
|
+
# Determine channels
|
1104
|
+
if channel:
|
1105
|
+
channels = [channel]
|
1106
|
+
else:
|
1107
|
+
channels = self.get_registered_channels()
|
1108
|
+
if not channels:
|
1109
|
+
try:
|
1110
|
+
from django.conf import settings as dj_settings
|
1111
|
+
channels = getattr(dj_settings, 'JOBS_CHANNELS', ['default'])
|
1112
|
+
except Exception:
|
1113
|
+
channels = ['default']
|
1114
|
+
|
1115
|
+
for ch in channels:
|
1116
|
+
ch_result = {'scheduled_added': 0, 'broadcast_added': 0, 'errors': []}
|
1117
|
+
try:
|
1118
|
+
# Query jobs pending with future run_at
|
1119
|
+
qs = Job.objects.filter(channel=ch, status='pending', run_at__gt=now).order_by('run_at')
|
1120
|
+
if limit is not None:
|
1121
|
+
qs = qs[:int(limit)]
|
1122
|
+
|
1123
|
+
sched_key = self.keys.sched(ch)
|
1124
|
+
sched_b_key = self.keys.sched_broadcast(ch)
|
1125
|
+
|
1126
|
+
for job in qs:
|
1127
|
+
try:
|
1128
|
+
score = job.run_at.timestamp() * 1000.0
|
1129
|
+
# Skip if already present
|
1130
|
+
exists = self.redis.zscore(sched_b_key if job.broadcast else sched_key, job.id)
|
1131
|
+
if exists is not None:
|
1132
|
+
continue
|
1133
|
+
# Insert into appropriate ZSET
|
1134
|
+
if job.broadcast:
|
1135
|
+
self.redis.zadd(sched_b_key, {job.id: score})
|
1136
|
+
ch_result['broadcast_added'] += 1
|
1137
|
+
else:
|
1138
|
+
self.redis.zadd(sched_key, {job.id: score})
|
1139
|
+
ch_result['scheduled_added'] += 1
|
1140
|
+
except Exception as ie:
|
1141
|
+
ch_result['errors'].append(f"{job.id}: {ie}")
|
1142
|
+
|
1143
|
+
except Exception as ce:
|
1144
|
+
ch_result['errors'].append(str(ce))
|
1145
|
+
|
1146
|
+
results['channels'][ch] = ch_result
|
1147
|
+
|
1148
|
+
except Exception as e:
|
1149
|
+
results['status'] = False
|
1150
|
+
results['errors'].append(str(e))
|
1151
|
+
|
1152
|
+
return results
|
1153
|
+
|
1154
|
+
# Attach as a method on JobManager for runtime use
|
1155
|
+
JobManager.rebuild_scheduled = _jobmanager_rebuild_scheduled
|
1156
|
+
|
1157
|
+
def _jobmanager_recover_stale_running(self, channel: Optional[str] = None, max_age_seconds: Optional[int] = None) -> Dict[str, Any]:
|
1158
|
+
"""
|
1159
|
+
Recover stale running jobs (DB shows status='running' but no inflight messages in Redis).
|
1160
|
+
For each channel (or a specific channel), if inflight_count == 0, reset DB running jobs
|
1161
|
+
to pending and requeue them to the stream immediately.
|
1162
|
+
|
1163
|
+
Args:
|
1164
|
+
channel: Optional channel to restrict recovery
|
1165
|
+
max_age_seconds: Optional age threshold (only recover jobs started before now - max_age_seconds)
|
1166
|
+
|
1167
|
+
Returns:
|
1168
|
+
Dict with per-channel recovery results and any errors:
|
1169
|
+
{
|
1170
|
+
status: True/False,
|
1171
|
+
channels: {
|
1172
|
+
channel: {
|
1173
|
+
examined: N,
|
1174
|
+
recovered: M,
|
1175
|
+
errors: [...]
|
1176
|
+
},
|
1177
|
+
...
|
1178
|
+
},
|
1179
|
+
errors: [...]
|
1180
|
+
}
|
1181
|
+
"""
|
1182
|
+
results: Dict[str, Any] = {'status': True, 'channels': {}, 'errors': []}
|
1183
|
+
try:
|
1184
|
+
# Determine channels
|
1185
|
+
if channel:
|
1186
|
+
channels = [channel]
|
1187
|
+
else:
|
1188
|
+
try:
|
1189
|
+
channels = self.get_registered_channels()
|
1190
|
+
except Exception:
|
1191
|
+
channels = []
|
1192
|
+
if not channels:
|
1193
|
+
try:
|
1194
|
+
from django.conf import settings as dj_settings
|
1195
|
+
channels = getattr(dj_settings, 'JOBS_CHANNELS', ['default'])
|
1196
|
+
except Exception:
|
1197
|
+
channels = ['default']
|
1198
|
+
|
1199
|
+
now = timezone.now()
|
1200
|
+
for ch in channels:
|
1201
|
+
ch_result: Dict[str, Any] = {'examined': 0, 'recovered': 0, 'errors': []}
|
1202
|
+
try:
|
1203
|
+
# Check inflight (PEL) for this channel
|
1204
|
+
state = self.get_queue_state(ch)
|
1205
|
+
inflight = int(state.get('inflight_count', 0) or 0)
|
1206
|
+
|
1207
|
+
# Only recover when no inflight (avoid racing real running work)
|
1208
|
+
if inflight > 0:
|
1209
|
+
results['channels'][ch] = ch_result
|
1210
|
+
continue
|
1211
|
+
|
1212
|
+
# Build query for DB running jobs
|
1213
|
+
from django.db.models import Q
|
1214
|
+
q = Q(channel=ch, status='running')
|
1215
|
+
if max_age_seconds is not None and max_age_seconds > 0:
|
1216
|
+
cutoff = now - timedelta(seconds=int(max_age_seconds))
|
1217
|
+
q &= Q(started_at__lt=cutoff)
|
1218
|
+
|
1219
|
+
running_qs = Job.objects.filter(q).order_by('started_at')
|
1220
|
+
ch_result['examined'] = running_qs.count()
|
1221
|
+
|
1222
|
+
# Requeue each recovered job
|
1223
|
+
for job in running_qs:
|
1224
|
+
try:
|
1225
|
+
# Reset DB status to pending
|
1226
|
+
job.status = 'pending'
|
1227
|
+
job.runner_id = None
|
1228
|
+
job.cancel_requested = False
|
1229
|
+
job.started_at = None
|
1230
|
+
job.finished_at = None
|
1231
|
+
job.last_error = job.last_error or ''
|
1232
|
+
job.stack_trace = job.stack_trace or ''
|
1233
|
+
job.save(update_fields=['status', 'runner_id', 'cancel_requested', 'started_at', 'finished_at', 'last_error', 'stack_trace', 'modified'])
|
1234
|
+
|
1235
|
+
# Push to stream immediately
|
1236
|
+
stream_key = self.keys.stream(job.channel) if not job.broadcast else self.keys.stream_broadcast(job.channel)
|
1237
|
+
try:
|
1238
|
+
self.redis.xadd(stream_key, {
|
1239
|
+
'job_id': job.id,
|
1240
|
+
'func': job.func,
|
1241
|
+
'recovered': now.isoformat()
|
1242
|
+
})
|
1243
|
+
except Exception as xe:
|
1244
|
+
ch_result['errors'].append(f"xadd failed for {job.id}: {xe}")
|
1245
|
+
|
1246
|
+
# Event
|
1247
|
+
try:
|
1248
|
+
JobEvent.objects.create(
|
1249
|
+
job=job,
|
1250
|
+
channel=job.channel,
|
1251
|
+
event='retry',
|
1252
|
+
details={'reason': 'recover_stale_running'}
|
1253
|
+
)
|
1254
|
+
except Exception as ee:
|
1255
|
+
ch_result['errors'].append(f"event failed for {job.id}: {ee}")
|
1256
|
+
|
1257
|
+
ch_result['recovered'] += 1
|
1258
|
+
except Exception as je:
|
1259
|
+
ch_result['errors'].append(f"{job.id}: {je}")
|
1260
|
+
|
1261
|
+
except Exception as ce:
|
1262
|
+
ch_result['errors'].append(str(ce))
|
1263
|
+
|
1264
|
+
results['channels'][ch] = ch_result
|
1265
|
+
|
1266
|
+
except Exception as e:
|
1267
|
+
results['status'] = False
|
1268
|
+
results['errors'].append(str(e))
|
1269
|
+
|
1270
|
+
return results
|
1271
|
+
|
1272
|
+
# Attach as a method on JobManager for runtime use
|
1273
|
+
JobManager.recover_stale_running = _jobmanager_recover_stale_running
|
1274
|
+
|
1275
|
+
# Module-level singleton
|
1276
|
+
_manager = None
|
1277
|
+
|
1278
|
+
|
1279
|
+
def get_manager() -> JobManager:
|
1280
|
+
"""
|
1281
|
+
Get the JobManager singleton instance.
|
1282
|
+
|
1283
|
+
Returns:
|
1284
|
+
JobManager instance
|
1285
|
+
"""
|
1286
|
+
global _manager
|
1287
|
+
if not _manager:
|
1288
|
+
_manager = JobManager()
|
1289
|
+
return _manager
|
1290
|
+
|
1291
|
+
|
1292
|
+
# Convenience functions for Django shell
|
1293
|
+
def clear_stuck_jobs(channel: str, idle_threshold_ms: int = 60000) -> Dict[str, Any]:
|
1294
|
+
"""
|
1295
|
+
Convenience function to clear stuck jobs from Django shell.
|
1296
|
+
|
1297
|
+
Usage:
|
1298
|
+
from mojo.apps.jobs.manager import clear_stuck_jobs
|
1299
|
+
result = clear_stuck_jobs('email', idle_threshold_ms=60000)
|
1300
|
+
print(result)
|
1301
|
+
|
1302
|
+
Args:
|
1303
|
+
channel: Channel name to clear
|
1304
|
+
idle_threshold_ms: Consider stuck if idle longer than this (0 to clear all)
|
1305
|
+
|
1306
|
+
Returns:
|
1307
|
+
Dict with results
|
1308
|
+
"""
|
1309
|
+
return get_manager().clear_stuck_jobs(channel, idle_threshold_ms=idle_threshold_ms)
|
1310
|
+
|
1311
|
+
|
1312
|
+
def get_channel_health(channel: str) -> Dict[str, Any]:
|
1313
|
+
"""
|
1314
|
+
Convenience function to check channel health from Django shell.
|
1315
|
+
|
1316
|
+
Usage:
|
1317
|
+
from mojo.apps.jobs.manager import get_channel_health
|
1318
|
+
health = get_channel_health('email')
|
1319
|
+
print(f"Pending: {health['messages']['pending']}")
|
1320
|
+
|
1321
|
+
Args:
|
1322
|
+
channel: Channel name to check
|
1323
|
+
|
1324
|
+
Returns:
|
1325
|
+
Channel health dict
|
1326
|
+
"""
|
1327
|
+
return get_manager().get_channel_health(channel)
|