django-nativemojo 0.1.15__py3-none-any.whl → 0.1.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. {django_nativemojo-0.1.15.dist-info → django_nativemojo-0.1.17.dist-info}/METADATA +3 -2
  2. django_nativemojo-0.1.17.dist-info/RECORD +302 -0
  3. mojo/__init__.py +1 -1
  4. mojo/apps/account/management/commands/serializer_admin.py +121 -1
  5. mojo/apps/account/migrations/0006_add_device_tracking_models.py +72 -0
  6. mojo/apps/account/migrations/0007_delete_userdevicelocation.py +16 -0
  7. mojo/apps/account/migrations/0008_userdevicelocation.py +33 -0
  8. mojo/apps/account/migrations/0009_geolocatedip_subnet.py +18 -0
  9. mojo/apps/account/migrations/0010_group_avatar.py +20 -0
  10. mojo/apps/account/migrations/0011_user_org_registereddevice_pushconfig_and_more.py +118 -0
  11. mojo/apps/account/migrations/0012_remove_pushconfig_apns_key_file_and_more.py +21 -0
  12. mojo/apps/account/migrations/0013_pushconfig_test_mode_alter_pushconfig_apns_enabled_and_more.py +28 -0
  13. mojo/apps/account/migrations/0014_notificationdelivery_data_payload_and_more.py +48 -0
  14. mojo/apps/account/models/__init__.py +2 -0
  15. mojo/apps/account/models/device.py +279 -0
  16. mojo/apps/account/models/group.py +294 -8
  17. mojo/apps/account/models/member.py +14 -1
  18. mojo/apps/account/models/push/__init__.py +4 -0
  19. mojo/apps/account/models/push/config.py +112 -0
  20. mojo/apps/account/models/push/delivery.py +93 -0
  21. mojo/apps/account/models/push/device.py +66 -0
  22. mojo/apps/account/models/push/template.py +99 -0
  23. mojo/apps/account/models/user.py +190 -17
  24. mojo/apps/account/rest/__init__.py +2 -0
  25. mojo/apps/account/rest/device.py +39 -0
  26. mojo/apps/account/rest/group.py +8 -0
  27. mojo/apps/account/rest/push.py +187 -0
  28. mojo/apps/account/rest/user.py +95 -5
  29. mojo/apps/account/services/__init__.py +1 -0
  30. mojo/apps/account/services/push.py +363 -0
  31. mojo/apps/aws/migrations/0001_initial.py +206 -0
  32. mojo/apps/aws/migrations/0002_emaildomain_can_recv_emaildomain_can_send_and_more.py +28 -0
  33. mojo/apps/aws/migrations/0003_mailbox_is_domain_default_mailbox_is_system_default_and_more.py +31 -0
  34. mojo/apps/aws/migrations/0004_s3bucket.py +39 -0
  35. mojo/apps/aws/migrations/0005_alter_emaildomain_region_delete_s3bucket.py +21 -0
  36. mojo/apps/aws/models/__init__.py +19 -0
  37. mojo/apps/aws/models/email_attachment.py +99 -0
  38. mojo/apps/aws/models/email_domain.py +218 -0
  39. mojo/apps/aws/models/email_template.py +132 -0
  40. mojo/apps/aws/models/incoming_email.py +197 -0
  41. mojo/apps/aws/models/mailbox.py +288 -0
  42. mojo/apps/aws/models/sent_message.py +175 -0
  43. mojo/apps/aws/rest/__init__.py +6 -0
  44. mojo/apps/aws/rest/email.py +33 -0
  45. mojo/apps/aws/rest/email_ops.py +183 -0
  46. mojo/apps/aws/rest/messages.py +32 -0
  47. mojo/apps/aws/rest/send.py +101 -0
  48. mojo/apps/aws/rest/sns.py +403 -0
  49. mojo/apps/aws/rest/templates.py +19 -0
  50. mojo/apps/aws/services/__init__.py +32 -0
  51. mojo/apps/aws/services/email.py +390 -0
  52. mojo/apps/aws/services/email_ops.py +548 -0
  53. mojo/apps/docit/__init__.py +6 -0
  54. mojo/apps/docit/markdown_plugins/syntax_highlight.py +25 -0
  55. mojo/apps/docit/markdown_plugins/toc.py +12 -0
  56. mojo/apps/docit/migrations/0001_initial.py +113 -0
  57. mojo/apps/docit/migrations/0002_alter_book_modified_by_alter_page_modified_by.py +26 -0
  58. mojo/apps/docit/migrations/0003_alter_book_group.py +20 -0
  59. mojo/apps/docit/models/__init__.py +17 -0
  60. mojo/apps/docit/models/asset.py +231 -0
  61. mojo/apps/docit/models/book.py +227 -0
  62. mojo/apps/docit/models/page.py +319 -0
  63. mojo/apps/docit/models/page_revision.py +203 -0
  64. mojo/apps/docit/rest/__init__.py +10 -0
  65. mojo/apps/docit/rest/asset.py +17 -0
  66. mojo/apps/docit/rest/book.py +22 -0
  67. mojo/apps/docit/rest/page.py +22 -0
  68. mojo/apps/docit/rest/page_revision.py +17 -0
  69. mojo/apps/docit/services/__init__.py +11 -0
  70. mojo/apps/docit/services/docit.py +315 -0
  71. mojo/apps/docit/services/markdown.py +44 -0
  72. mojo/apps/fileman/backends/s3.py +209 -0
  73. mojo/apps/fileman/models/file.py +45 -9
  74. mojo/apps/fileman/models/manager.py +269 -3
  75. mojo/apps/incident/migrations/0007_event_uid.py +18 -0
  76. mojo/apps/incident/migrations/0008_ticket_ticketnote.py +55 -0
  77. mojo/apps/incident/migrations/0009_incident_status.py +18 -0
  78. mojo/apps/incident/migrations/0010_event_country_code.py +18 -0
  79. mojo/apps/incident/migrations/0011_incident_country_code.py +18 -0
  80. mojo/apps/incident/migrations/0012_alter_incident_status.py +18 -0
  81. mojo/apps/incident/models/__init__.py +1 -0
  82. mojo/apps/incident/models/event.py +35 -0
  83. mojo/apps/incident/models/incident.py +2 -0
  84. mojo/apps/incident/models/ticket.py +62 -0
  85. mojo/apps/incident/reporter.py +21 -3
  86. mojo/apps/incident/rest/__init__.py +1 -0
  87. mojo/apps/incident/rest/ticket.py +43 -0
  88. mojo/apps/jobs/__init__.py +489 -0
  89. mojo/apps/jobs/adapters.py +24 -0
  90. mojo/apps/jobs/cli.py +616 -0
  91. mojo/apps/jobs/daemon.py +370 -0
  92. mojo/apps/jobs/examples/sample_jobs.py +376 -0
  93. mojo/apps/jobs/examples/webhook_examples.py +203 -0
  94. mojo/apps/jobs/handlers/__init__.py +5 -0
  95. mojo/apps/jobs/handlers/webhook.py +317 -0
  96. mojo/apps/jobs/job_engine.py +734 -0
  97. mojo/apps/jobs/keys.py +203 -0
  98. mojo/apps/jobs/local_queue.py +363 -0
  99. mojo/apps/jobs/management/__init__.py +3 -0
  100. mojo/apps/jobs/management/commands/__init__.py +3 -0
  101. mojo/apps/jobs/manager.py +1327 -0
  102. mojo/apps/jobs/migrations/0001_initial.py +97 -0
  103. mojo/apps/jobs/migrations/0002_alter_job_max_retries_joblog.py +39 -0
  104. mojo/apps/jobs/models/__init__.py +6 -0
  105. mojo/apps/jobs/models/job.py +441 -0
  106. mojo/apps/jobs/rest/__init__.py +2 -0
  107. mojo/apps/jobs/rest/control.py +466 -0
  108. mojo/apps/jobs/rest/jobs.py +421 -0
  109. mojo/apps/jobs/scheduler.py +571 -0
  110. mojo/apps/jobs/services/__init__.py +6 -0
  111. mojo/apps/jobs/services/job_actions.py +465 -0
  112. mojo/apps/jobs/settings.py +209 -0
  113. mojo/apps/logit/models/log.py +3 -0
  114. mojo/apps/metrics/__init__.py +8 -1
  115. mojo/apps/metrics/redis_metrics.py +198 -0
  116. mojo/apps/metrics/rest/__init__.py +3 -0
  117. mojo/apps/metrics/rest/categories.py +266 -0
  118. mojo/apps/metrics/rest/helpers.py +48 -0
  119. mojo/apps/metrics/rest/permissions.py +99 -0
  120. mojo/apps/metrics/rest/values.py +277 -0
  121. mojo/apps/metrics/utils.py +17 -0
  122. mojo/decorators/http.py +40 -1
  123. mojo/helpers/aws/__init__.py +11 -7
  124. mojo/helpers/aws/inbound_email.py +309 -0
  125. mojo/helpers/aws/kms.py +413 -0
  126. mojo/helpers/aws/ses_domain.py +959 -0
  127. mojo/helpers/crypto/__init__.py +1 -1
  128. mojo/helpers/crypto/utils.py +15 -0
  129. mojo/helpers/location/__init__.py +2 -0
  130. mojo/helpers/location/countries.py +262 -0
  131. mojo/helpers/location/geolocation.py +196 -0
  132. mojo/helpers/logit.py +37 -0
  133. mojo/helpers/redis/__init__.py +2 -0
  134. mojo/helpers/redis/adapter.py +606 -0
  135. mojo/helpers/redis/client.py +48 -0
  136. mojo/helpers/redis/pool.py +225 -0
  137. mojo/helpers/request.py +8 -0
  138. mojo/helpers/response.py +8 -0
  139. mojo/middleware/auth.py +1 -1
  140. mojo/middleware/cors.py +40 -0
  141. mojo/middleware/logging.py +131 -12
  142. mojo/middleware/mojo.py +5 -0
  143. mojo/models/rest.py +271 -57
  144. mojo/models/secrets.py +86 -0
  145. mojo/serializers/__init__.py +16 -10
  146. mojo/serializers/core/__init__.py +90 -0
  147. mojo/serializers/core/cache/__init__.py +121 -0
  148. mojo/serializers/core/cache/backends.py +518 -0
  149. mojo/serializers/core/cache/base.py +102 -0
  150. mojo/serializers/core/cache/disabled.py +181 -0
  151. mojo/serializers/core/cache/memory.py +287 -0
  152. mojo/serializers/core/cache/redis.py +533 -0
  153. mojo/serializers/core/cache/utils.py +454 -0
  154. mojo/serializers/{manager.py → core/manager.py} +53 -4
  155. mojo/serializers/core/serializer.py +475 -0
  156. mojo/serializers/{advanced/formats → formats}/csv.py +116 -139
  157. mojo/serializers/suggested_improvements.md +388 -0
  158. testit/client.py +1 -1
  159. testit/helpers.py +14 -0
  160. testit/runner.py +23 -6
  161. django_nativemojo-0.1.15.dist-info/RECORD +0 -234
  162. mojo/apps/notify/README.md +0 -91
  163. mojo/apps/notify/README_NOTIFICATIONS.md +0 -566
  164. mojo/apps/notify/admin.py +0 -52
  165. mojo/apps/notify/handlers/example_handlers.py +0 -516
  166. mojo/apps/notify/handlers/ses/__init__.py +0 -25
  167. mojo/apps/notify/handlers/ses/complaint.py +0 -25
  168. mojo/apps/notify/handlers/ses/message.py +0 -86
  169. mojo/apps/notify/management/commands/__init__.py +0 -1
  170. mojo/apps/notify/management/commands/process_notifications.py +0 -370
  171. mojo/apps/notify/mod +0 -0
  172. mojo/apps/notify/models/__init__.py +0 -12
  173. mojo/apps/notify/models/account.py +0 -128
  174. mojo/apps/notify/models/attachment.py +0 -24
  175. mojo/apps/notify/models/bounce.py +0 -68
  176. mojo/apps/notify/models/complaint.py +0 -40
  177. mojo/apps/notify/models/inbox.py +0 -113
  178. mojo/apps/notify/models/inbox_message.py +0 -173
  179. mojo/apps/notify/models/outbox.py +0 -129
  180. mojo/apps/notify/models/outbox_message.py +0 -288
  181. mojo/apps/notify/models/template.py +0 -30
  182. mojo/apps/notify/providers/aws.py +0 -73
  183. mojo/apps/notify/rest/ses.py +0 -0
  184. mojo/apps/notify/utils/__init__.py +0 -2
  185. mojo/apps/notify/utils/notifications.py +0 -404
  186. mojo/apps/notify/utils/parsing.py +0 -202
  187. mojo/apps/notify/utils/render.py +0 -144
  188. mojo/apps/tasks/README.md +0 -118
  189. mojo/apps/tasks/__init__.py +0 -44
  190. mojo/apps/tasks/manager.py +0 -644
  191. mojo/apps/tasks/rest/__init__.py +0 -2
  192. mojo/apps/tasks/rest/hooks.py +0 -0
  193. mojo/apps/tasks/rest/tasks.py +0 -76
  194. mojo/apps/tasks/runner.py +0 -439
  195. mojo/apps/tasks/task.py +0 -99
  196. mojo/apps/tasks/tq_handlers.py +0 -132
  197. mojo/helpers/crypto/__pycache__/hash.cpython-310.pyc +0 -0
  198. mojo/helpers/crypto/__pycache__/sign.cpython-310.pyc +0 -0
  199. mojo/helpers/crypto/__pycache__/utils.cpython-310.pyc +0 -0
  200. mojo/helpers/redis.py +0 -10
  201. mojo/models/meta.py +0 -262
  202. mojo/serializers/advanced/README.md +0 -363
  203. mojo/serializers/advanced/__init__.py +0 -247
  204. mojo/serializers/advanced/formats/__init__.py +0 -28
  205. mojo/serializers/advanced/formats/excel.py +0 -516
  206. mojo/serializers/advanced/formats/json.py +0 -239
  207. mojo/serializers/advanced/formats/response.py +0 -485
  208. mojo/serializers/advanced/serializer.py +0 -568
  209. mojo/serializers/optimized.py +0 -618
  210. {django_nativemojo-0.1.15.dist-info → django_nativemojo-0.1.17.dist-info}/LICENSE +0 -0
  211. {django_nativemojo-0.1.15.dist-info → django_nativemojo-0.1.17.dist-info}/NOTICE +0 -0
  212. {django_nativemojo-0.1.15.dist-info → django_nativemojo-0.1.17.dist-info}/WHEEL +0 -0
  213. /mojo/apps/{notify → aws/migrations}/__init__.py +0 -0
  214. /mojo/apps/{notify/handlers → docit/markdown_plugins}/__init__.py +0 -0
  215. /mojo/apps/{notify/management → docit/migrations}/__init__.py +0 -0
  216. /mojo/apps/{notify/providers → jobs/examples}/__init__.py +0 -0
  217. /mojo/apps/{notify/rest → jobs/migrations}/__init__.py +0 -0
  218. /mojo/{serializers → rest}/openapi.py +0 -0
  219. /mojo/serializers/{settings_example.py → examples/settings.py} +0 -0
  220. /mojo/{apps/notify/handlers/ses/bounce.py → serializers/formats/__init__.py} +0 -0
  221. /mojo/serializers/{advanced/formats → formats}/localizers.py +0 -0
@@ -0,0 +1,734 @@
1
+ """
2
+ JobEngine - The runner daemon for executing jobs.
3
+
4
+ Plan B engine: consumes jobs from Redis Lists (per-channel queues),
5
+ tracks in-flight jobs in a ZSET with visibility timeout, and executes
6
+ registered handlers.
7
+ """
8
+ import sys
9
+ import signal
10
+ import socket
11
+ import time
12
+ import json
13
+ import threading
14
+ import random
15
+ import traceback
16
+ from datetime import datetime, timedelta
17
+ from typing import Dict, List, Optional, Set, Tuple
18
+
19
+ from django.db import close_old_connections
20
+
21
+ from mojo.helpers.settings import settings
22
+ from mojo.helpers import logit
23
+ from .keys import JobKeys
24
+ from .adapters import get_adapter
25
+ from .models import Job, JobEvent
26
+ import concurrent.futures
27
+ import importlib
28
+ from threading import Lock, Semaphore
29
+ from typing import Callable
30
+
31
+ from mojo.apps import metrics
32
+ from mojo.helpers import dates
33
+
34
+ logger = logit.get_logger("jobs", "jobs.log", debug=True)
35
+
36
+
37
+ JOBS_ENGINE_CLAIM_BATCH = settings.get('JOBS_ENGINE_CLAIM_BATCH', 5)
38
+ JOBS_CHANNELS = settings.get('JOBS_CHANNELS', ['default'])
39
+ JOBS_ENGINE_MAX_WORKERS = settings.get('JOBS_ENGINE_MAX_WORKERS', 10)
40
+ JOBS_ENGINE_CLAIM_BUFFER = settings.get('JOBS_ENGINE_CLAIM_BUFFER', 2)
41
+ JOBS_RUNNER_HEARTBEAT_SEC = settings.get('JOBS_RUNNER_HEARTBEAT_SEC', 5)
42
+ JOBS_VISIBILITY_TIMEOUT_MS = settings.get('JOBS_VISIBILITY_TIMEOUT_MS', 30000)
43
+
44
+
45
+ def load_job_function(func_path: str) -> Callable:
46
+ """
47
+ Dynamically import a job function.
48
+ Example: 'mojo.apps.account.jobs.send_invite'
49
+ """
50
+ try:
51
+ module_path, func_name = func_path.rsplit('.', 1)
52
+ module = importlib.import_module(module_path)
53
+ return getattr(module, func_name)
54
+ except (ImportError, AttributeError, ValueError) as e:
55
+ raise ImportError(f"Cannot load job function '{func_path}': {e}")
56
+
57
+
58
+ class JobEngine:
59
+ """
60
+ Job execution engine that runs as a daemon process.
61
+
62
+ Plan B: Consumes jobs from Redis List queues and executes handlers dynamically
63
+ with support for retries, cancellation, and parallel execution. Tracks in-flight
64
+ jobs in a ZSET to enable crash recovery via a reaper.
65
+ """
66
+
67
+ def __init__(self, channels: Optional[List[str]] = None,
68
+ runner_id: Optional[str] = None,
69
+ max_workers: Optional[int] = None):
70
+ """
71
+ Initialize the job engine.
72
+
73
+ Args:
74
+ channels: List of channels to consume from (default: from settings.JOBS_CHANNELS)
75
+ runner_id: Unique runner identifier (auto-generated if not provided)
76
+ max_workers: Maximum thread pool workers (default from settings)
77
+ """
78
+ self.channels = channels or JOBS_CHANNELS
79
+ self.runner_id = runner_id or self._generate_runner_id()
80
+ self.redis = get_adapter()
81
+ self.keys = JobKeys()
82
+
83
+ # Thread pool configuration
84
+ self.max_workers = max_workers or JOBS_ENGINE_MAX_WORKERS
85
+ self.executor = concurrent.futures.ThreadPoolExecutor(
86
+ max_workers=self.max_workers,
87
+ thread_name_prefix=f"JobWorker-{self.runner_id}"
88
+ )
89
+
90
+ # Track active jobs
91
+ self.active_jobs = {}
92
+ self.active_lock = Lock()
93
+
94
+ # Limit claimed jobs
95
+ claim_buffer = JOBS_ENGINE_CLAIM_BUFFER
96
+ self.max_claimed = self.max_workers * claim_buffer
97
+ self.claim_semaphore = Semaphore(self.max_claimed)
98
+
99
+ # Control flags
100
+ self.running = False
101
+ self.is_initialized = False
102
+ self.stop_event = threading.Event()
103
+
104
+ # Heartbeat thread
105
+ self.heartbeat_thread = None
106
+ self.heartbeat_interval = JOBS_RUNNER_HEARTBEAT_SEC
107
+
108
+ # Control channel listener
109
+ self.control_thread = None
110
+
111
+ # Stats
112
+ self.jobs_processed = 0
113
+ self.jobs_failed = 0
114
+ self.start_time = None
115
+
116
+ logger.info(f"JobEngine initialized: runner_id={self.runner_id}, "
117
+ f"channels={self.channels}")
118
+
119
+ def _generate_runner_id(self) -> str:
120
+ """Generate a consistent runner ID based on hostname and channels."""
121
+ hostname = socket.gethostname()
122
+ # Clean hostname for use in ID (remove dots, make lowercase)
123
+ clean_hostname = hostname.lower().replace('.', '-').replace('_', '-')
124
+
125
+ # # Create a consistent suffix based on channels served
126
+ # channels_hash = hash(tuple(sorted(self.channels))) % 10000
127
+
128
+ return f"{clean_hostname}-engine"
129
+
130
+ def initialize(self):
131
+ if (self.is_initialized):
132
+ logger.warning("JobEngine already initialized")
133
+ return
134
+ self.is_initialized = True
135
+
136
+ logger.info(f"Initializing JobEngine {self.runner_id}")
137
+ self.running = True
138
+ self.start_time = dates.utcnow()
139
+ self.stop_event.clear()
140
+
141
+ # Start heartbeat thread
142
+ self._start_heartbeat()
143
+
144
+ # Start control listener thread
145
+ self._start_control_listener()
146
+
147
+ # Register signal handlers
148
+ self._setup_signal_handlers()
149
+
150
+ def start(self):
151
+ """
152
+ Start the job engine.
153
+
154
+ Sets up consumer groups, starts heartbeat, and begins processing.
155
+ """
156
+ if self.running:
157
+ logger.warning("JobEngine already running")
158
+ return
159
+
160
+ self.initialize()
161
+
162
+ # Main processing loop
163
+ try:
164
+ self._main_loop()
165
+ except KeyboardInterrupt:
166
+ logger.info("JobEngine interrupted by user")
167
+ except Exception as e:
168
+ logger.error(f"JobEngine crashed: {e}")
169
+ raise
170
+ finally:
171
+ self.stop()
172
+
173
+ def stop(self, timeout: float = 30.0):
174
+ """
175
+ Stop the job engine gracefully.
176
+
177
+ Args:
178
+ timeout: Maximum time to wait for clean shutdown
179
+ """
180
+ if self.running:
181
+ logger.info(f"Stopping JobEngine {self.runner_id}...")
182
+ self.running = False
183
+ self.stop_event.set()
184
+ # Wait for active jobs
185
+ with self.active_lock:
186
+ active = list(self.active_jobs.values())
187
+ if active:
188
+ logger.info(f"Waiting for {len(active)} active jobs...")
189
+ futures = [j['future'] for j in active]
190
+ concurrent.futures.wait(futures, timeout=timeout/2)
191
+ # Shutdown executor
192
+ self.executor.shutdown(wait=True)
193
+
194
+ # Stop heartbeat
195
+ if self.heartbeat_thread and self.heartbeat_thread.is_alive():
196
+ self.heartbeat_thread.join(timeout=5.0)
197
+
198
+ # Stop control listener
199
+ if self.control_thread and self.control_thread.is_alive():
200
+ self.control_thread.join(timeout=5.0)
201
+
202
+ # Clean up Redis keys
203
+ try:
204
+ self.redis.delete(self.keys.runner_hb(self.runner_id))
205
+ except Exception as e:
206
+ logger.warning(f"Failed to clean up runner keys: {e}")
207
+
208
+ logger.info(f"JobEngine {self.runner_id} stopped. "
209
+ f"Processed: {self.jobs_processed}, Failed: {self.jobs_failed}")
210
+
211
+ def _cleanup_consumer_groups(self):
212
+ """
213
+ Clean up consumer group registrations on shutdown.
214
+ This prevents accumulation of dead consumers.
215
+ """
216
+ logger.info(f"Cleaning up consumer registrations for {self.runner_id}")
217
+
218
+ for channel in self.channels:
219
+ try:
220
+ stream_key = self.keys.stream(channel)
221
+ group_key = self.keys.group_workers(channel)
222
+ broadcast_stream = self.keys.stream_broadcast(channel)
223
+ runner_group = self.keys.group_runner(channel, self.runner_id)
224
+
225
+ client = self.redis.get_client()
226
+
227
+ # For main stream: reclaim and ACK any pending jobs before deletion
228
+ try:
229
+ pending_info = client.execute_command(
230
+ 'XPENDING', stream_key, group_key, '-', '+', '100', self.runner_id
231
+ )
232
+
233
+ if pending_info:
234
+ message_ids = [msg[0] for msg in pending_info]
235
+ if message_ids:
236
+ # Reclaim and immediately ACK to clear them
237
+ try:
238
+ claimed = client.execute_command(
239
+ 'XCLAIM', stream_key, group_key, self.runner_id,
240
+ '0', *message_ids
241
+ )
242
+ if claimed:
243
+ client.execute_command('XACK', stream_key, group_key, *message_ids)
244
+ logger.info(f"Cleared {len(message_ids)} pending jobs during cleanup for {channel}")
245
+ except Exception as e:
246
+ logger.warning(f"Failed to clear pending jobs during cleanup: {e}")
247
+
248
+ except Exception as e:
249
+ logger.debug(f"No pending jobs to clean for {channel}: {e}")
250
+
251
+ # Delete consumer from main group
252
+ try:
253
+ client.execute_command('XGROUP', 'DELCONSUMER', stream_key, group_key, self.runner_id)
254
+ logger.debug(f"Removed consumer {self.runner_id} from group {group_key}")
255
+ except Exception as e:
256
+ logger.debug(f"Consumer {self.runner_id} was not in group {group_key}: {e}")
257
+
258
+ # Delete consumer from broadcast group
259
+ try:
260
+ client.execute_command('XGROUP', 'DELCONSUMER', broadcast_stream, runner_group, self.runner_id)
261
+ logger.debug(f"Removed consumer {self.runner_id} from broadcast group {runner_group}")
262
+ except Exception as e:
263
+ logger.debug(f"Consumer {self.runner_id} was not in broadcast group {runner_group}: {e}")
264
+
265
+ except Exception as e:
266
+ logger.warning(f"Failed to cleanup consumer groups for {channel}: {e}")
267
+
268
+ def _setup_consumer_groups(self):
269
+ """No-op in Plan B (List + ZSET)."""
270
+ logger.info("Plan B mode: no consumer groups to set up.")
271
+
272
+ def _setup_signal_handlers(self):
273
+ """Register signal handlers for graceful shutdown."""
274
+ def handle_signal(signum, frame):
275
+ logger.info(f"Received signal {signum}, initiating graceful shutdown")
276
+ self.stop()
277
+ sys.exit(0)
278
+
279
+ signal.signal(signal.SIGTERM, handle_signal)
280
+ signal.signal(signal.SIGINT, handle_signal)
281
+
282
+ def _start_heartbeat(self):
283
+ """Start the heartbeat and reaper threads."""
284
+ self.heartbeat_thread = threading.Thread(
285
+ target=self._heartbeat_loop,
286
+ name=f"Heartbeat-{self.runner_id}",
287
+ daemon=True
288
+ )
289
+ self.heartbeat_thread.start()
290
+ # Reaper thread for visibility timeout
291
+ self.reaper_thread = threading.Thread(
292
+ target=self._reaper_loop,
293
+ name=f"Reaper-{self.runner_id}",
294
+ daemon=True
295
+ )
296
+ self.reaper_thread.start()
297
+
298
+ def _heartbeat_loop(self):
299
+ """Heartbeat thread main loop."""
300
+ hb_key = self.keys.runner_hb(self.runner_id)
301
+
302
+ while self.running and not self.stop_event.is_set():
303
+ try:
304
+ # Update heartbeat with TTL
305
+ self.redis.set(hb_key, json.dumps({
306
+ 'runner_id': self.runner_id,
307
+ 'hostname': socket.gethostname(),
308
+ 'channels': self.channels,
309
+ 'jobs_processed': self.jobs_processed,
310
+ 'jobs_failed': self.jobs_failed,
311
+ 'started': self.start_time.isoformat(),
312
+ 'last_heartbeat': dates.utcnow().isoformat()
313
+ }), ex=self.heartbeat_interval * 3) # TTL = 3x interval
314
+
315
+ # Touch visibility timeout for active jobs to prevent premature reaping
316
+ try:
317
+ now_ms = int(time.time() * 1000)
318
+ # Snapshot active jobs to minimize lock hold time
319
+ with self.active_lock:
320
+ active_snapshot = [(jid, meta.get('channel')) for jid, meta in self.active_jobs.items()]
321
+ for jid, ch in active_snapshot:
322
+ if not ch:
323
+ continue
324
+ # Update in-flight ZSET score to extend visibility timeout
325
+ self.redis.zadd(self.keys.processing(ch), {jid: now_ms})
326
+ except Exception as te:
327
+ logger.debug(f"Heartbeat touch failed: {te}")
328
+
329
+ except Exception as e:
330
+ logger.warning(f"Heartbeat update failed: {e}")
331
+
332
+ # Sleep with periodic wake for stop check
333
+ for _ in range(self.heartbeat_interval):
334
+ if self.stop_event.is_set():
335
+ break
336
+ time.sleep(1)
337
+
338
+ def _start_control_listener(self):
339
+ """Start the control channel listener thread."""
340
+ self.control_thread = threading.Thread(
341
+ target=self._control_loop,
342
+ name=f"Control-{self.runner_id}",
343
+ daemon=True
344
+ )
345
+ self.control_thread.start()
346
+
347
+ def _control_loop(self):
348
+ """Control channel listener loop."""
349
+ control_key = self.keys.runner_ctl(self.runner_id)
350
+ broadcast_key = "mojo:jobs:runners:broadcast"
351
+ pubsub = self.redis.pubsub()
352
+ # Listen to runner-specific control and global broadcast control
353
+ pubsub.subscribe(control_key, broadcast_key)
354
+
355
+ try:
356
+ while self.running and not self.stop_event.is_set():
357
+ message = pubsub.get_message(timeout=5.0)
358
+ if message and message.get('type') == 'message':
359
+ self._handle_control_message(message.get('data'), message.get('channel'))
360
+ finally:
361
+ pubsub.close()
362
+
363
+ def _handle_control_message(self, data: bytes, channel: Optional[str] = None):
364
+ """Handle a control channel message or broadcast command."""
365
+ try:
366
+ message = json.loads(data.decode('utf-8'))
367
+ command = message.get('command')
368
+
369
+ if command == 'ping':
370
+ # Respond with pong (direct control)
371
+ response_key = message.get('response_key')
372
+ if response_key:
373
+ self.redis.set(response_key, 'pong', ex=5)
374
+ logger.info("Responded to ping from control channel")
375
+
376
+ elif command == 'status':
377
+ # Broadcast status reply
378
+ reply_channel = message.get('reply_channel')
379
+ if reply_channel:
380
+ reply = {
381
+ 'runner_id': self.runner_id,
382
+ 'channels': self.channels,
383
+ 'jobs_processed': self.jobs_processed,
384
+ 'jobs_failed': self.jobs_failed,
385
+ 'started': self.start_time.isoformat() if self.start_time else None,
386
+ 'timestamp': dates.utcnow().isoformat(),
387
+ }
388
+ try:
389
+ self.redis.publish(reply_channel, json.dumps(reply))
390
+ except Exception as e:
391
+ logger.warning(f"Failed to publish status reply: {e}")
392
+
393
+ elif command == 'shutdown':
394
+ logger.info("Received shutdown command from control channel/broadcast")
395
+ self.stop()
396
+
397
+ else:
398
+ logger.warning(f"Unknown control command: {command}")
399
+
400
+ except Exception as e:
401
+ logger.error(f"Failed to handle control message: {e}")
402
+
403
+ def _main_loop(self):
404
+ """Main processing loop - claims jobs from List queues based on capacity."""
405
+ logger.info(f"JobEngine {self.runner_id} entering main loop (Plan B)")
406
+
407
+ while self.running and not self.stop_event.is_set():
408
+ try:
409
+ # Check available capacity
410
+ with self.active_lock:
411
+ active_count = len(self.active_jobs)
412
+
413
+ if active_count >= self.max_claimed:
414
+ time.sleep(0.1)
415
+ continue
416
+
417
+ # Compose BRPOP order (priority first)
418
+ channels_ordered = list(self.channels)
419
+ if 'priority' in channels_ordered:
420
+ channels_ordered = ['priority'] + [c for c in channels_ordered if c != 'priority']
421
+ queue_keys = [self.keys.queue(ch) for ch in channels_ordered]
422
+
423
+ # Claim one job at a time to avoid over-claiming
424
+ popped = self.redis.brpop(queue_keys, timeout=1)
425
+ if not popped:
426
+ continue
427
+
428
+ queue_key, job_id = popped
429
+ # Determine channel from key
430
+ channel = queue_key.split(':')[-1]
431
+
432
+ # Track in-flight (visibility)
433
+ try:
434
+ self.redis.zadd(self.keys.processing(channel), {job_id: int(time.time() * 1000)})
435
+ except Exception as e:
436
+ logger.warning(f"Failed to add job {job_id} to processing ZSET: {e}")
437
+
438
+ # Submit to thread pool
439
+ future = self.executor.submit(
440
+ self.execute_job,
441
+ channel, job_id
442
+ )
443
+
444
+ with self.active_lock:
445
+ self.active_jobs[job_id] = {
446
+ 'future': future,
447
+ 'started': dates.utcnow(),
448
+ 'channel': channel
449
+ }
450
+
451
+ future.add_done_callback(lambda f, jid=job_id: self._job_completed(jid))
452
+
453
+ except Exception as e:
454
+ logger.error(f"Error in main loop: {e}")
455
+ time.sleep(0.5)
456
+
457
+ def claim_jobs_by_channel(self, channel: str, count: int) -> List[Tuple[str, str, str]]:
458
+ """Plan B: not used. Kept for compatibility."""
459
+ return []
460
+
461
+ def claim_jobs(self, count: int) -> List[Tuple[str, str, str]]:
462
+ """
463
+ Claim up to 'count' jobs from Redis streams.
464
+
465
+ Args:
466
+ count: Maximum number of jobs to claim
467
+
468
+ Returns:
469
+ List of (stream_key, msg_id, job_id) tuples
470
+ """
471
+ claimed = []
472
+ # Prioritize 'priority' channel first if present
473
+ channels_ordered = list(self.channels)
474
+ if 'priority' in channels_ordered:
475
+ channels_ordered = ['priority'] + [c for c in channels_ordered if c != 'priority']
476
+ for channel in channels_ordered:
477
+ if len(claimed) >= count:
478
+ break
479
+ channel_messages = self.claim_jobs_by_channel(channel, count - len(claimed))
480
+ claimed.extend(channel_messages)
481
+ return claimed
482
+
483
+ def _ack_message(self, stream_key: str, msg_id: str):
484
+ """Plan B: not used. Kept for compatibility."""
485
+ return
486
+
487
+ def execute_job(self, channel: str, job_id: str):
488
+ """Execute job and handle all state updates (Plan B)."""
489
+ job = None
490
+ try:
491
+ # Load job from database
492
+ close_old_connections()
493
+ job = Job.objects.select_for_update().get(id=job_id)
494
+ except Exception as e:
495
+ logit.error(f"Failed to load job {job_id}: {e}")
496
+ # Remove from processing to avoid leak
497
+ try:
498
+ self.redis.zrem(self.keys.processing(channel), job_id)
499
+ except Exception:
500
+ pass
501
+ return
502
+
503
+ try:
504
+ # Check if already processed or canceled
505
+ if job.status in ('completed', 'canceled'):
506
+ # Already finished; remove from processing if present
507
+ try:
508
+ self.redis.zrem(self.keys.processing(channel), job_id)
509
+ except Exception:
510
+ pass
511
+ return
512
+
513
+ # Check expiration
514
+ if job.is_expired:
515
+ job.status = 'expired'
516
+ job.finished_at = dates.utcnow()
517
+ job.save(update_fields=['status', 'finished_at'])
518
+
519
+ # Event: expired
520
+ try:
521
+ JobEvent.objects.create(
522
+ job=job,
523
+ channel=job.channel,
524
+ event='expired',
525
+ runner_id=self.runner_id,
526
+ attempt=job.attempt,
527
+ details={'reason': 'job_expired_before_execution'}
528
+ )
529
+ except Exception:
530
+ pass
531
+
532
+ # Remove from processing after DB update
533
+ try:
534
+ self.redis.zrem(self.keys.processing(channel), job_id)
535
+ except Exception:
536
+ pass
537
+ metrics.record("jobs.expired")
538
+ return
539
+
540
+ # Mark as running
541
+ job.status = 'running'
542
+ job.started_at = dates.utcnow()
543
+ job.runner_id = self.runner_id
544
+ job.attempt += 1
545
+ job.save(update_fields=['status', 'started_at', 'runner_id', 'attempt'])
546
+
547
+ # Event: running
548
+ try:
549
+ JobEvent.objects.create(
550
+ job=job,
551
+ channel=job.channel,
552
+ event='running',
553
+ runner_id=self.runner_id,
554
+ attempt=job.attempt,
555
+ details={'queue': self.keys.queue(channel)}
556
+ )
557
+ except Exception:
558
+ pass
559
+
560
+ # Load and execute function
561
+ func = load_job_function(job.func)
562
+ func(job)
563
+
564
+ # Mark complete
565
+ job.status = 'completed'
566
+ job.finished_at = dates.utcnow()
567
+ job.save(update_fields=['status', 'finished_at', 'metadata'])
568
+ logger.info(f"Job {job.id} completed")
569
+ # Event: completed
570
+ try:
571
+ JobEvent.objects.create(
572
+ job=job,
573
+ channel=job.channel,
574
+ event='completed',
575
+ runner_id=self.runner_id,
576
+ attempt=job.attempt,
577
+ details={}
578
+ )
579
+ except Exception:
580
+ pass
581
+
582
+ # Remove from processing after DB update
583
+ try:
584
+ self.redis.zrem(self.keys.processing(channel), job_id)
585
+ except Exception:
586
+ pass
587
+
588
+ # Metrics
589
+ metrics.record("jobs.completed", count=1)
590
+ metrics.record(f"jobs.channel.{job.channel}.completed", count=1)
591
+ metrics.record("jobs.duration_ms", count=job.duration_ms)
592
+
593
+ except Exception as e:
594
+ try:
595
+ if job:
596
+ job.add_log(f"Failed to complete job: {e}", kind="error")
597
+ except Exception:
598
+ pass
599
+ self._handle_job_failure(job_id, channel, e)
600
+
601
+ def _handle_job_failure(self, job_id: str, channel: str, error: Exception):
602
+ """Handle job failure with retries (Plan B)."""
603
+ try:
604
+ job = Job.objects.select_for_update().get(id=job_id)
605
+
606
+ # Record error
607
+ job.last_error = str(error)
608
+ job.stack_trace = traceback.format_exc()
609
+
610
+ # Check retry eligibility
611
+ if job.attempt < job.max_retries:
612
+ # Calculate backoff with jitter
613
+ backoff = min(
614
+ job.backoff_base ** job.attempt,
615
+ job.backoff_max_sec
616
+ )
617
+ jitter = backoff * (0.8 + random.random() * 0.4)
618
+
619
+ # Schedule retry
620
+ job.run_at = dates.utcnow() + timedelta(seconds=jitter)
621
+ job.status = 'pending'
622
+ job.save(update_fields=[
623
+ 'status', 'run_at', 'last_error', 'stack_trace'
624
+ ])
625
+
626
+ # Event: retry scheduled
627
+ try:
628
+ JobEvent.objects.create(
629
+ job=job,
630
+ channel=job.channel,
631
+ event='retry',
632
+ runner_id=self.runner_id,
633
+ attempt=job.attempt,
634
+ details={'reason': 'failure', 'next_run_at': job.run_at.isoformat()}
635
+ )
636
+ except Exception:
637
+ pass
638
+
639
+ # Add to scheduled ZSET (route by broadcast)
640
+ score = job.run_at.timestamp() * 1000
641
+ target_zset = self.keys.sched_broadcast(job.channel) if job.broadcast else self.keys.sched(job.channel)
642
+ self.redis.zadd(target_zset, {job_id: score})
643
+
644
+ metrics.record("jobs.retried")
645
+ else:
646
+ # Max retries exceeded
647
+ job.status = 'failed'
648
+ job.finished_at = dates.utcnow()
649
+ job.save(update_fields=[
650
+ 'status', 'finished_at', 'last_error', 'stack_trace'
651
+ ])
652
+
653
+ # Event: failed
654
+ try:
655
+ JobEvent.objects.create(
656
+ job=job,
657
+ channel=job.channel,
658
+ event='failed',
659
+ runner_id=self.runner_id,
660
+ attempt=job.attempt,
661
+ details={'error': job.last_error}
662
+ )
663
+ except Exception:
664
+ pass
665
+
666
+ metrics.record("jobs.failed")
667
+ metrics.record(f"jobs.channel.{job.channel}.failed")
668
+
669
+ # Always remove from processing to prevent leaks
670
+ try:
671
+ self.redis.zrem(self.keys.processing(channel), job_id)
672
+ except Exception:
673
+ pass
674
+
675
+ except Exception as e:
676
+ logit.error(f"Failed to handle job failure: {e}")
677
+
678
+ def _job_completed(self, job_id: str):
679
+ """Callback when job future completes."""
680
+ with self.active_lock:
681
+ self.active_jobs.pop(job_id, None)
682
+ self.jobs_processed += 1
683
+
684
+ def _reaper_loop(self):
685
+ """Requeue stale in-flight jobs based on visibility timeout (Plan B)."""
686
+ while self.running and not self.stop_event.is_set():
687
+ try:
688
+ now_ms = int(time.time() * 1000)
689
+ cutoff = now_ms - JOBS_VISIBILITY_TIMEOUT_MS
690
+ for ch in self.channels:
691
+ # Acquire short-lived lock to avoid duplicate requeues across engines
692
+ acquired = False
693
+ try:
694
+ acquired = self.redis.set(self.keys.reaper_lock(ch), self.runner_id, nx=True, px=2000)
695
+ except Exception as le:
696
+ logger.debug(f"Reaper lock error for {ch}: {le}")
697
+ acquired = False
698
+ if not acquired:
699
+ # Another engine is handling this channel right now
700
+ continue
701
+ # Fetch stale entries: claimed earlier than cutoff
702
+ try:
703
+ stale_ids = self.redis.zrangebyscore(self.keys.processing(ch), float("-inf"), cutoff, limit=100)
704
+ except Exception as e:
705
+ logger.debug(f"Reaper fetch failed for {ch}: {e}")
706
+ stale_ids = []
707
+ for jid in stale_ids:
708
+ try:
709
+ # Remove from processing and requeue
710
+ self.redis.zrem(self.keys.processing(ch), jid)
711
+ self.redis.rpush(self.keys.queue(ch), jid)
712
+ # Add event trail (best effort)
713
+ try:
714
+ job = Job.objects.get(id=jid)
715
+ JobEvent.objects.create(
716
+ job=job,
717
+ channel=ch,
718
+ event='retry',
719
+ runner_id=self.runner_id,
720
+ attempt=job.attempt,
721
+ details={'reason': 'reaper_timeout'}
722
+ )
723
+ except Exception:
724
+ pass
725
+ logger.info(f"Reaper requeued stale job {jid} on {ch}")
726
+ except Exception as e:
727
+ logger.warning(f"Reaper failed to requeue {jid} on {ch}: {e}")
728
+ except Exception as e:
729
+ logger.warning(f"Reaper loop error: {e}")
730
+ # Sleep a bit before next pass
731
+ for _ in range(5):
732
+ if self.stop_event.is_set():
733
+ break
734
+ time.sleep(1)