nebu 0.1.116__tar.gz → 0.1.118__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {nebu-0.1.116/src/nebu.egg-info → nebu-0.1.118}/PKG-INFO +1 -1
  2. {nebu-0.1.116 → nebu-0.1.118}/pyproject.toml +1 -1
  3. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/processors/consumer.py +179 -21
  4. nebu-0.1.118/src/nebu/processors/consumer_health_worker.py +262 -0
  5. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/processors/models.py +6 -0
  6. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/processors/processor.py +56 -30
  7. {nebu-0.1.116 → nebu-0.1.118/src/nebu.egg-info}/PKG-INFO +1 -1
  8. {nebu-0.1.116 → nebu-0.1.118}/src/nebu.egg-info/SOURCES.txt +1 -0
  9. {nebu-0.1.116 → nebu-0.1.118}/LICENSE +0 -0
  10. {nebu-0.1.116 → nebu-0.1.118}/README.md +0 -0
  11. {nebu-0.1.116 → nebu-0.1.118}/setup.cfg +0 -0
  12. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/__init__.py +0 -0
  13. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/auth.py +0 -0
  14. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/builders/builder.py +0 -0
  15. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/builders/models.py +0 -0
  16. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/cache.py +0 -0
  17. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/config.py +0 -0
  18. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/containers/container.py +0 -0
  19. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/containers/models.py +0 -0
  20. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/data.py +0 -0
  21. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/errors.py +0 -0
  22. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/logging.py +0 -0
  23. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/meta.py +0 -0
  24. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/namespaces/models.py +0 -0
  25. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/namespaces/namespace.py +0 -0
  26. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/orign.py +0 -0
  27. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/processors/consumer_process_worker.py +0 -0
  28. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/processors/decorate.py +0 -0
  29. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/processors/default.py +0 -0
  30. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/redis/models.py +0 -0
  31. {nebu-0.1.116 → nebu-0.1.118}/src/nebu/services/service.py +0 -0
  32. {nebu-0.1.116 → nebu-0.1.118}/src/nebu.egg-info/dependency_links.txt +0 -0
  33. {nebu-0.1.116 → nebu-0.1.118}/src/nebu.egg-info/requires.txt +0 -0
  34. {nebu-0.1.116 → nebu-0.1.118}/src/nebu.egg-info/top_level.txt +0 -0
  35. {nebu-0.1.116 → nebu-0.1.118}/tests/test_bucket.py +0 -0
  36. {nebu-0.1.116 → nebu-0.1.118}/tests/test_containers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nebu
3
- Version: 0.1.116
3
+ Version: 0.1.118
4
4
  Summary: A globally distributed container runtime
5
5
  Requires-Python: >=3.10.14
6
6
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "nebu"
3
- version = "0.1.116"
3
+ version = "0.1.118"
4
4
  description = "A globally distributed container runtime"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10.14"
@@ -33,11 +33,20 @@ local_namespace: Dict[str, Any] = {} # Namespace for included objects
33
33
  last_load_mtime: float = 0.0
34
34
  entrypoint_abs_path: Optional[str] = None
35
35
 
36
+ # Global health check subprocess
37
+ health_subprocess: Optional[subprocess.Popen] = None
38
+
36
39
  REDIS_CONSUMER_GROUP = os.environ.get("REDIS_CONSUMER_GROUP")
37
40
  REDIS_STREAM = os.environ.get("REDIS_STREAM")
38
41
  NEBU_EXECUTION_MODE = os.environ.get("NEBU_EXECUTION_MODE", "inline").lower()
39
42
  execution_mode = NEBU_EXECUTION_MODE
40
43
 
44
+ # Define health check stream and group names
45
+ REDIS_HEALTH_STREAM = f"{REDIS_STREAM}.health" if REDIS_STREAM else None
46
+ REDIS_HEALTH_CONSUMER_GROUP = (
47
+ f"{REDIS_CONSUMER_GROUP}-health" if REDIS_CONSUMER_GROUP else None
48
+ )
49
+
41
50
  if execution_mode not in ["inline", "subprocess"]:
42
51
  logger.warning(
43
52
  f"Invalid NEBU_EXECUTION_MODE: {NEBU_EXECUTION_MODE}. Must be 'inline' or 'subprocess'. Defaulting to 'inline'."
@@ -328,20 +337,34 @@ socks.set_default_proxy(socks.SOCKS5, "localhost", 1055)
328
337
  socket.socket = socks.socksocket
329
338
  logger.info("Configured SOCKS5 proxy for socket connections via localhost:1055")
330
339
 
331
- # Connect to Redis
332
- try:
333
- # Parse the Redis URL to handle potential credentials or specific DBs if needed
334
- # Although from_url should work now with the patched socket
335
- r = redis.from_url(
336
- REDIS_URL, decode_responses=True
337
- ) # Added decode_responses for convenience
338
- r.ping() # Test connection
339
- redis_info = REDIS_URL.split("@")[-1] if "@" in REDIS_URL else REDIS_URL
340
- logger.info(f"Connected to Redis via SOCKS proxy at {redis_info}")
341
- except Exception as e:
342
- logger.critical(f"Failed to connect to Redis via SOCKS proxy: {e}")
343
- logger.exception("Redis Connection Error Traceback:")
344
- sys.exit(1)
340
+ # Global Redis connection for the main consumer
341
+ r: redis.Redis # Initialized by connect_redis, which sys.exits on failure
342
+
343
+
344
+ # --- Connect to Redis (Main Consumer) ---
345
+ def connect_redis(redis_url: str) -> redis.Redis:
346
+ """Connects to Redis and returns the connection object."""
347
+ try:
348
+ # Parse the Redis URL to handle potential credentials or specific DBs if needed
349
+ # Although from_url should work now with the patched socket
350
+ logger.info(
351
+ f"Attempting to connect to Redis at {redis_url.split('@')[-1] if '@' in redis_url else redis_url}"
352
+ )
353
+ conn = redis.from_url(
354
+ redis_url, decode_responses=True
355
+ ) # Added decode_responses for convenience
356
+ conn.ping() # Test connection
357
+ redis_info = redis_url.split("@")[-1] if "@" in redis_url else redis_url
358
+ logger.info(f"Connected to Redis via SOCKS proxy at {redis_info}")
359
+ return conn
360
+ except Exception as e:
361
+ logger.critical(f"Failed to connect to Redis via SOCKS proxy: {e}")
362
+ logger.exception("Redis Connection Error Traceback:")
363
+ sys.exit(1)
364
+
365
+
366
+ r = connect_redis(REDIS_URL)
367
+
345
368
 
346
369
  # Create consumer group if it doesn't exist
347
370
  try:
@@ -360,6 +383,103 @@ except ResponseError as e:
360
383
  logger.exception("Consumer Group Creation Error Traceback:")
361
384
 
362
385
 
386
+ # --- Health Check Subprocess Management ---
387
+ def start_health_check_subprocess() -> Optional[subprocess.Popen]:
388
+ """Start the health check consumer subprocess."""
389
+ global REDIS_HEALTH_STREAM, REDIS_HEALTH_CONSUMER_GROUP
390
+
391
+ if not all([REDIS_URL, REDIS_HEALTH_STREAM, REDIS_HEALTH_CONSUMER_GROUP]):
392
+ logger.warning(
393
+ "[Consumer] Health check stream not configured. Health consumer subprocess not started."
394
+ )
395
+ return None
396
+
397
+ try:
398
+ # Type assertions to ensure variables are strings before using them
399
+ assert isinstance(REDIS_HEALTH_STREAM, str)
400
+ assert isinstance(REDIS_HEALTH_CONSUMER_GROUP, str)
401
+
402
+ # Prepare environment variables for the subprocess
403
+ health_env = os.environ.copy()
404
+ health_env["REDIS_HEALTH_STREAM"] = REDIS_HEALTH_STREAM
405
+ health_env["REDIS_HEALTH_CONSUMER_GROUP"] = REDIS_HEALTH_CONSUMER_GROUP
406
+
407
+ # Start the health check worker subprocess
408
+ health_cmd = [
409
+ sys.executable,
410
+ "-u", # Force unbuffered stdout/stderr
411
+ "-m",
412
+ "nebu.processors.consumer_health_worker",
413
+ ]
414
+
415
+ process = subprocess.Popen(
416
+ health_cmd,
417
+ stdout=subprocess.PIPE,
418
+ stderr=subprocess.STDOUT, # Combine stderr with stdout
419
+ text=True,
420
+ encoding="utf-8",
421
+ env=health_env,
422
+ bufsize=1, # Line buffered
423
+ )
424
+
425
+ logger.info(
426
+ f"[Consumer] Health check subprocess started with PID {process.pid}"
427
+ )
428
+ return process
429
+
430
+ except Exception as e:
431
+ logger.error(f"[Consumer] Failed to start health check subprocess: {e}")
432
+ logger.exception("Health Subprocess Start Error Traceback:")
433
+ return None
434
+
435
+
436
+ def monitor_health_subprocess(process: subprocess.Popen) -> None:
437
+ """Monitor the health check subprocess and log its output."""
438
+ try:
439
+ # Read output from the subprocess
440
+ if process.stdout:
441
+ for line in iter(process.stdout.readline, ""):
442
+ logger.info(f"[HealthSubprocess] {line.strip()}")
443
+ process.stdout.close() if process.stdout else None
444
+ except Exception as e:
445
+ logger.error(f"[Consumer] Error monitoring health subprocess: {e}")
446
+
447
+
448
+ def check_health_subprocess() -> bool:
449
+ """Check if the health subprocess is still running and restart if needed."""
450
+ global health_subprocess
451
+
452
+ if health_subprocess is None:
453
+ return False
454
+
455
+ # Check if process is still running
456
+ if health_subprocess.poll() is None:
457
+ return True # Still running
458
+
459
+ # Process has exited
460
+ exit_code = health_subprocess.returncode
461
+ logger.warning(
462
+ f"[Consumer] Health subprocess exited with code {exit_code}. Restarting..."
463
+ )
464
+
465
+ # Start a new health subprocess
466
+ health_subprocess = start_health_check_subprocess()
467
+
468
+ if health_subprocess:
469
+ # Start monitoring thread for the new subprocess
470
+ monitor_thread = threading.Thread(
471
+ target=monitor_health_subprocess, args=(health_subprocess,), daemon=True
472
+ )
473
+ monitor_thread.start()
474
+ logger.info(
475
+ "[Consumer] Health subprocess restarted and monitoring thread started."
476
+ )
477
+ return True
478
+ else:
479
+ logger.error("[Consumer] Failed to restart health subprocess.")
480
+ return False
481
+
482
+
363
483
  # Function to process messages
364
484
  def process_message(message_id: str, message_data: Dict[str, str]) -> None:
365
485
  # Access the globally managed user code elements
@@ -1088,11 +1208,35 @@ logger.info(
1088
1208
  f"[Consumer] Hot code reloading is {'DISABLED' if disable_hot_reload else 'ENABLED'}."
1089
1209
  )
1090
1210
 
1211
+ # Start the health check consumer subprocess
1212
+ if REDIS_HEALTH_STREAM and REDIS_HEALTH_CONSUMER_GROUP:
1213
+ health_subprocess = start_health_check_subprocess()
1214
+ if health_subprocess:
1215
+ # Start monitoring thread for subprocess output
1216
+ monitor_thread = threading.Thread(
1217
+ target=monitor_health_subprocess, args=(health_subprocess,), daemon=True
1218
+ )
1219
+ monitor_thread.start()
1220
+ logger.info(
1221
+ f"[Consumer] Health check subprocess for {REDIS_HEALTH_STREAM} started and monitoring thread started."
1222
+ )
1223
+ else:
1224
+ logger.error("[Consumer] Failed to start health check subprocess.")
1225
+ else:
1226
+ logger.warning(
1227
+ "[Consumer] Health check stream not configured. Health consumer subprocess not started."
1228
+ )
1229
+
1091
1230
  try:
1092
1231
  while True:
1093
1232
  logger.debug(
1094
1233
  f"[{datetime.now(timezone.utc).isoformat()}] --- Top of main loop ---"
1095
1234
  ) # Added log
1235
+
1236
+ # --- Check Health Subprocess Status ---
1237
+ if health_subprocess:
1238
+ check_health_subprocess()
1239
+
1096
1240
  # --- Check for Code Updates ---
1097
1241
  if not disable_hot_reload:
1098
1242
  logger.debug(
@@ -1356,18 +1500,21 @@ except ConnectionError as e:
1356
1500
  # Attempt to reconnect explicitly
1357
1501
  try:
1358
1502
  logger.info("Attempting Redis reconnection...")
1359
- # Close existing potentially broken connection? `r.close()` if available
1360
- r = redis.from_url(REDIS_URL, decode_responses=True)
1361
- r.ping()
1503
+ # Close existing potentially broken connection?
1504
+ if r: # Check if r was initialized
1505
+ try:
1506
+ r.close()
1507
+ except Exception:
1508
+ pass # Ignore errors during close
1509
+ r = connect_redis(REDIS_URL) # connect_redis will sys.exit on failure
1362
1510
  logger.info("Reconnected to Redis.")
1363
- except Exception as recon_e:
1511
+ except Exception as recon_e: # Should not be reached if connect_redis exits
1364
1512
  logger.error(f"Failed to reconnect to Redis: {recon_e}")
1365
- # Keep waiting
1366
1513
 
1367
1514
  except ResponseError as e:
1368
1515
  logger.error(f"Redis command error: {e}")
1369
1516
  # Should we exit or retry?
1370
- if "NOGROUP" in str(e):
1517
+ if r and "NOGROUP" in str(e): # Check if r is initialized
1371
1518
  logger.critical("Consumer group seems to have disappeared. Exiting.")
1372
1519
  sys.exit(1)
1373
1520
  time.sleep(1)
@@ -1379,4 +1526,15 @@ except Exception as e:
1379
1526
 
1380
1527
  finally:
1381
1528
  logger.info("Consumer loop exited.")
1382
- # Any other cleanup needed?
1529
+ # Cleanup health subprocess
1530
+ if health_subprocess and health_subprocess.poll() is None:
1531
+ logger.info("[Consumer] Terminating health check subprocess...")
1532
+ health_subprocess.terminate()
1533
+ try:
1534
+ health_subprocess.wait(timeout=5)
1535
+ except subprocess.TimeoutExpired:
1536
+ logger.warning(
1537
+ "[Consumer] Health subprocess did not terminate gracefully, killing it."
1538
+ )
1539
+ health_subprocess.kill()
1540
+ logger.info("[Consumer] Health subprocess cleanup complete.")
@@ -0,0 +1,262 @@
1
+ #!/usr/bin/env python3
2
+ import json
3
+ import logging
4
+ import os
5
+ import socket
6
+ import sys
7
+ import time
8
+ from typing import Any, Dict, List, Optional, Tuple, cast
9
+
10
+ import redis
11
+ import socks
12
+ from redis import ConnectionError, ResponseError
13
+ from redis.exceptions import TimeoutError as RedisTimeoutError
14
+
15
+ # Assuming these are imported from other modules
16
+ from nebu.processors.models import V1ProcessorHealthResponse
17
+
18
+
19
+ def setup_health_logging():
20
+ """Set up logging for the health check worker to write to a dedicated file."""
21
+ # Create logs directory if it doesn't exist
22
+ log_dir = os.path.join(os.getcwd(), "logs")
23
+ os.makedirs(log_dir, exist_ok=True)
24
+
25
+ # Create log file path with timestamp
26
+ log_file = os.path.join(log_dir, f"health_consumer_{os.getpid()}.log")
27
+
28
+ # Configure logging
29
+ logging.basicConfig(
30
+ level=logging.INFO,
31
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
32
+ handlers=[
33
+ logging.FileHandler(log_file),
34
+ logging.StreamHandler(
35
+ sys.stdout
36
+ ), # Also log to stdout for subprocess monitoring
37
+ ],
38
+ )
39
+
40
+ logger = logging.getLogger("HealthConsumer")
41
+ logger.info(f"Health check worker started. Logging to: {log_file}")
42
+ return logger
43
+
44
+
45
+ def process_health_check_message(
46
+ message_id: str,
47
+ message_data: Dict[str, str],
48
+ redis_conn: redis.Redis,
49
+ logger: logging.Logger,
50
+ health_stream: str,
51
+ health_group: str,
52
+ ) -> None:
53
+ """Processes a single health check message."""
54
+ logger.info(f"Processing health check message {message_id}: {message_data}")
55
+
56
+ health_status = "ok"
57
+ health_message: Optional[str] = "Health check processed successfully."
58
+ details: Optional[Dict[str, Any]] = None
59
+ return_stream: Optional[str] = None
60
+
61
+ try:
62
+ if "data" in message_data:
63
+ data = json.loads(message_data["data"])
64
+ logger.info(f"Health check data: {data}")
65
+ # Example: Extract return_stream if present in the health check data
66
+ return_stream = data.get("return_stream")
67
+ # Example: Update details if provided
68
+ if "check_details" in data:
69
+ details = {"processed_details": data["check_details"]}
70
+
71
+ except (json.JSONDecodeError, KeyError) as e:
72
+ logger.warning(f"Could not parse health check message data: {e}")
73
+ health_status = "error"
74
+ health_message = f"Failed to parse health check message data: {e}"
75
+ details = {"error": str(e)}
76
+
77
+ # Construct the health response
78
+ health_response = V1ProcessorHealthResponse(
79
+ status=health_status, message=health_message, details=details
80
+ )
81
+
82
+ logger.info(
83
+ f"Health response for message {message_id}: {health_response.model_dump_json()}"
84
+ )
85
+
86
+ # If a return_stream is specified, send the response there
87
+ if return_stream:
88
+ try:
89
+ # It's good practice to set a maxlen for the return stream to prevent it from growing indefinitely
90
+ redis_conn.xadd(
91
+ return_stream,
92
+ health_response.model_dump(), # type: ignore[arg-type]
93
+ maxlen=1000,
94
+ approximate=True,
95
+ )
96
+ logger.info(
97
+ f"Sent health response for {message_id} to stream: {return_stream}"
98
+ )
99
+ except Exception as e_resp_send:
100
+ logger.error(
101
+ f"Failed to send health response for {message_id} to stream {return_stream}: {e_resp_send}"
102
+ )
103
+
104
+ # Acknowledge the health check message
105
+ try:
106
+ redis_conn.xack(health_stream, health_group, message_id)
107
+ logger.info(f"Acknowledged health check message {message_id}")
108
+ except Exception as e_ack:
109
+ logger.error(
110
+ f"Failed to acknowledge health check message {message_id}: {e_ack}"
111
+ )
112
+
113
+
114
+ def main():
115
+ """Main function for the health check consumer subprocess."""
116
+ logger = setup_health_logging()
117
+
118
+ # Get environment variables
119
+ redis_url = os.environ.get("REDIS_URL")
120
+ health_stream = os.environ.get("REDIS_HEALTH_STREAM")
121
+ health_group = os.environ.get("REDIS_HEALTH_CONSUMER_GROUP")
122
+
123
+ if not all([redis_url, health_stream, health_group]):
124
+ logger.error(
125
+ "Missing required environment variables: REDIS_URL, REDIS_HEALTH_STREAM, REDIS_HEALTH_CONSUMER_GROUP"
126
+ )
127
+ sys.exit(1)
128
+
129
+ # Type assertions after validation
130
+ assert isinstance(redis_url, str)
131
+ assert isinstance(health_stream, str)
132
+ assert isinstance(health_group, str)
133
+
134
+ logger.info(
135
+ f"Starting health consumer for stream: {health_stream}, group: {health_group}"
136
+ )
137
+
138
+ # Configure SOCKS proxy
139
+ socks.set_default_proxy(socks.SOCKS5, "localhost", 1055)
140
+ socket.socket = socks.socksocket
141
+ logger.info("Configured SOCKS5 proxy for socket connections via localhost:1055")
142
+
143
+ health_redis_conn: Optional[redis.Redis] = None
144
+ health_consumer_name = f"health-consumer-{os.getpid()}-{socket.gethostname()}"
145
+
146
+ while True:
147
+ try:
148
+ if health_redis_conn is None:
149
+ logger.info("Connecting to Redis for health stream...")
150
+ health_redis_conn = redis.from_url(redis_url, decode_responses=True)
151
+ health_redis_conn.ping()
152
+ logger.info("Connected to Redis for health stream.")
153
+
154
+ # Create health consumer group if it doesn't exist
155
+ try:
156
+ health_redis_conn.xgroup_create(
157
+ health_stream, health_group, id="0", mkstream=True
158
+ )
159
+ logger.info(
160
+ f"Created consumer group {health_group} for stream {health_stream}"
161
+ )
162
+ except ResponseError as e_group:
163
+ if "BUSYGROUP" in str(e_group):
164
+ logger.info(f"Consumer group {health_group} already exists.")
165
+ else:
166
+ logger.error(f"Error creating health consumer group: {e_group}")
167
+ time.sleep(5)
168
+ health_redis_conn = None
169
+ continue
170
+ except Exception as e_group_other:
171
+ logger.error(
172
+ f"Unexpected error creating health consumer group: {e_group_other}"
173
+ )
174
+ time.sleep(5)
175
+ health_redis_conn = None
176
+ continue
177
+
178
+ # Read from health stream
179
+ assert health_redis_conn is not None
180
+
181
+ health_streams_arg: Dict[str, object] = {health_stream: ">"}
182
+ raw_messages = health_redis_conn.xreadgroup(
183
+ health_group,
184
+ health_consumer_name,
185
+ health_streams_arg, # type: ignore[arg-type]
186
+ count=1,
187
+ block=5000, # Block for 5 seconds
188
+ )
189
+
190
+ if raw_messages:
191
+ # Cast to expected type for decode_responses=True
192
+ messages = cast(
193
+ List[Tuple[str, List[Tuple[str, Dict[str, str]]]]], raw_messages
194
+ )
195
+ for _stream_name, stream_messages in messages:
196
+ for message_id, message_data in stream_messages:
197
+ process_health_check_message(
198
+ message_id,
199
+ message_data,
200
+ health_redis_conn,
201
+ logger,
202
+ health_stream,
203
+ health_group,
204
+ )
205
+
206
+ except (ConnectionError, RedisTimeoutError, TimeoutError) as e_conn:
207
+ logger.error(f"Redis connection error: {e_conn}. Reconnecting in 5s...")
208
+ if health_redis_conn:
209
+ try:
210
+ health_redis_conn.close()
211
+ except Exception:
212
+ pass
213
+ health_redis_conn = None
214
+ time.sleep(5)
215
+
216
+ except ResponseError as e_resp:
217
+ logger.error(f"Redis response error: {e_resp}")
218
+ if "NOGROUP" in str(e_resp):
219
+ logger.warning(
220
+ "Health consumer group disappeared. Attempting to recreate..."
221
+ )
222
+ if health_redis_conn:
223
+ try:
224
+ health_redis_conn.close()
225
+ except Exception:
226
+ pass
227
+ health_redis_conn = None
228
+ elif "UNBLOCKED" in str(e_resp):
229
+ logger.info(
230
+ "XREADGROUP unblocked, connection might have been closed. Reconnecting."
231
+ )
232
+ if health_redis_conn:
233
+ try:
234
+ health_redis_conn.close()
235
+ except Exception:
236
+ pass
237
+ health_redis_conn = None
238
+ time.sleep(1)
239
+ else:
240
+ time.sleep(5)
241
+
242
+ except KeyboardInterrupt:
243
+ logger.info("Received interrupt signal. Shutting down health consumer...")
244
+ break
245
+
246
+ except Exception as e:
247
+ logger.error(f"Unexpected error in health check consumer: {e}")
248
+ logger.exception("Traceback:")
249
+ time.sleep(5)
250
+
251
+ # Cleanup
252
+ if health_redis_conn:
253
+ try:
254
+ health_redis_conn.close()
255
+ except Exception:
256
+ pass
257
+
258
+ logger.info("Health check consumer shutdown complete.")
259
+
260
+
261
+ if __name__ == "__main__":
262
+ main()
@@ -149,3 +149,9 @@ class V1OpenAIStreamResponse(BaseModel):
149
149
  content: Any # Using Any for ChatCompletionResponse
150
150
  created_at: int
151
151
  user_id: Optional[str] = None
152
+
153
+
154
+ class V1ProcessorHealthResponse(BaseModel):
155
+ status: str
156
+ message: Optional[str] = None
157
+ details: Optional[Any] = None
@@ -1,7 +1,8 @@
1
1
  import json
2
2
  import threading
3
3
  import time
4
- import uuid
4
+
5
+ # import uuid # Removed unused import
5
6
  from typing import (
6
7
  Any,
7
8
  Dict,
@@ -23,6 +24,7 @@ from nebu.meta import V1ResourceMetaRequest, V1ResourceReference
23
24
  from nebu.processors.models import (
24
25
  V1ContainerRequest,
25
26
  V1Processor,
27
+ V1ProcessorHealthResponse,
26
28
  V1ProcessorRequest,
27
29
  V1Processors,
28
30
  V1ProcessorScaleRequest,
@@ -246,7 +248,7 @@ class Processor(Generic[InputType, OutputType]):
246
248
 
247
249
  # --- Wait for health check if requested ---
248
250
  if wait_for_healthy:
249
- self.wait_for_health_check()
251
+ self.wait_for_healthy()
250
252
 
251
253
  def __call__(
252
254
  self,
@@ -315,6 +317,9 @@ class Processor(Generic[InputType, OutputType]):
315
317
  if "error" in raw_response_json:
316
318
  raise Exception(raw_response_json["error"])
317
319
 
320
+ if "status" in raw_response_json:
321
+ return raw_response_json
322
+
318
323
  raw_content = raw_response_json.get("content")
319
324
  logger.debug(f">>> Raw content: {raw_content}")
320
325
 
@@ -556,11 +561,11 @@ class Processor(Generic[InputType, OutputType]):
556
561
  else:
557
562
  logger.info(f"No active log stream to stop for {self.name}.")
558
563
 
559
- def wait_for_health_check(
564
+ def wait_for_healthy(
560
565
  self, timeout: float = 3600.0, retry_interval: float = 5.0
561
566
  ) -> None:
562
567
  """
563
- Wait for the processor to respond to health checks.
568
+ Wait for the processor to respond to health checks using the health endpoint.
564
569
 
565
570
  Args:
566
571
  timeout: Maximum time to wait for health check in seconds
@@ -570,40 +575,25 @@ class Processor(Generic[InputType, OutputType]):
570
575
  raise ValueError("Processor not found, cannot perform health check")
571
576
 
572
577
  logger.info(
573
- f"Waiting for processor {self.processor.metadata.name} to be healthy..."
578
+ f"Waiting for processor {self.processor.metadata.name} to be healthy via health endpoint..."
574
579
  )
575
580
 
576
581
  start_time = time.time()
577
582
  while time.time() - start_time < timeout:
578
583
  try:
579
- # Create a health check message
580
- health_check_data = {
581
- "kind": "HealthCheck",
582
- "id": str(uuid.uuid4()),
583
- "content": {},
584
- "created_at": time.time(),
585
- }
586
-
587
- # Send health check and wait for response
588
- response = self.send(
589
- data=health_check_data, # type: ignore[arg-type]
590
- wait=True,
591
- timeout=30.0, # Short timeout for individual health check
584
+ health_response = self.health() # Use the new health() method
585
+ logger.info(
586
+ f">>> Health check response: {health_response.model_dump_json()}"
592
587
  )
593
- logger.info(f">>> Health check response: {response}")
594
588
 
595
589
  # Check if the response indicates health
596
- if response and isinstance(response, dict):
597
- status = response.get("status")
598
- if status == "healthy":
599
- logger.info(
600
- f"Processor {self.processor.metadata.name} is healthy!"
601
- )
602
- return
603
-
604
- logger.info(
605
- f"Health check attempt failed, retrying in {retry_interval}s..."
606
- )
590
+ if health_response.status == "ok": # Check for "ok" status
591
+ logger.info(f"Processor {self.processor.metadata.name} is healthy!")
592
+ return
593
+ else:
594
+ logger.info(
595
+ f"Processor {self.processor.metadata.name} reported status: {health_response.status}. Retrying in {retry_interval}s..."
596
+ )
607
597
 
608
598
  except Exception as e:
609
599
  logger.info(
@@ -616,3 +606,39 @@ class Processor(Generic[InputType, OutputType]):
616
606
  raise TimeoutError(
617
607
  f"Processor {self.processor.metadata.name} failed to become healthy within {timeout} seconds"
618
608
  )
609
+
610
+ def health(self) -> V1ProcessorHealthResponse:
611
+ """
612
+ Performs a health check on the processor by calling the health endpoint.
613
+ """
614
+ if (
615
+ not self.processor
616
+ or not self.processor.metadata.name
617
+ or not self.processor.metadata.namespace
618
+ ):
619
+ raise ValueError(
620
+ "Processor not found or missing metadata (name/namespace), cannot perform health check."
621
+ )
622
+
623
+ health_url = f"{self.orign_host}/v1/processors/{self.processor.metadata.namespace}/{self.processor.metadata.name}/health"
624
+ logger.debug(f"Calling health check endpoint: {health_url}")
625
+
626
+ try:
627
+ response = requests.get(
628
+ health_url,
629
+ headers={"Authorization": f"Bearer {self.api_key}"},
630
+ timeout=30.0, # Standard timeout for a health check
631
+ )
632
+ response.raise_for_status() # Raise an exception for HTTP errors
633
+ health_response_data = response.json()
634
+ return V1ProcessorHealthResponse.model_validate(health_response_data)
635
+ except requests.exceptions.RequestException as e:
636
+ logger.error(f"Health check request to {health_url} failed: {e}")
637
+ # Optionally, return a V1ProcessorHealthResponse indicating an error
638
+ # For now, re-raising the exception or a custom one might be better
639
+ raise RuntimeError(f"Failed to get health status: {e}") from e
640
+ except Exception as e:
641
+ logger.error(f"An unexpected error occurred during health check: {e}")
642
+ raise RuntimeError(
643
+ f"Unexpected error during health status retrieval: {e}"
644
+ ) from e
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nebu
3
- Version: 0.1.116
3
+ Version: 0.1.118
4
4
  Summary: A globally distributed container runtime
5
5
  Requires-Python: >=3.10.14
6
6
  Description-Content-Type: text/markdown
@@ -22,6 +22,7 @@ src/nebu/containers/models.py
22
22
  src/nebu/namespaces/models.py
23
23
  src/nebu/namespaces/namespace.py
24
24
  src/nebu/processors/consumer.py
25
+ src/nebu/processors/consumer_health_worker.py
25
26
  src/nebu/processors/consumer_process_worker.py
26
27
  src/nebu/processors/decorate.py
27
28
  src/nebu/processors/default.py
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes