nebu 0.1.116__tar.gz → 0.1.118__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nebu-0.1.116/src/nebu.egg-info → nebu-0.1.118}/PKG-INFO +1 -1
- {nebu-0.1.116 → nebu-0.1.118}/pyproject.toml +1 -1
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/processors/consumer.py +179 -21
- nebu-0.1.118/src/nebu/processors/consumer_health_worker.py +262 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/processors/models.py +6 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/processors/processor.py +56 -30
- {nebu-0.1.116 → nebu-0.1.118/src/nebu.egg-info}/PKG-INFO +1 -1
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu.egg-info/SOURCES.txt +1 -0
- {nebu-0.1.116 → nebu-0.1.118}/LICENSE +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/README.md +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/setup.cfg +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/__init__.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/auth.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/builders/builder.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/builders/models.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/cache.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/config.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/containers/container.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/containers/models.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/data.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/errors.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/logging.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/meta.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/namespaces/models.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/namespaces/namespace.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/orign.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/processors/consumer_process_worker.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/processors/decorate.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/processors/default.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/redis/models.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu/services/service.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu.egg-info/dependency_links.txt +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu.egg-info/requires.txt +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/src/nebu.egg-info/top_level.txt +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/tests/test_bucket.py +0 -0
- {nebu-0.1.116 → nebu-0.1.118}/tests/test_containers.py +0 -0
@@ -33,11 +33,20 @@ local_namespace: Dict[str, Any] = {} # Namespace for included objects
|
|
33
33
|
last_load_mtime: float = 0.0
|
34
34
|
entrypoint_abs_path: Optional[str] = None
|
35
35
|
|
36
|
+
# Global health check subprocess
|
37
|
+
health_subprocess: Optional[subprocess.Popen] = None
|
38
|
+
|
36
39
|
REDIS_CONSUMER_GROUP = os.environ.get("REDIS_CONSUMER_GROUP")
|
37
40
|
REDIS_STREAM = os.environ.get("REDIS_STREAM")
|
38
41
|
NEBU_EXECUTION_MODE = os.environ.get("NEBU_EXECUTION_MODE", "inline").lower()
|
39
42
|
execution_mode = NEBU_EXECUTION_MODE
|
40
43
|
|
44
|
+
# Define health check stream and group names
|
45
|
+
REDIS_HEALTH_STREAM = f"{REDIS_STREAM}.health" if REDIS_STREAM else None
|
46
|
+
REDIS_HEALTH_CONSUMER_GROUP = (
|
47
|
+
f"{REDIS_CONSUMER_GROUP}-health" if REDIS_CONSUMER_GROUP else None
|
48
|
+
)
|
49
|
+
|
41
50
|
if execution_mode not in ["inline", "subprocess"]:
|
42
51
|
logger.warning(
|
43
52
|
f"Invalid NEBU_EXECUTION_MODE: {NEBU_EXECUTION_MODE}. Must be 'inline' or 'subprocess'. Defaulting to 'inline'."
|
@@ -328,20 +337,34 @@ socks.set_default_proxy(socks.SOCKS5, "localhost", 1055)
|
|
328
337
|
socket.socket = socks.socksocket
|
329
338
|
logger.info("Configured SOCKS5 proxy for socket connections via localhost:1055")
|
330
339
|
|
331
|
-
#
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
340
|
+
# Global Redis connection for the main consumer
|
341
|
+
r: redis.Redis # Initialized by connect_redis, which sys.exits on failure
|
342
|
+
|
343
|
+
|
344
|
+
# --- Connect to Redis (Main Consumer) ---
|
345
|
+
def connect_redis(redis_url: str) -> redis.Redis:
|
346
|
+
"""Connects to Redis and returns the connection object."""
|
347
|
+
try:
|
348
|
+
# Parse the Redis URL to handle potential credentials or specific DBs if needed
|
349
|
+
# Although from_url should work now with the patched socket
|
350
|
+
logger.info(
|
351
|
+
f"Attempting to connect to Redis at {redis_url.split('@')[-1] if '@' in redis_url else redis_url}"
|
352
|
+
)
|
353
|
+
conn = redis.from_url(
|
354
|
+
redis_url, decode_responses=True
|
355
|
+
) # Added decode_responses for convenience
|
356
|
+
conn.ping() # Test connection
|
357
|
+
redis_info = redis_url.split("@")[-1] if "@" in redis_url else redis_url
|
358
|
+
logger.info(f"Connected to Redis via SOCKS proxy at {redis_info}")
|
359
|
+
return conn
|
360
|
+
except Exception as e:
|
361
|
+
logger.critical(f"Failed to connect to Redis via SOCKS proxy: {e}")
|
362
|
+
logger.exception("Redis Connection Error Traceback:")
|
363
|
+
sys.exit(1)
|
364
|
+
|
365
|
+
|
366
|
+
r = connect_redis(REDIS_URL)
|
367
|
+
|
345
368
|
|
346
369
|
# Create consumer group if it doesn't exist
|
347
370
|
try:
|
@@ -360,6 +383,103 @@ except ResponseError as e:
|
|
360
383
|
logger.exception("Consumer Group Creation Error Traceback:")
|
361
384
|
|
362
385
|
|
386
|
+
# --- Health Check Subprocess Management ---
|
387
|
+
def start_health_check_subprocess() -> Optional[subprocess.Popen]:
|
388
|
+
"""Start the health check consumer subprocess."""
|
389
|
+
global REDIS_HEALTH_STREAM, REDIS_HEALTH_CONSUMER_GROUP
|
390
|
+
|
391
|
+
if not all([REDIS_URL, REDIS_HEALTH_STREAM, REDIS_HEALTH_CONSUMER_GROUP]):
|
392
|
+
logger.warning(
|
393
|
+
"[Consumer] Health check stream not configured. Health consumer subprocess not started."
|
394
|
+
)
|
395
|
+
return None
|
396
|
+
|
397
|
+
try:
|
398
|
+
# Type assertions to ensure variables are strings before using them
|
399
|
+
assert isinstance(REDIS_HEALTH_STREAM, str)
|
400
|
+
assert isinstance(REDIS_HEALTH_CONSUMER_GROUP, str)
|
401
|
+
|
402
|
+
# Prepare environment variables for the subprocess
|
403
|
+
health_env = os.environ.copy()
|
404
|
+
health_env["REDIS_HEALTH_STREAM"] = REDIS_HEALTH_STREAM
|
405
|
+
health_env["REDIS_HEALTH_CONSUMER_GROUP"] = REDIS_HEALTH_CONSUMER_GROUP
|
406
|
+
|
407
|
+
# Start the health check worker subprocess
|
408
|
+
health_cmd = [
|
409
|
+
sys.executable,
|
410
|
+
"-u", # Force unbuffered stdout/stderr
|
411
|
+
"-m",
|
412
|
+
"nebu.processors.consumer_health_worker",
|
413
|
+
]
|
414
|
+
|
415
|
+
process = subprocess.Popen(
|
416
|
+
health_cmd,
|
417
|
+
stdout=subprocess.PIPE,
|
418
|
+
stderr=subprocess.STDOUT, # Combine stderr with stdout
|
419
|
+
text=True,
|
420
|
+
encoding="utf-8",
|
421
|
+
env=health_env,
|
422
|
+
bufsize=1, # Line buffered
|
423
|
+
)
|
424
|
+
|
425
|
+
logger.info(
|
426
|
+
f"[Consumer] Health check subprocess started with PID {process.pid}"
|
427
|
+
)
|
428
|
+
return process
|
429
|
+
|
430
|
+
except Exception as e:
|
431
|
+
logger.error(f"[Consumer] Failed to start health check subprocess: {e}")
|
432
|
+
logger.exception("Health Subprocess Start Error Traceback:")
|
433
|
+
return None
|
434
|
+
|
435
|
+
|
436
|
+
def monitor_health_subprocess(process: subprocess.Popen) -> None:
|
437
|
+
"""Monitor the health check subprocess and log its output."""
|
438
|
+
try:
|
439
|
+
# Read output from the subprocess
|
440
|
+
if process.stdout:
|
441
|
+
for line in iter(process.stdout.readline, ""):
|
442
|
+
logger.info(f"[HealthSubprocess] {line.strip()}")
|
443
|
+
process.stdout.close() if process.stdout else None
|
444
|
+
except Exception as e:
|
445
|
+
logger.error(f"[Consumer] Error monitoring health subprocess: {e}")
|
446
|
+
|
447
|
+
|
448
|
+
def check_health_subprocess() -> bool:
|
449
|
+
"""Check if the health subprocess is still running and restart if needed."""
|
450
|
+
global health_subprocess
|
451
|
+
|
452
|
+
if health_subprocess is None:
|
453
|
+
return False
|
454
|
+
|
455
|
+
# Check if process is still running
|
456
|
+
if health_subprocess.poll() is None:
|
457
|
+
return True # Still running
|
458
|
+
|
459
|
+
# Process has exited
|
460
|
+
exit_code = health_subprocess.returncode
|
461
|
+
logger.warning(
|
462
|
+
f"[Consumer] Health subprocess exited with code {exit_code}. Restarting..."
|
463
|
+
)
|
464
|
+
|
465
|
+
# Start a new health subprocess
|
466
|
+
health_subprocess = start_health_check_subprocess()
|
467
|
+
|
468
|
+
if health_subprocess:
|
469
|
+
# Start monitoring thread for the new subprocess
|
470
|
+
monitor_thread = threading.Thread(
|
471
|
+
target=monitor_health_subprocess, args=(health_subprocess,), daemon=True
|
472
|
+
)
|
473
|
+
monitor_thread.start()
|
474
|
+
logger.info(
|
475
|
+
"[Consumer] Health subprocess restarted and monitoring thread started."
|
476
|
+
)
|
477
|
+
return True
|
478
|
+
else:
|
479
|
+
logger.error("[Consumer] Failed to restart health subprocess.")
|
480
|
+
return False
|
481
|
+
|
482
|
+
|
363
483
|
# Function to process messages
|
364
484
|
def process_message(message_id: str, message_data: Dict[str, str]) -> None:
|
365
485
|
# Access the globally managed user code elements
|
@@ -1088,11 +1208,35 @@ logger.info(
|
|
1088
1208
|
f"[Consumer] Hot code reloading is {'DISABLED' if disable_hot_reload else 'ENABLED'}."
|
1089
1209
|
)
|
1090
1210
|
|
1211
|
+
# Start the health check consumer subprocess
|
1212
|
+
if REDIS_HEALTH_STREAM and REDIS_HEALTH_CONSUMER_GROUP:
|
1213
|
+
health_subprocess = start_health_check_subprocess()
|
1214
|
+
if health_subprocess:
|
1215
|
+
# Start monitoring thread for subprocess output
|
1216
|
+
monitor_thread = threading.Thread(
|
1217
|
+
target=monitor_health_subprocess, args=(health_subprocess,), daemon=True
|
1218
|
+
)
|
1219
|
+
monitor_thread.start()
|
1220
|
+
logger.info(
|
1221
|
+
f"[Consumer] Health check subprocess for {REDIS_HEALTH_STREAM} started and monitoring thread started."
|
1222
|
+
)
|
1223
|
+
else:
|
1224
|
+
logger.error("[Consumer] Failed to start health check subprocess.")
|
1225
|
+
else:
|
1226
|
+
logger.warning(
|
1227
|
+
"[Consumer] Health check stream not configured. Health consumer subprocess not started."
|
1228
|
+
)
|
1229
|
+
|
1091
1230
|
try:
|
1092
1231
|
while True:
|
1093
1232
|
logger.debug(
|
1094
1233
|
f"[{datetime.now(timezone.utc).isoformat()}] --- Top of main loop ---"
|
1095
1234
|
) # Added log
|
1235
|
+
|
1236
|
+
# --- Check Health Subprocess Status ---
|
1237
|
+
if health_subprocess:
|
1238
|
+
check_health_subprocess()
|
1239
|
+
|
1096
1240
|
# --- Check for Code Updates ---
|
1097
1241
|
if not disable_hot_reload:
|
1098
1242
|
logger.debug(
|
@@ -1356,18 +1500,21 @@ except ConnectionError as e:
|
|
1356
1500
|
# Attempt to reconnect explicitly
|
1357
1501
|
try:
|
1358
1502
|
logger.info("Attempting Redis reconnection...")
|
1359
|
-
# Close existing potentially broken connection?
|
1360
|
-
r
|
1361
|
-
|
1503
|
+
# Close existing potentially broken connection?
|
1504
|
+
if r: # Check if r was initialized
|
1505
|
+
try:
|
1506
|
+
r.close()
|
1507
|
+
except Exception:
|
1508
|
+
pass # Ignore errors during close
|
1509
|
+
r = connect_redis(REDIS_URL) # connect_redis will sys.exit on failure
|
1362
1510
|
logger.info("Reconnected to Redis.")
|
1363
|
-
except Exception as recon_e:
|
1511
|
+
except Exception as recon_e: # Should not be reached if connect_redis exits
|
1364
1512
|
logger.error(f"Failed to reconnect to Redis: {recon_e}")
|
1365
|
-
# Keep waiting
|
1366
1513
|
|
1367
1514
|
except ResponseError as e:
|
1368
1515
|
logger.error(f"Redis command error: {e}")
|
1369
1516
|
# Should we exit or retry?
|
1370
|
-
if "NOGROUP" in str(e):
|
1517
|
+
if r and "NOGROUP" in str(e): # Check if r is initialized
|
1371
1518
|
logger.critical("Consumer group seems to have disappeared. Exiting.")
|
1372
1519
|
sys.exit(1)
|
1373
1520
|
time.sleep(1)
|
@@ -1379,4 +1526,15 @@ except Exception as e:
|
|
1379
1526
|
|
1380
1527
|
finally:
|
1381
1528
|
logger.info("Consumer loop exited.")
|
1382
|
-
#
|
1529
|
+
# Cleanup health subprocess
|
1530
|
+
if health_subprocess and health_subprocess.poll() is None:
|
1531
|
+
logger.info("[Consumer] Terminating health check subprocess...")
|
1532
|
+
health_subprocess.terminate()
|
1533
|
+
try:
|
1534
|
+
health_subprocess.wait(timeout=5)
|
1535
|
+
except subprocess.TimeoutExpired:
|
1536
|
+
logger.warning(
|
1537
|
+
"[Consumer] Health subprocess did not terminate gracefully, killing it."
|
1538
|
+
)
|
1539
|
+
health_subprocess.kill()
|
1540
|
+
logger.info("[Consumer] Health subprocess cleanup complete.")
|
@@ -0,0 +1,262 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
import os
|
5
|
+
import socket
|
6
|
+
import sys
|
7
|
+
import time
|
8
|
+
from typing import Any, Dict, List, Optional, Tuple, cast
|
9
|
+
|
10
|
+
import redis
|
11
|
+
import socks
|
12
|
+
from redis import ConnectionError, ResponseError
|
13
|
+
from redis.exceptions import TimeoutError as RedisTimeoutError
|
14
|
+
|
15
|
+
# Assuming these are imported from other modules
|
16
|
+
from nebu.processors.models import V1ProcessorHealthResponse
|
17
|
+
|
18
|
+
|
19
|
+
def setup_health_logging():
|
20
|
+
"""Set up logging for the health check worker to write to a dedicated file."""
|
21
|
+
# Create logs directory if it doesn't exist
|
22
|
+
log_dir = os.path.join(os.getcwd(), "logs")
|
23
|
+
os.makedirs(log_dir, exist_ok=True)
|
24
|
+
|
25
|
+
# Create log file path with timestamp
|
26
|
+
log_file = os.path.join(log_dir, f"health_consumer_{os.getpid()}.log")
|
27
|
+
|
28
|
+
# Configure logging
|
29
|
+
logging.basicConfig(
|
30
|
+
level=logging.INFO,
|
31
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
32
|
+
handlers=[
|
33
|
+
logging.FileHandler(log_file),
|
34
|
+
logging.StreamHandler(
|
35
|
+
sys.stdout
|
36
|
+
), # Also log to stdout for subprocess monitoring
|
37
|
+
],
|
38
|
+
)
|
39
|
+
|
40
|
+
logger = logging.getLogger("HealthConsumer")
|
41
|
+
logger.info(f"Health check worker started. Logging to: {log_file}")
|
42
|
+
return logger
|
43
|
+
|
44
|
+
|
45
|
+
def process_health_check_message(
|
46
|
+
message_id: str,
|
47
|
+
message_data: Dict[str, str],
|
48
|
+
redis_conn: redis.Redis,
|
49
|
+
logger: logging.Logger,
|
50
|
+
health_stream: str,
|
51
|
+
health_group: str,
|
52
|
+
) -> None:
|
53
|
+
"""Processes a single health check message."""
|
54
|
+
logger.info(f"Processing health check message {message_id}: {message_data}")
|
55
|
+
|
56
|
+
health_status = "ok"
|
57
|
+
health_message: Optional[str] = "Health check processed successfully."
|
58
|
+
details: Optional[Dict[str, Any]] = None
|
59
|
+
return_stream: Optional[str] = None
|
60
|
+
|
61
|
+
try:
|
62
|
+
if "data" in message_data:
|
63
|
+
data = json.loads(message_data["data"])
|
64
|
+
logger.info(f"Health check data: {data}")
|
65
|
+
# Example: Extract return_stream if present in the health check data
|
66
|
+
return_stream = data.get("return_stream")
|
67
|
+
# Example: Update details if provided
|
68
|
+
if "check_details" in data:
|
69
|
+
details = {"processed_details": data["check_details"]}
|
70
|
+
|
71
|
+
except (json.JSONDecodeError, KeyError) as e:
|
72
|
+
logger.warning(f"Could not parse health check message data: {e}")
|
73
|
+
health_status = "error"
|
74
|
+
health_message = f"Failed to parse health check message data: {e}"
|
75
|
+
details = {"error": str(e)}
|
76
|
+
|
77
|
+
# Construct the health response
|
78
|
+
health_response = V1ProcessorHealthResponse(
|
79
|
+
status=health_status, message=health_message, details=details
|
80
|
+
)
|
81
|
+
|
82
|
+
logger.info(
|
83
|
+
f"Health response for message {message_id}: {health_response.model_dump_json()}"
|
84
|
+
)
|
85
|
+
|
86
|
+
# If a return_stream is specified, send the response there
|
87
|
+
if return_stream:
|
88
|
+
try:
|
89
|
+
# It's good practice to set a maxlen for the return stream to prevent it from growing indefinitely
|
90
|
+
redis_conn.xadd(
|
91
|
+
return_stream,
|
92
|
+
health_response.model_dump(), # type: ignore[arg-type]
|
93
|
+
maxlen=1000,
|
94
|
+
approximate=True,
|
95
|
+
)
|
96
|
+
logger.info(
|
97
|
+
f"Sent health response for {message_id} to stream: {return_stream}"
|
98
|
+
)
|
99
|
+
except Exception as e_resp_send:
|
100
|
+
logger.error(
|
101
|
+
f"Failed to send health response for {message_id} to stream {return_stream}: {e_resp_send}"
|
102
|
+
)
|
103
|
+
|
104
|
+
# Acknowledge the health check message
|
105
|
+
try:
|
106
|
+
redis_conn.xack(health_stream, health_group, message_id)
|
107
|
+
logger.info(f"Acknowledged health check message {message_id}")
|
108
|
+
except Exception as e_ack:
|
109
|
+
logger.error(
|
110
|
+
f"Failed to acknowledge health check message {message_id}: {e_ack}"
|
111
|
+
)
|
112
|
+
|
113
|
+
|
114
|
+
def main():
|
115
|
+
"""Main function for the health check consumer subprocess."""
|
116
|
+
logger = setup_health_logging()
|
117
|
+
|
118
|
+
# Get environment variables
|
119
|
+
redis_url = os.environ.get("REDIS_URL")
|
120
|
+
health_stream = os.environ.get("REDIS_HEALTH_STREAM")
|
121
|
+
health_group = os.environ.get("REDIS_HEALTH_CONSUMER_GROUP")
|
122
|
+
|
123
|
+
if not all([redis_url, health_stream, health_group]):
|
124
|
+
logger.error(
|
125
|
+
"Missing required environment variables: REDIS_URL, REDIS_HEALTH_STREAM, REDIS_HEALTH_CONSUMER_GROUP"
|
126
|
+
)
|
127
|
+
sys.exit(1)
|
128
|
+
|
129
|
+
# Type assertions after validation
|
130
|
+
assert isinstance(redis_url, str)
|
131
|
+
assert isinstance(health_stream, str)
|
132
|
+
assert isinstance(health_group, str)
|
133
|
+
|
134
|
+
logger.info(
|
135
|
+
f"Starting health consumer for stream: {health_stream}, group: {health_group}"
|
136
|
+
)
|
137
|
+
|
138
|
+
# Configure SOCKS proxy
|
139
|
+
socks.set_default_proxy(socks.SOCKS5, "localhost", 1055)
|
140
|
+
socket.socket = socks.socksocket
|
141
|
+
logger.info("Configured SOCKS5 proxy for socket connections via localhost:1055")
|
142
|
+
|
143
|
+
health_redis_conn: Optional[redis.Redis] = None
|
144
|
+
health_consumer_name = f"health-consumer-{os.getpid()}-{socket.gethostname()}"
|
145
|
+
|
146
|
+
while True:
|
147
|
+
try:
|
148
|
+
if health_redis_conn is None:
|
149
|
+
logger.info("Connecting to Redis for health stream...")
|
150
|
+
health_redis_conn = redis.from_url(redis_url, decode_responses=True)
|
151
|
+
health_redis_conn.ping()
|
152
|
+
logger.info("Connected to Redis for health stream.")
|
153
|
+
|
154
|
+
# Create health consumer group if it doesn't exist
|
155
|
+
try:
|
156
|
+
health_redis_conn.xgroup_create(
|
157
|
+
health_stream, health_group, id="0", mkstream=True
|
158
|
+
)
|
159
|
+
logger.info(
|
160
|
+
f"Created consumer group {health_group} for stream {health_stream}"
|
161
|
+
)
|
162
|
+
except ResponseError as e_group:
|
163
|
+
if "BUSYGROUP" in str(e_group):
|
164
|
+
logger.info(f"Consumer group {health_group} already exists.")
|
165
|
+
else:
|
166
|
+
logger.error(f"Error creating health consumer group: {e_group}")
|
167
|
+
time.sleep(5)
|
168
|
+
health_redis_conn = None
|
169
|
+
continue
|
170
|
+
except Exception as e_group_other:
|
171
|
+
logger.error(
|
172
|
+
f"Unexpected error creating health consumer group: {e_group_other}"
|
173
|
+
)
|
174
|
+
time.sleep(5)
|
175
|
+
health_redis_conn = None
|
176
|
+
continue
|
177
|
+
|
178
|
+
# Read from health stream
|
179
|
+
assert health_redis_conn is not None
|
180
|
+
|
181
|
+
health_streams_arg: Dict[str, object] = {health_stream: ">"}
|
182
|
+
raw_messages = health_redis_conn.xreadgroup(
|
183
|
+
health_group,
|
184
|
+
health_consumer_name,
|
185
|
+
health_streams_arg, # type: ignore[arg-type]
|
186
|
+
count=1,
|
187
|
+
block=5000, # Block for 5 seconds
|
188
|
+
)
|
189
|
+
|
190
|
+
if raw_messages:
|
191
|
+
# Cast to expected type for decode_responses=True
|
192
|
+
messages = cast(
|
193
|
+
List[Tuple[str, List[Tuple[str, Dict[str, str]]]]], raw_messages
|
194
|
+
)
|
195
|
+
for _stream_name, stream_messages in messages:
|
196
|
+
for message_id, message_data in stream_messages:
|
197
|
+
process_health_check_message(
|
198
|
+
message_id,
|
199
|
+
message_data,
|
200
|
+
health_redis_conn,
|
201
|
+
logger,
|
202
|
+
health_stream,
|
203
|
+
health_group,
|
204
|
+
)
|
205
|
+
|
206
|
+
except (ConnectionError, RedisTimeoutError, TimeoutError) as e_conn:
|
207
|
+
logger.error(f"Redis connection error: {e_conn}. Reconnecting in 5s...")
|
208
|
+
if health_redis_conn:
|
209
|
+
try:
|
210
|
+
health_redis_conn.close()
|
211
|
+
except Exception:
|
212
|
+
pass
|
213
|
+
health_redis_conn = None
|
214
|
+
time.sleep(5)
|
215
|
+
|
216
|
+
except ResponseError as e_resp:
|
217
|
+
logger.error(f"Redis response error: {e_resp}")
|
218
|
+
if "NOGROUP" in str(e_resp):
|
219
|
+
logger.warning(
|
220
|
+
"Health consumer group disappeared. Attempting to recreate..."
|
221
|
+
)
|
222
|
+
if health_redis_conn:
|
223
|
+
try:
|
224
|
+
health_redis_conn.close()
|
225
|
+
except Exception:
|
226
|
+
pass
|
227
|
+
health_redis_conn = None
|
228
|
+
elif "UNBLOCKED" in str(e_resp):
|
229
|
+
logger.info(
|
230
|
+
"XREADGROUP unblocked, connection might have been closed. Reconnecting."
|
231
|
+
)
|
232
|
+
if health_redis_conn:
|
233
|
+
try:
|
234
|
+
health_redis_conn.close()
|
235
|
+
except Exception:
|
236
|
+
pass
|
237
|
+
health_redis_conn = None
|
238
|
+
time.sleep(1)
|
239
|
+
else:
|
240
|
+
time.sleep(5)
|
241
|
+
|
242
|
+
except KeyboardInterrupt:
|
243
|
+
logger.info("Received interrupt signal. Shutting down health consumer...")
|
244
|
+
break
|
245
|
+
|
246
|
+
except Exception as e:
|
247
|
+
logger.error(f"Unexpected error in health check consumer: {e}")
|
248
|
+
logger.exception("Traceback:")
|
249
|
+
time.sleep(5)
|
250
|
+
|
251
|
+
# Cleanup
|
252
|
+
if health_redis_conn:
|
253
|
+
try:
|
254
|
+
health_redis_conn.close()
|
255
|
+
except Exception:
|
256
|
+
pass
|
257
|
+
|
258
|
+
logger.info("Health check consumer shutdown complete.")
|
259
|
+
|
260
|
+
|
261
|
+
if __name__ == "__main__":
|
262
|
+
main()
|
@@ -149,3 +149,9 @@ class V1OpenAIStreamResponse(BaseModel):
|
|
149
149
|
content: Any # Using Any for ChatCompletionResponse
|
150
150
|
created_at: int
|
151
151
|
user_id: Optional[str] = None
|
152
|
+
|
153
|
+
|
154
|
+
class V1ProcessorHealthResponse(BaseModel):
|
155
|
+
status: str
|
156
|
+
message: Optional[str] = None
|
157
|
+
details: Optional[Any] = None
|
@@ -1,7 +1,8 @@
|
|
1
1
|
import json
|
2
2
|
import threading
|
3
3
|
import time
|
4
|
-
|
4
|
+
|
5
|
+
# import uuid # Removed unused import
|
5
6
|
from typing import (
|
6
7
|
Any,
|
7
8
|
Dict,
|
@@ -23,6 +24,7 @@ from nebu.meta import V1ResourceMetaRequest, V1ResourceReference
|
|
23
24
|
from nebu.processors.models import (
|
24
25
|
V1ContainerRequest,
|
25
26
|
V1Processor,
|
27
|
+
V1ProcessorHealthResponse,
|
26
28
|
V1ProcessorRequest,
|
27
29
|
V1Processors,
|
28
30
|
V1ProcessorScaleRequest,
|
@@ -246,7 +248,7 @@ class Processor(Generic[InputType, OutputType]):
|
|
246
248
|
|
247
249
|
# --- Wait for health check if requested ---
|
248
250
|
if wait_for_healthy:
|
249
|
-
self.
|
251
|
+
self.wait_for_healthy()
|
250
252
|
|
251
253
|
def __call__(
|
252
254
|
self,
|
@@ -315,6 +317,9 @@ class Processor(Generic[InputType, OutputType]):
|
|
315
317
|
if "error" in raw_response_json:
|
316
318
|
raise Exception(raw_response_json["error"])
|
317
319
|
|
320
|
+
if "status" in raw_response_json:
|
321
|
+
return raw_response_json
|
322
|
+
|
318
323
|
raw_content = raw_response_json.get("content")
|
319
324
|
logger.debug(f">>> Raw content: {raw_content}")
|
320
325
|
|
@@ -556,11 +561,11 @@ class Processor(Generic[InputType, OutputType]):
|
|
556
561
|
else:
|
557
562
|
logger.info(f"No active log stream to stop for {self.name}.")
|
558
563
|
|
559
|
-
def
|
564
|
+
def wait_for_healthy(
|
560
565
|
self, timeout: float = 3600.0, retry_interval: float = 5.0
|
561
566
|
) -> None:
|
562
567
|
"""
|
563
|
-
Wait for the processor to respond to health checks.
|
568
|
+
Wait for the processor to respond to health checks using the health endpoint.
|
564
569
|
|
565
570
|
Args:
|
566
571
|
timeout: Maximum time to wait for health check in seconds
|
@@ -570,40 +575,25 @@ class Processor(Generic[InputType, OutputType]):
|
|
570
575
|
raise ValueError("Processor not found, cannot perform health check")
|
571
576
|
|
572
577
|
logger.info(
|
573
|
-
f"Waiting for processor {self.processor.metadata.name} to be healthy..."
|
578
|
+
f"Waiting for processor {self.processor.metadata.name} to be healthy via health endpoint..."
|
574
579
|
)
|
575
580
|
|
576
581
|
start_time = time.time()
|
577
582
|
while time.time() - start_time < timeout:
|
578
583
|
try:
|
579
|
-
#
|
580
|
-
|
581
|
-
"
|
582
|
-
"id": str(uuid.uuid4()),
|
583
|
-
"content": {},
|
584
|
-
"created_at": time.time(),
|
585
|
-
}
|
586
|
-
|
587
|
-
# Send health check and wait for response
|
588
|
-
response = self.send(
|
589
|
-
data=health_check_data, # type: ignore[arg-type]
|
590
|
-
wait=True,
|
591
|
-
timeout=30.0, # Short timeout for individual health check
|
584
|
+
health_response = self.health() # Use the new health() method
|
585
|
+
logger.info(
|
586
|
+
f">>> Health check response: {health_response.model_dump_json()}"
|
592
587
|
)
|
593
|
-
logger.info(f">>> Health check response: {response}")
|
594
588
|
|
595
589
|
# Check if the response indicates health
|
596
|
-
if
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
logger.info(
|
605
|
-
f"Health check attempt failed, retrying in {retry_interval}s..."
|
606
|
-
)
|
590
|
+
if health_response.status == "ok": # Check for "ok" status
|
591
|
+
logger.info(f"Processor {self.processor.metadata.name} is healthy!")
|
592
|
+
return
|
593
|
+
else:
|
594
|
+
logger.info(
|
595
|
+
f"Processor {self.processor.metadata.name} reported status: {health_response.status}. Retrying in {retry_interval}s..."
|
596
|
+
)
|
607
597
|
|
608
598
|
except Exception as e:
|
609
599
|
logger.info(
|
@@ -616,3 +606,39 @@ class Processor(Generic[InputType, OutputType]):
|
|
616
606
|
raise TimeoutError(
|
617
607
|
f"Processor {self.processor.metadata.name} failed to become healthy within {timeout} seconds"
|
618
608
|
)
|
609
|
+
|
610
|
+
def health(self) -> V1ProcessorHealthResponse:
|
611
|
+
"""
|
612
|
+
Performs a health check on the processor by calling the health endpoint.
|
613
|
+
"""
|
614
|
+
if (
|
615
|
+
not self.processor
|
616
|
+
or not self.processor.metadata.name
|
617
|
+
or not self.processor.metadata.namespace
|
618
|
+
):
|
619
|
+
raise ValueError(
|
620
|
+
"Processor not found or missing metadata (name/namespace), cannot perform health check."
|
621
|
+
)
|
622
|
+
|
623
|
+
health_url = f"{self.orign_host}/v1/processors/{self.processor.metadata.namespace}/{self.processor.metadata.name}/health"
|
624
|
+
logger.debug(f"Calling health check endpoint: {health_url}")
|
625
|
+
|
626
|
+
try:
|
627
|
+
response = requests.get(
|
628
|
+
health_url,
|
629
|
+
headers={"Authorization": f"Bearer {self.api_key}"},
|
630
|
+
timeout=30.0, # Standard timeout for a health check
|
631
|
+
)
|
632
|
+
response.raise_for_status() # Raise an exception for HTTP errors
|
633
|
+
health_response_data = response.json()
|
634
|
+
return V1ProcessorHealthResponse.model_validate(health_response_data)
|
635
|
+
except requests.exceptions.RequestException as e:
|
636
|
+
logger.error(f"Health check request to {health_url} failed: {e}")
|
637
|
+
# Optionally, return a V1ProcessorHealthResponse indicating an error
|
638
|
+
# For now, re-raising the exception or a custom one might be better
|
639
|
+
raise RuntimeError(f"Failed to get health status: {e}") from e
|
640
|
+
except Exception as e:
|
641
|
+
logger.error(f"An unexpected error occurred during health check: {e}")
|
642
|
+
raise RuntimeError(
|
643
|
+
f"Unexpected error during health status retrieval: {e}"
|
644
|
+
) from e
|
@@ -22,6 +22,7 @@ src/nebu/containers/models.py
|
|
22
22
|
src/nebu/namespaces/models.py
|
23
23
|
src/nebu/namespaces/namespace.py
|
24
24
|
src/nebu/processors/consumer.py
|
25
|
+
src/nebu/processors/consumer_health_worker.py
|
25
26
|
src/nebu/processors/consumer_process_worker.py
|
26
27
|
src/nebu/processors/decorate.py
|
27
28
|
src/nebu/processors/default.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|