matrice-compute 0.1.20__py3-none-any.whl → 0.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,11 +3,12 @@
3
3
  import os
4
4
  import logging
5
5
  from matrice_common.utils import log_errors
6
- # from kafka import KafkaProducer, KafkaConsumer
6
+ from kafka import KafkaProducer, KafkaConsumer
7
7
  import uuid
8
8
  import json
9
9
  import time
10
10
  import base64
11
+ import threading
11
12
 
12
13
  # TODO: update /scaling to /compute
13
14
 
@@ -15,12 +16,13 @@ class Scaling:
15
16
 
16
17
  """Class providing scaling functionality for compute instances."""
17
18
 
18
- def __init__(self, session, instance_id=None):
19
+ def __init__(self, session, instance_id=None, enable_kafka=True):
19
20
  """Initialize Scaling instance.
20
21
 
21
22
  Args:
22
23
  session: Session object for making RPC calls
23
24
  instance_id: ID of the compute instance
25
+ enable_kafka: Enable Kafka communication (default True)
24
26
 
25
27
  Raises:
26
28
  Exception: If instance_id is not provided
@@ -34,38 +36,93 @@ class Scaling:
34
36
  self.rpc = session.rpc
35
37
  used_ports_str = os.environ.get("USED_PORTS", "")
36
38
  self.used_ports = set(int(p) for p in used_ports_str.split(",") if p.strip())
39
+
40
+ # Kafka configuration and initialization
41
+ self.enable_kafka = enable_kafka
42
+ self.kafka_producer = None
43
+ self.kafka_consumer = None
44
+ self.kafka_thread = None
45
+ self.kafka_running = False
46
+
47
+ # Maps correlation_id to threading.Event for request/response matching
48
+ self.pending_requests = {}
49
+ # Maps correlation_id to response data
50
+ self.response_map = {}
51
+ self.response_lock = threading.Lock()
52
+
53
+ if self.enable_kafka:
54
+ try:
55
+ self.kafka_config = {
56
+ "bootstrap_servers": self.get_kafka_bootstrap_servers(),
57
+ "action_request_topic": "action_requests",
58
+ "action_response_topic": "action_responses",
59
+ "compute_request_topic": "compute_requests",
60
+ "compute_response_topic": "compute_responses"
61
+ }
62
+
63
+ # Initialize single producer
64
+ self.kafka_producer = KafkaProducer(
65
+ bootstrap_servers=self.kafka_config["bootstrap_servers"],
66
+ value_serializer=lambda v: json.dumps(v).encode("utf-8"),
67
+ max_block_ms=5000 # Timeout if Kafka is down
68
+ )
69
+
70
+ # Initialize single consumer for both response topics
71
+ self.kafka_consumer = KafkaConsumer(
72
+ self.kafka_config["action_response_topic"],
73
+ self.kafka_config["compute_response_topic"],
74
+ bootstrap_servers=self.kafka_config["bootstrap_servers"],
75
+ group_id=f"py_compute_{instance_id}",
76
+ value_deserializer=lambda m: json.loads(m.decode("utf-8")),
77
+ auto_offset_reset='latest',
78
+ enable_auto_commit=True,
79
+ consumer_timeout_ms=1000, # Poll timeout
80
+ session_timeout_ms=60000, # Increase session timeout to 60s (default 30s)
81
+ heartbeat_interval_ms=3000, # Send heartbeat every 3s
82
+ max_poll_interval_ms=300000 # Max time between polls: 5 minutes
83
+ )
84
+
85
+ # Start background thread to handle responses
86
+ self.kafka_running = True
87
+ self.kafka_thread = threading.Thread(target=self._kafka_response_listener, daemon=True)
88
+ self.kafka_thread.start()
89
+
90
+ logging.info(f"Kafka enabled with bootstrap servers: {self.kafka_config['bootstrap_servers']}")
91
+ except Exception as e:
92
+ logging.warning(f"Failed to initialize Kafka, will use REST API only: {e}")
93
+ self.enable_kafka = False
94
+ self.kafka_producer = None
95
+ self.kafka_consumer = None
96
+
37
97
  logging.info(
38
- "Initialized Scaling with instance_id: %s",
98
+ "Initialized Scaling with instance_id: %s, Kafka enabled: %s",
39
99
  instance_id,
100
+ self.enable_kafka
40
101
  )
41
- # KAFKA TEMPORARILY DISABLED - Using REST API directly
42
- # self.kafka_config = {
43
- # "bootstrap_servers": self.get_kafka_bootstrap_servers(),
44
- # "api_request_topic": "action_requests",
45
- # "api_response_topic": "action_responses",
46
- # "scaling_request_topic": "compute_requests",
47
- # "scaling_response_topic": "compute_responses"
48
- # }
49
- # self.kafka_producer = KafkaProducer(
50
- # bootstrap_servers=self.kafka_config["bootstrap_servers"],
51
- # value_serializer=lambda v: json.dumps(v).encode("utf-8"),)
52
102
 
53
103
 
54
104
 
55
- # KAFKA TEMPORARILY DISABLED - Using REST API directly
56
- # @log_errors(default_return=(None, "Error creating Kafka producer", "Kafka producer creation failed"), log_error=True)
57
- # def get_kafka_bootstrap_servers(self):
58
- # """Get Kafka bootstrap servers from API and decode base64 fields."""
59
- # path = "/v1/actions/get_kafka_info"
60
- # response = self.rpc.get(path=path)
61
- # if not response or not response.get("success"):
62
- # raise ValueError(f"Failed to fetch Kafka config: {response.get('message', 'No response')}")
63
- # encoded_ip = response["data"]["ip"]
64
- # encoded_port = response["data"]["port"]
65
- # ip = base64.b64decode(encoded_ip).decode("utf-8")
66
- # port = base64.b64decode(encoded_port).decode("utf-8")
67
- # bootstrap_servers = f"{ip}:{port}"
68
- # return bootstrap_servers
105
+ @log_errors(default_return=None, log_error=True)
106
+ def get_kafka_bootstrap_servers(self):
107
+ """Get Kafka bootstrap servers from API and decode base64 fields.
108
+
109
+ Returns:
110
+ str: Kafka bootstrap servers in format "ip:port"
111
+
112
+ Raises:
113
+ ValueError: If unable to fetch Kafka configuration
114
+ """
115
+ path = "/v1/actions/get_kafka_info"
116
+ response = self.rpc.get(path=path)
117
+ if not response or not response.get("success"):
118
+ raise ValueError(f"Failed to fetch Kafka config: {response.get('message', 'No response')}")
119
+ encoded_ip = response["data"]["ip"]
120
+ encoded_port = response["data"]["port"]
121
+ ip = base64.b64decode(encoded_ip).decode("utf-8")
122
+ port = base64.b64decode(encoded_port).decode("utf-8")
123
+ bootstrap_servers = f"{ip}:{port}"
124
+ # logging.info(f"Retrieved Kafka bootstrap servers: {bootstrap_servers}")
125
+ return bootstrap_servers
69
126
 
70
127
  @log_errors(default_return=(None, "Error processing response", "Response processing failed"), log_error=True)
71
128
  def handle_response(self, resp, success_message, error_message):
@@ -90,65 +147,267 @@ class Scaling:
90
147
  message = error_message
91
148
  logging.error("%s: %s", message, error)
92
149
  return data, error, message
93
-
150
+
151
+ def _kafka_response_listener(self):
152
+ """
153
+ Background thread that continuously polls for Kafka responses.
154
+
155
+ This thread runs in the background and listens for responses from both
156
+ action_responses and compute_responses topics. When a response is received,
157
+ it matches the correlation ID to pending requests and wakes up the waiting thread.
158
+ """
159
+ logging.info("Kafka response listener thread started")
160
+
161
+ while self.kafka_running:
162
+ try:
163
+ # Poll for messages with 1 second timeout
164
+ message_batch = self.kafka_consumer.poll(timeout_ms=1000)
165
+
166
+ if message_batch:
167
+ for topic_partition, messages in message_batch.items():
168
+ for message in messages:
169
+ try:
170
+ msg = message.value
171
+ correlation_id = msg.get("correlationId")
172
+
173
+ if correlation_id:
174
+ with self.response_lock:
175
+ if correlation_id in self.pending_requests:
176
+ # Store response and signal waiting thread
177
+ self.response_map[correlation_id] = msg
178
+ self.pending_requests[correlation_id].set()
179
+ logging.debug(f"Received Kafka response for correlation_id: {correlation_id}")
180
+ else:
181
+ logging.warning(f"Received Kafka message without correlationId: {msg}")
182
+ except Exception as e:
183
+ logging.error(f"Error processing Kafka message: {e}")
184
+
185
+ except Exception as e:
186
+ if self.kafka_running: # Only log if not shutting down
187
+ logging.error(f"Error in Kafka response listener: {e}")
188
+ time.sleep(1) # Avoid tight loop on persistent errors
189
+
190
+ logging.info("Kafka response listener thread stopped")
191
+
192
+ def _send_kafka_request(self, api, payload, request_topic, response_topic, timeout=5):
193
+ """
194
+ Send a request via Kafka and wait for response using the persistent consumer.
195
+
196
+ Args:
197
+ api: API name to call
198
+ payload: Request payload dictionary
199
+ request_topic: Kafka topic to send request to
200
+ response_topic: Kafka topic to receive response from (not used, kept for signature)
201
+ timeout: Timeout in seconds to wait for response
202
+
203
+ Returns:
204
+ Tuple of (data, error, message, kafka_success)
205
+ kafka_success is True if response received, False if timeout/error
206
+ """
207
+ if not self.enable_kafka or not self.kafka_producer:
208
+ return None, "Kafka not enabled", "Kafka not available", False
209
+
210
+ correlation_id = str(uuid.uuid4())
211
+ request_message = {
212
+ "correlationId": correlation_id,
213
+ "api": api,
214
+ "payload": payload,
215
+ }
216
+
217
+ # Create event for this request
218
+ event = threading.Event()
219
+
220
+ with self.response_lock:
221
+ self.pending_requests[correlation_id] = event
222
+
223
+ try:
224
+ # Add auth token if available
225
+ headers = None
226
+ if hasattr(self.session.rpc, 'AUTH_TOKEN'):
227
+ self.session.rpc.AUTH_TOKEN.set_bearer_token()
228
+ auth_token = self.session.rpc.AUTH_TOKEN.bearer_token
229
+ auth_token = auth_token.replace("Bearer ", "")
230
+ headers = [("Authorization", bytes(f"{auth_token}", "utf-8"))]
231
+
232
+ # Send request
233
+ self.kafka_producer.send(request_topic, request_message, headers=headers)
234
+ logging.info(f"Sent Kafka request for {api} with correlation_id: {correlation_id}")
235
+
236
+ # Wait for response with timeout
237
+ if event.wait(timeout=timeout):
238
+ # Response received
239
+ with self.response_lock:
240
+ response = self.response_map.pop(correlation_id, None)
241
+ self.pending_requests.pop(correlation_id, None)
242
+
243
+ if response:
244
+ if response.get("status") == "success":
245
+ data = response.get("data")
246
+ logging.info(f"Kafka success for {api}")
247
+ return data, None, f"Fetched via Kafka for {api}", True
248
+ else:
249
+ error = response.get("error", "Unknown error")
250
+ logging.error(f"Kafka error response for {api}: {error}")
251
+ return None, error, f"Kafka error response for {api}", True
252
+ else:
253
+ logging.warning(f"Kafka response received but missing data for {api}")
254
+ return None, "Response missing data", "Kafka response error", False
255
+ else:
256
+ # Timeout
257
+ with self.response_lock:
258
+ self.pending_requests.pop(correlation_id, None)
259
+ logging.warning(f"Kafka response timeout for {api} after {timeout} seconds")
260
+ return None, "Kafka response timeout", "Kafka response timeout", False
261
+
262
+ except Exception as e:
263
+ # Cleanup on error
264
+ with self.response_lock:
265
+ self.pending_requests.pop(correlation_id, None)
266
+ logging.error(f"Kafka send error for {api}: {e}")
267
+ return None, f"Kafka error: {e}", "Kafka send failed", False
268
+
269
+ def _hybrid_request(self, api, payload, request_topic, response_topic, rest_fallback_func):
270
+ """
271
+ Hybrid request method: try Kafka first, fallback to REST, cache if both fail.
272
+
273
+ Args:
274
+ api: API name
275
+ payload: Request payload
276
+ request_topic: Kafka request topic
277
+ response_topic: Kafka response topic
278
+ rest_fallback_func: Function to call for REST fallback (should return same format as handle_response)
279
+
280
+ Returns:
281
+ Tuple of (data, error, message) matching the API response pattern
282
+ """
283
+ # Try Kafka first
284
+ if self.enable_kafka:
285
+ data, error, message, kafka_success = self._send_kafka_request(
286
+ api, payload, request_topic, response_topic, timeout=5
287
+ )
288
+
289
+ if kafka_success and error is None:
290
+ # Kafka succeeded
291
+ return data, error, message
292
+
293
+ # Kafka returned an error response (not transport error)
294
+ if kafka_success and error is not None:
295
+ logging.warning(f"Kafka returned error for {api}, falling back to REST")
296
+
297
+ # Kafka failed or disabled, try REST
298
+ logging.info(f"Using REST API for {api}")
299
+ try:
300
+ rest_response = rest_fallback_func()
301
+
302
+ # Return REST response (success or failure)
303
+ if rest_response and len(rest_response) == 3:
304
+ return rest_response
305
+ else:
306
+ # Unexpected REST response format
307
+ logging.error(f"REST API returned unexpected format for {api}")
308
+ return None, "Unexpected REST response format", "REST API error"
309
+
310
+ except Exception as e:
311
+ # REST failed
312
+ logging.error(f"REST API failed for {api}: {e}")
313
+ return None, str(e), "REST API failed"
314
+
315
+ def shutdown(self):
316
+ """Gracefully shutdown Kafka connections."""
317
+ if self.kafka_running:
318
+ logging.info("Shutting down Kafka connections...")
319
+ self.kafka_running = False
320
+
321
+ if self.kafka_thread:
322
+ self.kafka_thread.join(timeout=5)
323
+
324
+ if self.kafka_consumer:
325
+ self.kafka_consumer.close()
326
+
327
+ if self.kafka_producer:
328
+ self.kafka_producer.close()
329
+
330
+ logging.info("Kafka connections closed")
331
+
94
332
  @log_errors(log_error=True)
95
333
  def get_downscaled_ids(self):
96
- """Get IDs of downscaled instances.
334
+ """Get IDs of downscaled instances using Kafka (with REST fallback).
97
335
 
98
336
  Returns:
99
337
  Tuple of (data, error, message) from API response
100
338
  """
101
- logging.info(
102
- "Getting downscaled ids for instance %s",
103
- self.instance_id,
104
- )
105
- path = f"/v1/compute/down_scaled_ids/{self.instance_id}"
106
- resp = self.rpc.get(path=path)
107
- return self.handle_response(
108
- resp,
109
- "Downscaled ids info fetched successfully",
110
- "Could not fetch the Downscaled ids info",
339
+ logging.info("Getting downscaled ids for instance %s", self.instance_id)
340
+
341
+ payload = {"instance_id": self.instance_id}
342
+
343
+ def rest_fallback():
344
+ path = f"/v1/compute/down_scaled_ids/{self.instance_id}"
345
+ resp = self.rpc.get(path=path)
346
+ return self.handle_response(
347
+ resp,
348
+ "Downscaled ids info fetched successfully",
349
+ "Could not fetch the Downscaled ids info",
350
+ )
351
+
352
+ return self._hybrid_request(
353
+ api="get_downscaled_ids",
354
+ payload=payload,
355
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
356
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
357
+ rest_fallback_func=rest_fallback
111
358
  )
112
359
 
113
360
  @log_errors(default_return=(None, "API call failed", "Failed to stop instance"), log_error=True)
114
361
  def stop_instance(self):
115
- """Stop the compute instance.
362
+ """Stop the compute instance using Kafka (with REST fallback).
116
363
 
117
364
  Returns:
118
365
  Tuple of (data, error, message) from API response
119
366
  """
120
- logging.info(
121
- "Stopping instance %s",
122
- self.instance_id,
123
- )
124
- path = "/v1/compute/compute_instance/stop"
125
- resp = self.rpc.put(
126
- path=path,
127
- payload={
128
- "_idInstance": self.instance_id,
129
- "isForcedStop": False,
130
- },
131
- )
132
- return self.handle_response(
133
- resp,
134
- "Instance stopped successfully",
135
- "Could not stop the instance",
367
+ logging.info("Stopping instance %s", self.instance_id)
368
+
369
+ payload = {
370
+ "_idInstance": self.instance_id,
371
+ "isForcedStop": False,
372
+ }
373
+
374
+ def rest_fallback():
375
+ path = "/v1/compute/compute_instance/stop"
376
+ resp = self.rpc.put(path=path, payload=payload)
377
+ return self.handle_response(
378
+ resp,
379
+ "Instance stopped successfully",
380
+ "Could not stop the instance",
381
+ )
382
+
383
+ return self._hybrid_request(
384
+ api="stop_instance",
385
+ payload=payload,
386
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
387
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
388
+ rest_fallback_func=rest_fallback
136
389
  )
137
390
 
138
391
  @log_errors(log_error=True)
139
- def update_jupyter_token(
140
- self,
141
- token="",
142
- ):
143
- path = f"/v1/scaling/update_jupyter_notebook_token/{self.instance_id}"
144
- payload = {
145
- "token": token,
146
- }
147
- resp = self.rpc.put(path=path, payload=payload)
148
- return self.handle_response(
149
- resp,
150
- "Resources updated successfully",
151
- "Could not update the resources",
392
+ def update_jupyter_token(self, token=""):
393
+ """Update Jupyter notebook token using Kafka (with REST fallback)."""
394
+ payload = {"token": token, "instance_id": self.instance_id}
395
+
396
+ def rest_fallback():
397
+ path = f"/v1/scaling/update_jupyter_notebook_token/{self.instance_id}"
398
+ resp = self.rpc.put(path=path, payload={"token": token})
399
+ return self.handle_response(
400
+ resp,
401
+ "Resources updated successfully",
402
+ "Could not update the resources",
403
+ )
404
+
405
+ return self._hybrid_request(
406
+ api="update_jupyter_token",
407
+ payload=payload,
408
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
409
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
410
+ rest_fallback_func=rest_fallback
152
411
  )
153
412
 
154
413
  @log_errors(log_error=True)
@@ -167,7 +426,7 @@ class Scaling:
167
426
  createdAt=None,
168
427
  updatedAt=None,
169
428
  ):
170
- """Update status of an action.
429
+ """Update status of an action using Kafka (with REST fallback).
171
430
 
172
431
  Args:
173
432
  service_provider: Provider of the service
@@ -188,12 +447,10 @@ class Scaling:
188
447
  """
189
448
  if not action_record_id:
190
449
  return None, "Action record id is required", "Action record id is required"
191
- logging.info(
192
- "Updating action status for action %s",
193
- action_record_id,
194
- )
195
- path = "/v1/compute/update_action_status"
196
- payload_scaling = {
450
+
451
+ logging.info("Updating action status for action %s", action_record_id)
452
+
453
+ payload = {
197
454
  "instanceID": self.instance_id,
198
455
  "serviceProvider": service_provider,
199
456
  "actionRecordId": action_record_id,
@@ -208,11 +465,22 @@ class Scaling:
208
465
  "createdAt": createdAt,
209
466
  "updatedAt": updatedAt,
210
467
  }
211
- resp = self.rpc.put(path=path, payload=payload_scaling)
212
- return self.handle_response(
213
- resp,
214
- "Action status details updated successfully",
215
- "Could not update the action status details ",
468
+
469
+ def rest_fallback():
470
+ path = "/v1/compute/update_action_status"
471
+ resp = self.rpc.put(path=path, payload=payload)
472
+ return self.handle_response(
473
+ resp,
474
+ "Action status details updated successfully",
475
+ "Could not update the action status details ",
476
+ )
477
+
478
+ return self._hybrid_request(
479
+ api="update_action_status",
480
+ payload=payload,
481
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
482
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
483
+ rest_fallback_func=rest_fallback
216
484
  )
217
485
 
218
486
  @log_errors(log_error=True)
@@ -225,7 +493,7 @@ class Scaling:
225
493
  status,
226
494
  status_description,
227
495
  ):
228
- """Update status of an action.
496
+ """Update status of an action using Kafka (with REST fallback).
229
497
 
230
498
  Args:
231
499
  action_record_id: ID of the action record
@@ -235,11 +503,8 @@ class Scaling:
235
503
  status: Status to update
236
504
  status_description: Description of the status
237
505
  """
238
- logging.info(
239
- "Updating status for action %s",
240
- action_record_id,
241
- )
242
- url = "/v1/actions"
506
+ logging.info("Updating status for action %s", action_record_id)
507
+
243
508
  payload = {
244
509
  "_id": action_record_id,
245
510
  "action": action_type,
@@ -248,76 +513,91 @@ class Scaling:
248
513
  "status": status,
249
514
  "statusDescription": status_description,
250
515
  }
251
- self.rpc.put(path=url, payload=payload)
516
+
517
+ def rest_fallback():
518
+ url = "/v1/actions"
519
+ self.rpc.put(path=url, payload=payload)
520
+ return None, None, "Status updated"
521
+
522
+ return self._hybrid_request(
523
+ api="update_action",
524
+ payload=payload,
525
+ request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
526
+ response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
527
+ rest_fallback_func=rest_fallback
528
+ )
252
529
 
253
530
  @log_errors(log_error=True)
254
531
  def get_shutdown_details(self):
255
- """Get shutdown details for the instance.
532
+ """Get shutdown details for the instance using Kafka (with REST fallback).
256
533
 
257
534
  Returns:
258
535
  Tuple of (data, error, message) from API response
259
536
  """
260
- logging.info(
261
- "Getting shutdown details for instance %s",
262
- self.instance_id,
263
- )
264
- path = f"/v1/compute/get_shutdown_details/{self.instance_id}"
265
- resp = self.rpc.get(path=path)
266
- return self.handle_response(
267
- resp,
268
- "Shutdown info fetched successfully",
269
- "Could not fetch the shutdown details",
537
+ logging.info("Getting shutdown details for instance %s", self.instance_id)
538
+
539
+ payload = {"instance_id": self.instance_id}
540
+
541
+ def rest_fallback():
542
+ path = f"/v1/compute/get_shutdown_details/{self.instance_id}"
543
+ resp = self.rpc.get(path=path)
544
+ return self.handle_response(
545
+ resp,
546
+ "Shutdown info fetched successfully",
547
+ "Could not fetch the shutdown details",
548
+ )
549
+
550
+ return self._hybrid_request(
551
+ api="get_shutdown_details",
552
+ payload=payload,
553
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
554
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
555
+ rest_fallback_func=rest_fallback
270
556
  )
271
557
 
272
558
  @log_errors(log_error=True)
273
559
  def get_tasks_details(self):
274
- """Get task details for the instance.
560
+ """Get task details for the instance using Kafka (with REST fallback).
275
561
 
276
562
  Returns:
277
563
  Tuple of (data, error, message) from API response
278
564
  """
279
- logging.info(
280
- "Getting tasks details for instance %s",
281
- self.instance_id,
282
- )
283
- path = f"/v1/actions/fetch_instance_action_details/{self.instance_id}/action_details"
284
- resp = self.rpc.get(path=path)
285
- return self.handle_response(
286
- resp,
287
- "Task details fetched successfully",
288
- "Could not fetch the task details",
565
+ logging.info("Getting tasks details for instance %s", self.instance_id)
566
+
567
+ payload = {"instance_id": self.instance_id}
568
+
569
+ def rest_fallback():
570
+ path = f"/v1/actions/fetch_instance_action_details/{self.instance_id}/action_details"
571
+ resp = self.rpc.get(path=path)
572
+ return self.handle_response(
573
+ resp,
574
+ "Task details fetched successfully",
575
+ "Could not fetch the task details",
576
+ )
577
+
578
+ return self._hybrid_request(
579
+ api="get_tasks_details",
580
+ payload=payload,
581
+ request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
582
+ response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
583
+ rest_fallback_func=rest_fallback
289
584
  )
290
585
 
291
586
  @log_errors(log_error=True)
292
587
  def get_action_details(self, action_status_id):
293
- """Get details for a specific action using REST API.
294
-
588
+ """Get details for a specific action using Kafka (with REST fallback).
589
+
295
590
  Args:
296
591
  action_status_id: ID of the action status to fetch
297
-
592
+
298
593
  Returns:
299
594
  Tuple of (data, error, message) from API response
300
595
  """
301
596
  logging.info("Getting action details for action %s", action_status_id)
302
- # KAFKA TEMPORARILY DISABLED - Using REST API directly
303
- # api = "get_action_details"
304
- # payload = {"actionRecordId": action_status_id}
305
- # data, error, message, kafka_response_received = self._send_kafka_request(
306
- # api=api,
307
- # payload=payload,
308
- # request_topic=self.kafka_config["api_request_topic"],
309
- # response_topic=self.kafka_config["api_response_topic"],
310
- # timeout=60
311
- # )
312
- # # Check if Kafka response was received and if it's an error, log and fallback to REST API
313
- # if kafka_response_received:
314
- # if error:
315
- # logging.warning("Kafka returned error for get_action_details: %s. Falling back to REST API.", error)
316
- # else:
317
- # return data, error, message
318
-
319
- # Using REST API directly
320
- try:
597
+
598
+ payload = {"actionRecordId": action_status_id}
599
+
600
+ def rest_fallback():
321
601
  path = f"/v1/actions/action/{action_status_id}/details"
322
602
  resp = self.rpc.get(path=path)
323
603
  return self.handle_response(
@@ -325,10 +605,14 @@ class Scaling:
325
605
  "Task details fetched successfully",
326
606
  "Could not fetch the task details",
327
607
  )
328
- except Exception as e:
329
- logging.error("REST API failed (get_action_details): %s", e)
330
- return None, f"Failed via REST: {e}", "REST API failed"
331
-
608
+
609
+ return self._hybrid_request(
610
+ api="get_action_details",
611
+ payload=payload,
612
+ request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
613
+ response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
614
+ rest_fallback_func=rest_fallback
615
+ )
332
616
 
333
617
  @log_errors(log_error=True)
334
618
  def update_action(
@@ -342,8 +626,8 @@ class Scaling:
342
626
  service="",
343
627
  job_params=None,
344
628
  ):
345
- """Update an action using REST API.
346
-
629
+ """Update an action using Kafka (with REST fallback).
630
+
347
631
  Args:
348
632
  id: Action ID
349
633
  step_code: Step code
@@ -353,15 +637,15 @@ class Scaling:
353
637
  status_description: Description of the status
354
638
  service: Service name
355
639
  job_params: Job parameters dictionary
356
-
640
+
357
641
  Returns:
358
642
  Tuple of (data, error, message) from API response
359
643
  """
360
644
  if job_params is None:
361
645
  job_params = {}
646
+
362
647
  logging.info("Updating action %s", id)
363
- # KAFKA TEMPORARILY DISABLED - Using REST API directly
364
- # api = "update_action"
648
+
365
649
  payload = {
366
650
  "_id": id,
367
651
  "stepCode": step_code,
@@ -372,22 +656,8 @@ class Scaling:
372
656
  "serviceName": service,
373
657
  "jobParams": job_params,
374
658
  }
375
- # data, error, message, kafka_response_received = self._send_kafka_request(
376
- # api=api,
377
- # payload=payload,
378
- # request_topic=self.kafka_config["api_request_topic"],
379
- # response_topic=self.kafka_config["api_response_topic"],
380
- # timeout=60
381
- # )
382
- # # Check if Kafka response was received and if it's an error, log and fallback to REST API
383
- # if kafka_response_received:
384
- # if error:
385
- # logging.warning("Kafka returned error for update_action: %s. Falling back to REST API.", error)
386
- # else:
387
- # return data, error, message
388
-
389
- # Using REST API directly
390
- try:
659
+
660
+ def rest_fallback():
391
661
  path = "/v1/actions"
392
662
  resp = self.rpc.put(path=path, payload=payload)
393
663
  return self.handle_response(
@@ -395,62 +665,66 @@ class Scaling:
395
665
  "Error logged successfully",
396
666
  "Could not log the errors",
397
667
  )
398
- except Exception as e:
399
- logging.error("REST API failed (update_action): %s", e)
400
- return None, f"Failed via REST: {e}", "REST API failed"
668
+
669
+ return self._hybrid_request(
670
+ api="update_action",
671
+ payload=payload,
672
+ request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
673
+ response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
674
+ rest_fallback_func=rest_fallback
675
+ )
401
676
 
402
677
 
403
678
  @log_errors(log_error=True)
404
679
  def assign_jobs(self, is_gpu):
405
680
  """Assign jobs to the instance using REST API.
406
-
681
+
407
682
  Args:
408
683
  is_gpu: Boolean or any value indicating if this is a GPU instance.
409
684
  Will be converted to proper boolean.
410
-
685
+
411
686
  Returns:
412
687
  Tuple of (data, error, message) from API response
413
688
  """
414
689
  # Convert is_gpu to proper boolean
415
690
  is_gpu_bool = bool(is_gpu)
416
691
  logging.info("Assigning jobs for instance %s (GPU: %s)", self.instance_id, is_gpu_bool)
417
-
418
- # KAFKA TEMPORARILY DISABLED - Using REST API directly
419
- # api = "assign_jobs"
692
+
693
+ # Use REST API directly
694
+ is_gpu_str = str(is_gpu_bool).lower()
695
+ path = f"/v1/actions/assign_jobs/{is_gpu_str}/{self.instance_id}"
696
+ resp = self.rpc.get(path=path)
697
+ return self.handle_response(
698
+ resp,
699
+ "Pinged successfully",
700
+ "Could not ping the scaling jobs",
701
+ )
702
+
703
+ # # Kafka approach (commented out - using REST only)
420
704
  # payload = {
421
705
  # "instanceID": self.instance_id,
422
706
  # "isGPUInstance": is_gpu_bool,
423
707
  # }
424
-
425
- # data, error, message, kafka_response_received = self._send_kafka_request(
426
- # api=api,
708
+ #
709
+ # # Define REST fallback function
710
+ # def rest_fallback():
711
+ # is_gpu_str = str(is_gpu_bool).lower()
712
+ # path = f"/v1/actions/assign_jobs/{is_gpu_str}/{self.instance_id}"
713
+ # resp = self.rpc.get(path=path)
714
+ # return self.handle_response(
715
+ # resp,
716
+ # "Pinged successfully",
717
+ # "Could not ping the scaling jobs",
718
+ # )
719
+ #
720
+ # # Use hybrid approach: Kafka first, REST fallback, cache if both fail
721
+ # return self._hybrid_request(
722
+ # api="assign_jobs",
427
723
  # payload=payload,
428
- # request_topic=self.kafka_config["api_request_topic"],
429
- # response_topic=self.kafka_config["api_response_topic"],
430
- # timeout=60
724
+ # request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
725
+ # response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
726
+ # rest_fallback_func=rest_fallback
431
727
  # )
432
-
433
- # # Check if Kafka response was received and if it's an error, log and fallback to REST API
434
- # if kafka_response_received:
435
- # if error:
436
- # logging.warning("Kafka returned error for assign_jobs: %s. Falling back to REST API.", error)
437
- # else:
438
- # return data, error, message
439
-
440
- # Using REST API directly
441
- try:
442
- # Convert boolean to lowercase string for API endpoint
443
- is_gpu_str = str(is_gpu_bool).lower()
444
- path = f"/v1/actions/assign_jobs/{is_gpu_str}/{self.instance_id}"
445
- resp = self.rpc.get(path=path)
446
- return self.handle_response(
447
- resp,
448
- "Pinged successfully",
449
- "Could not ping the scaling jobs",
450
- )
451
- except Exception as e:
452
- logging.error("REST API failed (assign_jobs): %s", e)
453
- return None, f"Failed via REST: {e}", "REST API failed"
454
728
 
455
729
 
456
730
  @log_errors(log_error=True)
@@ -461,14 +735,14 @@ class Scaling:
461
735
  availableMemory=0,
462
736
  availableGPUMemory=0,
463
737
  ):
464
- """Update available resources for the instance using REST API.
465
-
738
+ """Update available resources for the instance using Kafka (with REST fallback).
739
+
466
740
  Args:
467
741
  availableCPU: Available CPU resources
468
742
  availableGPU: Available GPU resources
469
743
  availableMemory: Available memory
470
744
  availableGPUMemory: Available GPU memory
471
-
745
+
472
746
  Returns:
473
747
  Tuple of (data, error, message) from API response
474
748
  """
@@ -480,28 +754,9 @@ class Scaling:
480
754
  "availableGPUMemory": availableGPUMemory,
481
755
  "availableGPU": availableGPU,
482
756
  }
483
- # KAFKA TEMPORARILY DISABLED - Using REST API directly
484
- # api = "update_available_resources"
485
- # correlation_id = str(uuid.uuid4())
486
757
 
487
- # data, error, message, kafka_response_received = self._send_kafka_request(
488
- # api=api,
489
- # payload=payload,
490
- # request_topic=self.kafka_config["scaling_request_topic"],
491
- # response_topic=self.kafka_config["scaling_response_topic"],
492
- # timeout=60
493
- # )
494
-
495
- # # Check if Kafka response was received
496
- # # Response format: {'correlationId': 'id', 'status': 'success'/'error', 'data': ..., 'error': 'error message'}
497
- # if kafka_response_received:
498
- # if error:
499
- # logging.warning("Kafka returned error for update_available_resources: %s. Falling back to REST API.", error)
500
- # else:
501
- # return data, error, message
502
-
503
- # Using REST API directly
504
- try:
758
+ # Define REST fallback function
759
+ def rest_fallback():
505
760
  path = f"/v1/compute/update_available_resources/{self.instance_id}"
506
761
  resp = self.rpc.put(path=path, payload=payload)
507
762
  return self.handle_response(
@@ -509,45 +764,35 @@ class Scaling:
509
764
  "Resources updated successfully",
510
765
  "Could not update the resources",
511
766
  )
512
- except Exception as e:
513
- logging.error("REST API failed (update_available_resources): %s", e)
514
- return None, f"Failed to update available resources via REST: {e}", "REST API failed"
767
+
768
+ # Use hybrid approach: Kafka first, REST fallback, cache if both fail
769
+ return self._hybrid_request(
770
+ api="update_available_resources",
771
+ payload=payload,
772
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
773
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
774
+ rest_fallback_func=rest_fallback
775
+ )
515
776
 
516
777
  @log_errors(log_error=True)
517
778
  def update_action_docker_logs(self, action_record_id, log_content):
518
- """Update docker logs for an action using REST API.
519
-
779
+ """Update docker logs for an action using Kafka (with REST fallback).
780
+
520
781
  Args:
521
782
  action_record_id: ID of the action record
522
783
  log_content: Content of the logs to update
523
-
784
+
524
785
  Returns:
525
786
  Tuple of (data, error, message) from API response
526
787
  """
527
788
  logging.info("Updating docker logs for action %s", action_record_id)
528
- # KAFKA TEMPORARILY DISABLED - Using REST API directly
529
- # api = "update_action_docker_logs"
789
+
530
790
  payload = {
531
791
  "actionRecordId": action_record_id,
532
792
  "logContent": log_content,
533
793
  }
534
- # data, error, message, kafka_response_received = self._send_kafka_request(
535
- # api=api,
536
- # payload=payload,
537
- # request_topic=self.kafka_config["api_request_topic"],
538
- # response_topic=self.kafka_config["api_response_topic"],
539
- # timeout=60
540
- # )
541
794
 
542
- # # Check if Kafka response was received and if it's an error, log and fallback to REST API
543
- # if kafka_response_received:
544
- # if error:
545
- # logging.warning("Kafka returned error for update_action_docker_logs: %s. Falling back to REST API.", error)
546
- # else:
547
- # return data, error, message
548
-
549
- # Using REST API directly
550
- try:
795
+ def rest_fallback():
551
796
  path = "/v1/actions/update_action_docker_logs"
552
797
  resp = self.rpc.put(path=path, payload=payload)
553
798
  return self.handle_response(
@@ -555,40 +800,67 @@ class Scaling:
555
800
  "Docker logs updated successfully",
556
801
  "Could not update the docker logs",
557
802
  )
558
- except Exception as e:
559
- logging.error("REST API failed (update_action_docker_logs): %s", e)
560
- return None, f"Failed via REST: {e}", "REST API failed"
561
-
803
+
804
+ return self._hybrid_request(
805
+ api="update_action_docker_logs",
806
+ payload=payload,
807
+ request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
808
+ response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
809
+ rest_fallback_func=rest_fallback
810
+ )
562
811
 
563
812
  @log_errors(log_error=True)
564
813
  def get_docker_hub_credentials(self):
565
- """Get Docker Hub credentials.
814
+ """Get Docker Hub credentials using Kafka (with REST fallback).
566
815
 
567
816
  Returns:
568
817
  Tuple of (data, error, message) from API response
569
818
  """
570
819
  logging.info("Getting docker credentials")
571
- path = "/v1/compute/get_docker_hub_credentials"
572
- resp = self.rpc.get(path=path)
573
- return self.handle_response(
574
- resp,
575
- "Docker credentials fetched successfully",
576
- "Could not fetch the docker credentials",
820
+
821
+ payload = {}
822
+
823
+ def rest_fallback():
824
+ path = "/v1/compute/get_docker_hub_credentials"
825
+ resp = self.rpc.get(path=path)
826
+ return self.handle_response(
827
+ resp,
828
+ "Docker credentials fetched successfully",
829
+ "Could not fetch the docker credentials",
830
+ )
831
+
832
+ return self._hybrid_request(
833
+ api="get_docker_hub_credentials",
834
+ payload=payload,
835
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
836
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
837
+ rest_fallback_func=rest_fallback
577
838
  )
578
839
 
579
840
  @log_errors(log_error=True)
580
841
  def get_open_ports_config(self):
581
- """Get open ports configuration.
842
+ """Get open ports configuration using Kafka (with REST fallback).
582
843
 
583
844
  Returns:
584
845
  Tuple of (data, error, message) from API response
585
846
  """
586
- path = f"/v1/scaling/get_open_ports/{self.instance_id}"
587
- resp = self.rpc.get(path=path)
588
- return self.handle_response(
589
- resp,
590
- "Open ports config fetched successfully",
591
- "Could not fetch the open ports config",
847
+ payload = {"instance_id": self.instance_id}
848
+
849
+ def rest_fallback():
850
+ path = f"/v1/compute/get_open_ports/{self.instance_id}"
851
+ resp = self.rpc.get(path=path)
852
+ return self.handle_response(
853
+ resp,
854
+ "Open ports config fetched successfully",
855
+ "Could not fetch the open ports config",
856
+ )
857
+
858
+ return self._hybrid_request(
859
+ api="get_open_ports_config",
860
+ payload=payload,
861
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
862
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
863
+ rest_fallback_func=rest_fallback
592
864
  )
593
865
 
594
866
  @log_errors(default_return=None, log_error=True)
@@ -639,7 +911,7 @@ class Scaling:
639
911
 
640
912
  @log_errors(log_error=True)
641
913
  def get_model_secret_keys(self, secret_name):
642
- """Get model secret keys.
914
+ """Get model secret keys using Kafka (with REST fallback).
643
915
 
644
916
  Args:
645
917
  secret_name: Name of the secret
@@ -647,12 +919,23 @@ class Scaling:
647
919
  Returns:
648
920
  Tuple of (data, error, message) from API response
649
921
  """
650
- path = f"/v1/compute/get_models_secret_keys?secret_name={secret_name}"
651
- resp = self.rpc.get(path=path)
652
- return self.handle_response(
653
- resp,
654
- "Secret keys fetched successfully",
655
- "Could not fetch the secret keys",
922
+ payload = {"secret_name": secret_name}
923
+
924
+ def rest_fallback():
925
+ path = f"/v1/compute/get_models_secret_keys?secret_name={secret_name}"
926
+ resp = self.rpc.get(path=path)
927
+ return self.handle_response(
928
+ resp,
929
+ "Secret keys fetched successfully",
930
+ "Could not fetch the secret keys",
931
+ )
932
+
933
+ return self._hybrid_request(
934
+ api="get_model_secret_keys",
935
+ payload=payload,
936
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
937
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
938
+ rest_fallback_func=rest_fallback
656
939
  )
657
940
 
658
941
  @log_errors(log_error=True)
@@ -753,7 +1036,7 @@ class Scaling:
753
1036
 
754
1037
  @log_errors(log_error=True)
755
1038
  def stop_account_compute(self, account_number, alias):
756
- """Stop a compute instance for an account.
1039
+ """Stop a compute instance for an account using Kafka (with REST fallback).
757
1040
 
758
1041
  Args:
759
1042
  account_number: Account number
@@ -762,17 +1045,33 @@ class Scaling:
762
1045
  Returns:
763
1046
  Tuple of (data, error, message) from API response
764
1047
  """
765
- path = f"/v1/scaling/stop_account_compute/{account_number}/{alias}"
766
- resp = self.rpc.put(path=path)
767
- return self.handle_response(
768
- resp,
769
- "Compute instance stopped successfully",
770
- "Could not stop the compute instance",
1048
+ logging.info("Stopping account compute for %s/%s", account_number, alias)
1049
+
1050
+ payload = {
1051
+ "account_number": account_number,
1052
+ "alias": alias,
1053
+ }
1054
+
1055
+ def rest_fallback():
1056
+ path = f"/v1/compute/stop_account_compute/{account_number}/{alias}"
1057
+ resp = self.rpc.put(path=path)
1058
+ return self.handle_response(
1059
+ resp,
1060
+ "Compute instance stopped successfully",
1061
+ "Could not stop the compute instance",
1062
+ )
1063
+
1064
+ return self._hybrid_request(
1065
+ api="stop_account_compute",
1066
+ payload=payload,
1067
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
1068
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
1069
+ rest_fallback_func=rest_fallback
771
1070
  )
772
1071
 
773
1072
  @log_errors(log_error=True)
774
1073
  def restart_account_compute(self, account_number, alias):
775
- """Restart a compute instance for an account.
1074
+ """Restart a compute instance for an account using Kafka (with REST fallback).
776
1075
 
777
1076
  Args:
778
1077
  account_number: Account number
@@ -781,12 +1080,28 @@ class Scaling:
781
1080
  Returns:
782
1081
  Tuple of (data, error, message) from API response
783
1082
  """
784
- path = f"/v1/scaling/restart_account_compute/{account_number}/{alias}"
785
- resp = self.rpc.put(path=path)
786
- return self.handle_response(
787
- resp,
788
- "Compute instance restarted successfully",
789
- "Could not restart the compute instance",
1083
+ logging.info("Restarting account compute for %s/%s", account_number, alias)
1084
+
1085
+ payload = {
1086
+ "account_number": account_number,
1087
+ "alias": alias,
1088
+ }
1089
+
1090
+ def rest_fallback():
1091
+ path = f"/v1/compute/restart_account_compute/{account_number}/{alias}"
1092
+ resp = self.rpc.put(path=path)
1093
+ return self.handle_response(
1094
+ resp,
1095
+ "Compute instance restarted successfully",
1096
+ "Could not restart the compute instance",
1097
+ )
1098
+
1099
+ return self._hybrid_request(
1100
+ api="restart_account_compute",
1101
+ payload=payload,
1102
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
1103
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
1104
+ rest_fallback_func=rest_fallback
790
1105
  )
791
1106
 
792
1107
  @log_errors(log_error=True)
@@ -810,37 +1125,59 @@ class Scaling:
810
1125
 
811
1126
  @log_errors(log_error=True)
812
1127
  def get_all_instances_type(self):
813
- """Get all instance types.
1128
+ """Get all instance types using Kafka (with REST fallback).
814
1129
 
815
1130
  Returns:
816
1131
  Tuple of (data, error, message) from API response
817
1132
  """
818
- path = "/v1/scaling/get_all_instances_type"
819
- resp = self.rpc.get(path=path)
820
- return self.handle_response(
821
- resp,
822
- "All instance types fetched successfully",
823
- "Could not fetch the instance types",
1133
+ payload = {}
1134
+
1135
+ def rest_fallback():
1136
+ path = "/v1/compute/get_all_instances_type"
1137
+ resp = self.rpc.get(path=path)
1138
+ return self.handle_response(
1139
+ resp,
1140
+ "All instance types fetched successfully",
1141
+ "Could not fetch the instance types",
1142
+ )
1143
+
1144
+ return self._hybrid_request(
1145
+ api="get_all_instances_type",
1146
+ payload=payload,
1147
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
1148
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
1149
+ rest_fallback_func=rest_fallback
824
1150
  )
825
1151
 
826
1152
  @log_errors(log_error=True)
827
1153
  def get_compute_details(self):
828
- """Get compute instance details.
1154
+ """Get compute instance details using Kafka (with REST fallback).
829
1155
 
830
1156
  Returns:
831
1157
  Tuple of (data, error, message) from API response
832
1158
  """
833
- path = f"/v1/scaling/get_compute_details/{self.instance_id}"
834
- resp = self.rpc.get(path=path)
835
- return self.handle_response(
836
- resp,
837
- "Compute details fetched successfully",
838
- "Could not fetch the compute details",
1159
+ payload = {"instance_id": self.instance_id}
1160
+
1161
+ def rest_fallback():
1162
+ path = f"/v1/scaling/get_compute_details/{self.instance_id}"
1163
+ resp = self.rpc.get(path=path)
1164
+ return self.handle_response(
1165
+ resp,
1166
+ "Compute details fetched successfully",
1167
+ "Could not fetch the compute details",
1168
+ )
1169
+
1170
+ return self._hybrid_request(
1171
+ api="get_compute_details",
1172
+ payload=payload,
1173
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
1174
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
1175
+ rest_fallback_func=rest_fallback
839
1176
  )
840
1177
 
841
1178
  @log_errors(log_error=True)
842
1179
  def get_user_access_key_pair(self, user_id):
843
- """Get user access key pair.
1180
+ """Get user access key pair using Kafka (with REST fallback).
844
1181
 
845
1182
  Args:
846
1183
  user_id: ID of the user
@@ -848,17 +1185,28 @@ class Scaling:
848
1185
  Returns:
849
1186
  Tuple of (data, error, message) from API response
850
1187
  """
851
- path = f"/v1/compute/get_user_access_key_pair/{user_id}/{self.instance_id}"
852
- resp = self.rpc.get(path=path)
853
- return self.handle_response(
854
- resp,
855
- "User access key pair fetched successfully",
856
- "Could not fetch the user access key pair",
1188
+ payload = {"user_id": user_id, "instance_id": self.instance_id}
1189
+
1190
+ def rest_fallback():
1191
+ path = f"/v1/compute/get_user_access_key_pair/{user_id}/{self.instance_id}"
1192
+ resp = self.rpc.get(path=path)
1193
+ return self.handle_response(
1194
+ resp,
1195
+ "User access key pair fetched successfully",
1196
+ "Could not fetch the user access key pair",
1197
+ )
1198
+
1199
+ return self._hybrid_request(
1200
+ api="get_user_access_key_pair",
1201
+ payload=payload,
1202
+ request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
1203
+ response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
1204
+ rest_fallback_func=rest_fallback
857
1205
  )
858
1206
 
859
1207
  @log_errors(log_error=True)
860
1208
  def get_internal_api_key(self, action_id):
861
- """Get internal API key.
1209
+ """Get internal API key using Kafka (with REST fallback).
862
1210
 
863
1211
  Args:
864
1212
  action_id: ID of the action
@@ -866,107 +1214,22 @@ class Scaling:
866
1214
  Returns:
867
1215
  Tuple of (data, error, message) from API response
868
1216
  """
869
- path = f"/v1/actions/get_internal_api_key/{action_id}/{self.instance_id}"
870
- resp = self.rpc.get(path=path)
871
- return self.handle_response(
872
- resp,
873
- "internal keys fetched successfully",
874
- "Could not fetch internal keys",
1217
+ payload = {"action_id": action_id, "instance_id": self.instance_id}
1218
+
1219
+ def rest_fallback():
1220
+ path = f"/v1/actions/get_internal_api_key/{action_id}/{self.instance_id}"
1221
+ resp = self.rpc.get(path=path)
1222
+ return self.handle_response(
1223
+ resp,
1224
+ "internal keys fetched successfully",
1225
+ "Could not fetch internal keys",
1226
+ )
1227
+
1228
+ return self._hybrid_request(
1229
+ api="get_internal_api_key",
1230
+ payload=payload,
1231
+ request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
1232
+ response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
1233
+ rest_fallback_func=rest_fallback
875
1234
  )
876
1235
 
877
- # KAFKA TEMPORARILY DISABLED - Using REST API directly
878
- # @log_errors(log_error=True)
879
- # def handle_kafka_response(self, msg, success_message, error_message):
880
- # """
881
- # Helper to process Kafka response messages in a consistent way.
882
- # """
883
- # if msg.get("status") == "success":
884
- # data = msg.get("data")
885
- # error = None
886
- # message = success_message
887
- # logging.info(message)
888
- # else:
889
- # data = msg.get("data")
890
- # error = msg.get("error", "Unknown error")
891
- # message = error_message
892
- # logging.error("%s: %s", message, error)
893
- # return data, error, message
894
-
895
- # def _send_kafka_request(self, api, payload, request_topic, response_topic, timeout=60):
896
- # """
897
- # Helper to send a request to Kafka and wait for a response.
898
- # Returns (data, error, message, kafka_response_received) where kafka_response_received is True if a response was received (even if error), False if transport error/timeout.
899
- # """
900
- # correlation_id = str(uuid.uuid4())
901
- # request_message = {
902
- # "correlationId": correlation_id,
903
- # "api": api,
904
- # "payload": payload,
905
- # }
906
-
907
- # consumer = KafkaConsumer(
908
- # response_topic,
909
- # bootstrap_servers=self.kafka_config["bootstrap_servers"],
910
- # group_id=None,
911
- # value_deserializer=lambda m: json.loads(m.decode("utf-8")),
912
- # auto_offset_reset='latest',
913
- # enable_auto_commit=True,
914
- # )
915
-
916
- # try:
917
- # if hasattr(self.session.rpc, 'AUTH_TOKEN'):
918
- # self.session.rpc.AUTH_TOKEN.set_bearer_token()
919
- # auth_token = self.session.rpc.AUTH_TOKEN.bearer_token
920
- # auth_token = auth_token.replace("Bearer ", "")
921
- # headers = [("Authorization", bytes(f"{auth_token}", "utf-8"))]
922
- # else:
923
- # headers = None
924
- # self.kafka_producer.send(request_topic, request_message, headers=headers)
925
- # # self.kafka_producer.flush()
926
- # logging.info("Sent %s request to Kafka topic %s", api, request_topic)
927
- # except Exception as e:
928
- # logging.error("Kafka producer error: %s", e)
929
- # return None, f"Kafka producer error: {e}", "Kafka send failed", False
930
- # try:
931
- # start = time.time()
932
- # while time.time() - start < timeout:
933
- # # Poll for messages with a short timeout to avoid blocking forever
934
- # message_batch = consumer.poll(timeout_ms=1000)
935
- # if message_batch:
936
- # for topic_partition, messages in message_batch.items():
937
- # for message in messages:
938
- # print("trying to fetch message")
939
- # msg = message.value
940
- # if msg.get("correlationId") == correlation_id:
941
- # consumer.close()
942
- # # Always treat a received response as final, even if error
943
- # return self.handle_kafka_response(
944
- # msg,
945
- # f"Fetched via Kafka for {api}",
946
- # f"Kafka error response for {api}"
947
- # ) + (True,)
948
- # else:
949
- # print(f"No messages received, waiting... ({time.time() - start:.1f}s/{timeout}s)")
950
- #
951
- # consumer.close()
952
- # logging.warning("Kafka response timeout for %s after %d seconds", api, timeout)
953
- # return None, "Kafka response timeout", "Kafka response timeout", False
954
- # except Exception as e:
955
- # logging.error("Kafka consumer error: %s", e)
956
- # return None, f"Kafka consumer error: {e}", "Kafka consumer error", False
957
-
958
- # def _cache_failed_request(self, api, payload):
959
- # """Cache the failed request for retry. Here, we use a simple file cache as a placeholder."""
960
- # try:
961
- # cache_file = os.path.join(os.path.dirname(__file__), 'request_cache.json')
962
- # if os.path.exists(cache_file):
963
- # with open(cache_file, 'r') as f:
964
- # cache = json.load(f)
965
- # else:
966
- # cache = []
967
- # cache.append({"api": api, "payload": payload, "ts": time.time()})
968
- # with open(cache_file, 'w') as f:
969
- # json.dump(cache, f)
970
- # logging.info("Cached failed request for api %s", api)
971
- # except Exception as e:
972
- # logging.error("Failed to cache request: %s", e)