matrice-compute 0.1.21__py3-none-any.whl → 0.1.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,26 +3,18 @@
3
3
  import os
4
4
  import logging
5
5
  from matrice_common.utils import log_errors
6
- from kafka import KafkaProducer, KafkaConsumer
7
- import uuid
8
- import json
9
- import time
10
- import base64
11
- import threading
12
-
13
- # TODO: update /scaling to /compute
14
6
 
15
7
  class Scaling:
16
8
 
17
9
  """Class providing scaling functionality for compute instances."""
18
10
 
19
- def __init__(self, session, instance_id=None, enable_kafka=True):
11
+ def __init__(self, session, instance_id=None, enable_kafka=False):
20
12
  """Initialize Scaling instance.
21
13
 
22
14
  Args:
23
15
  session: Session object for making RPC calls
24
16
  instance_id: ID of the compute instance
25
- enable_kafka: Enable Kafka communication (default True)
17
+ enable_kafka: Deprecated parameter, kept for backward compatibility (ignored)
26
18
 
27
19
  Raises:
28
20
  Exception: If instance_id is not provided
@@ -37,92 +29,10 @@ class Scaling:
37
29
  used_ports_str = os.environ.get("USED_PORTS", "")
38
30
  self.used_ports = set(int(p) for p in used_ports_str.split(",") if p.strip())
39
31
 
40
- # Kafka configuration and initialization
41
- self.enable_kafka = enable_kafka
42
- self.kafka_producer = None
43
- self.kafka_consumer = None
44
- self.kafka_thread = None
45
- self.kafka_running = False
46
-
47
- # Maps correlation_id to threading.Event for request/response matching
48
- self.pending_requests = {}
49
- # Maps correlation_id to response data
50
- self.response_map = {}
51
- self.response_lock = threading.Lock()
52
-
53
- if self.enable_kafka:
54
- try:
55
- self.kafka_config = {
56
- "bootstrap_servers": self.get_kafka_bootstrap_servers(),
57
- "action_request_topic": "action_requests",
58
- "action_response_topic": "action_responses",
59
- "compute_request_topic": "compute_requests",
60
- "compute_response_topic": "compute_responses"
61
- }
62
-
63
- # Initialize single producer
64
- self.kafka_producer = KafkaProducer(
65
- bootstrap_servers=self.kafka_config["bootstrap_servers"],
66
- value_serializer=lambda v: json.dumps(v).encode("utf-8"),
67
- max_block_ms=5000 # Timeout if Kafka is down
68
- )
69
-
70
- # Initialize single consumer for both response topics
71
- self.kafka_consumer = KafkaConsumer(
72
- self.kafka_config["action_response_topic"],
73
- self.kafka_config["compute_response_topic"],
74
- bootstrap_servers=self.kafka_config["bootstrap_servers"],
75
- group_id=f"py_compute_{instance_id}",
76
- value_deserializer=lambda m: json.loads(m.decode("utf-8")),
77
- auto_offset_reset='latest',
78
- enable_auto_commit=True,
79
- consumer_timeout_ms=1000, # Poll timeout
80
- session_timeout_ms=60000, # Increase session timeout to 60s (default 30s)
81
- heartbeat_interval_ms=3000, # Send heartbeat every 3s
82
- max_poll_interval_ms=300000 # Max time between polls: 5 minutes
83
- )
84
-
85
- # Start background thread to handle responses
86
- self.kafka_running = True
87
- self.kafka_thread = threading.Thread(target=self._kafka_response_listener, daemon=True)
88
- self.kafka_thread.start()
89
-
90
- logging.info(f"Kafka enabled with bootstrap servers: {self.kafka_config['bootstrap_servers']}")
91
- except Exception as e:
92
- logging.warning(f"Failed to initialize Kafka, will use REST API only: {e}")
93
- self.enable_kafka = False
94
- self.kafka_producer = None
95
- self.kafka_consumer = None
96
-
97
32
  logging.info(
98
- "Initialized Scaling with instance_id: %s, Kafka enabled: %s",
99
- instance_id,
100
- self.enable_kafka
33
+ "Initialized Scaling with instance_id: %s (REST API only)",
34
+ instance_id
101
35
  )
102
-
103
-
104
-
105
- @log_errors(default_return=None, log_error=True)
106
- def get_kafka_bootstrap_servers(self):
107
- """Get Kafka bootstrap servers from API and decode base64 fields.
108
-
109
- Returns:
110
- str: Kafka bootstrap servers in format "ip:port"
111
-
112
- Raises:
113
- ValueError: If unable to fetch Kafka configuration
114
- """
115
- path = "/v1/actions/get_kafka_info"
116
- response = self.rpc.get(path=path)
117
- if not response or not response.get("success"):
118
- raise ValueError(f"Failed to fetch Kafka config: {response.get('message', 'No response')}")
119
- encoded_ip = response["data"]["ip"]
120
- encoded_port = response["data"]["port"]
121
- ip = base64.b64decode(encoded_ip).decode("utf-8")
122
- port = base64.b64decode(encoded_port).decode("utf-8")
123
- bootstrap_servers = f"{ip}:{port}"
124
- # logging.info(f"Retrieved Kafka bootstrap servers: {bootstrap_servers}")
125
- return bootstrap_servers
126
36
 
127
37
  @log_errors(default_return=(None, "Error processing response", "Response processing failed"), log_error=True)
128
38
  def handle_response(self, resp, success_message, error_message):
@@ -148,266 +58,52 @@ class Scaling:
148
58
  logging.error("%s: %s", message, error)
149
59
  return data, error, message
150
60
 
151
- def _kafka_response_listener(self):
152
- """
153
- Background thread that continuously polls for Kafka responses.
154
-
155
- This thread runs in the background and listens for responses from both
156
- action_responses and compute_responses topics. When a response is received,
157
- it matches the correlation ID to pending requests and wakes up the waiting thread.
158
- """
159
- logging.info("Kafka response listener thread started")
160
-
161
- while self.kafka_running:
162
- try:
163
- # Poll for messages with 1 second timeout
164
- message_batch = self.kafka_consumer.poll(timeout_ms=1000)
165
-
166
- if message_batch:
167
- for topic_partition, messages in message_batch.items():
168
- for message in messages:
169
- try:
170
- msg = message.value
171
- correlation_id = msg.get("correlationId")
172
-
173
- if correlation_id:
174
- with self.response_lock:
175
- if correlation_id in self.pending_requests:
176
- # Store response and signal waiting thread
177
- self.response_map[correlation_id] = msg
178
- self.pending_requests[correlation_id].set()
179
- logging.debug(f"Received Kafka response for correlation_id: {correlation_id}")
180
- else:
181
- logging.warning(f"Received Kafka message without correlationId: {msg}")
182
- except Exception as e:
183
- logging.error(f"Error processing Kafka message: {e}")
184
-
185
- except Exception as e:
186
- if self.kafka_running: # Only log if not shutting down
187
- logging.error(f"Error in Kafka response listener: {e}")
188
- time.sleep(1) # Avoid tight loop on persistent errors
189
-
190
- logging.info("Kafka response listener thread stopped")
191
-
192
- def _send_kafka_request(self, api, payload, request_topic, response_topic, timeout=5):
193
- """
194
- Send a request via Kafka and wait for response using the persistent consumer.
195
-
196
- Args:
197
- api: API name to call
198
- payload: Request payload dictionary
199
- request_topic: Kafka topic to send request to
200
- response_topic: Kafka topic to receive response from (not used, kept for signature)
201
- timeout: Timeout in seconds to wait for response
202
-
203
- Returns:
204
- Tuple of (data, error, message, kafka_success)
205
- kafka_success is True if response received, False if timeout/error
206
- """
207
- if not self.enable_kafka or not self.kafka_producer:
208
- return None, "Kafka not enabled", "Kafka not available", False
209
-
210
- correlation_id = str(uuid.uuid4())
211
- request_message = {
212
- "correlationId": correlation_id,
213
- "api": api,
214
- "payload": payload,
215
- }
216
-
217
- # Create event for this request
218
- event = threading.Event()
219
-
220
- with self.response_lock:
221
- self.pending_requests[correlation_id] = event
222
-
223
- try:
224
- # Add auth token if available
225
- headers = None
226
- if hasattr(self.session.rpc, 'AUTH_TOKEN'):
227
- self.session.rpc.AUTH_TOKEN.set_bearer_token()
228
- auth_token = self.session.rpc.AUTH_TOKEN.bearer_token
229
- auth_token = auth_token.replace("Bearer ", "")
230
- headers = [("Authorization", bytes(f"{auth_token}", "utf-8"))]
231
-
232
- # Send request
233
- self.kafka_producer.send(request_topic, request_message, headers=headers)
234
- logging.info(f"Sent Kafka request for {api} with correlation_id: {correlation_id}")
235
-
236
- # Wait for response with timeout
237
- if event.wait(timeout=timeout):
238
- # Response received
239
- with self.response_lock:
240
- response = self.response_map.pop(correlation_id, None)
241
- self.pending_requests.pop(correlation_id, None)
242
-
243
- if response:
244
- if response.get("status") == "success":
245
- data = response.get("data")
246
- logging.info(f"Kafka success for {api}")
247
- return data, None, f"Fetched via Kafka for {api}", True
248
- else:
249
- error = response.get("error", "Unknown error")
250
- logging.error(f"Kafka error response for {api}: {error}")
251
- return None, error, f"Kafka error response for {api}", True
252
- else:
253
- logging.warning(f"Kafka response received but missing data for {api}")
254
- return None, "Response missing data", "Kafka response error", False
255
- else:
256
- # Timeout
257
- with self.response_lock:
258
- self.pending_requests.pop(correlation_id, None)
259
- logging.warning(f"Kafka response timeout for {api} after {timeout} seconds")
260
- return None, "Kafka response timeout", "Kafka response timeout", False
261
-
262
- except Exception as e:
263
- # Cleanup on error
264
- with self.response_lock:
265
- self.pending_requests.pop(correlation_id, None)
266
- logging.error(f"Kafka send error for {api}: {e}")
267
- return None, f"Kafka error: {e}", "Kafka send failed", False
268
-
269
- def _hybrid_request(self, api, payload, request_topic, response_topic, rest_fallback_func):
270
- """
271
- Hybrid request method: try Kafka first, fallback to REST, cache if both fail.
272
-
273
- Args:
274
- api: API name
275
- payload: Request payload
276
- request_topic: Kafka request topic
277
- response_topic: Kafka response topic
278
- rest_fallback_func: Function to call for REST fallback (should return same format as handle_response)
279
-
280
- Returns:
281
- Tuple of (data, error, message) matching the API response pattern
282
- """
283
- # Try Kafka first
284
- if self.enable_kafka:
285
- data, error, message, kafka_success = self._send_kafka_request(
286
- api, payload, request_topic, response_topic, timeout=5
287
- )
288
-
289
- if kafka_success and error is None:
290
- # Kafka succeeded
291
- return data, error, message
292
-
293
- # Kafka returned an error response (not transport error)
294
- if kafka_success and error is not None:
295
- logging.warning(f"Kafka returned error for {api}, falling back to REST")
296
-
297
- # Kafka failed or disabled, try REST
298
- logging.info(f"Using REST API for {api}")
299
- try:
300
- rest_response = rest_fallback_func()
301
-
302
- # Return REST response (success or failure)
303
- if rest_response and len(rest_response) == 3:
304
- return rest_response
305
- else:
306
- # Unexpected REST response format
307
- logging.error(f"REST API returned unexpected format for {api}")
308
- return None, "Unexpected REST response format", "REST API error"
309
-
310
- except Exception as e:
311
- # REST failed
312
- logging.error(f"REST API failed for {api}: {e}")
313
- return None, str(e), "REST API failed"
314
-
315
- def shutdown(self):
316
- """Gracefully shutdown Kafka connections."""
317
- if self.kafka_running:
318
- logging.info("Shutting down Kafka connections...")
319
- self.kafka_running = False
320
-
321
- if self.kafka_thread:
322
- self.kafka_thread.join(timeout=5)
323
-
324
- if self.kafka_consumer:
325
- self.kafka_consumer.close()
326
-
327
- if self.kafka_producer:
328
- self.kafka_producer.close()
329
-
330
- logging.info("Kafka connections closed")
331
61
 
332
62
  @log_errors(log_error=True)
333
63
  def get_downscaled_ids(self):
334
- """Get IDs of downscaled instances using Kafka (with REST fallback).
64
+ """Get IDs of downscaled instances using REST API.
335
65
 
336
66
  Returns:
337
67
  Tuple of (data, error, message) from API response
338
68
  """
339
69
  logging.info("Getting downscaled ids for instance %s", self.instance_id)
340
-
341
- payload = {"instance_id": self.instance_id}
342
-
343
- def rest_fallback():
344
- path = f"/v1/compute/down_scaled_ids/{self.instance_id}"
345
- resp = self.rpc.get(path=path)
346
- return self.handle_response(
347
- resp,
348
- "Downscaled ids info fetched successfully",
349
- "Could not fetch the Downscaled ids info",
350
- )
351
-
352
- return self._hybrid_request(
353
- api="get_downscaled_ids",
354
- payload=payload,
355
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
356
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
357
- rest_fallback_func=rest_fallback
70
+ path = f"/v1/compute/down_scaled_ids/{self.instance_id}"
71
+ resp = self.rpc.get(path=path)
72
+ return self.handle_response(
73
+ resp,
74
+ "Downscaled ids info fetched successfully",
75
+ "Could not fetch the Downscaled ids info",
358
76
  )
359
77
 
360
78
  @log_errors(default_return=(None, "API call failed", "Failed to stop instance"), log_error=True)
361
79
  def stop_instance(self):
362
- """Stop the compute instance using Kafka (with REST fallback).
80
+ """Stop the compute instance using REST API.
363
81
 
364
82
  Returns:
365
83
  Tuple of (data, error, message) from API response
366
84
  """
367
85
  logging.info("Stopping instance %s", self.instance_id)
368
-
369
86
  payload = {
370
87
  "_idInstance": self.instance_id,
371
88
  "isForcedStop": False,
372
89
  }
373
-
374
- def rest_fallback():
375
- path = "/v1/compute/compute_instance/stop"
376
- resp = self.rpc.put(path=path, payload=payload)
377
- return self.handle_response(
378
- resp,
379
- "Instance stopped successfully",
380
- "Could not stop the instance",
381
- )
382
-
383
- return self._hybrid_request(
384
- api="stop_instance",
385
- payload=payload,
386
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
387
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
388
- rest_fallback_func=rest_fallback
90
+ path = "/v1/compute/compute_instance/stop"
91
+ resp = self.rpc.put(path=path, payload=payload)
92
+ return self.handle_response(
93
+ resp,
94
+ "Instance stopped successfully",
95
+ "Could not stop the instance",
389
96
  )
390
97
 
391
98
  @log_errors(log_error=True)
392
99
  def update_jupyter_token(self, token=""):
393
- """Update Jupyter notebook token using Kafka (with REST fallback)."""
394
- payload = {"token": token, "instance_id": self.instance_id}
395
-
396
- def rest_fallback():
397
- path = f"/v1/scaling/update_jupyter_notebook_token/{self.instance_id}"
398
- resp = self.rpc.put(path=path, payload={"token": token})
399
- return self.handle_response(
400
- resp,
401
- "Resources updated successfully",
402
- "Could not update the resources",
403
- )
404
-
405
- return self._hybrid_request(
406
- api="update_jupyter_token",
407
- payload=payload,
408
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
409
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
410
- rest_fallback_func=rest_fallback
100
+ """Update Jupyter notebook token using REST API."""
101
+ path = f"/v1/scaling/update_jupyter_notebook_token/{self.instance_id}"
102
+ resp = self.rpc.put(path=path, payload={"token": token})
103
+ return self.handle_response(
104
+ resp,
105
+ "Resources updated successfully",
106
+ "Could not update the resources",
411
107
  )
412
108
 
413
109
  @log_errors(log_error=True)
@@ -426,7 +122,7 @@ class Scaling:
426
122
  createdAt=None,
427
123
  updatedAt=None,
428
124
  ):
429
- """Update status of an action using Kafka (with REST fallback).
125
+ """Update status of an action using REST API.
430
126
 
431
127
  Args:
432
128
  service_provider: Provider of the service
@@ -466,21 +162,12 @@ class Scaling:
466
162
  "updatedAt": updatedAt,
467
163
  }
468
164
 
469
- def rest_fallback():
470
- path = "/v1/compute/update_action_status"
471
- resp = self.rpc.put(path=path, payload=payload)
472
- return self.handle_response(
473
- resp,
474
- "Action status details updated successfully",
475
- "Could not update the action status details ",
476
- )
477
-
478
- return self._hybrid_request(
479
- api="update_action_status",
480
- payload=payload,
481
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
482
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
483
- rest_fallback_func=rest_fallback
165
+ path = "/v1/compute/update_action_status"
166
+ resp = self.rpc.put(path=path, payload=payload)
167
+ return self.handle_response(
168
+ resp,
169
+ "Action status details updated successfully",
170
+ "Could not update the action status details ",
484
171
  )
485
172
 
486
173
  @log_errors(log_error=True)
@@ -493,7 +180,7 @@ class Scaling:
493
180
  status,
494
181
  status_description,
495
182
  ):
496
- """Update status of an action using Kafka (with REST fallback).
183
+ """Update status of an action using REST API.
497
184
 
498
185
  Args:
499
186
  action_record_id: ID of the action record
@@ -514,78 +201,45 @@ class Scaling:
514
201
  "statusDescription": status_description,
515
202
  }
516
203
 
517
- def rest_fallback():
518
- url = "/v1/actions"
519
- self.rpc.put(path=url, payload=payload)
520
- return None, None, "Status updated"
521
-
522
- return self._hybrid_request(
523
- api="update_action",
524
- payload=payload,
525
- request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
526
- response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
527
- rest_fallback_func=rest_fallback
528
- )
204
+ url = "/v1/actions"
205
+ self.rpc.put(path=url, payload=payload)
206
+ return None, None, "Status updated"
529
207
 
530
208
  @log_errors(log_error=True)
531
209
  def get_shutdown_details(self):
532
- """Get shutdown details for the instance using Kafka (with REST fallback).
210
+ """Get shutdown details for the instance using REST API.
533
211
 
534
212
  Returns:
535
213
  Tuple of (data, error, message) from API response
536
214
  """
537
215
  logging.info("Getting shutdown details for instance %s", self.instance_id)
538
-
539
- payload = {"instance_id": self.instance_id}
540
-
541
- def rest_fallback():
542
- path = f"/v1/compute/get_shutdown_details/{self.instance_id}"
543
- resp = self.rpc.get(path=path)
544
- return self.handle_response(
545
- resp,
546
- "Shutdown info fetched successfully",
547
- "Could not fetch the shutdown details",
548
- )
549
-
550
- return self._hybrid_request(
551
- api="get_shutdown_details",
552
- payload=payload,
553
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
554
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
555
- rest_fallback_func=rest_fallback
216
+ path = f"/v1/compute/get_shutdown_details/{self.instance_id}"
217
+ resp = self.rpc.get(path=path)
218
+ return self.handle_response(
219
+ resp,
220
+ "Shutdown info fetched successfully",
221
+ "Could not fetch the shutdown details",
556
222
  )
557
223
 
558
224
  @log_errors(log_error=True)
559
225
  def get_tasks_details(self):
560
- """Get task details for the instance using Kafka (with REST fallback).
226
+ """Get task details for the instance using REST API.
561
227
 
562
228
  Returns:
563
229
  Tuple of (data, error, message) from API response
564
230
  """
565
231
  logging.info("Getting tasks details for instance %s", self.instance_id)
566
-
567
- payload = {"instance_id": self.instance_id}
568
-
569
- def rest_fallback():
570
- path = f"/v1/actions/fetch_instance_action_details/{self.instance_id}/action_details"
571
- resp = self.rpc.get(path=path)
572
- return self.handle_response(
573
- resp,
574
- "Task details fetched successfully",
575
- "Could not fetch the task details",
576
- )
577
-
578
- return self._hybrid_request(
579
- api="get_tasks_details",
580
- payload=payload,
581
- request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
582
- response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
583
- rest_fallback_func=rest_fallback
232
+ path = f"/v1/actions/fetch_instance_action_details/{self.instance_id}/action_details"
233
+ resp = self.rpc.get(path=path)
234
+ return self.handle_response(
235
+ resp,
236
+ "Task details fetched successfully",
237
+ "Could not fetch the task details",
584
238
  )
585
239
 
586
240
  @log_errors(log_error=True)
587
241
  def get_action_details(self, action_status_id):
588
- """Get details for a specific action using Kafka (with REST fallback).
242
+ """Get details for a specific action using REST API.
589
243
 
590
244
  Args:
591
245
  action_status_id: ID of the action status to fetch
@@ -594,24 +248,12 @@ class Scaling:
594
248
  Tuple of (data, error, message) from API response
595
249
  """
596
250
  logging.info("Getting action details for action %s", action_status_id)
597
-
598
- payload = {"actionRecordId": action_status_id}
599
-
600
- def rest_fallback():
601
- path = f"/v1/actions/action/{action_status_id}/details"
602
- resp = self.rpc.get(path=path)
603
- return self.handle_response(
604
- resp,
605
- "Task details fetched successfully",
606
- "Could not fetch the task details",
607
- )
608
-
609
- return self._hybrid_request(
610
- api="get_action_details",
611
- payload=payload,
612
- request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
613
- response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
614
- rest_fallback_func=rest_fallback
251
+ path = f"/v1/actions/action/{action_status_id}/details"
252
+ resp = self.rpc.get(path=path)
253
+ return self.handle_response(
254
+ resp,
255
+ "Task details fetched successfully",
256
+ "Could not fetch the task details",
615
257
  )
616
258
 
617
259
  @log_errors(log_error=True)
@@ -626,7 +268,7 @@ class Scaling:
626
268
  service="",
627
269
  job_params=None,
628
270
  ):
629
- """Update an action using Kafka (with REST fallback).
271
+ """Update an action using REST API.
630
272
 
631
273
  Args:
632
274
  id: Action ID
@@ -657,27 +299,18 @@ class Scaling:
657
299
  "jobParams": job_params,
658
300
  }
659
301
 
660
- def rest_fallback():
661
- path = "/v1/actions"
662
- resp = self.rpc.put(path=path, payload=payload)
663
- return self.handle_response(
664
- resp,
665
- "Error logged successfully",
666
- "Could not log the errors",
667
- )
668
-
669
- return self._hybrid_request(
670
- api="update_action",
671
- payload=payload,
672
- request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
673
- response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
674
- rest_fallback_func=rest_fallback
302
+ path = "/v1/actions"
303
+ resp = self.rpc.put(path=path, payload=payload)
304
+ return self.handle_response(
305
+ resp,
306
+ "Error logged successfully",
307
+ "Could not log the errors",
675
308
  )
676
309
 
677
310
 
678
311
  @log_errors(log_error=True)
679
312
  def assign_jobs(self, is_gpu):
680
- """Assign jobs to the instance using Kafka (with REST fallback).
313
+ """Assign jobs to the instance using REST API.
681
314
 
682
315
  Args:
683
316
  is_gpu: Boolean or any value indicating if this is a GPU instance.
@@ -690,29 +323,14 @@ class Scaling:
690
323
  is_gpu_bool = bool(is_gpu)
691
324
  logging.info("Assigning jobs for instance %s (GPU: %s)", self.instance_id, is_gpu_bool)
692
325
 
693
- payload = {
694
- "instanceID": self.instance_id,
695
- "isGPUInstance": is_gpu_bool,
696
- }
697
-
698
- # Define REST fallback function
699
- def rest_fallback():
700
- is_gpu_str = str(is_gpu_bool).lower()
701
- path = f"/v1/actions/assign_jobs/{is_gpu_str}/{self.instance_id}"
702
- resp = self.rpc.get(path=path)
703
- return self.handle_response(
704
- resp,
705
- "Pinged successfully",
706
- "Could not ping the scaling jobs",
707
- )
708
-
709
- # Use hybrid approach: Kafka first, REST fallback, cache if both fail
710
- return self._hybrid_request(
711
- api="assign_jobs",
712
- payload=payload,
713
- request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
714
- response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
715
- rest_fallback_func=rest_fallback
326
+ # Use REST API directly
327
+ is_gpu_str = str(is_gpu_bool).lower()
328
+ path = f"/v1/actions/assign_jobs/{is_gpu_str}/{self.instance_id}"
329
+ resp = self.rpc.get(path=path)
330
+ return self.handle_response(
331
+ resp,
332
+ "Pinged successfully",
333
+ "Could not ping the scaling jobs",
716
334
  )
717
335
 
718
336
 
@@ -724,7 +342,7 @@ class Scaling:
724
342
  availableMemory=0,
725
343
  availableGPUMemory=0,
726
344
  ):
727
- """Update available resources for the instance using Kafka (with REST fallback).
345
+ """Update available resources for the instance using REST API.
728
346
 
729
347
  Args:
730
348
  availableCPU: Available CPU resources
@@ -744,28 +362,17 @@ class Scaling:
744
362
  "availableGPU": availableGPU,
745
363
  }
746
364
 
747
- # Define REST fallback function
748
- def rest_fallback():
749
- path = f"/v1/compute/update_available_resources/{self.instance_id}"
750
- resp = self.rpc.put(path=path, payload=payload)
751
- return self.handle_response(
752
- resp,
753
- "Resources updated successfully",
754
- "Could not update the resources",
755
- )
756
-
757
- # Use hybrid approach: Kafka first, REST fallback, cache if both fail
758
- return self._hybrid_request(
759
- api="update_available_resources",
760
- payload=payload,
761
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
762
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
763
- rest_fallback_func=rest_fallback
365
+ path = f"/v1/compute/update_available_resources/{self.instance_id}"
366
+ resp = self.rpc.put(path=path, payload=payload)
367
+ return self.handle_response(
368
+ resp,
369
+ "Resources updated successfully",
370
+ "Could not update the resources",
764
371
  )
765
372
 
766
373
  @log_errors(log_error=True)
767
374
  def update_action_docker_logs(self, action_record_id, log_content):
768
- """Update docker logs for an action using Kafka (with REST fallback).
375
+ """Update docker logs for an action using REST API.
769
376
 
770
377
  Args:
771
378
  action_record_id: ID of the action record
@@ -781,75 +388,43 @@ class Scaling:
781
388
  "logContent": log_content,
782
389
  }
783
390
 
784
- def rest_fallback():
785
- path = "/v1/actions/update_action_docker_logs"
786
- resp = self.rpc.put(path=path, payload=payload)
787
- return self.handle_response(
788
- resp,
789
- "Docker logs updated successfully",
790
- "Could not update the docker logs",
791
- )
792
-
793
- return self._hybrid_request(
794
- api="update_action_docker_logs",
795
- payload=payload,
796
- request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
797
- response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
798
- rest_fallback_func=rest_fallback
391
+ path = "/v1/actions/update_action_docker_logs"
392
+ resp = self.rpc.put(path=path, payload=payload)
393
+ return self.handle_response(
394
+ resp,
395
+ "Docker logs updated successfully",
396
+ "Could not update the docker logs",
799
397
  )
800
398
 
801
399
  @log_errors(log_error=True)
802
400
  def get_docker_hub_credentials(self):
803
- """Get Docker Hub credentials using Kafka (with REST fallback).
401
+ """Get Docker Hub credentials using REST API.
804
402
 
805
403
  Returns:
806
404
  Tuple of (data, error, message) from API response
807
405
  """
808
406
  logging.info("Getting docker credentials")
809
-
810
- payload = {}
811
-
812
- def rest_fallback():
813
- path = "/v1/compute/get_docker_hub_credentials"
814
- resp = self.rpc.get(path=path)
815
- return self.handle_response(
816
- resp,
817
- "Docker credentials fetched successfully",
818
- "Could not fetch the docker credentials",
819
- )
820
-
821
- return self._hybrid_request(
822
- api="get_docker_hub_credentials",
823
- payload=payload,
824
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
825
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
826
- rest_fallback_func=rest_fallback
407
+ path = "/v1/compute/get_docker_hub_credentials"
408
+ resp = self.rpc.get(path=path)
409
+ return self.handle_response(
410
+ resp,
411
+ "Docker credentials fetched successfully",
412
+ "Could not fetch the docker credentials",
827
413
  )
828
414
 
829
415
  @log_errors(log_error=True)
830
416
  def get_open_ports_config(self):
831
- """Get open ports configuration using Kafka (with REST fallback).
417
+ """Get open ports configuration using REST API.
832
418
 
833
419
  Returns:
834
420
  Tuple of (data, error, message) from API response
835
421
  """
836
- payload = {"instance_id": self.instance_id}
837
-
838
- def rest_fallback():
839
- path = f"/v1/compute/get_open_ports/{self.instance_id}"
840
- resp = self.rpc.get(path=path)
841
- return self.handle_response(
842
- resp,
843
- "Open ports config fetched successfully",
844
- "Could not fetch the open ports config",
845
- )
846
-
847
- return self._hybrid_request(
848
- api="get_open_ports_config",
849
- payload=payload,
850
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
851
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
852
- rest_fallback_func=rest_fallback
422
+ path = f"/v1/compute/get_open_ports/{self.instance_id}"
423
+ resp = self.rpc.get(path=path)
424
+ return self.handle_response(
425
+ resp,
426
+ "Open ports config fetched successfully",
427
+ "Could not fetch the open ports config",
853
428
  )
854
429
 
855
430
  @log_errors(default_return=None, log_error=True)
@@ -900,7 +475,7 @@ class Scaling:
900
475
 
901
476
  @log_errors(log_error=True)
902
477
  def get_model_secret_keys(self, secret_name):
903
- """Get model secret keys using Kafka (with REST fallback).
478
+ """Get model secret keys using REST API.
904
479
 
905
480
  Args:
906
481
  secret_name: Name of the secret
@@ -908,23 +483,12 @@ class Scaling:
908
483
  Returns:
909
484
  Tuple of (data, error, message) from API response
910
485
  """
911
- payload = {"secret_name": secret_name}
912
-
913
- def rest_fallback():
914
- path = f"/v1/compute/get_models_secret_keys?secret_name={secret_name}"
915
- resp = self.rpc.get(path=path)
916
- return self.handle_response(
917
- resp,
918
- "Secret keys fetched successfully",
919
- "Could not fetch the secret keys",
920
- )
921
-
922
- return self._hybrid_request(
923
- api="get_model_secret_keys",
924
- payload=payload,
925
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
926
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
927
- rest_fallback_func=rest_fallback
486
+ path = f"/v1/compute/get_models_secret_keys?secret_name={secret_name}"
487
+ resp = self.rpc.get(path=path)
488
+ return self.handle_response(
489
+ resp,
490
+ "Secret keys fetched successfully",
491
+ "Could not fetch the secret keys",
928
492
  )
929
493
 
930
494
  @log_errors(log_error=True)
@@ -1025,7 +589,7 @@ class Scaling:
1025
589
 
1026
590
  @log_errors(log_error=True)
1027
591
  def stop_account_compute(self, account_number, alias):
1028
- """Stop a compute instance for an account using Kafka (with REST fallback).
592
+ """Stop a compute instance for an account using REST API.
1029
593
 
1030
594
  Args:
1031
595
  account_number: Account number
@@ -1035,32 +599,17 @@ class Scaling:
1035
599
  Tuple of (data, error, message) from API response
1036
600
  """
1037
601
  logging.info("Stopping account compute for %s/%s", account_number, alias)
1038
-
1039
- payload = {
1040
- "account_number": account_number,
1041
- "alias": alias,
1042
- }
1043
-
1044
- def rest_fallback():
1045
- path = f"/v1/compute/stop_account_compute/{account_number}/{alias}"
1046
- resp = self.rpc.put(path=path)
1047
- return self.handle_response(
1048
- resp,
1049
- "Compute instance stopped successfully",
1050
- "Could not stop the compute instance",
1051
- )
1052
-
1053
- return self._hybrid_request(
1054
- api="stop_account_compute",
1055
- payload=payload,
1056
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
1057
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
1058
- rest_fallback_func=rest_fallback
602
+ path = f"/v1/compute/stop_account_compute/{account_number}/{alias}"
603
+ resp = self.rpc.put(path=path)
604
+ return self.handle_response(
605
+ resp,
606
+ "Compute instance stopped successfully",
607
+ "Could not stop the compute instance",
1059
608
  )
1060
609
 
1061
610
  @log_errors(log_error=True)
1062
611
  def restart_account_compute(self, account_number, alias):
1063
- """Restart a compute instance for an account using Kafka (with REST fallback).
612
+ """Restart a compute instance for an account using REST API.
1064
613
 
1065
614
  Args:
1066
615
  account_number: Account number
@@ -1070,27 +619,12 @@ class Scaling:
1070
619
  Tuple of (data, error, message) from API response
1071
620
  """
1072
621
  logging.info("Restarting account compute for %s/%s", account_number, alias)
1073
-
1074
- payload = {
1075
- "account_number": account_number,
1076
- "alias": alias,
1077
- }
1078
-
1079
- def rest_fallback():
1080
- path = f"/v1/compute/restart_account_compute/{account_number}/{alias}"
1081
- resp = self.rpc.put(path=path)
1082
- return self.handle_response(
1083
- resp,
1084
- "Compute instance restarted successfully",
1085
- "Could not restart the compute instance",
1086
- )
1087
-
1088
- return self._hybrid_request(
1089
- api="restart_account_compute",
1090
- payload=payload,
1091
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
1092
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
1093
- rest_fallback_func=rest_fallback
622
+ path = f"/v1/compute/restart_account_compute/{account_number}/{alias}"
623
+ resp = self.rpc.put(path=path)
624
+ return self.handle_response(
625
+ resp,
626
+ "Compute instance restarted successfully",
627
+ "Could not restart the compute instance",
1094
628
  )
1095
629
 
1096
630
  @log_errors(log_error=True)
@@ -1114,59 +648,37 @@ class Scaling:
1114
648
 
1115
649
  @log_errors(log_error=True)
1116
650
  def get_all_instances_type(self):
1117
- """Get all instance types using Kafka (with REST fallback).
651
+ """Get all instance types using REST API.
1118
652
 
1119
653
  Returns:
1120
654
  Tuple of (data, error, message) from API response
1121
655
  """
1122
- payload = {}
1123
-
1124
- def rest_fallback():
1125
- path = "/v1/compute/get_all_instances_type"
1126
- resp = self.rpc.get(path=path)
1127
- return self.handle_response(
1128
- resp,
1129
- "All instance types fetched successfully",
1130
- "Could not fetch the instance types",
1131
- )
1132
-
1133
- return self._hybrid_request(
1134
- api="get_all_instances_type",
1135
- payload=payload,
1136
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
1137
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
1138
- rest_fallback_func=rest_fallback
656
+ path = "/v1/compute/get_all_instances_type"
657
+ resp = self.rpc.get(path=path)
658
+ return self.handle_response(
659
+ resp,
660
+ "All instance types fetched successfully",
661
+ "Could not fetch the instance types",
1139
662
  )
1140
663
 
1141
664
  @log_errors(log_error=True)
1142
665
  def get_compute_details(self):
1143
- """Get compute instance details using Kafka (with REST fallback).
666
+ """Get compute instance details using REST API.
1144
667
 
1145
668
  Returns:
1146
669
  Tuple of (data, error, message) from API response
1147
670
  """
1148
- payload = {"instance_id": self.instance_id}
1149
-
1150
- def rest_fallback():
1151
- path = f"/v1/scaling/get_compute_details/{self.instance_id}"
1152
- resp = self.rpc.get(path=path)
1153
- return self.handle_response(
1154
- resp,
1155
- "Compute details fetched successfully",
1156
- "Could not fetch the compute details",
1157
- )
1158
-
1159
- return self._hybrid_request(
1160
- api="get_compute_details",
1161
- payload=payload,
1162
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
1163
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
1164
- rest_fallback_func=rest_fallback
671
+ path = f"/v1/scaling/get_compute_details/{self.instance_id}"
672
+ resp = self.rpc.get(path=path)
673
+ return self.handle_response(
674
+ resp,
675
+ "Compute details fetched successfully",
676
+ "Could not fetch the compute details",
1165
677
  )
1166
678
 
1167
679
  @log_errors(log_error=True)
1168
680
  def get_user_access_key_pair(self, user_id):
1169
- """Get user access key pair using Kafka (with REST fallback).
681
+ """Get user access key pair using REST API.
1170
682
 
1171
683
  Args:
1172
684
  user_id: ID of the user
@@ -1174,28 +686,17 @@ class Scaling:
1174
686
  Returns:
1175
687
  Tuple of (data, error, message) from API response
1176
688
  """
1177
- payload = {"user_id": user_id, "instance_id": self.instance_id}
1178
-
1179
- def rest_fallback():
1180
- path = f"/v1/compute/get_user_access_key_pair/{user_id}/{self.instance_id}"
1181
- resp = self.rpc.get(path=path)
1182
- return self.handle_response(
1183
- resp,
1184
- "User access key pair fetched successfully",
1185
- "Could not fetch the user access key pair",
1186
- )
1187
-
1188
- return self._hybrid_request(
1189
- api="get_user_access_key_pair",
1190
- payload=payload,
1191
- request_topic=self.kafka_config["compute_request_topic"] if self.enable_kafka else None,
1192
- response_topic=self.kafka_config["compute_response_topic"] if self.enable_kafka else None,
1193
- rest_fallback_func=rest_fallback
689
+ path = f"/v1/compute/get_user_access_key_pair/{user_id}/{self.instance_id}"
690
+ resp = self.rpc.get(path=path)
691
+ return self.handle_response(
692
+ resp,
693
+ "User access key pair fetched successfully",
694
+ "Could not fetch the user access key pair",
1194
695
  )
1195
696
 
1196
697
  @log_errors(log_error=True)
1197
698
  def get_internal_api_key(self, action_id):
1198
- """Get internal API key using Kafka (with REST fallback).
699
+ """Get internal API key using REST API.
1199
700
 
1200
701
  Args:
1201
702
  action_id: ID of the action
@@ -1203,22 +704,11 @@ class Scaling:
1203
704
  Returns:
1204
705
  Tuple of (data, error, message) from API response
1205
706
  """
1206
- payload = {"action_id": action_id, "instance_id": self.instance_id}
1207
-
1208
- def rest_fallback():
1209
- path = f"/v1/actions/get_internal_api_key/{action_id}/{self.instance_id}"
1210
- resp = self.rpc.get(path=path)
1211
- return self.handle_response(
1212
- resp,
1213
- "internal keys fetched successfully",
1214
- "Could not fetch internal keys",
1215
- )
1216
-
1217
- return self._hybrid_request(
1218
- api="get_internal_api_key",
1219
- payload=payload,
1220
- request_topic=self.kafka_config["action_request_topic"] if self.enable_kafka else None,
1221
- response_topic=self.kafka_config["action_response_topic"] if self.enable_kafka else None,
1222
- rest_fallback_func=rest_fallback
707
+ path = f"/v1/actions/get_internal_api_key/{action_id}/{self.instance_id}"
708
+ resp = self.rpc.get(path=path)
709
+ return self.handle_response(
710
+ resp,
711
+ "internal keys fetched successfully",
712
+ "Could not fetch internal keys",
1223
713
  )
1224
714
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrice_compute
3
- Version: 0.1.21
3
+ Version: 0.1.23
4
4
  Summary: Common server utilities for Matrice.ai services
5
5
  Author-email: "Matrice.ai" <dipendra@matrice.ai>
6
6
  License-Expression: MIT
@@ -7,11 +7,11 @@ matrice_compute/instance_utils.py,sha256=tCI_A3L5iohw62acmlXuOJns0DjIkvwN4znlUAI
7
7
  matrice_compute/prechecks.py,sha256=W9YmNF3RcLhOf4U8WBlExvFqDw1aGWSNTlJtA73lbDQ,17196
8
8
  matrice_compute/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  matrice_compute/resources_tracker.py,sha256=pkdt0aVKx_TpY_Sq---73w9INkDffZZe3mZGlp1EftE,22573
10
- matrice_compute/scaling.py,sha256=RBqfhButiocWueu-OeUbo8KquYs3BpYi2GiksFM9H10,45998
10
+ matrice_compute/scaling.py,sha256=CeT_lxJNkjJamRETG1lWaOtdSr5ySmcaMcqt7-lFRbo,23731
11
11
  matrice_compute/shutdown_manager.py,sha256=0MYV_AqygqR9NEntYf7atUC-PbWXyNkm1f-8c2aizgA,13234
12
12
  matrice_compute/task_utils.py,sha256=3qIutiQdYPyGRxH9ZwLbqdg8sZcnp6jp08pszWCRFl0,2820
13
- matrice_compute-0.1.21.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
14
- matrice_compute-0.1.21.dist-info/METADATA,sha256=_2gHnRw1cg9bwDjtSJ2_Dkb7XzTfrrQyvMWdFNB1RE8,1038
15
- matrice_compute-0.1.21.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
- matrice_compute-0.1.21.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
17
- matrice_compute-0.1.21.dist-info/RECORD,,
13
+ matrice_compute-0.1.23.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
14
+ matrice_compute-0.1.23.dist-info/METADATA,sha256=7FCjLIs4y-5IfN9P8FRdcSbIZhPbeOC8Cg9ZSCUWr6o,1038
15
+ matrice_compute-0.1.23.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
+ matrice_compute-0.1.23.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
17
+ matrice_compute-0.1.23.dist-info/RECORD,,