matrice-compute 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,20 @@
1
1
  """Module providing __init__ functionality."""
2
2
 
3
+ import subprocess
3
4
 
4
5
  from matrice_common.utils import dependencies_check
5
6
 
6
- dependencies_check(["docker", "psutil", "cryptography", "notebook", "aiohttp", "kafka-python"])
7
+ dependencies_check(
8
+ ["docker", "psutil", "cryptography", "notebook", "aiohttp", "kafka-python"]
9
+ )
10
+
11
+ subprocess.run( # Re-upgrade docker to avoid missing DOCKER_HOST connection error
12
+ ["pip", "install", "--upgrade", "docker"],
13
+ check=True,
14
+ stdout=subprocess.DEVNULL, # suppress normal output
15
+ stderr=subprocess.DEVNULL # suppress warnings/progress
16
+ )
17
+
7
18
  from matrice_compute.instance_manager import InstanceManager # noqa: E402
8
19
 
9
20
  __all__ = ["InstanceManager"]
@@ -74,7 +74,8 @@ class ActionInstance:
74
74
  "streaming_gateway": streaming_gateway_execute,
75
75
  "facial_recognition_setup": facial_recognition_setup_execute,
76
76
  "fe_fs_streaming": fe_fs_streaming_execute,
77
- "inference_ws_server": inference_ws_server_execute
77
+ "inference_ws_server": inference_ws_server_execute,
78
+ "lpr_setup": lpr_setup_execute
78
79
  }
79
80
  if self.action_type not in self.actions_map:
80
81
  raise ValueError(f"Unknown action type: {self.action_type}")
@@ -1100,6 +1101,36 @@ def facial_recognition_setup_execute(self: ActionInstance):
1100
1101
  # Docker Command run
1101
1102
  self.start(worker_cmd, "facial_recognition_setup")
1102
1103
 
1104
+ @log_errors(raise_exception=False)
1105
+ def lpr_setup_execute(self: ActionInstance):
1106
+ """
1107
+ Creates and setup the database for license plate server.
1108
+ """
1109
+ action_details = self.get_action_details()
1110
+
1111
+ if not action_details:
1112
+ return
1113
+ image = self.docker_container
1114
+ external_port = self.scaling.get_open_port()
1115
+
1116
+ self.setup_action_requirements(action_details)
1117
+
1118
+ # Add worker container run command
1119
+ worker_cmd = (
1120
+ f"docker run -d --pull=always "
1121
+ f"--name lpr-worker "
1122
+ f"-p {external_port}:8082 "
1123
+ f'-e ENV="{os.environ.get("ENV", "prod")}" '
1124
+ f'-e MATRICE_SECRET_ACCESS_KEY="{self.matrice_secret_access_key}" '
1125
+ f'-e MATRICE_ACCESS_KEY_ID="{self.matrice_access_key_id}" '
1126
+ f'-e ACTION_ID="{self.action_record_id}" '
1127
+ f"{image}"
1128
+ )
1129
+ print("Worker docker run command:", worker_cmd)
1130
+
1131
+ # Docker Command run
1132
+ self.start(worker_cmd, "lpr_setup")
1133
+
1103
1134
  @log_errors(raise_exception=False)
1104
1135
  def inference_ws_server_execute(self: ActionInstance):
1105
1136
  """
@@ -153,7 +153,8 @@ class InstanceManager:
153
153
  key,
154
154
  value,
155
155
  ) in manual_instance_info.items():
156
- os.environ[key] = value
156
+ if value is not None:
157
+ os.environ[key] = str(value)
157
158
  if not (os.environ.get("SERVICE_PROVIDER") and os.environ.get("INSTANCE_ID")):
158
159
  raise Exception(
159
160
  "SERVICE_PROVIDER and INSTANCE_ID must be set as environment variables or passed as arguments"
@@ -3,7 +3,7 @@
3
3
  import os
4
4
  import logging
5
5
  from matrice_common.utils import log_errors
6
- from kafka import KafkaProducer, KafkaConsumer
6
+ # from kafka import KafkaProducer, KafkaConsumer
7
7
  import uuid
8
8
  import json
9
9
  import time
@@ -37,32 +37,34 @@ class Scaling:
37
37
  "Initialized Scaling with instance_id: %s",
38
38
  instance_id,
39
39
  )
40
- self.kafka_config = {
41
- "bootstrap_servers": self.get_kafka_bootstrap_servers(),
42
- "api_request_topic": "action_requests",
43
- "api_response_topic": "action_responses",
44
- "scaling_request_topic": "compute_requests",
45
- "scaling_response_topic": "compute_responses"
46
- }
47
- self.kafka_producer = KafkaProducer(
48
- bootstrap_servers=self.kafka_config["bootstrap_servers"],
49
- value_serializer=lambda v: json.dumps(v).encode("utf-8"),)
40
+ # KAFKA TEMPORARILY DISABLED - Using REST API directly
41
+ # self.kafka_config = {
42
+ # "bootstrap_servers": self.get_kafka_bootstrap_servers(),
43
+ # "api_request_topic": "action_requests",
44
+ # "api_response_topic": "action_responses",
45
+ # "scaling_request_topic": "compute_requests",
46
+ # "scaling_response_topic": "compute_responses"
47
+ # }
48
+ # self.kafka_producer = KafkaProducer(
49
+ # bootstrap_servers=self.kafka_config["bootstrap_servers"],
50
+ # value_serializer=lambda v: json.dumps(v).encode("utf-8"),)
50
51
 
51
52
 
52
53
 
53
- @log_errors(default_return=(None, "Error creating Kafka producer", "Kafka producer creation failed"), log_error=True)
54
- def get_kafka_bootstrap_servers(self):
55
- """Get Kafka bootstrap servers from API and decode base64 fields."""
56
- path = "/v1/actions/get_kafka_info"
57
- response = self.rpc.get(path=path)
58
- if not response or not response.get("success"):
59
- raise ValueError(f"Failed to fetch Kafka config: {response.get('message', 'No response')}")
60
- encoded_ip = response["data"]["ip"]
61
- encoded_port = response["data"]["port"]
62
- ip = base64.b64decode(encoded_ip).decode("utf-8")
63
- port = base64.b64decode(encoded_port).decode("utf-8")
64
- bootstrap_servers = f"{ip}:{port}"
65
- return bootstrap_servers
54
+ # KAFKA TEMPORARILY DISABLED - Using REST API directly
55
+ # @log_errors(default_return=(None, "Error creating Kafka producer", "Kafka producer creation failed"), log_error=True)
56
+ # def get_kafka_bootstrap_servers(self):
57
+ # """Get Kafka bootstrap servers from API and decode base64 fields."""
58
+ # path = "/v1/actions/get_kafka_info"
59
+ # response = self.rpc.get(path=path)
60
+ # if not response or not response.get("success"):
61
+ # raise ValueError(f"Failed to fetch Kafka config: {response.get('message', 'No response')}")
62
+ # encoded_ip = response["data"]["ip"]
63
+ # encoded_port = response["data"]["port"]
64
+ # ip = base64.b64decode(encoded_ip).decode("utf-8")
65
+ # port = base64.b64decode(encoded_port).decode("utf-8")
66
+ # bootstrap_servers = f"{ip}:{port}"
67
+ # return bootstrap_servers
66
68
 
67
69
  @log_errors(default_return=(None, "Error processing response", "Response processing failed"), log_error=True)
68
70
  def handle_response(self, resp, success_message, error_message):
@@ -285,34 +287,44 @@ class Scaling:
285
287
 
286
288
  @log_errors(log_error=True)
287
289
  def get_action_details(self, action_status_id):
288
- """Get details for a specific action using Kafka, fallback to REST, then cache."""
290
+ """Get details for a specific action using REST API.
291
+
292
+ Args:
293
+ action_status_id: ID of the action status to fetch
294
+
295
+ Returns:
296
+ Tuple of (data, error, message) from API response
297
+ """
289
298
  logging.info("Getting action details for action %s", action_status_id)
290
- api = "get_action_details"
291
- payload = {"actionRecordId": action_status_id}
292
- # Try Kafka first
293
- data, error, message, kafka_response_received = self._send_kafka_request(
294
- api=api,
295
- payload=payload,
296
- request_topic=self.kafka_config["api_request_topic"],
297
- response_topic=self.kafka_config["api_response_topic"],
298
- timeout=60
299
- )
300
- if kafka_response_received:
301
- return data, error, message
302
-
303
- # Only if Kafka transport failed or timed out, try REST
299
+ # KAFKA TEMPORARILY DISABLED - Using REST API directly
300
+ # api = "get_action_details"
301
+ # payload = {"actionRecordId": action_status_id}
302
+ # data, error, message, kafka_response_received = self._send_kafka_request(
303
+ # api=api,
304
+ # payload=payload,
305
+ # request_topic=self.kafka_config["api_request_topic"],
306
+ # response_topic=self.kafka_config["api_response_topic"],
307
+ # timeout=60
308
+ # )
309
+ # # Check if Kafka response was received and if it's an error, log and fallback to REST API
310
+ # if kafka_response_received:
311
+ # if error:
312
+ # logging.warning("Kafka returned error for get_action_details: %s. Falling back to REST API.", error)
313
+ # else:
314
+ # return data, error, message
315
+
316
+ # Using REST API directly
304
317
  try:
305
318
  path = f"/v1/actions/action/{action_status_id}/details"
306
319
  resp = self.rpc.get(path=path)
307
320
  return self.handle_response(
308
321
  resp,
309
- "Task details fetched successfully (REST fallback)",
310
- "Could not fetch the task details (REST fallback)",
322
+ "Task details fetched successfully",
323
+ "Could not fetch the task details",
311
324
  )
312
325
  except Exception as e:
313
- logging.error("REST fallback failed: %s", e)
314
- self._cache_failed_request(api, payload)
315
- return None, f"Failed via Kafka and REST: {e}", "Cached for retry"
326
+ logging.error("REST API failed (get_action_details): %s", e)
327
+ return None, f"Failed via REST: {e}", "REST API failed"
316
328
 
317
329
 
318
330
  @log_errors(log_error=True)
@@ -327,11 +339,26 @@ class Scaling:
327
339
  service="",
328
340
  job_params=None,
329
341
  ):
330
- """Update an action using Kafka, fallback to REST, then cache."""
342
+ """Update an action using REST API.
343
+
344
+ Args:
345
+ id: Action ID
346
+ step_code: Step code
347
+ action_type: Type of action
348
+ status: Status of the action
349
+ sub_action: Sub-action details
350
+ status_description: Description of the status
351
+ service: Service name
352
+ job_params: Job parameters dictionary
353
+
354
+ Returns:
355
+ Tuple of (data, error, message) from API response
356
+ """
331
357
  if job_params is None:
332
358
  job_params = {}
333
359
  logging.info("Updating action %s", id)
334
- api = "update_action"
360
+ # KAFKA TEMPORARILY DISABLED - Using REST API directly
361
+ # api = "update_action"
335
362
  payload = {
336
363
  "_id": id,
337
364
  "stepCode": step_code,
@@ -342,63 +369,85 @@ class Scaling:
342
369
  "serviceName": service,
343
370
  "jobParams": job_params,
344
371
  }
345
- data, error, message, kafka_response_received = self._send_kafka_request(
346
- api=api,
347
- payload=payload,
348
- request_topic=self.kafka_config["api_request_topic"],
349
- response_topic=self.kafka_config["api_response_topic"],
350
- timeout=60
351
- )
352
- if kafka_response_received:
353
- return data, error, message
372
+ # data, error, message, kafka_response_received = self._send_kafka_request(
373
+ # api=api,
374
+ # payload=payload,
375
+ # request_topic=self.kafka_config["api_request_topic"],
376
+ # response_topic=self.kafka_config["api_response_topic"],
377
+ # timeout=60
378
+ # )
379
+ # # Check if Kafka response was received and if it's an error, log and fallback to REST API
380
+ # if kafka_response_received:
381
+ # if error:
382
+ # logging.warning("Kafka returned error for update_action: %s. Falling back to REST API.", error)
383
+ # else:
384
+ # return data, error, message
385
+
386
+ # Using REST API directly
354
387
  try:
355
388
  path = "/v1/actions"
356
389
  resp = self.rpc.put(path=path, payload=payload)
357
390
  return self.handle_response(
358
391
  resp,
359
- "Error logged successfully (REST fallback)",
360
- "Could not log the errors (REST fallback)",
392
+ "Error logged successfully",
393
+ "Could not log the errors",
361
394
  )
362
395
  except Exception as e:
363
- logging.error("REST fallback failed (update_action): %s", e)
364
- self._cache_failed_request(api, payload)
365
- return None, f"Failed via Kafka and REST: {e}", "Cached for retry"
396
+ logging.error("REST API failed (update_action): %s", e)
397
+ return None, f"Failed via REST: {e}", "REST API failed"
366
398
 
367
399
 
368
400
  @log_errors(log_error=True)
369
401
  def assign_jobs(self, is_gpu):
370
- """Assign jobs to the instance using Kafka, fallback to REST, then cache."""
371
- logging.info("Assigning jobs for instance %s (GPU: %s)", self.instance_id, is_gpu)
372
- api = "assign_jobs"
373
- payload = {
374
- "instanceID": self.instance_id,
375
- "isGPUInstance": is_gpu,
376
- }
377
-
378
- data, error, message, kafka_response_received = self._send_kafka_request(
379
- api=api,
380
- payload=payload,
381
- request_topic=self.kafka_config["api_request_topic"],
382
- response_topic=self.kafka_config["api_response_topic"],
383
- timeout=60
384
- )
385
-
386
- if kafka_response_received:
387
- return data, error, message
388
-
389
- # Fallback to REST
402
+ """Assign jobs to the instance using REST API.
403
+
404
+ Args:
405
+ is_gpu: Boolean or any value indicating if this is a GPU instance.
406
+ Will be converted to proper boolean.
407
+
408
+ Returns:
409
+ Tuple of (data, error, message) from API response
410
+ """
411
+ # Convert is_gpu to proper boolean
412
+ is_gpu_bool = bool(is_gpu)
413
+ logging.info("Assigning jobs for instance %s (GPU: %s)", self.instance_id, is_gpu_bool)
414
+
415
+ # KAFKA TEMPORARILY DISABLED - Using REST API directly
416
+ # api = "assign_jobs"
417
+ # payload = {
418
+ # "instanceID": self.instance_id,
419
+ # "isGPUInstance": is_gpu_bool,
420
+ # }
421
+
422
+ # data, error, message, kafka_response_received = self._send_kafka_request(
423
+ # api=api,
424
+ # payload=payload,
425
+ # request_topic=self.kafka_config["api_request_topic"],
426
+ # response_topic=self.kafka_config["api_response_topic"],
427
+ # timeout=60
428
+ # )
429
+
430
+ # # Check if Kafka response was received and if it's an error, log and fallback to REST API
431
+ # if kafka_response_received:
432
+ # if error:
433
+ # logging.warning("Kafka returned error for assign_jobs: %s. Falling back to REST API.", error)
434
+ # else:
435
+ # return data, error, message
436
+
437
+ # Using REST API directly
390
438
  try:
391
- path = f"/v1/actions/assign_jobs/{str(is_gpu)}/{self.instance_id}"
439
+ # Convert boolean to lowercase string for API endpoint
440
+ is_gpu_str = str(is_gpu_bool).lower()
441
+ path = f"/v1/actions/assign_jobs/{is_gpu_str}/{self.instance_id}"
392
442
  resp = self.rpc.get(path=path)
393
443
  return self.handle_response(
394
444
  resp,
395
- "Pinged successfully (REST fallback)",
396
- "Could not ping the scaling jobs (REST fallback)",
445
+ "Pinged successfully",
446
+ "Could not ping the scaling jobs",
397
447
  )
398
448
  except Exception as e:
399
- logging.error("REST fallback failed (assign_jobs): %s", e)
400
- self._cache_failed_request(api, payload)
401
- return None, f"Failed via Kafka and REST: {e}", "Cached for retry"
449
+ logging.error("REST API failed (assign_jobs): %s", e)
450
+ return None, f"Failed via REST: {e}", "REST API failed"
402
451
 
403
452
 
404
453
  @log_errors(log_error=True)
@@ -409,7 +458,17 @@ class Scaling:
409
458
  availableMemory=0,
410
459
  availableGPUMemory=0,
411
460
  ):
412
- """Update available resources for the instance using Kafka, fallback to REST, then cache."""
461
+ """Update available resources for the instance using REST API.
462
+
463
+ Args:
464
+ availableCPU: Available CPU resources
465
+ availableGPU: Available GPU resources
466
+ availableMemory: Available memory
467
+ availableGPUMemory: Available GPU memory
468
+
469
+ Returns:
470
+ Tuple of (data, error, message) from API response
471
+ """
413
472
  logging.info("Updating available resources for instance %s", self.instance_id)
414
473
  payload = {
415
474
  "instance_id": self.instance_id,
@@ -418,63 +477,84 @@ class Scaling:
418
477
  "availableGPUMemory": availableGPUMemory,
419
478
  "availableGPU": availableGPU,
420
479
  }
421
- api = "update_available_resources"
422
- correlation_id = str(uuid.uuid4())
423
-
424
-
425
- data, error, message, kafka_response_received = self._send_kafka_request(
426
- api=api,
427
- payload=payload,
428
- request_topic=self.kafka_config["scaling_request_topic"],
429
- response_topic=self.kafka_config["scaling_response_topic"],
430
- timeout=60
431
- )
432
-
433
- if kafka_response_received:
434
- return data, error, message
480
+ # KAFKA TEMPORARILY DISABLED - Using REST API directly
481
+ # api = "update_available_resources"
482
+ # correlation_id = str(uuid.uuid4())
483
+
484
+ # data, error, message, kafka_response_received = self._send_kafka_request(
485
+ # api=api,
486
+ # payload=payload,
487
+ # request_topic=self.kafka_config["scaling_request_topic"],
488
+ # response_topic=self.kafka_config["scaling_response_topic"],
489
+ # timeout=60
490
+ # )
491
+
492
+ # # Check if Kafka response was received
493
+ # # Response format: {'correlationId': 'id', 'status': 'success'/'error', 'data': ..., 'error': 'error message'}
494
+ # if kafka_response_received:
495
+ # if error:
496
+ # logging.warning("Kafka returned error for update_available_resources: %s. Falling back to REST API.", error)
497
+ # else:
498
+ # return data, error, message
499
+
500
+ # Using REST API directly
435
501
  try:
436
502
  path = f"/v1/scaling/update_available_resources/{self.instance_id}"
437
503
  resp = self.rpc.put(path=path, payload=payload)
438
504
  return self.handle_response(
439
505
  resp,
440
- "Resources updated successfully (REST fallback)",
441
- "Could not update the resources (REST fallback)",
506
+ "Resources updated successfully",
507
+ "Could not update the resources",
442
508
  )
443
509
  except Exception as e:
444
- logging.error("REST fallback failed (update_available_resources): %s", e)
445
- self._cache_failed_request(api, payload)
446
- return None, f"Failed to update available resources via Kafka and REST: {e}", "Cached for retry"
510
+ logging.error("REST API failed (update_available_resources): %s", e)
511
+ return None, f"Failed to update available resources via REST: {e}", "REST API failed"
447
512
 
448
513
  @log_errors(log_error=True)
449
514
  def update_action_docker_logs(self, action_record_id, log_content):
450
- """Update docker logs for an action using Kafka, fallback to REST, then cache."""
515
+ """Update docker logs for an action using REST API.
516
+
517
+ Args:
518
+ action_record_id: ID of the action record
519
+ log_content: Content of the logs to update
520
+
521
+ Returns:
522
+ Tuple of (data, error, message) from API response
523
+ """
451
524
  logging.info("Updating docker logs for action %s", action_record_id)
452
- api = "update_action_docker_logs"
525
+ # KAFKA TEMPORARILY DISABLED - Using REST API directly
526
+ # api = "update_action_docker_logs"
453
527
  payload = {
454
528
  "actionRecordId": action_record_id,
455
529
  "logContent": log_content,
456
-
457
530
  }
458
- data, error, message, kafka_response_received = self._send_kafka_request(
459
- api=api,
460
- payload=payload,
461
- request_topic=self.kafka_config["api_request_topic"],
462
- response_topic=self.kafka_config["api_response_topic"],
463
- timeout=60
464
- )
465
-
466
- if kafka_response_received:
467
- return data, error, message
531
+ # data, error, message, kafka_response_received = self._send_kafka_request(
532
+ # api=api,
533
+ # payload=payload,
534
+ # request_topic=self.kafka_config["api_request_topic"],
535
+ # response_topic=self.kafka_config["api_response_topic"],
536
+ # timeout=60
537
+ # )
538
+
539
+ # # Check if Kafka response was received and if it's an error, log and fallback to REST API
540
+ # if kafka_response_received:
541
+ # if error:
542
+ # logging.warning("Kafka returned error for update_action_docker_logs: %s. Falling back to REST API.", error)
543
+ # else:
544
+ # return data, error, message
545
+
546
+ # Using REST API directly
468
547
  try:
469
548
  path = "/v1/actions/update_action_docker_logs"
470
549
  resp = self.rpc.put(path=path, payload=payload)
471
550
  return self.handle_response(
472
551
  resp,
473
- "Docker logs updated successfully (REST fallback)",
474
- "Could not update the docker logs (REST fallback)",
552
+ "Docker logs updated successfully",
553
+ "Could not update the docker logs",
475
554
  )
476
555
  except Exception as e:
477
- logging.error("REST fallback failed (update_action_docker_logs): %s", e)
556
+ logging.error("REST API failed (update_action_docker_logs): %s", e)
557
+ return None, f"Failed via REST: {e}", "REST API failed"
478
558
 
479
559
 
480
560
  @log_errors(log_error=True)
@@ -533,7 +613,8 @@ class Scaling:
533
613
  if port in self.used_ports:
534
614
  continue
535
615
  self.used_ports.add(port)
536
- os.environ["USED_PORTS"] = ",".join(str(p) for p in self.used_ports)
616
+ ports_value = ",".join(str(p) for p in self.used_ports)
617
+ os.environ["USED_PORTS"] = str(ports_value)
537
618
  logging.info("Found available port: %s", port)
538
619
  return port
539
620
  logging.error(
@@ -790,98 +871,99 @@ class Scaling:
790
871
  "Could not fetch internal keys",
791
872
  )
792
873
 
793
- @log_errors(log_error=True)
794
- def handle_kafka_response(self, msg, success_message, error_message):
795
- """
796
- Helper to process Kafka response messages in a consistent way.
797
- """
798
- if msg.get("status") == "success":
799
- data = msg.get("data")
800
- error = None
801
- message = success_message
802
- logging.info(message)
803
- else:
804
- data = msg.get("data")
805
- error = msg.get("error", "Unknown error")
806
- message = error_message
807
- logging.error("%s: %s", message, error)
808
- return data, error, message
809
-
810
- def _send_kafka_request(self, api, payload, request_topic, response_topic, timeout=60):
811
- """
812
- Helper to send a request to Kafka and wait for a response.
813
- Returns (data, error, message, kafka_response_received) where kafka_response_received is True if a response was received (even if error), False if transport error/timeout.
814
- """
815
- correlation_id = str(uuid.uuid4())
816
- request_message = {
817
- "correlationId": correlation_id,
818
- "api": api,
819
- "payload": payload,
820
- }
821
-
822
- consumer = KafkaConsumer(
823
- response_topic,
824
- bootstrap_servers=self.kafka_config["bootstrap_servers"],
825
- group_id=None,
826
- value_deserializer=lambda m: json.loads(m.decode("utf-8")),
827
- auto_offset_reset='latest',
828
- enable_auto_commit=True,
829
- )
830
-
831
- try:
832
- if hasattr(self.session.rpc, 'AUTH_TOKEN'):
833
- self.session.rpc.AUTH_TOKEN.set_bearer_token()
834
- auth_token = self.session.rpc.AUTH_TOKEN.bearer_token
835
- auth_token = auth_token.replace("Bearer ", "")
836
- headers = [("Authorization", bytes(f"{auth_token}", "utf-8"))]
837
- else:
838
- headers = None
839
- self.kafka_producer.send(request_topic, request_message, headers=headers)
840
- # self.kafka_producer.flush()
841
- logging.info("Sent %s request to Kafka topic %s", api, request_topic)
842
- except Exception as e:
843
- logging.error("Kafka producer error: %s", e)
844
- return None, f"Kafka producer error: {e}", "Kafka send failed", False
845
- try:
846
- start = time.time()
847
- while time.time() - start < timeout:
848
- # Poll for messages with a short timeout to avoid blocking forever
849
- message_batch = consumer.poll(timeout_ms=1000)
850
- if message_batch:
851
- for topic_partition, messages in message_batch.items():
852
- for message in messages:
853
- print("trying to fetch message")
854
- msg = message.value
855
- if msg.get("correlationId") == correlation_id:
856
- consumer.close()
857
- # Always treat a received response as final, even if error
858
- return self.handle_kafka_response(
859
- msg,
860
- f"Fetched via Kafka for {api}",
861
- f"Kafka error response for {api}"
862
- ) + (True,)
863
- else:
864
- print(f"No messages received, waiting... ({time.time() - start:.1f}s/{timeout}s)")
865
-
866
- consumer.close()
867
- logging.warning("Kafka response timeout for %s after %d seconds", api, timeout)
868
- return None, "Kafka response timeout", "Kafka response timeout", False
869
- except Exception as e:
870
- logging.error("Kafka consumer error: %s", e)
871
- return None, f"Kafka consumer error: {e}", "Kafka consumer error", False
872
-
873
- def _cache_failed_request(self, api, payload):
874
- """Cache the failed request for retry. Here, we use a simple file cache as a placeholder."""
875
- try:
876
- cache_file = os.path.join(os.path.dirname(__file__), 'request_cache.json')
877
- if os.path.exists(cache_file):
878
- with open(cache_file, 'r') as f:
879
- cache = json.load(f)
880
- else:
881
- cache = []
882
- cache.append({"api": api, "payload": payload, "ts": time.time()})
883
- with open(cache_file, 'w') as f:
884
- json.dump(cache, f)
885
- logging.info("Cached failed request for api %s", api)
886
- except Exception as e:
887
- logging.error("Failed to cache request: %s", e)
874
+ # KAFKA TEMPORARILY DISABLED - Using REST API directly
875
+ # @log_errors(log_error=True)
876
+ # def handle_kafka_response(self, msg, success_message, error_message):
877
+ # """
878
+ # Helper to process Kafka response messages in a consistent way.
879
+ # """
880
+ # if msg.get("status") == "success":
881
+ # data = msg.get("data")
882
+ # error = None
883
+ # message = success_message
884
+ # logging.info(message)
885
+ # else:
886
+ # data = msg.get("data")
887
+ # error = msg.get("error", "Unknown error")
888
+ # message = error_message
889
+ # logging.error("%s: %s", message, error)
890
+ # return data, error, message
891
+
892
+ # def _send_kafka_request(self, api, payload, request_topic, response_topic, timeout=60):
893
+ # """
894
+ # Helper to send a request to Kafka and wait for a response.
895
+ # Returns (data, error, message, kafka_response_received) where kafka_response_received is True if a response was received (even if error), False if transport error/timeout.
896
+ # """
897
+ # correlation_id = str(uuid.uuid4())
898
+ # request_message = {
899
+ # "correlationId": correlation_id,
900
+ # "api": api,
901
+ # "payload": payload,
902
+ # }
903
+
904
+ # consumer = KafkaConsumer(
905
+ # response_topic,
906
+ # bootstrap_servers=self.kafka_config["bootstrap_servers"],
907
+ # group_id=None,
908
+ # value_deserializer=lambda m: json.loads(m.decode("utf-8")),
909
+ # auto_offset_reset='latest',
910
+ # enable_auto_commit=True,
911
+ # )
912
+
913
+ # try:
914
+ # if hasattr(self.session.rpc, 'AUTH_TOKEN'):
915
+ # self.session.rpc.AUTH_TOKEN.set_bearer_token()
916
+ # auth_token = self.session.rpc.AUTH_TOKEN.bearer_token
917
+ # auth_token = auth_token.replace("Bearer ", "")
918
+ # headers = [("Authorization", bytes(f"{auth_token}", "utf-8"))]
919
+ # else:
920
+ # headers = None
921
+ # self.kafka_producer.send(request_topic, request_message, headers=headers)
922
+ # # self.kafka_producer.flush()
923
+ # logging.info("Sent %s request to Kafka topic %s", api, request_topic)
924
+ # except Exception as e:
925
+ # logging.error("Kafka producer error: %s", e)
926
+ # return None, f"Kafka producer error: {e}", "Kafka send failed", False
927
+ # try:
928
+ # start = time.time()
929
+ # while time.time() - start < timeout:
930
+ # # Poll for messages with a short timeout to avoid blocking forever
931
+ # message_batch = consumer.poll(timeout_ms=1000)
932
+ # if message_batch:
933
+ # for topic_partition, messages in message_batch.items():
934
+ # for message in messages:
935
+ # print("trying to fetch message")
936
+ # msg = message.value
937
+ # if msg.get("correlationId") == correlation_id:
938
+ # consumer.close()
939
+ # # Always treat a received response as final, even if error
940
+ # return self.handle_kafka_response(
941
+ # msg,
942
+ # f"Fetched via Kafka for {api}",
943
+ # f"Kafka error response for {api}"
944
+ # ) + (True,)
945
+ # else:
946
+ # print(f"No messages received, waiting... ({time.time() - start:.1f}s/{timeout}s)")
947
+ #
948
+ # consumer.close()
949
+ # logging.warning("Kafka response timeout for %s after %d seconds", api, timeout)
950
+ # return None, "Kafka response timeout", "Kafka response timeout", False
951
+ # except Exception as e:
952
+ # logging.error("Kafka consumer error: %s", e)
953
+ # return None, f"Kafka consumer error: {e}", "Kafka consumer error", False
954
+
955
+ # def _cache_failed_request(self, api, payload):
956
+ # """Cache the failed request for retry. Here, we use a simple file cache as a placeholder."""
957
+ # try:
958
+ # cache_file = os.path.join(os.path.dirname(__file__), 'request_cache.json')
959
+ # if os.path.exists(cache_file):
960
+ # with open(cache_file, 'r') as f:
961
+ # cache = json.load(f)
962
+ # else:
963
+ # cache = []
964
+ # cache.append({"api": api, "payload": payload, "ts": time.time()})
965
+ # with open(cache_file, 'w') as f:
966
+ # json.dump(cache, f)
967
+ # logging.info("Cached failed request for api %s", api)
968
+ # except Exception as e:
969
+ # logging.error("Failed to cache request: %s", e)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrice_compute
3
- Version: 0.1.12
3
+ Version: 0.1.13
4
4
  Summary: Common server utilities for Matrice.ai services
5
5
  Author-email: "Matrice.ai" <dipendra@matrice.ai>
6
6
  License-Expression: MIT
@@ -1,17 +1,17 @@
1
- matrice_compute/__init__.py,sha256=HG5yzsY2dcQ0sGKwxMj-Sv2zDhbSC00slAdkcfS9nng,304
2
- matrice_compute/action_instance.py,sha256=6tyZehK7SfIu6NjXp4wFeYMY0BINShmXtoCXyimDKN0,58002
1
+ matrice_compute/__init__.py,sha256=ZzQcFsT005VCgq9VZUh565f4upOooEb_FwZ6RgweNZs,597
2
+ matrice_compute/action_instance.py,sha256=6IVMNODznEagFlwifjP1neO6OK0H46vuvMYDw02gYF0,58985
3
3
  matrice_compute/actions_manager.py,sha256=5U-xM6tl_Z6x96bi-c7AJM9ru80LqTN8f5Oce8dAu_A,7780
4
4
  matrice_compute/actions_scaledown_manager.py,sha256=pJ0nduNwHWZ10GnqJNx0Ok7cVWabQ_M8E2Vb9pH3A_k,2002
5
- matrice_compute/instance_manager.py,sha256=LhPOqrzmrs-QdorqgDOuBDHjpUkLPgCZovBdCBiVmVw,10103
5
+ matrice_compute/instance_manager.py,sha256=8USyX09ZxLvnVNIrjRogbyUeMCfgWnasuRqYkkVF4tQ,10146
6
6
  matrice_compute/instance_utils.py,sha256=tIFVUi8HJPy4GY-jtfVx2zIgmXNta7s3jCIRzBga1hI,21977
7
7
  matrice_compute/prechecks.py,sha256=W9YmNF3RcLhOf4U8WBlExvFqDw1aGWSNTlJtA73lbDQ,17196
8
8
  matrice_compute/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  matrice_compute/resources_tracker.py,sha256=My26LPglDHcQcTkxxiXwpfdqkpEAt3clrqJ-k1fAl1M,17878
10
- matrice_compute/scaling.py,sha256=8HfbKMsR7EI0rrLfKl_gz6FMO2Q4sLXELxGc3DcLwz8,31743
10
+ matrice_compute/scaling.py,sha256=hlPpEW8uggMKHW9kwu71obOnbNXhoqRlkmux4Fc3OP0,35202
11
11
  matrice_compute/shutdown_manager.py,sha256=0MYV_AqygqR9NEntYf7atUC-PbWXyNkm1f-8c2aizgA,13234
12
12
  matrice_compute/task_utils.py,sha256=ML9uTrYQiWgEMJitYxoGlVOa9KUXNKV_WqnousOTK6k,2762
13
- matrice_compute-0.1.12.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
14
- matrice_compute-0.1.12.dist-info/METADATA,sha256=__gJ0pkG07q5s8kOmdk8ItFWV3pfcHa-EUKDEe48xrY,1038
15
- matrice_compute-0.1.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
- matrice_compute-0.1.12.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
17
- matrice_compute-0.1.12.dist-info/RECORD,,
13
+ matrice_compute-0.1.13.dist-info/licenses/LICENSE.txt,sha256=_uQUZpgO0mRYL5-fPoEvLSbNnLPv6OmbeEDCHXhK6Qc,1066
14
+ matrice_compute-0.1.13.dist-info/METADATA,sha256=aX4hxZ2ll6w9miiYJ9Ed-FZtEVUEvwNb6vUplVYNm0w,1038
15
+ matrice_compute-0.1.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
+ matrice_compute-0.1.13.dist-info/top_level.txt,sha256=63Plr3L1GzBUWZO5JZaFkiv8IcB10xUPU-9w3i6ptvE,16
17
+ matrice_compute-0.1.13.dist-info/RECORD,,