matrice-compute 0.1.32__tar.gz → 0.1.33__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/PKG-INFO +1 -1
  2. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/matrice_compute.egg-info/PKG-INFO +1 -1
  3. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/src/matrice_compute/action_instance.py +10 -1
  4. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/src/matrice_compute/actions_manager.py +5 -1
  5. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/LICENSE.txt +0 -0
  6. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/README.md +0 -0
  7. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/matrice_compute.egg-info/SOURCES.txt +0 -0
  8. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/matrice_compute.egg-info/dependency_links.txt +0 -0
  9. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/matrice_compute.egg-info/not-zip-safe +0 -0
  10. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/matrice_compute.egg-info/top_level.txt +0 -0
  11. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/pyproject.toml +0 -0
  12. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/setup.cfg +0 -0
  13. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/setup.py +0 -0
  14. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/src/matrice_compute/__init__.py +0 -0
  15. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/src/matrice_compute/actions_scaledown_manager.py +0 -0
  16. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/src/matrice_compute/compute_operations_handler.py +0 -0
  17. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/src/matrice_compute/instance_manager.py +0 -0
  18. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/src/matrice_compute/instance_utils.py +0 -0
  19. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/src/matrice_compute/prechecks.py +0 -0
  20. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/src/matrice_compute/py.typed +0 -0
  21. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/src/matrice_compute/resources_tracker.py +0 -0
  22. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/src/matrice_compute/scaling.py +0 -0
  23. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/src/matrice_compute/shutdown_manager.py +0 -0
  24. {matrice_compute-0.1.32 → matrice_compute-0.1.33}/src/matrice_compute/task_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrice_compute
3
- Version: 0.1.32
3
+ Version: 0.1.33
4
4
  Summary: Common server utilities for Matrice.ai services
5
5
  Author-email: "Matrice.ai" <dipendra@matrice.ai>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrice_compute
3
- Version: 0.1.32
3
+ Version: 0.1.33
4
4
  Summary: Common server utilities for Matrice.ai services
5
5
  Author-email: "Matrice.ai" <dipendra@matrice.ai>
6
6
  License-Expression: MIT
@@ -939,7 +939,16 @@ class ActionInstance:
939
939
  env={**os.environ},
940
940
  )
941
941
 
942
- stdout, stderr = process.communicate(timeout=120)
942
+ # Use a longer timeout for docker run since --pull=always may need to
943
+ # download large images on first run. Default: 30 minutes (1800 seconds)
944
+ # Can be configured via DOCKER_START_TIMEOUT_SECONDS environment variable
945
+ docker_start_timeout = int(os.environ.get("DOCKER_START_TIMEOUT_SECONDS", 1800))
946
+ logging.info(
947
+ "Waiting for docker container to start for action %s (timeout: %d seconds)",
948
+ self.action_record_id,
949
+ docker_start_timeout,
950
+ )
951
+ stdout, stderr = process.communicate(timeout=docker_start_timeout)
943
952
 
944
953
  if process.returncode != 0:
945
954
  logging.error(
@@ -43,7 +43,11 @@ class ActionsManager:
43
43
  """
44
44
  actions = []
45
45
  logging.info("Polling backend for new jobs")
46
- fetched_actions, error, _ = self.scaling.assign_jobs(has_gpu())
46
+ result = self.scaling.assign_jobs(has_gpu())
47
+ if result is None:
48
+ logging.error("assign_jobs returned None")
49
+ return actions
50
+ fetched_actions, error, _ = result
47
51
  if error:
48
52
  logging.error("Error assigning jobs: %s", error)
49
53
  return actions