PyPI - deltacat - Versions diffs - 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl - Mend

deltacat 0.1.8py3-none-any.whl → 0.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

deltacat/__init__.py +41 -15
deltacat/aws/clients.py +12 -31
deltacat/aws/constants.py +1 -1
deltacat/aws/redshift/__init__.py +7 -2
deltacat/aws/redshift/model/manifest.py +54 -50
deltacat/aws/s3u.py +188 -218
deltacat/catalog/delegate.py +151 -185
deltacat/catalog/interface.py +78 -97
deltacat/catalog/model/catalog.py +21 -21
deltacat/catalog/model/table_definition.py +11 -9
deltacat/compute/compactor/__init__.py +12 -16
deltacat/compute/compactor/compaction_session.py +259 -316
deltacat/compute/compactor/model/delta_annotated.py +60 -44
deltacat/compute/compactor/model/delta_file_envelope.py +5 -6
deltacat/compute/compactor/model/delta_file_locator.py +10 -8
deltacat/compute/compactor/model/materialize_result.py +6 -7
deltacat/compute/compactor/model/primary_key_index.py +38 -34
deltacat/compute/compactor/model/pyarrow_write_result.py +3 -4
deltacat/compute/compactor/model/round_completion_info.py +25 -19
deltacat/compute/compactor/model/sort_key.py +18 -15
deltacat/compute/compactor/steps/dedupe.py +152 -259
deltacat/compute/compactor/steps/hash_bucket.py +57 -73
deltacat/compute/compactor/steps/materialize.py +138 -99
deltacat/compute/compactor/steps/rehash/rehash_bucket.py +13 -13
deltacat/compute/compactor/steps/rehash/rewrite_index.py +11 -13
deltacat/compute/compactor/utils/io.py +59 -47
deltacat/compute/compactor/utils/primary_key_index.py +131 -90
deltacat/compute/compactor/utils/round_completion_file.py +22 -23
deltacat/compute/compactor/utils/system_columns.py +33 -42
deltacat/compute/metastats/meta_stats.py +235 -157
deltacat/compute/metastats/model/partition_stats_dict.py +7 -10
deltacat/compute/metastats/model/stats_cluster_size_estimator.py +13 -5
deltacat/compute/metastats/stats.py +95 -64
deltacat/compute/metastats/utils/io.py +100 -53
deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +5 -2
deltacat/compute/metastats/utils/ray_utils.py +38 -33
deltacat/compute/stats/basic.py +107 -69
deltacat/compute/stats/models/delta_column_stats.py +11 -8
deltacat/compute/stats/models/delta_stats.py +59 -32
deltacat/compute/stats/models/delta_stats_cache_result.py +4 -1
deltacat/compute/stats/models/manifest_entry_stats.py +12 -6
deltacat/compute/stats/models/stats_result.py +24 -14
deltacat/compute/stats/utils/intervals.py +16 -9
deltacat/compute/stats/utils/io.py +86 -51
deltacat/compute/stats/utils/manifest_stats_file.py +24 -33
deltacat/constants.py +8 -10
deltacat/io/__init__.py +2 -2
deltacat/io/aws/redshift/redshift_datasource.py +157 -143
deltacat/io/dataset.py +14 -17
deltacat/io/read_api.py +36 -33
deltacat/logs.py +94 -42
deltacat/storage/__init__.py +18 -8
deltacat/storage/interface.py +196 -213
deltacat/storage/model/delta.py +45 -51
deltacat/storage/model/list_result.py +12 -8
deltacat/storage/model/namespace.py +4 -5
deltacat/storage/model/partition.py +42 -42
deltacat/storage/model/stream.py +29 -30
deltacat/storage/model/table.py +14 -14
deltacat/storage/model/table_version.py +32 -31
deltacat/storage/model/types.py +1 -0
deltacat/tests/stats/test_intervals.py +11 -24
deltacat/tests/utils/test_record_batch_tables.py +284 -0
deltacat/types/media.py +3 -4
deltacat/types/tables.py +31 -21
deltacat/utils/common.py +5 -11
deltacat/utils/numpy.py +20 -22
deltacat/utils/pandas.py +73 -100
deltacat/utils/performance.py +3 -9
deltacat/utils/placement.py +276 -231
deltacat/utils/pyarrow.py +302 -89
deltacat/utils/ray_utils/collections.py +2 -1
deltacat/utils/ray_utils/concurrency.py +38 -32
deltacat/utils/ray_utils/dataset.py +28 -28
deltacat/utils/ray_utils/performance.py +5 -9
deltacat/utils/ray_utils/runtime.py +9 -10
{deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/METADATA +22 -12
deltacat-0.1.11.dist-info/RECORD +110 -0
{deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/WHEEL +1 -1
deltacat/autoscaler/events/__init__.py +0 -0
deltacat/autoscaler/events/compaction/__init__.py +0 -0
deltacat/autoscaler/events/compaction/cluster.py +0 -82
deltacat/autoscaler/events/compaction/collections/__init__.py +0 -0
deltacat/autoscaler/events/compaction/collections/partition_key_value.py +0 -36
deltacat/autoscaler/events/compaction/dispatcher.py +0 -28
deltacat/autoscaler/events/compaction/input.py +0 -27
deltacat/autoscaler/events/compaction/process.py +0 -25
deltacat/autoscaler/events/compaction/session_manager.py +0 -13
deltacat/autoscaler/events/compaction/utils.py +0 -216
deltacat/autoscaler/events/compaction/workflow.py +0 -303
deltacat/autoscaler/events/dispatcher.py +0 -95
deltacat/autoscaler/events/dynamodb/__init__.py +0 -0
deltacat/autoscaler/events/dynamodb/event_store.py +0 -164
deltacat/autoscaler/events/event_store.py +0 -55
deltacat/autoscaler/events/exceptions.py +0 -6
deltacat/autoscaler/events/processor.py +0 -177
deltacat/autoscaler/events/session_manager.py +0 -25
deltacat/autoscaler/events/states.py +0 -88
deltacat/autoscaler/events/workflow.py +0 -54
deltacat/autoscaler/node_group.py +0 -230
deltacat/autoscaler/utils.py +0 -69
deltacat-0.1.8.dist-info/RECORD +0 -131
/deltacat/{autoscaler → tests/utils}/__init__.py +0 -0
{deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/LICENSE +0 -0
{deltacat-0.1.8.dist-info → deltacat-0.1.11.dist-info}/top_level.txt +0 -0

deltacat/utils/placement.py CHANGED Viewed

@@ -1,241 +1,286 @@
-import ray
+import logging
 import re
 import time
-import yaml
-import logging
-from typing import Optional, Union, List, Dict, Any, Callable, Tuple
-from ray.util.placement_group import (
-	placement_group,
-	placement_group_table,
-	get_current_placement_group
-)
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Tuple, Union
-from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
+import ray
+import yaml
 from ray.experimental.state.api import get_node, get_placement_group
+from ray.util.placement_group import placement_group, placement_group_table
+from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
 from deltacat import logs
-from deltacat.utils.ray_utils.runtime import live_node_resource_keys
 logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
-#Limitation of current node group or placement group manager
-#Must run on driver or head node bc state.api needs to query dashboard api server at 127.0.0.1.
-#Issue: https://github.com/ray-project/ray/issues/29959
-class NodeGroupManager():
-	def __init__(self,path: str, gname: str):
-		"""Node Group Manager
-		Args:
-			path: cluster yaml file
-			gname: node group prefix, e.g., 'partition'
-		"""
-		#cluster init status:
-		self.NODE_GROUP_PREFIX=gname
-		self.cluster_config=self._read_yaml(path)
-		self.init_groups = self._cluster_node_groups(self.cluster_config)
-		self.init_group_res = self._parse_node_resources()
-	def _cluster_node_groups(self, config: Dict[str, Any]) -> Dict[str, Any]:
-		"""Get Worker Groups
-		Args:
-			config: cluster yaml data
-		Returns:
-			worker groups: a dict of worker node group
-		"""
-		avail_node_types =  list(config['available_node_types'].items())
-		#exclude head node type
-		head_node_types = [nt for nt in avail_node_types if 'resources' in nt[1] and 'CPU' in nt[1]['resources'] and nt[1]['resources']['CPU']==0][0]
-		worker_node_types = [x for x in avail_node_types if x !=head_node_types]
-		#assuming homogenous cluster
-		#in future, update with fleet resource
-		if len(worker_node_types)>0:
-			self.INSTANCE_TYPE = worker_node_types[0][1]['node_config']['InstanceType']
-		return worker_node_types
-	def _read_yaml(self, path: str) -> Dict[str, Any]:
-		with open(path, "rt") as f:
-			return yaml.safe_load(f)
-	def _update_groups(self) -> List[Tuple[str, float]]:
-		"""
-			Node groups can come and go during runtime, whenever a node group is needed, we need to check the current available groups
-		Returns:
-			current_groups: dict of custom resource groups
-		"""
-		#Add 1.1 second latency to avoid inconsistency issue between raylet and head
-		time.sleep(1.1)
-		all_available_res = ray.available_resources()
-		current_groups =[(k,all_available_res[k]) for k in all_available_res.keys() if self.NODE_GROUP_PREFIX in k]
-		return current_groups
-	def _parse_node_resources(self) -> Dict[str, Dict[str, float]]:
-		"""
-			Parse resources per node to get detailed resource tighted to each node group
-			Returns:
-				group_res: a dict of resources, e.g., {'CPU':0,'memory':0,'object_store_memory':0}
-		"""
-		all_available_resources= ray._private.state.state._available_resources_per_node()
-		group_keys = [x[0] for x in self.init_groups]
-		group_res={}
-		for k in group_keys:
-			group_res[k]={'CPU':0,'memory':0,'object_store_memory':0,'node_id':[]}
-		for v in all_available_resources.values():
-			keys =v.keys()
-			r = re.compile(self.NODE_GROUP_PREFIX)
-			partition=list(filter(r.match, list(keys)))
-			r = re.compile("node:")
-			node_id = list(filter(r.match, list(keys)))
-			if len(partition)>0:
-				partition = partition[0]
-			if len(node_id)>0:
-				node_id = node_id[0]
-			if self.NODE_GROUP_PREFIX in partition:
-				group_res[partition]['CPU']+=v['CPU']
-				group_res[partition]['memory']+=v['memory']
-				group_res[partition]['object_store_memory']+=v['object_store_memory']
-				group_res[partition]['node_id'].append(node_id)
-		return group_res
-	def _update_group_res(self, gname: str) -> Dict[str, Union[str, float]]:
-		"""
-			Get the realtime resource of a node group
-			Args:
-				gname: name of node group
-			Returns:
-				group_res: dict of updated resource(cpu, memory, object store memory) for a given group
-		"""
-		all_available_resources= ray._private.state.state._available_resources_per_node()
-		group_res={'CPU':0,'memory':0,'object_store_memory':0,'node_id':[]}
-		for v in all_available_resources.values():
-			keys =v.keys()
-			r = re.compile("node:")
-			node_id = list(filter(r.match, list(keys)))
-			if len(node_id)>0:
-				node_id = node_id[0]
-			if gname in v.keys():
-				group_res['CPU']+=v['CPU']
-				group_res['memory']+=v['memory']
-				group_res['object_store_memory']+=v['object_store_memory']
-				group_res['node_id'].append(node_id)
-		return group_res
-	def get_one_group(self) -> Optional[Dict[str, Union[str, float]]]:
-		"""
-			Pop up one node group
-			Returns:
-				group_res: dict of node group resource, {"group":"partition_1","CPU":2,...}
-		"""
-		current_groups = self._update_groups()
-		if len(current_groups)>0:
-			gname = current_groups[-1][0]
-			group_res=self._update_group_res(gname)
-			group_res['group']=gname
-			try:
-				group_res['group_res']=ray.available_resources()[gname]
-			except Exception as e:
-				logger.info(f"There is no available resources for {gname}")
-				return None
-			return group_res
-		else:
-			return None
-	def get_group_by_name(self, gname: str) -> Optional[Dict[str, Union[str, float]]]:
-		"""
-			Get the specific node group given its pre-filled name
-			Args:
-				gname: name of the node group
-			Returns:
-				group_res: dict of node group resource
-		"""
-		group_res=self._update_group_res(gname)
-		group_res['group']=gname
-		try:
-			group_res['group_res']=ray.available_resources()[gname]
-		except Exception as e:
-			logger.info(f"There is no available resources for {gname}")
-			return None
-		return group_res
-class PlacementGroupManager():
-	"""Placement Group Manager
-	Create a list of placement group with the desired number of cpus
-	e.g., create a pg with 32 cpus, then this class will look for a node that has 32 cpus, and collect all
-	resources, including cpu, memory, and object store;
-	How to use:
-		```
-			from deltacat.utils.placement import PlacementGroupManager as pgm
-			pgm = pgm(10, 32)
-			pg_configs = pgm.pgs
-			opts = pg_configs[0][0]
-			fun.options(**opts).remote()
-		```
-	Args:
-		num_pgs: number of placement groups to be created
-		instance_cpus: number of cpus per instance
-	"""
-	def __init__(self, num_pgs: int, instance_cpus: int, instance_type: int = 8, time_out: Optional[float] = None):
-		head_res_key = self.get_current_node_resource_key()
-		all_node_res_key = live_node_resource_keys()
-		all_node_res_key.remove(head_res_key)
-		num_bundles = (int)(instance_cpus/instance_type)
-		self._pg_configs = ray.get([_config.options(resources={head_res_key:0.01}).remote(instance_cpus, instance_type, all_node_res_key[i*num_bundles:(i+1)*num_bundles]) for i in range(num_pgs)])
-	@property
-	def pgs(self):
-		return self._pg_configs
-	def get_current_node_resource_key(self) -> str:
-	    current_node_id = ray.get_runtime_context().node_id.hex()
-	    for node in ray.nodes():
-	        if node["NodeID"] == current_node_id:
-	            # Found the node.
-	            for key in node["Resources"].keys():
-	                if key.startswith("node:"):
-	                    return key
+# Limitation of current node group or placement group manager
+# Must run on driver or head node bc state.api needs to query dashboard api server at 127.0.0.1.
+# Issue: https://github.com/ray-project/ray/issues/29959
+@dataclass
+class PlacementGroupConfig:
+    def __init__(self, opts, resource):
+        self.opts = opts
+        self.resource = resource
+class NodeGroupManager:
+    def __init__(self, path: str, gname: str):
+        """Node Group Manager
+        Args:
+                path: cluster yaml file
+                gname: node group prefix, e.g., 'partition'
+        """
+        # cluster init status:
+        self.NODE_GROUP_PREFIX = gname
+        self.cluster_config = self._read_yaml(path)
+        self.init_groups = self._cluster_node_groups(self.cluster_config)
+        self.init_group_res = self._parse_node_resources()
+    def _cluster_node_groups(self, config: Dict[str, Any]) -> Dict[str, Any]:
+        """Get Worker Groups
+        Args:
+                config: cluster yaml data
+        Returns:
+                worker groups: a dict of worker node group
+        """
+        avail_node_types = list(config["available_node_types"].items())
+        # exclude head node type
+        head_node_types = [
+            nt
+            for nt in avail_node_types
+            if "resources" in nt[1]
+            and "CPU" in nt[1]["resources"]
+            and nt[1]["resources"]["CPU"] == 0
+        ][0]
+        worker_node_types = [x for x in avail_node_types if x != head_node_types]
+        # assuming homogenous cluster
+        # in future, update with fleet resource
+        if len(worker_node_types) > 0:
+            self.INSTANCE_TYPE = worker_node_types[0][1]["node_config"]["InstanceType"]
+        return worker_node_types
+    def _read_yaml(self, path: str) -> Dict[str, Any]:
+        with open(path, "rt") as f:
+            return yaml.safe_load(f)
+    def _update_groups(self) -> List[Tuple[str, float]]:
+        """
+                Node groups can come and go during runtime, whenever a node group is needed, we need to check the current available groups
+        Returns:
+                current_groups: dict of custom resource groups
+        """
+        # Add 1.1 second latency to avoid inconsistency issue between raylet and head
+        time.sleep(1.1)
+        all_available_res = ray.available_resources()
+        current_groups = [
+            (k, all_available_res[k])
+            for k in all_available_res.keys()
+            if self.NODE_GROUP_PREFIX in k
+        ]
+        return current_groups
+    def _parse_node_resources(self) -> Dict[str, Dict[str, float]]:
+        """
+        Parse resources per node to get detailed resource tighted to each node group
+        Returns:
+                group_res: a dict of resources, e.g., {'CPU':0,'memory':0,'object_store_memory':0}
+        """
+        all_available_resources = (
+            ray._private.state.state._available_resources_per_node()
+        )
+        group_keys = [x[0] for x in self.init_groups]
+        group_res = {}
+        for k in group_keys:
+            group_res[k] = {
+                "CPU": 0,
+                "memory": 0,
+                "object_store_memory": 0,
+                "node_id": [],
+            }
+        for v in all_available_resources.values():
+            keys = v.keys()
+            r = re.compile(self.NODE_GROUP_PREFIX)
+            partition = list(filter(r.match, list(keys)))
+            r = re.compile("node:")
+            node_id = list(filter(r.match, list(keys)))
+            if len(partition) > 0:
+                partition = partition[0]
+            if len(node_id) > 0:
+                node_id = node_id[0]
+            if self.NODE_GROUP_PREFIX in partition:
+                group_res[partition]["CPU"] += v["CPU"]
+                group_res[partition]["memory"] += v["memory"]
+                group_res[partition]["object_store_memory"] += v["object_store_memory"]
+                group_res[partition]["node_id"].append(node_id)
+        return group_res
+    def _update_group_res(self, gname: str) -> Dict[str, Union[str, float]]:
+        """
+        Get the realtime resource of a node group
+        Args:
+                gname: name of node group
+        Returns:
+                group_res: dict of updated resource(cpu, memory, object store memory) for a given group
+        """
+        all_available_resources = (
+            ray._private.state.state._available_resources_per_node()
+        )
+        group_res = {"CPU": 0, "memory": 0, "object_store_memory": 0, "node_id": []}
+        for v in all_available_resources.values():
+            keys = v.keys()
+            r = re.compile("node:")
+            node_id = list(filter(r.match, list(keys)))
+            if len(node_id) > 0:
+                node_id = node_id[0]
+            if gname in v.keys():
+                group_res["CPU"] += v["CPU"]
+                group_res["memory"] += v["memory"]
+                group_res["object_store_memory"] += v["object_store_memory"]
+                group_res["node_id"].append(node_id)
+        return group_res
+    def get_one_group(self) -> Optional[Dict[str, Union[str, float]]]:
+        """
+        Pop up one node group
+        Returns:
+                group_res: dict of node group resource, {"group":"partition_1","CPU":2,...}
+        """
+        current_groups = self._update_groups()
+        if len(current_groups) > 0:
+            gname = current_groups[-1][0]
+            group_res = self._update_group_res(gname)
+            group_res["group"] = gname
+            try:
+                group_res["group_res"] = ray.available_resources()[gname]
+            except Exception as e:
+                logger.info(f"Error: {e}. There is no available resources for {gname}")
+                return None
+            return group_res
+        else:
+            return None
+    def get_group_by_name(self, gname: str) -> Optional[Dict[str, Union[str, float]]]:
+        """
+        Get the specific node group given its pre-filled name
+        Args:
+                gname: name of the node group
+        Returns:
+                group_res: dict of node group resource
+        """
+        group_res = self._update_group_res(gname)
+        group_res["group"] = gname
+        try:
+            group_res["group_res"] = ray.available_resources()[gname]
+        except Exception as e:
+            logger.info(f"Error: {e}. There is no available resources for {gname}")
+            return None
+        return group_res
+class PlacementGroupManager:
+    """Placement Group Manager
+    Create a list of placement group with the desired number of cpus
+    e.g., create a pg with 32 cpus, then this class will look for a node that has 32 cpus, and collect all
+    resources, including cpu, memory, and object store;
+    How to use:
+            ```
+                    from deltacat.utils.placement import PlacementGroupManager as pgm
+                    pgm = pgm(10, 32)
+                    pg_configs = pgm.pgs
+                    opts = pg_configs[0][0]
+                    fun.options(**opts).remote()
+            ```
+    Args:
+            num_pgs: number of placement groups to be created
+            instance_cpus: number of cpus per instance
+    """
+    def __init__(
+        self,
+        num_pgs: int,
+        total_cpus_per_pg: int,
+        cpu_per_bundle: int,
+        strategy="SPREAD",
+        capture_child_tasks=True,
+    ):
+        head_res_key = self.get_current_node_resource_key()
+        # run the task on head and consume a fractional cpu, so that pg can be created on non-head node
+        # if cpu_per_bundle is less than the cpus per node, the pg can still be created on head
+        # curent assumption is that the cpu_per_bundle = cpus per node
+        # TODO: figure out how to create pg on non-head explicitly
+        self._pg_configs = ray.get(
+            [
+                _config.options(resources={head_res_key: 0.01}).remote(
+                    total_cpus_per_pg, cpu_per_bundle, strategy, capture_child_tasks
+                )
+                for i in range(num_pgs)
+            ]
+        )
+        # TODO: handle the cases where cpu_per_bundle is larger than max cpus per node, support it on ec2/flex/manta
+    @property
+    def pgs(self):
+        return self._pg_configs
+    def get_current_node_resource_key(self) -> str:
+        # on ec2: address="172.31.34.51:6379"
+        # on manta: address = "2600:1f10:4674:6815:aadb:2dc8:de61:bc8e:6379"
+        current_node_name = ray.experimental.internal_kv.global_gcs_client.address[:-5]
+        for node in ray.nodes():
+            if node["NodeName"] == current_node_name:
+                # Found the node.
+                for key in node["Resources"].keys():
+                    if key.startswith("node:"):
+                        return key
 @ray.remote(num_cpus=0.01)
-def _config(instance_cpus: int, instance_type: int, node_res_keys: List[str], time_out: Optional[float] = None) -> Tuple[Dict[str,Any], Dict[str,Any]]:
-	pg_config = None
-	try:
-		opts ={}
-		cluster_resources={}
-		num_bundles = (int)(instance_cpus/instance_type)
-		bundles = [{'CPU':instance_type,node_res_keys[i]:1} for i in range(num_bundles)]
-		pg = placement_group(bundles, strategy="SPREAD")
-		ray.get(pg.ready(), timeout=time_out)
-		if not pg:
-			return None
-		opts = {"scheduling_strategy":PlacementGroupSchedulingStrategy(
-			placement_group=pg, placement_group_capture_child_tasks=True)
-		}
-		pg_id = placement_group_table(pg)['placement_group_id']
-		pg_details = get_placement_group(pg_id)
-		bundles = pg_details['bundles']
-		node_ids =[]
-		for bd in bundles:
-			node_ids.append(bd['node_id'])
-		#query available resources given list of node id
-		all_nodes_available_res = ray._private.state.state._available_resources_per_node()
-		pg_res = {'CPU':0,'memory':0,'object_store_memory':0,'node_id':[]}
-		for node_id in node_ids:
-			if node_id in all_nodes_available_res:
-				v = all_nodes_available_res[node_id]
-				node_detail = get_node(node_id)
-				pg_res['CPU']+=node_detail['resources_total']['CPU']
-				pg_res['memory']+=v['memory']
-				pg_res['object_store_memory']+=v['object_store_memory']
-		cluster_resources['CPU'] = int(pg_res['CPU'])
-		cluster_resources['memory'] = float(pg_res['memory'])
-		cluster_resources['object_store_memory'] = float(pg_res['object_store_memory'])
-		cluster_resources['node_id'] = node_res_keys
-		pg_config=[opts,cluster_resources]
-		logger.info(f"pg has resources:{cluster_resources}")
-	except Exception as e:
-		logger.error(f"placement group error:{e}")
-		pass
-	return pg_config
+def _config(
+    total_cpus_per_pg: int,
+    cpu_per_node: int,
+    strategy="SPREAD",
+    capture_child_tasks=True,
+    time_out: Optional[float] = None,
+) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+    pg_config = None
+    opts = {}
+    cluster_resources = {}
+    num_bundles = (int)(total_cpus_per_pg / cpu_per_node)
+    bundles = [{"CPU": cpu_per_node} for i in range(num_bundles)]
+    pg = placement_group(bundles, strategy=strategy)
+    ray.get(pg.ready(), timeout=time_out)
+    if not pg:
+        return None
+    opts = {
+        "scheduling_strategy": PlacementGroupSchedulingStrategy(
+            placement_group=pg, placement_group_capture_child_tasks=capture_child_tasks
+        )
+    }
+    pg_id = placement_group_table(pg)["placement_group_id"]
+    pg_details = get_placement_group(pg_id)
+    bundles = pg_details["bundles"]
+    node_ids = []
+    for bd in bundles:
+        node_ids.append(bd["node_id"])
+    # query available resources given list of node id
+    all_nodes_available_res = ray._private.state.state._available_resources_per_node()
+    pg_res = {"CPU": 0, "memory": 0, "object_store_memory": 0}
+    for node_id in node_ids:
+        if node_id in all_nodes_available_res:
+            v = all_nodes_available_res[node_id]
+            node_detail = get_node(node_id)
+            pg_res["CPU"] += node_detail["resources_total"]["CPU"]
+            pg_res["memory"] += v["memory"]
+            pg_res["object_store_memory"] += v["object_store_memory"]
+    cluster_resources["CPU"] = int(pg_res["CPU"])
+    cluster_resources["memory"] = float(pg_res["memory"])
+    cluster_resources["object_store_memory"] = float(pg_res["object_store_memory"])
+    pg_config = PlacementGroupConfig(opts, cluster_resources)
+    logger.info(f"pg has resources:{cluster_resources}")
+    return pg_config

deltacat 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl

deltacat 0.1.8py3-none-any.whl → 0.1.11py3-none-any.whl