deltacat 0.1.10.dev0__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. deltacat/__init__.py +41 -15
  2. deltacat/aws/clients.py +12 -31
  3. deltacat/aws/constants.py +1 -1
  4. deltacat/aws/redshift/__init__.py +7 -2
  5. deltacat/aws/redshift/model/manifest.py +54 -50
  6. deltacat/aws/s3u.py +176 -187
  7. deltacat/catalog/delegate.py +151 -185
  8. deltacat/catalog/interface.py +78 -97
  9. deltacat/catalog/model/catalog.py +21 -21
  10. deltacat/catalog/model/table_definition.py +11 -9
  11. deltacat/compute/compactor/__init__.py +12 -16
  12. deltacat/compute/compactor/compaction_session.py +237 -166
  13. deltacat/compute/compactor/model/delta_annotated.py +60 -44
  14. deltacat/compute/compactor/model/delta_file_envelope.py +5 -6
  15. deltacat/compute/compactor/model/delta_file_locator.py +10 -8
  16. deltacat/compute/compactor/model/materialize_result.py +6 -7
  17. deltacat/compute/compactor/model/primary_key_index.py +38 -34
  18. deltacat/compute/compactor/model/pyarrow_write_result.py +3 -4
  19. deltacat/compute/compactor/model/round_completion_info.py +25 -19
  20. deltacat/compute/compactor/model/sort_key.py +18 -15
  21. deltacat/compute/compactor/steps/dedupe.py +119 -94
  22. deltacat/compute/compactor/steps/hash_bucket.py +48 -47
  23. deltacat/compute/compactor/steps/materialize.py +86 -92
  24. deltacat/compute/compactor/steps/rehash/rehash_bucket.py +13 -13
  25. deltacat/compute/compactor/steps/rehash/rewrite_index.py +5 -5
  26. deltacat/compute/compactor/utils/io.py +59 -47
  27. deltacat/compute/compactor/utils/primary_key_index.py +91 -80
  28. deltacat/compute/compactor/utils/round_completion_file.py +22 -23
  29. deltacat/compute/compactor/utils/system_columns.py +33 -45
  30. deltacat/compute/metastats/meta_stats.py +235 -157
  31. deltacat/compute/metastats/model/partition_stats_dict.py +7 -10
  32. deltacat/compute/metastats/model/stats_cluster_size_estimator.py +13 -5
  33. deltacat/compute/metastats/stats.py +95 -64
  34. deltacat/compute/metastats/utils/io.py +100 -53
  35. deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +5 -2
  36. deltacat/compute/metastats/utils/ray_utils.py +38 -33
  37. deltacat/compute/stats/basic.py +107 -69
  38. deltacat/compute/stats/models/delta_column_stats.py +11 -8
  39. deltacat/compute/stats/models/delta_stats.py +59 -32
  40. deltacat/compute/stats/models/delta_stats_cache_result.py +4 -1
  41. deltacat/compute/stats/models/manifest_entry_stats.py +12 -6
  42. deltacat/compute/stats/models/stats_result.py +24 -14
  43. deltacat/compute/stats/utils/intervals.py +16 -9
  44. deltacat/compute/stats/utils/io.py +86 -51
  45. deltacat/compute/stats/utils/manifest_stats_file.py +24 -33
  46. deltacat/constants.py +4 -13
  47. deltacat/io/__init__.py +2 -2
  48. deltacat/io/aws/redshift/redshift_datasource.py +157 -143
  49. deltacat/io/dataset.py +14 -17
  50. deltacat/io/read_api.py +36 -33
  51. deltacat/logs.py +94 -42
  52. deltacat/storage/__init__.py +18 -8
  53. deltacat/storage/interface.py +196 -213
  54. deltacat/storage/model/delta.py +45 -51
  55. deltacat/storage/model/list_result.py +12 -8
  56. deltacat/storage/model/namespace.py +4 -5
  57. deltacat/storage/model/partition.py +42 -42
  58. deltacat/storage/model/stream.py +29 -30
  59. deltacat/storage/model/table.py +14 -14
  60. deltacat/storage/model/table_version.py +32 -31
  61. deltacat/storage/model/types.py +1 -0
  62. deltacat/tests/stats/test_intervals.py +11 -24
  63. deltacat/tests/utils/__init__.py +0 -0
  64. deltacat/tests/utils/test_record_batch_tables.py +284 -0
  65. deltacat/types/media.py +3 -4
  66. deltacat/types/tables.py +31 -21
  67. deltacat/utils/common.py +5 -11
  68. deltacat/utils/numpy.py +20 -22
  69. deltacat/utils/pandas.py +73 -100
  70. deltacat/utils/performance.py +3 -9
  71. deltacat/utils/placement.py +259 -230
  72. deltacat/utils/pyarrow.py +302 -89
  73. deltacat/utils/ray_utils/collections.py +2 -1
  74. deltacat/utils/ray_utils/concurrency.py +27 -28
  75. deltacat/utils/ray_utils/dataset.py +28 -28
  76. deltacat/utils/ray_utils/performance.py +5 -9
  77. deltacat/utils/ray_utils/runtime.py +9 -10
  78. {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/METADATA +1 -1
  79. deltacat-0.1.12.dist-info/RECORD +110 -0
  80. deltacat-0.1.10.dev0.dist-info/RECORD +0 -108
  81. {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/LICENSE +0 -0
  82. {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/WHEEL +0 -0
  83. {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/top_level.txt +0 -0
@@ -1,257 +1,286 @@
1
- import ray
1
+ import logging
2
2
  import re
3
3
  import time
4
- import yaml
5
- import logging
6
4
  from dataclasses import dataclass
7
- from typing import Optional, Union, List, Dict, Any, Callable, Tuple
8
- from ray.util.placement_group import (
9
- placement_group,
10
- placement_group_table,
11
- get_current_placement_group
12
- )
5
+ from typing import Any, Dict, List, Optional, Tuple, Union
13
6
 
14
- from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
7
+ import ray
8
+ import yaml
15
9
  from ray.experimental.state.api import get_node, get_placement_group
16
-
10
+ from ray.util.placement_group import placement_group, placement_group_table
11
+ from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
17
12
 
18
13
  from deltacat import logs
19
- from deltacat.utils.ray_utils.runtime import live_node_resource_keys
14
+
20
15
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
21
16
 
22
- #Limitation of current node group or placement group manager
23
- #Must run on driver or head node bc state.api needs to query dashboard api server at 127.0.0.1.
24
- #Issue: https://github.com/ray-project/ray/issues/29959
17
+ # Limitation of current node group or placement group manager
18
+ # Must run on driver or head node bc state.api needs to query dashboard api server at 127.0.0.1.
19
+ # Issue: https://github.com/ray-project/ray/issues/29959
20
+
25
21
 
26
22
  @dataclass
27
- class PlacementGroupConfig():
28
- def __init__(self, opts, resource):
29
- self.opts = opts
30
- self.resource = resource
23
+ class PlacementGroupConfig:
24
+ def __init__(self, opts, resource):
25
+ self.opts = opts
26
+ self.resource = resource
31
27
 
32
- class NodeGroupManager():
33
28
 
34
- def __init__(self,path: str, gname: str):
35
- """Node Group Manager
36
- Args:
37
- path: cluster yaml file
38
- gname: node group prefix, e.g., 'partition'
39
- """
40
- #cluster init status:
41
- self.NODE_GROUP_PREFIX=gname
42
- self.cluster_config=self._read_yaml(path)
43
- self.init_groups = self._cluster_node_groups(self.cluster_config)
44
- self.init_group_res = self._parse_node_resources()
45
-
46
- def _cluster_node_groups(self, config: Dict[str, Any]) -> Dict[str, Any]:
47
- """Get Worker Groups
48
- Args:
49
- config: cluster yaml data
50
- Returns:
51
- worker groups: a dict of worker node group
29
+ class NodeGroupManager:
30
+ def __init__(self, path: str, gname: str):
31
+ """Node Group Manager
32
+ Args:
33
+ path: cluster yaml file
34
+ gname: node group prefix, e.g., 'partition'
35
+ """
36
+ # cluster init status:
37
+ self.NODE_GROUP_PREFIX = gname
38
+ self.cluster_config = self._read_yaml(path)
39
+ self.init_groups = self._cluster_node_groups(self.cluster_config)
40
+ self.init_group_res = self._parse_node_resources()
52
41
 
53
- """
54
- avail_node_types = list(config['available_node_types'].items())
55
- #exclude head node type
56
- head_node_types = [nt for nt in avail_node_types if 'resources' in nt[1] and 'CPU' in nt[1]['resources'] and nt[1]['resources']['CPU']==0][0]
57
- worker_node_types = [x for x in avail_node_types if x !=head_node_types]
58
- #assuming homogenous cluster
59
- #in future, update with fleet resource
60
- if len(worker_node_types)>0:
61
- self.INSTANCE_TYPE = worker_node_types[0][1]['node_config']['InstanceType']
62
- return worker_node_types
42
+ def _cluster_node_groups(self, config: Dict[str, Any]) -> Dict[str, Any]:
43
+ """Get Worker Groups
44
+ Args:
45
+ config: cluster yaml data
46
+ Returns:
47
+ worker groups: a dict of worker node group
63
48
 
49
+ """
50
+ avail_node_types = list(config["available_node_types"].items())
51
+ # exclude head node type
52
+ head_node_types = [
53
+ nt
54
+ for nt in avail_node_types
55
+ if "resources" in nt[1]
56
+ and "CPU" in nt[1]["resources"]
57
+ and nt[1]["resources"]["CPU"] == 0
58
+ ][0]
59
+ worker_node_types = [x for x in avail_node_types if x != head_node_types]
60
+ # assuming homogenous cluster
61
+ # in future, update with fleet resource
62
+ if len(worker_node_types) > 0:
63
+ self.INSTANCE_TYPE = worker_node_types[0][1]["node_config"]["InstanceType"]
64
+ return worker_node_types
64
65
 
65
- def _read_yaml(self, path: str) -> Dict[str, Any]:
66
- with open(path, "rt") as f:
67
- return yaml.safe_load(f)
66
+ def _read_yaml(self, path: str) -> Dict[str, Any]:
67
+ with open(path, "rt") as f:
68
+ return yaml.safe_load(f)
68
69
 
69
- def _update_groups(self) -> List[Tuple[str, float]]:
70
- """
71
- Node groups can come and go during runtime, whenever a node group is needed, we need to check the current available groups
72
- Returns:
73
- current_groups: dict of custom resource groups
74
- """
75
- #Add 1.1 second latency to avoid inconsistency issue between raylet and head
76
- time.sleep(1.1)
77
- all_available_res = ray.available_resources()
78
- current_groups =[(k,all_available_res[k]) for k in all_available_res.keys() if self.NODE_GROUP_PREFIX in k]
79
- return current_groups
70
+ def _update_groups(self) -> List[Tuple[str, float]]:
71
+ """
72
+ Node groups can come and go during runtime, whenever a node group is needed, we need to check the current available groups
73
+ Returns:
74
+ current_groups: dict of custom resource groups
75
+ """
76
+ # Add 1.1 second latency to avoid inconsistency issue between raylet and head
77
+ time.sleep(1.1)
78
+ all_available_res = ray.available_resources()
79
+ current_groups = [
80
+ (k, all_available_res[k])
81
+ for k in all_available_res.keys()
82
+ if self.NODE_GROUP_PREFIX in k
83
+ ]
84
+ return current_groups
80
85
 
81
- def _parse_node_resources(self) -> Dict[str, Dict[str, float]]:
82
- """
83
- Parse resources per node to get detailed resource tighted to each node group
84
- Returns:
85
- group_res: a dict of resources, e.g., {'CPU':0,'memory':0,'object_store_memory':0}
86
- """
87
- all_available_resources= ray._private.state.state._available_resources_per_node()
88
- group_keys = [x[0] for x in self.init_groups]
89
- group_res={}
90
- for k in group_keys:
91
- group_res[k]={'CPU':0,'memory':0,'object_store_memory':0,'node_id':[]}
92
- for v in all_available_resources.values():
93
- keys =v.keys()
94
- r = re.compile(self.NODE_GROUP_PREFIX)
95
- partition=list(filter(r.match, list(keys)))
96
- r = re.compile("node:")
97
- node_id = list(filter(r.match, list(keys)))
98
- if len(partition)>0:
99
- partition = partition[0]
100
- if len(node_id)>0:
101
- node_id = node_id[0]
102
- if self.NODE_GROUP_PREFIX in partition:
103
- group_res[partition]['CPU']+=v['CPU']
104
- group_res[partition]['memory']+=v['memory']
105
- group_res[partition]['object_store_memory']+=v['object_store_memory']
106
- group_res[partition]['node_id'].append(node_id)
107
- return group_res
86
+ def _parse_node_resources(self) -> Dict[str, Dict[str, float]]:
87
+ """
88
+ Parse resources per node to get detailed resource tighted to each node group
89
+ Returns:
90
+ group_res: a dict of resources, e.g., {'CPU':0,'memory':0,'object_store_memory':0}
91
+ """
92
+ all_available_resources = (
93
+ ray._private.state.state._available_resources_per_node()
94
+ )
95
+ group_keys = [x[0] for x in self.init_groups]
96
+ group_res = {}
97
+ for k in group_keys:
98
+ group_res[k] = {
99
+ "CPU": 0,
100
+ "memory": 0,
101
+ "object_store_memory": 0,
102
+ "node_id": [],
103
+ }
104
+ for v in all_available_resources.values():
105
+ keys = v.keys()
106
+ r = re.compile(self.NODE_GROUP_PREFIX)
107
+ partition = list(filter(r.match, list(keys)))
108
+ r = re.compile("node:")
109
+ node_id = list(filter(r.match, list(keys)))
110
+ if len(partition) > 0:
111
+ partition = partition[0]
112
+ if len(node_id) > 0:
113
+ node_id = node_id[0]
114
+ if self.NODE_GROUP_PREFIX in partition:
115
+ group_res[partition]["CPU"] += v["CPU"]
116
+ group_res[partition]["memory"] += v["memory"]
117
+ group_res[partition]["object_store_memory"] += v["object_store_memory"]
118
+ group_res[partition]["node_id"].append(node_id)
119
+ return group_res
108
120
 
109
- def _update_group_res(self, gname: str) -> Dict[str, Union[str, float]]:
110
- """
111
- Get the realtime resource of a node group
112
- Args:
113
- gname: name of node group
114
- Returns:
115
- group_res: dict of updated resource(cpu, memory, object store memory) for a given group
116
- """
117
- all_available_resources= ray._private.state.state._available_resources_per_node()
118
- group_res={'CPU':0,'memory':0,'object_store_memory':0,'node_id':[]}
119
- for v in all_available_resources.values():
120
- keys =v.keys()
121
- r = re.compile("node:")
122
- node_id = list(filter(r.match, list(keys)))
123
- if len(node_id)>0:
124
- node_id = node_id[0]
125
- if gname in v.keys():
126
- group_res['CPU']+=v['CPU']
127
- group_res['memory']+=v['memory']
128
- group_res['object_store_memory']+=v['object_store_memory']
129
- group_res['node_id'].append(node_id)
130
- return group_res
121
+ def _update_group_res(self, gname: str) -> Dict[str, Union[str, float]]:
122
+ """
123
+ Get the realtime resource of a node group
124
+ Args:
125
+ gname: name of node group
126
+ Returns:
127
+ group_res: dict of updated resource(cpu, memory, object store memory) for a given group
128
+ """
129
+ all_available_resources = (
130
+ ray._private.state.state._available_resources_per_node()
131
+ )
132
+ group_res = {"CPU": 0, "memory": 0, "object_store_memory": 0, "node_id": []}
133
+ for v in all_available_resources.values():
134
+ keys = v.keys()
135
+ r = re.compile("node:")
136
+ node_id = list(filter(r.match, list(keys)))
137
+ if len(node_id) > 0:
138
+ node_id = node_id[0]
139
+ if gname in v.keys():
140
+ group_res["CPU"] += v["CPU"]
141
+ group_res["memory"] += v["memory"]
142
+ group_res["object_store_memory"] += v["object_store_memory"]
143
+ group_res["node_id"].append(node_id)
144
+ return group_res
131
145
 
132
- def get_one_group(self) -> Optional[Dict[str, Union[str, float]]]:
133
- """
134
- Pop up one node group
135
- Returns:
136
- group_res: dict of node group resource, {"group":"partition_1","CPU":2,...}
137
- """
138
- current_groups = self._update_groups()
139
- if len(current_groups)>0:
140
- gname = current_groups[-1][0]
141
- group_res=self._update_group_res(gname)
142
- group_res['group']=gname
143
- try:
144
- group_res['group_res']=ray.available_resources()[gname]
145
- except Exception as e:
146
- logger.info(f"There is no available resources for {gname}")
147
- return None
148
- return group_res
149
- else:
150
- return None
146
+ def get_one_group(self) -> Optional[Dict[str, Union[str, float]]]:
147
+ """
148
+ Pop up one node group
149
+ Returns:
150
+ group_res: dict of node group resource, {"group":"partition_1","CPU":2,...}
151
+ """
152
+ current_groups = self._update_groups()
153
+ if len(current_groups) > 0:
154
+ gname = current_groups[-1][0]
155
+ group_res = self._update_group_res(gname)
156
+ group_res["group"] = gname
157
+ try:
158
+ group_res["group_res"] = ray.available_resources()[gname]
159
+ except Exception as e:
160
+ logger.info(f"Error: {e}. There is no available resources for {gname}")
161
+ return None
162
+ return group_res
163
+ else:
164
+ return None
151
165
 
152
- def get_group_by_name(self, gname: str) -> Optional[Dict[str, Union[str, float]]]:
153
- """
154
- Get the specific node group given its pre-filled name
155
- Args:
156
- gname: name of the node group
157
- Returns:
158
- group_res: dict of node group resource
166
+ def get_group_by_name(self, gname: str) -> Optional[Dict[str, Union[str, float]]]:
167
+ """
168
+ Get the specific node group given its pre-filled name
169
+ Args:
170
+ gname: name of the node group
171
+ Returns:
172
+ group_res: dict of node group resource
159
173
 
160
- """
161
- group_res=self._update_group_res(gname)
162
- group_res['group']=gname
163
- try:
164
- group_res['group_res']=ray.available_resources()[gname]
165
- except Exception as e:
166
- logger.info(f"There is no available resources for {gname}")
167
- return None
168
- return group_res
174
+ """
175
+ group_res = self._update_group_res(gname)
176
+ group_res["group"] = gname
177
+ try:
178
+ group_res["group_res"] = ray.available_resources()[gname]
179
+ except Exception as e:
180
+ logger.info(f"Error: {e}. There is no available resources for {gname}")
181
+ return None
182
+ return group_res
169
183
 
170
- class PlacementGroupManager():
171
- """Placement Group Manager
172
- Create a list of placement group with the desired number of cpus
173
- e.g., create a pg with 32 cpus, then this class will look for a node that has 32 cpus, and collect all
174
- resources, including cpu, memory, and object store;
175
- How to use:
176
- ```
177
- from deltacat.utils.placement import PlacementGroupManager as pgm
178
- pgm = pgm(10, 32)
179
- pg_configs = pgm.pgs
180
- opts = pg_configs[0][0]
181
- fun.options(**opts).remote()
182
- ```
183
- Args:
184
- num_pgs: number of placement groups to be created
185
- instance_cpus: number of cpus per instance
186
- """
187
- def __init__(self, num_pgs: int,
188
- total_cpus_per_pg: int,
189
- cpu_per_bundle: int,
190
- strategy="SPREAD",
191
- capture_child_tasks=True):
192
- head_res_key = self.get_current_node_resource_key()
193
- #run the task on head and consume a fractional cpu, so that pg can be created on non-head node
194
- #if cpu_per_bundle is less than the cpus per node, the pg can still be created on head
195
- #curent assumption is that the cpu_per_bundle = cpus per node
196
- #TODO: figure out how to create pg on non-head explicitly
197
- self._pg_configs = ray.get([_config.options(resources={head_res_key:0.01}).remote(total_cpus_per_pg, \
198
- cpu_per_bundle, strategy, capture_child_tasks) for i in range(num_pgs)])
199
- #TODO: handle the cases where cpu_per_bundle is larger than max cpus per node, support it on ec2/flex/manta
200
-
201
- @property
202
- def pgs(self):
203
- return self._pg_configs
204
184
 
205
- def get_current_node_resource_key(self) -> str:
206
- #on ec2: address="172.31.34.51:6379"
207
- #on manta: address = "2600:1f10:4674:6815:aadb:2dc8:de61:bc8e:6379"
208
- current_node_name = ray.experimental.internal_kv.global_gcs_client.address[:-5]
209
- for node in ray.nodes():
210
- if node["NodeName"] == current_node_name:
211
- # Found the node.
212
- for key in node["Resources"].keys():
213
- if key.startswith("node:"):
214
- return key
185
+ class PlacementGroupManager:
186
+ """Placement Group Manager
187
+ Create a list of placement group with the desired number of cpus
188
+ e.g., create a pg with 32 cpus, then this class will look for a node that has 32 cpus, and collect all
189
+ resources, including cpu, memory, and object store;
190
+ How to use:
191
+ ```
192
+ from deltacat.utils.placement import PlacementGroupManager as pgm
193
+ pgm = pgm(10, 32)
194
+ pg_configs = pgm.pgs
195
+ opts = pg_configs[0][0]
196
+ fun.options(**opts).remote()
197
+ ```
198
+ Args:
199
+ num_pgs: number of placement groups to be created
200
+ instance_cpus: number of cpus per instance
201
+ """
215
202
 
216
- @ray.remote(num_cpus=0.01)
217
- def _config(total_cpus_per_pg: int,
218
- cpu_per_node: int,
219
- strategy="SPREAD",
220
- capture_child_tasks=True,
221
- time_out: Optional[float] = None) -> Tuple[Dict[str,Any], Dict[str,Any]]:
222
- pg_config = None
223
- opts ={}
224
- cluster_resources={}
225
- num_bundles = (int)(total_cpus_per_pg/cpu_per_node)
226
- bundles = [{'CPU':cpu_per_node} for i in range(num_bundles)]
227
- pg = placement_group(bundles, strategy=strategy)
228
- ray.get(pg.ready(), timeout=time_out)
229
- if not pg:
230
- return None
231
- opts = {"scheduling_strategy":PlacementGroupSchedulingStrategy(
232
- placement_group=pg, placement_group_capture_child_tasks=capture_child_tasks)
233
- }
234
- pg_id = placement_group_table(pg)['placement_group_id']
235
- pg_details = get_placement_group(pg_id)
236
- bundles = pg_details['bundles']
237
- node_ids = []
238
- for bd in bundles:
239
- node_ids.append(bd['node_id'])
240
- #query available resources given list of node id
241
- all_nodes_available_res = ray._private.state.state._available_resources_per_node()
242
- pg_res = {'CPU':0,'memory':0,'object_store_memory':0}
243
- for node_id in node_ids:
244
- if node_id in all_nodes_available_res:
245
- v = all_nodes_available_res[node_id]
246
- node_detail = get_node(node_id)
247
- pg_res['CPU']+=node_detail['resources_total']['CPU']
248
- pg_res['memory']+=v['memory']
249
- pg_res['object_store_memory']+=v['object_store_memory']
250
- cluster_resources['CPU'] = int(pg_res['CPU'])
251
- cluster_resources['memory'] = float(pg_res['memory'])
252
- cluster_resources['object_store_memory'] = float(pg_res['object_store_memory'])
253
- pg_config=PlacementGroupConfig(opts,cluster_resources)
254
- logger.info(f"pg has resources:{cluster_resources}")
203
+ def __init__(
204
+ self,
205
+ num_pgs: int,
206
+ total_cpus_per_pg: int,
207
+ cpu_per_bundle: int,
208
+ strategy="SPREAD",
209
+ capture_child_tasks=True,
210
+ ):
211
+ head_res_key = self.get_current_node_resource_key()
212
+ # run the task on head and consume a fractional cpu, so that pg can be created on non-head node
213
+ # if cpu_per_bundle is less than the cpus per node, the pg can still be created on head
214
+ # curent assumption is that the cpu_per_bundle = cpus per node
215
+ # TODO: figure out how to create pg on non-head explicitly
216
+ self._pg_configs = ray.get(
217
+ [
218
+ _config.options(resources={head_res_key: 0.01}).remote(
219
+ total_cpus_per_pg, cpu_per_bundle, strategy, capture_child_tasks
220
+ )
221
+ for i in range(num_pgs)
222
+ ]
223
+ )
224
+ # TODO: handle the cases where cpu_per_bundle is larger than max cpus per node, support it on ec2/flex/manta
255
225
 
256
- return pg_config
226
+ @property
227
+ def pgs(self):
228
+ return self._pg_configs
229
+
230
+ def get_current_node_resource_key(self) -> str:
231
+ # on ec2: address="172.31.34.51:6379"
232
+ # on manta: address = "2600:1f10:4674:6815:aadb:2dc8:de61:bc8e:6379"
233
+ current_node_name = ray.experimental.internal_kv.global_gcs_client.address[:-5]
234
+ for node in ray.nodes():
235
+ if node["NodeName"] == current_node_name:
236
+ # Found the node.
237
+ for key in node["Resources"].keys():
238
+ if key.startswith("node:"):
239
+ return key
240
+
241
+
242
+ @ray.remote(num_cpus=0.01)
243
+ def _config(
244
+ total_cpus_per_pg: int,
245
+ cpu_per_node: int,
246
+ strategy="SPREAD",
247
+ capture_child_tasks=True,
248
+ time_out: Optional[float] = None,
249
+ ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
250
+ pg_config = None
251
+ opts = {}
252
+ cluster_resources = {}
253
+ num_bundles = (int)(total_cpus_per_pg / cpu_per_node)
254
+ bundles = [{"CPU": cpu_per_node} for i in range(num_bundles)]
255
+ pg = placement_group(bundles, strategy=strategy)
256
+ ray.get(pg.ready(), timeout=time_out)
257
+ if not pg:
258
+ return None
259
+ opts = {
260
+ "scheduling_strategy": PlacementGroupSchedulingStrategy(
261
+ placement_group=pg, placement_group_capture_child_tasks=capture_child_tasks
262
+ )
263
+ }
264
+ pg_id = placement_group_table(pg)["placement_group_id"]
265
+ pg_details = get_placement_group(pg_id)
266
+ bundles = pg_details["bundles"]
267
+ node_ids = []
268
+ for bd in bundles:
269
+ node_ids.append(bd["node_id"])
270
+ # query available resources given list of node id
271
+ all_nodes_available_res = ray._private.state.state._available_resources_per_node()
272
+ pg_res = {"CPU": 0, "memory": 0, "object_store_memory": 0}
273
+ for node_id in node_ids:
274
+ if node_id in all_nodes_available_res:
275
+ v = all_nodes_available_res[node_id]
276
+ node_detail = get_node(node_id)
277
+ pg_res["CPU"] += node_detail["resources_total"]["CPU"]
278
+ pg_res["memory"] += v["memory"]
279
+ pg_res["object_store_memory"] += v["object_store_memory"]
280
+ cluster_resources["CPU"] = int(pg_res["CPU"])
281
+ cluster_resources["memory"] = float(pg_res["memory"])
282
+ cluster_resources["object_store_memory"] = float(pg_res["object_store_memory"])
283
+ pg_config = PlacementGroupConfig(opts, cluster_resources)
284
+ logger.info(f"pg has resources:{cluster_resources}")
257
285
 
286
+ return pg_config