graphviper 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
graphviper/__init__.py ADDED
File without changes
@@ -0,0 +1 @@
1
+ from .client import local_client, slurm_cluster_client
@@ -0,0 +1,208 @@
1
+ """
2
+ MIT License
3
+
4
+ Copyright (c) 2022 Jonathan Simon Kenyon
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
23
+ """
24
+ from collections import defaultdict
25
+ from distributed import SchedulerPlugin
26
+ from dask.core import reverse_dict
27
+ from dask.base import tokenize
28
+ from dask.order import graph_metrics, ndependencies
29
+ import click
30
+ from distributed.diagnostics.plugin import SchedulerPlugin
31
+ import numpy as np
32
+
33
+
34
+ def unravel_deps(hlg_deps, name, unravelled_deps=None):
35
+ """Recursively construct a set of all dependencies for a specific task."""
36
+
37
+ if unravelled_deps is None:
38
+ unravelled_deps = set()
39
+
40
+ for dep in hlg_deps[name]:
41
+ unravelled_deps |= {dep}
42
+ unravel_deps(hlg_deps, dep, unravelled_deps)
43
+
44
+ return unravelled_deps
45
+
46
+
47
+ def get_node_depths(dependencies, root_nodes, metrics):
48
+ node_depths = {}
49
+
50
+ for k in dependencies.keys():
51
+ # Get dependencies per node.
52
+ deps = unravel_deps(dependencies, k)
53
+ # Associate nodes with root nodes.
54
+ roots = root_nodes & deps
55
+ offset = metrics[k][-1]
56
+ node_depths[k] = max(metrics[r][-1] - offset for r in roots) if roots else 0
57
+
58
+ return node_depths
59
+
60
+
61
+ class schedular(SchedulerPlugin):
62
+ def __init__(self, autorestrictor, local_cache):
63
+ self.autorestrictor = autorestrictor
64
+ self.local_cache = local_cache
65
+ super().__init__()
66
+
67
+ def add_worker(self, scheduler, worker):
68
+ if self.local_cache:
69
+ # Set the resource label to the ip of the node that the worker is on, so that tasks that require a specific node can be assigned to the correct worker.
70
+ ip = worker[worker.rfind("/") + 1 : worker.rfind(":")]
71
+ scheduler.add_resources(worker=worker, resources={ip: 1})
72
+
73
+ def update_graph(self, scheduler, dsk=None, keys=None, restrictions=None, **kw):
74
+ if self.autorestrictor:
75
+ print("Using autorestrictor")
76
+ """Processes dependencies to assign tasks to specific workers."""
77
+ workers = list(scheduler.workers.keys())
78
+ n_worker = len(workers)
79
+
80
+ tasks = scheduler.tasks
81
+ dependencies = kw["dependencies"]
82
+
83
+ # print('In update_graph :', scheduler, ',*,', dsk, ',*,', keys , ',*,', restrictions , ',*,', kw)
84
+ if dependencies:
85
+ dependents = reverse_dict(dependencies)
86
+
87
+ # print('reversed dict:', dependents)
88
+
89
+ _, total_dependencies = ndependencies(dependencies, dependents)
90
+ # TODO: Avoid calling graph metrics.
91
+ metrics = graph_metrics(dependencies, dependents, total_dependencies)
92
+
93
+ # Terminal nodes have no dependents, root nodes have no dependencies.
94
+ # Horizontal partition nodes are initialized as the terminal nodes.
95
+ part_nodes = {k for (k, v) in dependents.items() if not v}
96
+ root_nodes = {k for (k, v) in dependencies.items() if not v}
97
+
98
+ # Figure out the depth of every task. Depth is defined as maximum
99
+ # distance from a root node. TODO: Optimize get_node_depths.
100
+
101
+ node_depths = get_node_depths(dependencies, root_nodes, metrics)
102
+ # try:
103
+ max_depth = max(node_depths.values())
104
+ # except:
105
+ # print('&&&&& dependencies, root_nodes, metrics',node_depths,',*,',dependencies, root_nodes, metrics)
106
+
107
+ # If we have fewer partition nodes than workers, we cannot utilise all
108
+ # the workers and are likely dealing with a reduction. We work our way
109
+ # back through the graph, starting at the deepest terminal nodes, and
110
+ # try to find a depth at which there was enough work to utilise all
111
+ # workers.
112
+ while (len(part_nodes) < n_worker) & (max_depth > 0):
113
+ _part_nodes = part_nodes.copy()
114
+ for pn in _part_nodes:
115
+ if node_depths[pn] == max_depth:
116
+ part_nodes ^= set((pn,))
117
+ part_nodes |= dependencies[pn]
118
+ max_depth -= 1
119
+ if max_depth <= 0:
120
+ return # In this case, there in nothing we can do - fall back.
121
+
122
+ part_roots = {}
123
+ part_dependencies = {}
124
+ part_dependents = {}
125
+
126
+ for pn in part_nodes:
127
+ # Get dependencies per partition node.
128
+ part_dependencies[pn] = unravel_deps(dependencies, pn)
129
+ # Get dependents per partition node.
130
+ part_dependents[pn] = unravel_deps(dependents, pn)
131
+ # Associate partition nodes with root nodes.
132
+ part_roots[pn] = root_nodes & part_dependencies[pn]
133
+
134
+ # Create a unique token for each set of partition roots. TODO: This is
135
+ # very strict. What about nodes with very similar roots? Tokenization
136
+ # may be overkill too.
137
+ root_tokens = {tokenize(*sorted(v)): v for v in part_roots.values()}
138
+
139
+ hash_map = defaultdict(set)
140
+ group_offset = 0
141
+
142
+ # Associate partition roots with a specific group if they are not a
143
+ # subset of another, larger root set.
144
+ for k, v in root_tokens.items():
145
+ if any(v < vv for vv in root_tokens.values()): # Strict subset.
146
+ continue
147
+ else:
148
+ hash_map[k] |= set([group_offset])
149
+ group_offset += 1
150
+
151
+ # If roots were a subset, they should share the group of their
152
+ # superset/s.
153
+ for k, v in root_tokens.items():
154
+ if not v: # Special case - no dependencies. Handled below.
155
+ continue
156
+ shared_roots = {
157
+ kk: None for kk, vv in root_tokens.items() if v < vv
158
+ }
159
+ if shared_roots:
160
+ hash_map[k] = set().union(
161
+ *[hash_map[kk] for kk in shared_roots.keys()]
162
+ )
163
+
164
+ task_groups = defaultdict(set)
165
+
166
+ for pn in part_nodes:
167
+ pdp = part_dependencies[pn]
168
+ pdn = part_dependents[pn]
169
+
170
+ if pdp:
171
+ groups = hash_map[tokenize(*sorted(part_roots[pn]))]
172
+ else: # Special case - no dependencies.
173
+ groups = {group_offset}
174
+ group_offset += 1
175
+
176
+ for g in groups:
177
+ task_groups[g] |= pdp | pdn | {pn}
178
+
179
+ worker_loads = {wkr: 0 for wkr in workers}
180
+
181
+ for task_group in task_groups.values():
182
+ assignee = min(worker_loads, key=worker_loads.get)
183
+ worker_loads[assignee] += len(task_group)
184
+
185
+ for task_name in task_group:
186
+ try:
187
+ task = tasks[task_name]
188
+ except KeyError: # Keys may not have an assosciated task.
189
+ continue
190
+
191
+ # print('^^^^^^',dir(task))
192
+ # if task._worker_restrictions is None:
193
+ # task._worker_restrictions = set()
194
+ # task._worker_restrictions |= {assignee}
195
+ # task._loose_restrictions = False
196
+
197
+ if task.worker_restrictions is None:
198
+ task.worker_restrictions = set()
199
+ task.worker_restrictions |= {assignee}
200
+ task.loose_restrictions = False
201
+
202
+
203
+ @click.command()
204
+ @click.option("--autorestrictor", default=False)
205
+ @click.option("--local_cache", default=False)
206
+ def dask_setup(scheduler, autorestrictor, local_cache):
207
+ plugin = schedular(autorestrictor, local_cache)
208
+ scheduler.add_plugin(plugin)
@@ -0,0 +1,71 @@
1
+ import click
2
+
3
+ from graphviper._logger import _setup_worker_logger
4
+
5
+
6
+ class _worker:
7
+ def __init__(self, local_cache, log_parms):
8
+ # print("init local cache")
9
+ self.local_cache = local_cache
10
+
11
+ print("log_parms", log_parms)
12
+ # /.lustre/aoc/projects/ngvla/viper/ngvla_sim/viper_
13
+ self.log_to_term = log_parms["log_to_term"]
14
+ self.log_to_file = log_parms["log_to_file"]
15
+ self.log_file = log_parms["log_file"]
16
+ self.log_level = log_parms["log_level"]
17
+
18
+ def get_logger(self):
19
+ return self.logger
20
+
21
+ def setup(self, worker):
22
+ """
23
+ Run when the plugin is attached to a worker. This happens when the plugin is registered
24
+ and attached to existing workers, or when a worker is created after the plugin has been
25
+ registered.
26
+ """
27
+
28
+ self.logger = _setup_worker_logger(
29
+ self.log_to_term,
30
+ self.log_to_file,
31
+ self.log_file,
32
+ self.log_level,
33
+ str(worker.id),
34
+ )
35
+ self.logger.debug(
36
+ "Logger created on worker " + str(worker.id) + ",*," + str(worker.address)
37
+ )
38
+ # Documentation https://distributed.dask.org/en/stable/worker.html#distributed.worker.Worker
39
+ self.worker = worker
40
+
41
+ if self.local_cache:
42
+ ip = worker.address[
43
+ worker.address.rfind("/") + 1 : worker.address.rfind(":")
44
+ ]
45
+ self.logger.debug(str(worker.id) + ",*," + ip)
46
+ worker.state.available_resources = {
47
+ **worker.state.available_resources,
48
+ **{ip: 1},
49
+ }
50
+ # print(worker.state.available_resources)
51
+
52
+
53
+ # https://github.com/dask/distributed/issues/4169
54
+ @click.command()
55
+ @click.option("--local_cache", default=False)
56
+ # @click.option("--log_parms", default={'log_to_term':True,'log_to_file':False,'log_file':'viper_', 'log_level':'DEBUG'})
57
+ @click.option("--log_to_term", default=True)
58
+ @click.option("--log_to_file", default=False)
59
+ @click.option("--log_file", default="viper_")
60
+ @click.option("--log_level", default="INFO")
61
+ async def dask_setup(
62
+ worker, local_cache, log_to_term, log_to_file, log_file, log_level
63
+ ):
64
+ log_parms = {
65
+ "log_to_term": log_to_term,
66
+ "log_to_file": log_to_file,
67
+ "log_file": log_file,
68
+ "log_level": log_level,
69
+ }
70
+ plugin = _worker(local_cache, log_parms)
71
+ await worker.client.register_worker_plugin(plugin, name="viper_worker")
@@ -0,0 +1,248 @@
1
+ import warnings, time, os, psutil, multiprocessing, re
2
+ import dask
3
+ import copy
4
+ import os
5
+ import logging
6
+ import graphviper
7
+ import distributed
8
+ from graphviper.parameter_checking.check_logger_parms import (
9
+ check_logger_parms,
10
+ check_worker_logger_parms,
11
+ )
12
+ from graphviper.logger import setup_logger, get_logger
13
+ from graphviper.dask._worker import (
14
+ _worker,
15
+ ) # _worker_logger_plugin
16
+
17
+
18
+ def local_client(
19
+ cores=None,
20
+ memory_limit=None,
21
+ autorestrictor=False,
22
+ dask_local_dir=None,
23
+ local_dir=None,
24
+ wait_for_workers=True,
25
+ log_parms={},
26
+ worker_log_parms={},
27
+ ):
28
+ """
29
+ local_dir setting is only useful for testing since this function creates a local cluster. slurm_cluster_client should be used for a multinode cluster.
30
+
31
+ https://github.com/dask/dask/issues/5577
32
+ log_parms['log_to_term'] = True/False
33
+ log_parms['log_file'] = True/False
34
+ log_parms['log_level'] =
35
+ """
36
+
37
+ _log_parms = copy.deepcopy(log_parms)
38
+ _worker_log_parms = copy.deepcopy(worker_log_parms)
39
+
40
+ assert check_logger_parms(
41
+ _log_parms
42
+ ), "######### ERROR: initialize_processing log_parms checking failed."
43
+
44
+ if _worker_log_parms is not None:
45
+ assert check_worker_logger_parms(
46
+ _worker_log_parms
47
+ ), "######### ERROR: initialize_processing log_parms checking failed."
48
+
49
+ if local_dir:
50
+ os.environ["VIPER_LOCAL_DIR"] = local_dir
51
+ local_cache = True
52
+ else:
53
+ local_cache = False
54
+
55
+ # print(_log_parms)
56
+ setup_logger(**_log_parms)
57
+ logger = get_logger()
58
+
59
+ _set_up_dask(dask_local_dir)
60
+
61
+ viper_path = graphviper.__path__[0]
62
+ if local_cache or autorestrictor:
63
+ dask.config.set(
64
+ {
65
+ "distributed.scheduler.preload": os.path.join(
66
+ viper_path, "_concurrency/_dask/_scheduler.py"
67
+ )
68
+ }
69
+ )
70
+ dask.config.set(
71
+ {
72
+ "distributed.scheduler.preload-argv": [
73
+ "--local_cache",
74
+ local_cache,
75
+ "--autorestrictor",
76
+ autorestrictor,
77
+ ]
78
+ }
79
+ )
80
+
81
+ """ This method of assigning a worker plugin does not seem to work when using dask_jobqueue. Consequently using client.register_worker_plugin so that the method of assigning a worker plugin is the same for local_client and slurm_cluster_client.
82
+ if local_cache or _worker_log_parms:
83
+ dask.config.set({"distributed.worker.preload": os.path.join(viper_path,'_utils/_worker.py')})
84
+ dask.config.set({"distributed.worker.preload-argv": ["--local_cache",local_cache,"--log_to_term",_worker_log_parms['log_to_term'],"--log_to_file",_worker_log_parms['log_to_file'],"--log_file",_worker_log_parms['log_file'],"--log_level",_worker_log_parms['log_level']]})
85
+ """
86
+ # setup distributed based multiprocessing environment
87
+ if cores is None:
88
+ cores = multiprocessing.cpu_count()
89
+ if memory_limit is None:
90
+ memory_limit = (
91
+ str(round(((psutil.virtual_memory().available / (1024**2))) / cores))
92
+ + "MB"
93
+ )
94
+ cluster = distributed.LocalCluster(
95
+ n_workers=cores, threads_per_worker=1, processes=True, memory_limit=memory_limit
96
+ ) # , silence_logs=logging.ERROR #,resources={'GPU': 2}
97
+ client = distributed.Client(cluster)
98
+ client.get_versions(check=True)
99
+
100
+ """
101
+ When constructing a graph that has local cache enabled all workers need to be up and running.
102
+ """
103
+ if local_cache or wait_for_workers:
104
+ client.wait_for_workers(n_workers=cores)
105
+
106
+ if local_cache or _worker_log_parms:
107
+ plugin = _worker(local_cache, _worker_log_parms)
108
+ client.register_worker_plugin(plugin, name="viper_worker")
109
+
110
+ logger.info("Created client " + str(client))
111
+
112
+ return client
113
+
114
+
115
+ def slurm_cluster_client(
116
+ workers_per_node,
117
+ cores_per_node,
118
+ memory_per_node,
119
+ number_of_nodes,
120
+ queue,
121
+ interface,
122
+ python_env_dir,
123
+ dask_local_dir,
124
+ dask_log_dir,
125
+ exclude_nodes="nmpost090",
126
+ dashboard_port=9000,
127
+ local_dir=None,
128
+ autorestrictor=False,
129
+ wait_for_workers=True,
130
+ log_parms={},
131
+ worker_log_parms={},
132
+ ):
133
+ """
134
+ local_cache setting is only useful for testing since this function creates a local cluster. slurm_cluster_client should be used for a multinode cluster.
135
+
136
+ https://github.com/dask/dask/issues/5577
137
+ log_parms['log_to_term'] = True/False
138
+ log_parms['log_file'] = True/False
139
+ log_parms['log_level'] =
140
+
141
+ interface eth0, ib0
142
+ python "/mnt/condor/jsteeb/viper_py/bin/python"
143
+ dask_local_dir "/mnt/condor/jsteeb"
144
+ dask_log_dir "/.lustre/aoc/projects/ngvla/viper/ngvla_sim",
145
+ """
146
+
147
+ from dask_jobqueue import SLURMCluster
148
+ from distributed import Client, config, performance_report
149
+
150
+ _log_parms = copy.deepcopy(log_parms)
151
+ _worker_log_parms = copy.deepcopy(worker_log_parms)
152
+
153
+ assert _check_logger_parms(
154
+ _log_parms
155
+ ), "######### ERROR: initialize_processing log_parms checking failed."
156
+ assert _check_worker_logger_parms(
157
+ _worker_log_parms
158
+ ), "######### ERROR: initialize_processing log_parms checking failed."
159
+
160
+ if local_dir:
161
+ os.environ["VIPER_LOCAL_DIR"] = local_dir
162
+ local_cache = True
163
+ else:
164
+ local_cache = False
165
+
166
+ # Viper logger for code that is not part of the Dask graph. The worker logger is setup in the _viper_worker plugin.
167
+ from viper._utils._logger import setup_logger
168
+
169
+ setup_logger(**_log_parms)
170
+ logger = get_logger()
171
+
172
+ _set_up_dask(dask_local_dir)
173
+
174
+ viper_path = graphviper.__path__.__dict__["_path"][0]
175
+ if local_cache or autorestrictor:
176
+ dask.config.set(
177
+ {
178
+ "distributed.scheduler.preload": os.path.join(
179
+ viper_path, "_concurrency/_dask/_scheduler.py"
180
+ )
181
+ }
182
+ )
183
+ dask.config.set(
184
+ {
185
+ "distributed.scheduler.preload-argv": [
186
+ "--local_cache",
187
+ local_cache,
188
+ "--autorestrictor",
189
+ autorestrictor,
190
+ ]
191
+ }
192
+ )
193
+
194
+ """ This method of assigning a worker plugin does not seem to work when using dask_jobqueue. Consequently using client.register_worker_plugin so that the method of assigning a worker plugin is the same for local_client and slurm_cluster_client.
195
+ if local_cache or _worker_log_parms:
196
+ dask.config.set({"distributed.worker.preload": os.path.join(viper_path,'_utils/_worker.py')})
197
+ dask.config.set({"distributed.worker.preload-argv": ["--local_cache",local_cache,"--log_to_term",_worker_log_parms['log_to_term'],"--log_to_file",_worker_log_parms['log_to_file'],"--log_file",_worker_log_parms['log_file'],"--log_level",_worker_log_parms['log_level']]})
198
+ """
199
+
200
+ cluster = SLURMCluster(
201
+ processes=workers_per_node,
202
+ cores=cores_per_node,
203
+ interface=interface,
204
+ memory=memory_per_node,
205
+ walltime="24:00:00",
206
+ queue=queue,
207
+ name="viper",
208
+ python=python_env_dir, # "/mnt/condor/jsteeb/viper_py/bin/python", #"/.lustre/aoc/projects/ngvla/viper/viper_py_env/bin/python",
209
+ local_directory=dask_local_dir, # "/mnt/condor/jsteeb",
210
+ log_directory=dask_log_dir,
211
+ job_extra_directives=["--exclude=" + exclude_nodes],
212
+ # job_extra_directives=["--exclude=nmpost087,nmpost089,nmpost088"],
213
+ scheduler_options={"dashboard_address": ":" + str(dashboard_port)},
214
+ ) # interface='ib0'
215
+
216
+ client = Client(cluster)
217
+
218
+ cluster.scale(workers_per_node * number_of_nodes)
219
+
220
+ """
221
+ When constructing a graph that has local cache enabled all workers need to be up and running.
222
+ """
223
+ if local_cache or wait_for_workers:
224
+ client.wait_for_workers(n_workers=workers_per_node * number_of_nodes)
225
+
226
+ if local_cache or _worker_log_parms:
227
+ plugin = _worker(local_cache, _worker_log_parms)
228
+ client.register_worker_plugin(plugin, name="viper_worker")
229
+
230
+ logger.info("Created client " + str(client))
231
+
232
+ return client
233
+
234
+
235
+ def _set_up_dask(local_directory):
236
+ if local_directory:
237
+ dask.config.set({"temporary_directory": local_directory})
238
+ dask.config.set({"distributed.scheduler.allowed-failures": 10})
239
+ dask.config.set({"distributed.scheduler.work-stealing": True})
240
+ dask.config.set({"distributed.scheduler.unknown-task-duration": "99m"})
241
+ dask.config.set({"distributed.worker.memory.pause": False})
242
+ dask.config.set({"distributed.worker.memory.terminate": False})
243
+ # dask.config.set({"distributed.worker.memory.recent-to-old-time": '999s'})
244
+ dask.config.set({"distributed.comm.timeouts.connect": "3600s"})
245
+ dask.config.set({"distributed.comm.timeouts.tcp": "3600s"})
246
+ dask.config.set({"distributed.nanny.environ.OMP_NUM_THREADS": 1})
247
+ dask.config.set({"distributed.nanny.environ.MKL_NUM_THREADS": 1})
248
+ # https://docs.dask.org/en/stable/how-to/customize-initialization.html
@@ -0,0 +1,8 @@
1
+ from .coordinate_utils import (
2
+ make_time_coord,
3
+ make_frequency_coord,
4
+ make_parallel_coord,
5
+ interpolate_data_coords_onto_parallel_coords,
6
+ )
7
+ from .map import map
8
+ from .reduce import reduce
File without changes