graphviper 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. graphviper-0.0.1/LICENSE.txt +28 -0
  2. graphviper-0.0.1/MANIFEST.in +4 -0
  3. graphviper-0.0.1/PKG-INFO +78 -0
  4. graphviper-0.0.1/README.md +7 -0
  5. graphviper-0.0.1/pyproject.toml +50 -0
  6. graphviper-0.0.1/setup.cfg +4 -0
  7. graphviper-0.0.1/src/graphviper/__init__.py +0 -0
  8. graphviper-0.0.1/src/graphviper/dask/__init__.py +1 -0
  9. graphviper-0.0.1/src/graphviper/dask/_scheduler.py +208 -0
  10. graphviper-0.0.1/src/graphviper/dask/_worker.py +71 -0
  11. graphviper-0.0.1/src/graphviper/dask/client.py +248 -0
  12. graphviper-0.0.1/src/graphviper/graph_tools/__init__.py +8 -0
  13. graphviper-0.0.1/src/graphviper/graph_tools/append.py +0 -0
  14. graphviper-0.0.1/src/graphviper/graph_tools/coordinate_utils.py +243 -0
  15. graphviper-0.0.1/src/graphviper/graph_tools/map.py +143 -0
  16. graphviper-0.0.1/src/graphviper/graph_tools/reduce.py +34 -0
  17. graphviper-0.0.1/src/graphviper/logger/__init__.py +1 -0
  18. graphviper-0.0.1/src/graphviper/logger/logger.py +86 -0
  19. graphviper-0.0.1/src/graphviper/parameter_checking/__init__.py +2 -0
  20. graphviper-0.0.1/src/graphviper/parameter_checking/check_logger_parms.py +56 -0
  21. graphviper-0.0.1/src/graphviper/parameter_checking/check_parms.py +335 -0
  22. graphviper-0.0.1/src/graphviper/utils/__init__.py +1 -0
  23. graphviper-0.0.1/src/graphviper/utils/display.py +10 -0
  24. graphviper-0.0.1/src/graphviper.egg-info/PKG-INFO +78 -0
  25. graphviper-0.0.1/src/graphviper.egg-info/SOURCES.txt +27 -0
  26. graphviper-0.0.1/src/graphviper.egg-info/dependency_links.txt +1 -0
  27. graphviper-0.0.1/src/graphviper.egg-info/requires.txt +36 -0
  28. graphviper-0.0.1/src/graphviper.egg-info/top_level.txt +1 -0
  29. graphviper-0.0.1/tests/test_graph_tools.py +99 -0
@@ -0,0 +1,28 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2023, Associated Universities, Inc.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,4 @@
1
+ include README.md
2
+ include MANIFEST.in
3
+ include LICENSE
4
+ include CONTRIBUTOR_LICENSING_AGREEMENT
@@ -0,0 +1,78 @@
1
+ Metadata-Version: 2.1
2
+ Name: graphviper
3
+ Version: 0.0.1
4
+ Summary: Astro Visibility and Image Parallel Execution Reduction
5
+ Author-email: Jan-Willem Steeb <jsteeb@nrao.edu>
6
+ License: BSD 3-Clause License
7
+
8
+ Copyright (c) 2023, Associated Universities, Inc.
9
+
10
+ Redistribution and use in source and binary forms, with or without
11
+ modification, are permitted provided that the following conditions are met:
12
+
13
+ 1. Redistributions of source code must retain the above copyright notice, this
14
+ list of conditions and the following disclaimer.
15
+
16
+ 2. Redistributions in binary form must reproduce the above copyright notice,
17
+ this list of conditions and the following disclaimer in the documentation
18
+ and/or other materials provided with the distribution.
19
+
20
+ 3. Neither the name of the copyright holder nor the names of its
21
+ contributors may be used to endorse or promote products derived from
22
+ this software without specific prior written permission.
23
+
24
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
28
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
+
35
+ Requires-Python: <3.12,>=3.8
36
+ Description-Content-Type: text/markdown
37
+ License-File: LICENSE.txt
38
+ Requires-Dist: astropy
39
+ Requires-Dist: dask
40
+ Requires-Dist: distributed
41
+ Requires-Dist: gdown
42
+ Requires-Dist: matplotlib
43
+ Requires-Dist: numba>=0.57.0
44
+ Requires-Dist: numpy<=1.25.0
45
+ Requires-Dist: prettytable
46
+ Requires-Dist: pytest
47
+ Requires-Dist: pytest-cov
48
+ Requires-Dist: pytest-html
49
+ Requires-Dist: scipy
50
+ Requires-Dist: xarray
51
+ Requires-Dist: zarr
52
+ Requires-Dist: bokeh
53
+ Requires-Dist: jupyterlab
54
+ Requires-Dist: pyarrow
55
+ Requires-Dist: graphviz
56
+ Requires-Dist: xradio
57
+ Requires-Dist: python_casacore>=3.5.2; sys_platform != "darwin"
58
+ Provides-Extra: docs
59
+ Requires-Dist: ipykernel; extra == "docs"
60
+ Requires-Dist: ipympl; extra == "docs"
61
+ Requires-Dist: ipython; extra == "docs"
62
+ Requires-Dist: jupyter-client; extra == "docs"
63
+ Requires-Dist: nbsphinx; extra == "docs"
64
+ Requires-Dist: recommonmark; extra == "docs"
65
+ Requires-Dist: scanpydoc; extra == "docs"
66
+ Requires-Dist: sphinx-autoapi; extra == "docs"
67
+ Requires-Dist: sphinx-autosummary-accessors; extra == "docs"
68
+ Requires-Dist: sphinx_rtd_theme; extra == "docs"
69
+ Requires-Dist: twine; extra == "docs"
70
+ Requires-Dist: pandoc; extra == "docs"
71
+
72
+ # GraphVIPER
73
+
74
+ [![Python 3.8 3.9 3.10 3.11](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)](https://www.python.org/downloads/release/python-380/)
75
+
76
+ ###
77
+
78
+
@@ -0,0 +1,7 @@
1
+ # GraphVIPER
2
+
3
+ [![Python 3.8 3.9 3.10 3.11](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)](https://www.python.org/downloads/release/python-380/)
4
+
5
+ ###
6
+
7
+
@@ -0,0 +1,50 @@
1
+ [project]
2
+ name = "graphviper"
3
+ version = "0.0.1"
4
+ description = "Astro Visibility and Image Parallel Execution Reduction"
5
+ authors = [
6
+ {name = "Jan-Willem Steeb", email="jsteeb@nrao.edu"},
7
+ ]
8
+ license = {file = "LICENSE.txt"}
9
+ readme = "README.md"
10
+ requires-python = ">= 3.8, < 3.12"
11
+
12
+ dependencies = [
13
+ 'astropy',
14
+ 'dask',
15
+ 'distributed',
16
+ 'gdown',
17
+ 'matplotlib',
18
+ 'numba>=0.57.0',
19
+ 'numpy<=1.25.0',
20
+ 'prettytable',
21
+ 'pytest',
22
+ 'pytest-cov',
23
+ 'pytest-html',
24
+ 'scipy',
25
+ 'xarray',
26
+ 'zarr',
27
+ 'bokeh',
28
+ 'jupyterlab',
29
+ 'pyarrow',
30
+ 'graphviz',
31
+ 'xradio',
32
+ 'python_casacore>=3.5.2; sys_platform != "darwin" '
33
+ ]
34
+
35
+
36
+ [project.optional-dependencies]
37
+ docs = [
38
+ 'ipykernel',
39
+ 'ipympl',
40
+ 'ipython',
41
+ 'jupyter-client',
42
+ 'nbsphinx',
43
+ 'recommonmark',
44
+ 'scanpydoc',
45
+ 'sphinx-autoapi',
46
+ 'sphinx-autosummary-accessors',
47
+ 'sphinx_rtd_theme',
48
+ 'twine',
49
+ 'pandoc'
50
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
File without changes
@@ -0,0 +1 @@
1
+ from .client import local_client, slurm_cluster_client
@@ -0,0 +1,208 @@
1
+ """
2
+ MIT License
3
+
4
+ Copyright (c) 2022 Jonathan Simon Kenyon
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
23
+ """
24
+ from collections import defaultdict
25
+ from distributed import SchedulerPlugin
26
+ from dask.core import reverse_dict
27
+ from dask.base import tokenize
28
+ from dask.order import graph_metrics, ndependencies
29
+ import click
30
+ from distributed.diagnostics.plugin import SchedulerPlugin
31
+ import numpy as np
32
+
33
+
34
+ def unravel_deps(hlg_deps, name, unravelled_deps=None):
35
+ """Recursively construct a set of all dependencies for a specific task."""
36
+
37
+ if unravelled_deps is None:
38
+ unravelled_deps = set()
39
+
40
+ for dep in hlg_deps[name]:
41
+ unravelled_deps |= {dep}
42
+ unravel_deps(hlg_deps, dep, unravelled_deps)
43
+
44
+ return unravelled_deps
45
+
46
+
47
+ def get_node_depths(dependencies, root_nodes, metrics):
48
+ node_depths = {}
49
+
50
+ for k in dependencies.keys():
51
+ # Get dependencies per node.
52
+ deps = unravel_deps(dependencies, k)
53
+ # Associate nodes with root nodes.
54
+ roots = root_nodes & deps
55
+ offset = metrics[k][-1]
56
+ node_depths[k] = max(metrics[r][-1] - offset for r in roots) if roots else 0
57
+
58
+ return node_depths
59
+
60
+
61
+ class schedular(SchedulerPlugin):
62
+ def __init__(self, autorestrictor, local_cache):
63
+ self.autorestrictor = autorestrictor
64
+ self.local_cache = local_cache
65
+ super().__init__()
66
+
67
+ def add_worker(self, scheduler, worker):
68
+ if self.local_cache:
69
+ # Set the resource label to the ip of the node that the worker is on, so that tasks that require a specific node can be assigned to the correct worker.
70
+ ip = worker[worker.rfind("/") + 1 : worker.rfind(":")]
71
+ scheduler.add_resources(worker=worker, resources={ip: 1})
72
+
73
+ def update_graph(self, scheduler, dsk=None, keys=None, restrictions=None, **kw):
74
+ if self.autorestrictor:
75
+ print("Using autorestrictor")
76
+ """Processes dependencies to assign tasks to specific workers."""
77
+ workers = list(scheduler.workers.keys())
78
+ n_worker = len(workers)
79
+
80
+ tasks = scheduler.tasks
81
+ dependencies = kw["dependencies"]
82
+
83
+ # print('In update_graph :', scheduler, ',*,', dsk, ',*,', keys , ',*,', restrictions , ',*,', kw)
84
+ if dependencies:
85
+ dependents = reverse_dict(dependencies)
86
+
87
+ # print('reversed dict:', dependents)
88
+
89
+ _, total_dependencies = ndependencies(dependencies, dependents)
90
+ # TODO: Avoid calling graph metrics.
91
+ metrics = graph_metrics(dependencies, dependents, total_dependencies)
92
+
93
+ # Terminal nodes have no dependents, root nodes have no dependencies.
94
+ # Horizontal partition nodes are initialized as the terminal nodes.
95
+ part_nodes = {k for (k, v) in dependents.items() if not v}
96
+ root_nodes = {k for (k, v) in dependencies.items() if not v}
97
+
98
+ # Figure out the depth of every task. Depth is defined as maximum
99
+ # distance from a root node. TODO: Optimize get_node_depths.
100
+
101
+ node_depths = get_node_depths(dependencies, root_nodes, metrics)
102
+ # try:
103
+ max_depth = max(node_depths.values())
104
+ # except:
105
+ # print('&&&&& dependencies, root_nodes, metrics',node_depths,',*,',dependencies, root_nodes, metrics)
106
+
107
+ # If we have fewer partition nodes than workers, we cannot utilise all
108
+ # the workers and are likely dealing with a reduction. We work our way
109
+ # back through the graph, starting at the deepest terminal nodes, and
110
+ # try to find a depth at which there was enough work to utilise all
111
+ # workers.
112
+ while (len(part_nodes) < n_worker) & (max_depth > 0):
113
+ _part_nodes = part_nodes.copy()
114
+ for pn in _part_nodes:
115
+ if node_depths[pn] == max_depth:
116
+ part_nodes ^= set((pn,))
117
+ part_nodes |= dependencies[pn]
118
+ max_depth -= 1
119
+ if max_depth <= 0:
120
+ return # In this case, there in nothing we can do - fall back.
121
+
122
+ part_roots = {}
123
+ part_dependencies = {}
124
+ part_dependents = {}
125
+
126
+ for pn in part_nodes:
127
+ # Get dependencies per partition node.
128
+ part_dependencies[pn] = unravel_deps(dependencies, pn)
129
+ # Get dependents per partition node.
130
+ part_dependents[pn] = unravel_deps(dependents, pn)
131
+ # Associate partition nodes with root nodes.
132
+ part_roots[pn] = root_nodes & part_dependencies[pn]
133
+
134
+ # Create a unique token for each set of partition roots. TODO: This is
135
+ # very strict. What about nodes with very similar roots? Tokenization
136
+ # may be overkill too.
137
+ root_tokens = {tokenize(*sorted(v)): v for v in part_roots.values()}
138
+
139
+ hash_map = defaultdict(set)
140
+ group_offset = 0
141
+
142
+ # Associate partition roots with a specific group if they are not a
143
+ # subset of another, larger root set.
144
+ for k, v in root_tokens.items():
145
+ if any(v < vv for vv in root_tokens.values()): # Strict subset.
146
+ continue
147
+ else:
148
+ hash_map[k] |= set([group_offset])
149
+ group_offset += 1
150
+
151
+ # If roots were a subset, they should share the group of their
152
+ # superset/s.
153
+ for k, v in root_tokens.items():
154
+ if not v: # Special case - no dependencies. Handled below.
155
+ continue
156
+ shared_roots = {
157
+ kk: None for kk, vv in root_tokens.items() if v < vv
158
+ }
159
+ if shared_roots:
160
+ hash_map[k] = set().union(
161
+ *[hash_map[kk] for kk in shared_roots.keys()]
162
+ )
163
+
164
+ task_groups = defaultdict(set)
165
+
166
+ for pn in part_nodes:
167
+ pdp = part_dependencies[pn]
168
+ pdn = part_dependents[pn]
169
+
170
+ if pdp:
171
+ groups = hash_map[tokenize(*sorted(part_roots[pn]))]
172
+ else: # Special case - no dependencies.
173
+ groups = {group_offset}
174
+ group_offset += 1
175
+
176
+ for g in groups:
177
+ task_groups[g] |= pdp | pdn | {pn}
178
+
179
+ worker_loads = {wkr: 0 for wkr in workers}
180
+
181
+ for task_group in task_groups.values():
182
+ assignee = min(worker_loads, key=worker_loads.get)
183
+ worker_loads[assignee] += len(task_group)
184
+
185
+ for task_name in task_group:
186
+ try:
187
+ task = tasks[task_name]
188
+ except KeyError: # Keys may not have an assosciated task.
189
+ continue
190
+
191
+ # print('^^^^^^',dir(task))
192
+ # if task._worker_restrictions is None:
193
+ # task._worker_restrictions = set()
194
+ # task._worker_restrictions |= {assignee}
195
+ # task._loose_restrictions = False
196
+
197
+ if task.worker_restrictions is None:
198
+ task.worker_restrictions = set()
199
+ task.worker_restrictions |= {assignee}
200
+ task.loose_restrictions = False
201
+
202
+
203
+ @click.command()
204
+ @click.option("--autorestrictor", default=False)
205
+ @click.option("--local_cache", default=False)
206
+ def dask_setup(scheduler, autorestrictor, local_cache):
207
+ plugin = schedular(autorestrictor, local_cache)
208
+ scheduler.add_plugin(plugin)
@@ -0,0 +1,71 @@
1
+ import click
2
+
3
+ from graphviper._logger import _setup_worker_logger
4
+
5
+
6
+ class _worker:
7
+ def __init__(self, local_cache, log_parms):
8
+ # print("init local cache")
9
+ self.local_cache = local_cache
10
+
11
+ print("log_parms", log_parms)
12
+ # /.lustre/aoc/projects/ngvla/viper/ngvla_sim/viper_
13
+ self.log_to_term = log_parms["log_to_term"]
14
+ self.log_to_file = log_parms["log_to_file"]
15
+ self.log_file = log_parms["log_file"]
16
+ self.log_level = log_parms["log_level"]
17
+
18
+ def get_logger(self):
19
+ return self.logger
20
+
21
+ def setup(self, worker):
22
+ """
23
+ Run when the plugin is attached to a worker. This happens when the plugin is registered
24
+ and attached to existing workers, or when a worker is created after the plugin has been
25
+ registered.
26
+ """
27
+
28
+ self.logger = _setup_worker_logger(
29
+ self.log_to_term,
30
+ self.log_to_file,
31
+ self.log_file,
32
+ self.log_level,
33
+ str(worker.id),
34
+ )
35
+ self.logger.debug(
36
+ "Logger created on worker " + str(worker.id) + ",*," + str(worker.address)
37
+ )
38
+ # Documentation https://distributed.dask.org/en/stable/worker.html#distributed.worker.Worker
39
+ self.worker = worker
40
+
41
+ if self.local_cache:
42
+ ip = worker.address[
43
+ worker.address.rfind("/") + 1 : worker.address.rfind(":")
44
+ ]
45
+ self.logger.debug(str(worker.id) + ",*," + ip)
46
+ worker.state.available_resources = {
47
+ **worker.state.available_resources,
48
+ **{ip: 1},
49
+ }
50
+ # print(worker.state.available_resources)
51
+
52
+
53
+ # https://github.com/dask/distributed/issues/4169
54
+ @click.command()
55
+ @click.option("--local_cache", default=False)
56
+ # @click.option("--log_parms", default={'log_to_term':True,'log_to_file':False,'log_file':'viper_', 'log_level':'DEBUG'})
57
+ @click.option("--log_to_term", default=True)
58
+ @click.option("--log_to_file", default=False)
59
+ @click.option("--log_file", default="viper_")
60
+ @click.option("--log_level", default="INFO")
61
+ async def dask_setup(
62
+ worker, local_cache, log_to_term, log_to_file, log_file, log_level
63
+ ):
64
+ log_parms = {
65
+ "log_to_term": log_to_term,
66
+ "log_to_file": log_to_file,
67
+ "log_file": log_file,
68
+ "log_level": log_level,
69
+ }
70
+ plugin = _worker(local_cache, log_parms)
71
+ await worker.client.register_worker_plugin(plugin, name="viper_worker")