guidepost 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidepost-0.3.2/MANIFEST.in +4 -0
- guidepost-0.3.2/PKG-INFO +90 -0
- guidepost-0.3.2/README.md +59 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/guidepost.py +20 -2
- {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/seriation.py +39 -1
- guidepost-0.3.2/guidepost/static/guidepost.js +23182 -0
- guidepost-0.3.2/guidepost/static/guidepost.js.map +7 -0
- guidepost-0.3.2/guidepost/static/trailmark.js +21 -0
- guidepost-0.3.2/guidepost/static/trailmark.js.map +7 -0
- guidepost-0.3.2/guidepost/version.py +2 -0
- guidepost-0.3.2/guidepost.egg-info/PKG-INFO +90 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/guidepost.egg-info/SOURCES.txt +6 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/setup.py +1 -1
- {guidepost-0.3.0 → guidepost-0.3.2}/tests/test_list_parsing.py +4 -1
- guidepost-0.3.0/PKG-INFO +0 -225
- guidepost-0.3.0/guidepost/version.py +0 -2
- guidepost-0.3.0/guidepost.egg-info/PKG-INFO +0 -225
- {guidepost-0.3.0 → guidepost-0.3.2}/LICENSE +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/__init__.py +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/aggregation.py +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/node_layout.py +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/trailmark.py +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/utils.py +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/guidepost.egg-info/dependency_links.txt +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/guidepost.egg-info/requires.txt +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/guidepost.egg-info/top_level.txt +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/pyproject.toml +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/setup.cfg +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/tests/test_aggregation.py +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/tests/test_node_layout.py +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/tests/test_seriation.py +0 -0
- {guidepost-0.3.0 → guidepost-0.3.2}/tutorials/__init__.py +0 -0
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
# `python -m build` builds the wheel from the sdist, so the compiled frontend
|
|
2
|
+
# bundles must be in the sdist for them to reach the installed wheel. They are
|
|
3
|
+
# gitignored (built by esbuild.config.js), so include them explicitly here.
|
|
4
|
+
recursive-include guidepost/static *.js *.js.map
|
guidepost-0.3.2/PKG-INFO
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: guidepost
|
|
3
|
+
Version: 0.3.2
|
|
4
|
+
Summary: Guidepost. An overview visualization for understanding supercomputer queue data.
|
|
5
|
+
Home-page: https://github.com/cscully-allison/guidepost
|
|
6
|
+
Author: Connor Scully-Allison
|
|
7
|
+
Author-email: cscullyallison@sci.utah.edu
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: numpy
|
|
15
|
+
Requires-Dist: pandas
|
|
16
|
+
Requires-Dist: scikit-learn
|
|
17
|
+
Requires-Dist: anywidget
|
|
18
|
+
Requires-Dist: traitlets
|
|
19
|
+
Requires-Dist: pyarrow>=14
|
|
20
|
+
Requires-Dist: duckdb>=0.10
|
|
21
|
+
Dynamic: author
|
|
22
|
+
Dynamic: author-email
|
|
23
|
+
Dynamic: classifier
|
|
24
|
+
Dynamic: description
|
|
25
|
+
Dynamic: description-content-type
|
|
26
|
+
Dynamic: home-page
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
Dynamic: requires-dist
|
|
29
|
+
Dynamic: requires-python
|
|
30
|
+
Dynamic: summary
|
|
31
|
+
|
|
32
|
+
# Guidepost
|
|
33
|
+
|
|
34
|
+
Guidepost is a Python library for visualizing High Performance Computing (HPC) job data in Jupyter notebooks. It turns a `pandas` DataFrame of job records into a single, linked, interactive overview — faceted heatmaps framed by histograms, a categorical bar chart, and a brushable color legend — so you can spot patterns in runtimes, queue waits, and resource usage, then export the exact records you care about back into Python.
|
|
35
|
+
|
|
36
|
+

|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install guidepost
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quick start
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from guidepost import Guidepost
|
|
48
|
+
import pandas as pd
|
|
49
|
+
|
|
50
|
+
gp = Guidepost()
|
|
51
|
+
gp.load_data(pd.read_parquet("data/jobs_data.parquet"))
|
|
52
|
+
|
|
53
|
+
gp # display in a notebook cell
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Brush the heatmap or its histograms, then pull the selected rows back into Python:
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
df = gp.selection.dataframe
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Input is a `pandas` DataFrame with at least three numeric and two categorical columns (datetime columns are supported on the x-axis).
|
|
63
|
+
|
|
64
|
+
## Documentation
|
|
65
|
+
|
|
66
|
+
Full documentation lives in the **[Guidepost Wiki](https://github.com/cscully-allison/guidepost/wiki)**:
|
|
67
|
+
|
|
68
|
+
- [Getting Started](https://github.com/cscully-allison/guidepost/wiki/Getting-Started)
|
|
69
|
+
- [Data Requirements and Type Detection](https://github.com/cscully-allison/guidepost/wiki/Data-Requirements-and-Type-Detection)
|
|
70
|
+
- [Configuration](https://github.com/cscully-allison/guidepost/wiki/Configuration)
|
|
71
|
+
- [Understanding the Views](https://github.com/cscully-allison/guidepost/wiki/Understanding-the-Views) — and the per-view interaction guides
|
|
72
|
+
- [Selecting and Exporting Data](https://github.com/cscully-allison/guidepost/wiki/Selecting-and-Exporting-Data)
|
|
73
|
+
- [API Reference](https://github.com/cscully-allison/guidepost/wiki/API-Reference)
|
|
74
|
+
- [FAQ and Troubleshooting](https://github.com/cscully-allison/guidepost/wiki/FAQ-and-Troubleshooting)
|
|
75
|
+
|
|
76
|
+
## Contributing
|
|
77
|
+
|
|
78
|
+
Contributions are welcome. Fork the repository, create a branch for your feature or bugfix, and open a pull request with a description of your changes.
|
|
79
|
+
|
|
80
|
+
## License
|
|
81
|
+
|
|
82
|
+
Guidepost is licensed under the MIT License. See the `LICENSE` file for details.
|
|
83
|
+
|
|
84
|
+
## Acknowledgments
|
|
85
|
+
|
|
86
|
+
Guidepost was developed under the auspices and with funding provided by the National Renewable Energy Laboratory (NREL), the National Science Foundation under NSF IIS-1844573 and IIS-2324465, and the Department of Energy under DE-SC0022044 and DE-SC0024635.
|
|
87
|
+
|
|
88
|
+
## Contact
|
|
89
|
+
|
|
90
|
+
For questions or feedback, reach out to the maintainer at [cscullyallison@sci.utah.edu].
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Guidepost
|
|
2
|
+
|
|
3
|
+
Guidepost is a Python library for visualizing High Performance Computing (HPC) job data in Jupyter notebooks. It turns a `pandas` DataFrame of job records into a single, linked, interactive overview — faceted heatmaps framed by histograms, a categorical bar chart, and a brushable color legend — so you can spot patterns in runtimes, queue waits, and resource usage, then export the exact records you care about back into Python.
|
|
4
|
+
|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install guidepost
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick start
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from guidepost import Guidepost
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
gp = Guidepost()
|
|
20
|
+
gp.load_data(pd.read_parquet("data/jobs_data.parquet"))
|
|
21
|
+
|
|
22
|
+
gp # display in a notebook cell
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Brush the heatmap or its histograms, then pull the selected rows back into Python:
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
df = gp.selection.dataframe
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Input is a `pandas` DataFrame with at least three numeric and two categorical columns (datetime columns are supported on the x-axis).
|
|
32
|
+
|
|
33
|
+
## Documentation
|
|
34
|
+
|
|
35
|
+
Full documentation lives in the **[Guidepost Wiki](https://github.com/cscully-allison/guidepost/wiki)**:
|
|
36
|
+
|
|
37
|
+
- [Getting Started](https://github.com/cscully-allison/guidepost/wiki/Getting-Started)
|
|
38
|
+
- [Data Requirements and Type Detection](https://github.com/cscully-allison/guidepost/wiki/Data-Requirements-and-Type-Detection)
|
|
39
|
+
- [Configuration](https://github.com/cscully-allison/guidepost/wiki/Configuration)
|
|
40
|
+
- [Understanding the Views](https://github.com/cscully-allison/guidepost/wiki/Understanding-the-Views) — and the per-view interaction guides
|
|
41
|
+
- [Selecting and Exporting Data](https://github.com/cscully-allison/guidepost/wiki/Selecting-and-Exporting-Data)
|
|
42
|
+
- [API Reference](https://github.com/cscully-allison/guidepost/wiki/API-Reference)
|
|
43
|
+
- [FAQ and Troubleshooting](https://github.com/cscully-allison/guidepost/wiki/FAQ-and-Troubleshooting)
|
|
44
|
+
|
|
45
|
+
## Contributing
|
|
46
|
+
|
|
47
|
+
Contributions are welcome. Fork the repository, create a branch for your feature or bugfix, and open a pull request with a description of your changes.
|
|
48
|
+
|
|
49
|
+
## License
|
|
50
|
+
|
|
51
|
+
Guidepost is licensed under the MIT License. See the `LICENSE` file for details.
|
|
52
|
+
|
|
53
|
+
## Acknowledgments
|
|
54
|
+
|
|
55
|
+
Guidepost was developed under the auspices and with funding provided by the National Renewable Energy Laboratory (NREL), the National Science Foundation under NSF IIS-1844573 and IIS-2324465, and the Department of Energy under DE-SC0022044 and DE-SC0024635.
|
|
56
|
+
|
|
57
|
+
## Contact
|
|
58
|
+
|
|
59
|
+
For questions or feedback, reach out to the maintainer at [cscullyallison@sci.utah.edu].
|
|
@@ -21,6 +21,22 @@ from .aggregation import AggregationEngine
|
|
|
21
21
|
SYNTHETIC_FACET_COL = "__gp_no_grouping__"
|
|
22
22
|
SYNTHETIC_FACET_VALUE = "All records"
|
|
23
23
|
|
|
24
|
+
|
|
25
|
+
class Selection:
|
|
26
|
+
"""Wrapper around the records selected in the widget.
|
|
27
|
+
|
|
28
|
+
The selected DataFrame is exposed as `.dataframe`. This indirection
|
|
29
|
+
leaves room to attach further selection metadata in the future without
|
|
30
|
+
changing the `gp.selection` access pattern.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, dataframe):
|
|
34
|
+
self.dataframe = dataframe
|
|
35
|
+
|
|
36
|
+
def __repr__(self):
|
|
37
|
+
return f"Selection(dataframe={self.dataframe!r})"
|
|
38
|
+
|
|
39
|
+
|
|
24
40
|
class Guidepost(anywidget.AnyWidget):
|
|
25
41
|
|
|
26
42
|
_esm = os.path.join(os.path.dirname(__file__), "static", "guidepost.js")
|
|
@@ -37,7 +53,6 @@ class Guidepost(anywidget.AnyWidget):
|
|
|
37
53
|
|
|
38
54
|
selected_records = traitlets.Unicode("[]").tag(sync=True)
|
|
39
55
|
records_df = pd.DataFrame()
|
|
40
|
-
selection = None
|
|
41
56
|
|
|
42
57
|
_summary_stats = traitlets.Dict({}).tag(sync=True)
|
|
43
58
|
|
|
@@ -272,7 +287,10 @@ class Guidepost(anywidget.AnyWidget):
|
|
|
272
287
|
|
|
273
288
|
@property
|
|
274
289
|
def selection(self):
|
|
275
|
-
|
|
290
|
+
# `selection` is intentionally an object wrapper rather than the bare
|
|
291
|
+
# DataFrame so additional selection metadata can be hung off it later
|
|
292
|
+
# without breaking callers. The DataFrame lives on `.dataframe`.
|
|
293
|
+
return Selection(self.retrieve_selected_data())
|
|
276
294
|
|
|
277
295
|
def retrieve_selected_data(self):
|
|
278
296
|
if self.cached_records_df is None:
|
|
@@ -167,11 +167,49 @@ def _seriate_component(members, A, node_list, SpectralEmbedding):
|
|
|
167
167
|
return sorted(members, key=lambda i: node_list[i])
|
|
168
168
|
|
|
169
169
|
sub = A[members][:, members].toarray()
|
|
170
|
+
|
|
171
|
+
# On symmetric structures (e.g. a clique) the smallest non-zero Laplacian
|
|
172
|
+
# eigenvalue is degenerate, so the 1-D spectral coordinate is not unique:
|
|
173
|
+
# the eigensolver returns an arbitrary basis vector from that eigenspace,
|
|
174
|
+
# which varies across runs, platforms, and BLAS threading. There is no
|
|
175
|
+
# canonical order in that case, so fall back to deterministic name order.
|
|
176
|
+
# (EigenVALUES are stable even when the corresponding eigenVECTORS are not,
|
|
177
|
+
# so this test is itself reproducible.)
|
|
178
|
+
if _spectral_order_is_degenerate(sub):
|
|
179
|
+
return sorted(members, key=lambda i: node_list[i])
|
|
180
|
+
|
|
170
181
|
emb = SpectralEmbedding(n_components=1, affinity="precomputed", random_state=0)
|
|
171
182
|
coords = emb.fit_transform(sub)[:, 0]
|
|
172
|
-
|
|
183
|
+
# Round so any residual floating-point noise on near-tied nodes collapses and
|
|
184
|
+
# the name-based secondary key decides their order deterministically.
|
|
185
|
+
local = sorted(
|
|
186
|
+
range(len(members)),
|
|
187
|
+
key=lambda t: (round(float(coords[t]), 9), node_list[members[t]]),
|
|
188
|
+
)
|
|
173
189
|
seq = [members[t] for t in local]
|
|
174
190
|
# Eigenvectors carry a sign ambiguity; canonicalize orientation by name.
|
|
175
191
|
if node_list[seq[0]] > node_list[seq[-1]]:
|
|
176
192
|
seq.reverse()
|
|
177
193
|
return seq
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _spectral_order_is_degenerate(sub, tol=1e-9):
|
|
197
|
+
"""True when the 1-D spectral embedding of ``sub`` is ill-defined.
|
|
198
|
+
|
|
199
|
+
The seriation uses the eigenvector of the smallest non-zero eigenvalue of
|
|
200
|
+
the symmetric normalized Laplacian. When that eigenvalue is (near-)repeated,
|
|
201
|
+
the eigenvector — and hence the node order — is not unique. We detect that
|
|
202
|
+
from the eigenvalue *spectrum*, which ``eigvalsh`` returns deterministically
|
|
203
|
+
regardless of solver/threading, unlike the eigenvectors themselves.
|
|
204
|
+
"""
|
|
205
|
+
deg = sub.sum(axis=1)
|
|
206
|
+
if np.any(deg <= 0):
|
|
207
|
+
return True # an isolated node has no meaningful spectral position
|
|
208
|
+
d_inv_sqrt = 1.0 / np.sqrt(deg)
|
|
209
|
+
laplacian = np.eye(sub.shape[0]) - (d_inv_sqrt[:, None] * sub * d_inv_sqrt[None, :])
|
|
210
|
+
evals = np.linalg.eigvalsh(laplacian) # ascending; evals[0] ~ 0 (connected)
|
|
211
|
+
if len(evals) < 3:
|
|
212
|
+
return False
|
|
213
|
+
# Degenerate iff the 2nd and 3rd smallest eigenvalues coincide, i.e. the
|
|
214
|
+
# smallest non-zero eigenvalue has multiplicity > 1.
|
|
215
|
+
return bool((evals[2] - evals[1]) < tol)
|