guidepost 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. guidepost-0.3.2/MANIFEST.in +4 -0
  2. guidepost-0.3.2/PKG-INFO +90 -0
  3. guidepost-0.3.2/README.md +59 -0
  4. {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/guidepost.py +20 -2
  5. {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/seriation.py +39 -1
  6. guidepost-0.3.2/guidepost/static/guidepost.js +23182 -0
  7. guidepost-0.3.2/guidepost/static/guidepost.js.map +7 -0
  8. guidepost-0.3.2/guidepost/static/trailmark.js +21 -0
  9. guidepost-0.3.2/guidepost/static/trailmark.js.map +7 -0
  10. guidepost-0.3.2/guidepost/version.py +2 -0
  11. guidepost-0.3.2/guidepost.egg-info/PKG-INFO +90 -0
  12. {guidepost-0.3.0 → guidepost-0.3.2}/guidepost.egg-info/SOURCES.txt +6 -0
  13. {guidepost-0.3.0 → guidepost-0.3.2}/setup.py +1 -1
  14. {guidepost-0.3.0 → guidepost-0.3.2}/tests/test_list_parsing.py +4 -1
  15. guidepost-0.3.0/PKG-INFO +0 -225
  16. guidepost-0.3.0/guidepost/version.py +0 -2
  17. guidepost-0.3.0/guidepost.egg-info/PKG-INFO +0 -225
  18. {guidepost-0.3.0 → guidepost-0.3.2}/LICENSE +0 -0
  19. {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/__init__.py +0 -0
  20. {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/aggregation.py +0 -0
  21. {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/node_layout.py +0 -0
  22. {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/trailmark.py +0 -0
  23. {guidepost-0.3.0 → guidepost-0.3.2}/guidepost/utils.py +0 -0
  24. {guidepost-0.3.0 → guidepost-0.3.2}/guidepost.egg-info/dependency_links.txt +0 -0
  25. {guidepost-0.3.0 → guidepost-0.3.2}/guidepost.egg-info/requires.txt +0 -0
  26. {guidepost-0.3.0 → guidepost-0.3.2}/guidepost.egg-info/top_level.txt +0 -0
  27. {guidepost-0.3.0 → guidepost-0.3.2}/pyproject.toml +0 -0
  28. {guidepost-0.3.0 → guidepost-0.3.2}/setup.cfg +0 -0
  29. {guidepost-0.3.0 → guidepost-0.3.2}/tests/test_aggregation.py +0 -0
  30. {guidepost-0.3.0 → guidepost-0.3.2}/tests/test_node_layout.py +0 -0
  31. {guidepost-0.3.0 → guidepost-0.3.2}/tests/test_seriation.py +0 -0
  32. {guidepost-0.3.0 → guidepost-0.3.2}/tutorials/__init__.py +0 -0
@@ -0,0 +1,4 @@
1
+ # `python -m build` builds the wheel from the sdist, so the compiled frontend
2
+ # bundles must be in the sdist for them to reach the installed wheel. They are
3
+ # gitignored (built by esbuild.config.js), so include them explicitly here.
4
+ recursive-include guidepost/static *.js *.js.map
@@ -0,0 +1,90 @@
1
+ Metadata-Version: 2.4
2
+ Name: guidepost
3
+ Version: 0.3.2
4
+ Summary: Guidepost. An overview visualization for understanding supercomputer queue data.
5
+ Home-page: https://github.com/cscully-allison/guidepost
6
+ Author: Connor Scully-Allison
7
+ Author-email: cscullyallison@sci.utah.edu
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: numpy
15
+ Requires-Dist: pandas
16
+ Requires-Dist: scikit-learn
17
+ Requires-Dist: anywidget
18
+ Requires-Dist: traitlets
19
+ Requires-Dist: pyarrow>=14
20
+ Requires-Dist: duckdb>=0.10
21
+ Dynamic: author
22
+ Dynamic: author-email
23
+ Dynamic: classifier
24
+ Dynamic: description
25
+ Dynamic: description-content-type
26
+ Dynamic: home-page
27
+ Dynamic: license-file
28
+ Dynamic: requires-dist
29
+ Dynamic: requires-python
30
+ Dynamic: summary
31
+
32
+ # Guidepost
33
+
34
+ Guidepost is a Python library for visualizing High Performance Computing (HPC) job data in Jupyter notebooks. It turns a `pandas` DataFrame of job records into a single, linked, interactive overview — faceted heatmaps framed by histograms, a categorical bar chart, and a brushable color legend — so you can spot patterns in runtimes, queue waits, and resource usage, then export the exact records you care about back into Python.
35
+
36
+ ![Annotated Guidepost visualization showing the data grouping name, color-by-categorical bar chart, and the current selection of records for export](https://i.postimg.cc/vTDMX2b3/temp-Image-MVb5ui.avif)
37
+
38
+ ## Installation
39
+
40
+ ```bash
41
+ pip install guidepost
42
+ ```
43
+
44
+ ## Quick start
45
+
46
+ ```python
47
+ from guidepost import Guidepost
48
+ import pandas as pd
49
+
50
+ gp = Guidepost()
51
+ gp.load_data(pd.read_parquet("data/jobs_data.parquet"))
52
+
53
+ gp # display in a notebook cell
54
+ ```
55
+
56
+ Brush the heatmap or its histograms, then pull the selected rows back into Python:
57
+
58
+ ```python
59
+ df = gp.selection.dataframe
60
+ ```
61
+
62
+ Input is a `pandas` DataFrame with at least three numeric and two categorical columns (datetime columns are supported on the x-axis).
63
+
64
+ ## Documentation
65
+
66
+ Full documentation lives in the **[Guidepost Wiki](https://github.com/cscully-allison/guidepost/wiki)**:
67
+
68
+ - [Getting Started](https://github.com/cscully-allison/guidepost/wiki/Getting-Started)
69
+ - [Data Requirements and Type Detection](https://github.com/cscully-allison/guidepost/wiki/Data-Requirements-and-Type-Detection)
70
+ - [Configuration](https://github.com/cscully-allison/guidepost/wiki/Configuration)
71
+ - [Understanding the Views](https://github.com/cscully-allison/guidepost/wiki/Understanding-the-Views) — and the per-view interaction guides
72
+ - [Selecting and Exporting Data](https://github.com/cscully-allison/guidepost/wiki/Selecting-and-Exporting-Data)
73
+ - [API Reference](https://github.com/cscully-allison/guidepost/wiki/API-Reference)
74
+ - [FAQ and Troubleshooting](https://github.com/cscully-allison/guidepost/wiki/FAQ-and-Troubleshooting)
75
+
76
+ ## Contributing
77
+
78
+ Contributions are welcome. Fork the repository, create a branch for your feature or bugfix, and open a pull request with a description of your changes.
79
+
80
+ ## License
81
+
82
+ Guidepost is licensed under the MIT License. See the `LICENSE` file for details.
83
+
84
+ ## Acknowledgments
85
+
86
+ Guidepost was developed under the auspices and with funding provided by the National Renewable Energy Laboratory (NREL), the National Science Foundation under NSF IIS-1844573 and IIS-2324465, and the Department of Energy under DE-SC0022044 and DE-SC0024635.
87
+
88
+ ## Contact
89
+
90
+ For questions or feedback, reach out to the maintainer at [cscullyallison@sci.utah.edu].
@@ -0,0 +1,59 @@
1
+ # Guidepost
2
+
3
+ Guidepost is a Python library for visualizing High Performance Computing (HPC) job data in Jupyter notebooks. It turns a `pandas` DataFrame of job records into a single, linked, interactive overview — faceted heatmaps framed by histograms, a categorical bar chart, and a brushable color legend — so you can spot patterns in runtimes, queue waits, and resource usage, then export the exact records you care about back into Python.
4
+
5
+ ![Annotated Guidepost visualization showing the data grouping name, color-by-categorical bar chart, and the current selection of records for export](https://i.postimg.cc/vTDMX2b3/temp-Image-MVb5ui.avif)
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install guidepost
11
+ ```
12
+
13
+ ## Quick start
14
+
15
+ ```python
16
+ from guidepost import Guidepost
17
+ import pandas as pd
18
+
19
+ gp = Guidepost()
20
+ gp.load_data(pd.read_parquet("data/jobs_data.parquet"))
21
+
22
+ gp # display in a notebook cell
23
+ ```
24
+
25
+ Brush the heatmap or its histograms, then pull the selected rows back into Python:
26
+
27
+ ```python
28
+ df = gp.selection.dataframe
29
+ ```
30
+
31
+ Input is a `pandas` DataFrame with at least three numeric and two categorical columns (datetime columns are supported on the x-axis).
32
+
33
+ ## Documentation
34
+
35
+ Full documentation lives in the **[Guidepost Wiki](https://github.com/cscully-allison/guidepost/wiki)**:
36
+
37
+ - [Getting Started](https://github.com/cscully-allison/guidepost/wiki/Getting-Started)
38
+ - [Data Requirements and Type Detection](https://github.com/cscully-allison/guidepost/wiki/Data-Requirements-and-Type-Detection)
39
+ - [Configuration](https://github.com/cscully-allison/guidepost/wiki/Configuration)
40
+ - [Understanding the Views](https://github.com/cscully-allison/guidepost/wiki/Understanding-the-Views) — and the per-view interaction guides
41
+ - [Selecting and Exporting Data](https://github.com/cscully-allison/guidepost/wiki/Selecting-and-Exporting-Data)
42
+ - [API Reference](https://github.com/cscully-allison/guidepost/wiki/API-Reference)
43
+ - [FAQ and Troubleshooting](https://github.com/cscully-allison/guidepost/wiki/FAQ-and-Troubleshooting)
44
+
45
+ ## Contributing
46
+
47
+ Contributions are welcome. Fork the repository, create a branch for your feature or bugfix, and open a pull request with a description of your changes.
48
+
49
+ ## License
50
+
51
+ Guidepost is licensed under the MIT License. See the `LICENSE` file for details.
52
+
53
+ ## Acknowledgments
54
+
55
+ Guidepost was developed under the auspices and with funding provided by the National Renewable Energy Laboratory (NREL), the National Science Foundation under NSF IIS-1844573 and IIS-2324465, and the Department of Energy under DE-SC0022044 and DE-SC0024635.
56
+
57
+ ## Contact
58
+
59
+ For questions or feedback, reach out to the maintainer at [cscullyallison@sci.utah.edu].
@@ -21,6 +21,22 @@ from .aggregation import AggregationEngine
21
21
  SYNTHETIC_FACET_COL = "__gp_no_grouping__"
22
22
  SYNTHETIC_FACET_VALUE = "All records"
23
23
 
24
+
25
+ class Selection:
26
+ """Wrapper around the records selected in the widget.
27
+
28
+ The selected DataFrame is exposed as `.dataframe`. This indirection
29
+ leaves room to attach further selection metadata in the future without
30
+ changing the `gp.selection` access pattern.
31
+ """
32
+
33
+ def __init__(self, dataframe):
34
+ self.dataframe = dataframe
35
+
36
+ def __repr__(self):
37
+ return f"Selection(dataframe={self.dataframe!r})"
38
+
39
+
24
40
  class Guidepost(anywidget.AnyWidget):
25
41
 
26
42
  _esm = os.path.join(os.path.dirname(__file__), "static", "guidepost.js")
@@ -37,7 +53,6 @@ class Guidepost(anywidget.AnyWidget):
37
53
 
38
54
  selected_records = traitlets.Unicode("[]").tag(sync=True)
39
55
  records_df = pd.DataFrame()
40
- selection = None
41
56
 
42
57
  _summary_stats = traitlets.Dict({}).tag(sync=True)
43
58
 
@@ -272,7 +287,10 @@ class Guidepost(anywidget.AnyWidget):
272
287
 
273
288
  @property
274
289
  def selection(self):
275
- return self.retrieve_selected_data()
290
+ # `selection` is intentionally an object wrapper rather than the bare
291
+ # DataFrame so additional selection metadata can be hung off it later
292
+ # without breaking callers. The DataFrame lives on `.dataframe`.
293
+ return Selection(self.retrieve_selected_data())
276
294
 
277
295
  def retrieve_selected_data(self):
278
296
  if self.cached_records_df is None:
@@ -167,11 +167,49 @@ def _seriate_component(members, A, node_list, SpectralEmbedding):
167
167
  return sorted(members, key=lambda i: node_list[i])
168
168
 
169
169
  sub = A[members][:, members].toarray()
170
+
171
+ # On symmetric structures (e.g. a clique) the smallest non-zero Laplacian
172
+ # eigenvalue is degenerate, so the 1-D spectral coordinate is not unique:
173
+ # the eigensolver returns an arbitrary basis vector from that eigenspace,
174
+ # which varies across runs, platforms, and BLAS threading. There is no
175
+ # canonical order in that case, so fall back to deterministic name order.
176
+ # (EigenVALUES are stable even when the corresponding eigenVECTORS are not,
177
+ # so this test is itself reproducible.)
178
+ if _spectral_order_is_degenerate(sub):
179
+ return sorted(members, key=lambda i: node_list[i])
180
+
170
181
  emb = SpectralEmbedding(n_components=1, affinity="precomputed", random_state=0)
171
182
  coords = emb.fit_transform(sub)[:, 0]
172
- local = sorted(range(len(members)), key=lambda t: (coords[t], node_list[members[t]]))
183
+ # Round so any residual floating-point noise on near-tied nodes collapses and
184
+ # the name-based secondary key decides their order deterministically.
185
+ local = sorted(
186
+ range(len(members)),
187
+ key=lambda t: (round(float(coords[t]), 9), node_list[members[t]]),
188
+ )
173
189
  seq = [members[t] for t in local]
174
190
  # Eigenvectors carry a sign ambiguity; canonicalize orientation by name.
175
191
  if node_list[seq[0]] > node_list[seq[-1]]:
176
192
  seq.reverse()
177
193
  return seq
194
+
195
+
196
+ def _spectral_order_is_degenerate(sub, tol=1e-9):
197
+ """True when the 1-D spectral embedding of ``sub`` is ill-defined.
198
+
199
+ The seriation uses the eigenvector of the smallest non-zero eigenvalue of
200
+ the symmetric normalized Laplacian. When that eigenvalue is (near-)repeated,
201
+ the eigenvector — and hence the node order — is not unique. We detect that
202
+ from the eigenvalue *spectrum*, which ``eigvalsh`` returns deterministically
203
+ regardless of solver/threading, unlike the eigenvectors themselves.
204
+ """
205
+ deg = sub.sum(axis=1)
206
+ if np.any(deg <= 0):
207
+ return True # an isolated node has no meaningful spectral position
208
+ d_inv_sqrt = 1.0 / np.sqrt(deg)
209
+ laplacian = np.eye(sub.shape[0]) - (d_inv_sqrt[:, None] * sub * d_inv_sqrt[None, :])
210
+ evals = np.linalg.eigvalsh(laplacian) # ascending; evals[0] ~ 0 (connected)
211
+ if len(evals) < 3:
212
+ return False
213
+ # Degenerate iff the 2nd and 3rd smallest eigenvalues coincide, i.e. the
214
+ # smallest non-zero eigenvalue has multiplicity > 1.
215
+ return bool((evals[2] - evals[1]) < tol)