guidepost 0.3.1__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. guidepost-0.3.2/MANIFEST.in +4 -0
  2. {guidepost-0.3.1 → guidepost-0.3.2}/PKG-INFO +2 -11
  3. {guidepost-0.3.1 → guidepost-0.3.2}/README.md +1 -10
  4. {guidepost-0.3.1 → guidepost-0.3.2}/guidepost/seriation.py +39 -1
  5. guidepost-0.3.2/guidepost/static/guidepost.js +23182 -0
  6. guidepost-0.3.2/guidepost/static/guidepost.js.map +7 -0
  7. guidepost-0.3.2/guidepost/static/trailmark.js +21 -0
  8. guidepost-0.3.2/guidepost/static/trailmark.js.map +7 -0
  9. guidepost-0.3.2/guidepost/version.py +2 -0
  10. {guidepost-0.3.1 → guidepost-0.3.2}/guidepost.egg-info/PKG-INFO +2 -11
  11. {guidepost-0.3.1 → guidepost-0.3.2}/guidepost.egg-info/SOURCES.txt +5 -0
  12. {guidepost-0.3.1 → guidepost-0.3.2}/setup.py +1 -1
  13. {guidepost-0.3.1 → guidepost-0.3.2}/tests/test_list_parsing.py +4 -1
  14. guidepost-0.3.1/guidepost/version.py +0 -2
  15. {guidepost-0.3.1 → guidepost-0.3.2}/LICENSE +0 -0
  16. {guidepost-0.3.1 → guidepost-0.3.2}/guidepost/__init__.py +0 -0
  17. {guidepost-0.3.1 → guidepost-0.3.2}/guidepost/aggregation.py +0 -0
  18. {guidepost-0.3.1 → guidepost-0.3.2}/guidepost/guidepost.py +0 -0
  19. {guidepost-0.3.1 → guidepost-0.3.2}/guidepost/node_layout.py +0 -0
  20. {guidepost-0.3.1 → guidepost-0.3.2}/guidepost/trailmark.py +0 -0
  21. {guidepost-0.3.1 → guidepost-0.3.2}/guidepost/utils.py +0 -0
  22. {guidepost-0.3.1 → guidepost-0.3.2}/guidepost.egg-info/dependency_links.txt +0 -0
  23. {guidepost-0.3.1 → guidepost-0.3.2}/guidepost.egg-info/requires.txt +0 -0
  24. {guidepost-0.3.1 → guidepost-0.3.2}/guidepost.egg-info/top_level.txt +0 -0
  25. {guidepost-0.3.1 → guidepost-0.3.2}/pyproject.toml +0 -0
  26. {guidepost-0.3.1 → guidepost-0.3.2}/setup.cfg +0 -0
  27. {guidepost-0.3.1 → guidepost-0.3.2}/tests/test_aggregation.py +0 -0
  28. {guidepost-0.3.1 → guidepost-0.3.2}/tests/test_node_layout.py +0 -0
  29. {guidepost-0.3.1 → guidepost-0.3.2}/tests/test_seriation.py +0 -0
  30. {guidepost-0.3.1 → guidepost-0.3.2}/tutorials/__init__.py +0 -0
@@ -0,0 +1,4 @@
1
+ # `python -m build` builds the wheel from the sdist, so the compiled frontend
2
+ # bundles must be in the sdist for them to reach the installed wheel. They are
3
+ # gitignored (built by esbuild.config.js), so include them explicitly here.
4
+ recursive-include guidepost/static *.js *.js.map
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: guidepost
3
- Version: 0.3.1
3
+ Version: 0.3.2
4
4
  Summary: Guidepost. An overview visualization for understanding supercomputer queue data.
5
5
  Home-page: https://github.com/cscully-allison/guidepost
6
6
  Author: Connor Scully-Allison
@@ -50,22 +50,13 @@ import pandas as pd
50
50
  gp = Guidepost()
51
51
  gp.load_data(pd.read_parquet("data/jobs_data.parquet"))
52
52
 
53
- gp.vis_configs = {
54
- 'x': 'start_time', # x-axis (numeric or datetime)
55
- 'y': 'queue_wait', # y-axis (numeric)
56
- 'color': 'nodes_requested', # cell color (numeric)
57
- 'color_agg': 'avg', # aggregation for color
58
- 'categorical': 'user', # bar chart / filter
59
- 'facet_by': 'partition' # splits the data into groups
60
- }
61
-
62
53
  gp # display in a notebook cell
63
54
  ```
64
55
 
65
56
  Brush the heatmap or its histograms, then pull the selected rows back into Python:
66
57
 
67
58
  ```python
68
- df = gp.retrieve_selected_data() # or: gp.selection.dataframe
59
+ df = gp.selection.dataframe
69
60
  ```
70
61
 
71
62
  Input is a `pandas` DataFrame with at least three numeric and two categorical columns (datetime columns are supported on the x-axis).
@@ -19,22 +19,13 @@ import pandas as pd
19
19
  gp = Guidepost()
20
20
  gp.load_data(pd.read_parquet("data/jobs_data.parquet"))
21
21
 
22
- gp.vis_configs = {
23
- 'x': 'start_time', # x-axis (numeric or datetime)
24
- 'y': 'queue_wait', # y-axis (numeric)
25
- 'color': 'nodes_requested', # cell color (numeric)
26
- 'color_agg': 'avg', # aggregation for color
27
- 'categorical': 'user', # bar chart / filter
28
- 'facet_by': 'partition' # splits the data into groups
29
- }
30
-
31
22
  gp # display in a notebook cell
32
23
  ```
33
24
 
34
25
  Brush the heatmap or its histograms, then pull the selected rows back into Python:
35
26
 
36
27
  ```python
37
- df = gp.retrieve_selected_data() # or: gp.selection.dataframe
28
+ df = gp.selection.dataframe
38
29
  ```
39
30
 
40
31
  Input is a `pandas` DataFrame with at least three numeric and two categorical columns (datetime columns are supported on the x-axis).
@@ -167,11 +167,49 @@ def _seriate_component(members, A, node_list, SpectralEmbedding):
167
167
  return sorted(members, key=lambda i: node_list[i])
168
168
 
169
169
  sub = A[members][:, members].toarray()
170
+
171
+ # On symmetric structures (e.g. a clique) the smallest non-zero Laplacian
172
+ # eigenvalue is degenerate, so the 1-D spectral coordinate is not unique:
173
+ # the eigensolver returns an arbitrary basis vector from that eigenspace,
174
+ # which varies across runs, platforms, and BLAS threading. There is no
175
+ # canonical order in that case, so fall back to deterministic name order.
176
+ # (EigenVALUES are stable even when the corresponding eigenVECTORS are not,
177
+ # so this test is itself reproducible.)
178
+ if _spectral_order_is_degenerate(sub):
179
+ return sorted(members, key=lambda i: node_list[i])
180
+
170
181
  emb = SpectralEmbedding(n_components=1, affinity="precomputed", random_state=0)
171
182
  coords = emb.fit_transform(sub)[:, 0]
172
- local = sorted(range(len(members)), key=lambda t: (coords[t], node_list[members[t]]))
183
+ # Round so any residual floating-point noise on near-tied nodes collapses and
184
+ # the name-based secondary key decides their order deterministically.
185
+ local = sorted(
186
+ range(len(members)),
187
+ key=lambda t: (round(float(coords[t]), 9), node_list[members[t]]),
188
+ )
173
189
  seq = [members[t] for t in local]
174
190
  # Eigenvectors carry a sign ambiguity; canonicalize orientation by name.
175
191
  if node_list[seq[0]] > node_list[seq[-1]]:
176
192
  seq.reverse()
177
193
  return seq
194
+
195
+
196
+ def _spectral_order_is_degenerate(sub, tol=1e-9):
197
+ """True when the 1-D spectral embedding of ``sub`` is ill-defined.
198
+
199
+ The seriation uses the eigenvector of the smallest non-zero eigenvalue of
200
+ the symmetric normalized Laplacian. When that eigenvalue is (near-)repeated,
201
+ the eigenvector — and hence the node order — is not unique. We detect that
202
+ from the eigenvalue *spectrum*, which ``eigvalsh`` returns deterministically
203
+ regardless of solver/threading, unlike the eigenvectors themselves.
204
+ """
205
+ deg = sub.sum(axis=1)
206
+ if np.any(deg <= 0):
207
+ return True # an isolated node has no meaningful spectral position
208
+ d_inv_sqrt = 1.0 / np.sqrt(deg)
209
+ laplacian = np.eye(sub.shape[0]) - (d_inv_sqrt[:, None] * sub * d_inv_sqrt[None, :])
210
+ evals = np.linalg.eigvalsh(laplacian) # ascending; evals[0] ~ 0 (connected)
211
+ if len(evals) < 3:
212
+ return False
213
+ # Degenerate iff the 2nd and 3rd smallest eigenvalues coincide, i.e. the
214
+ # smallest non-zero eigenvalue has multiplicity > 1.
215
+ return bool((evals[2] - evals[1]) < tol)