masster 0.3.12__tar.gz → 0.3.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (82) hide show
  1. {masster-0.3.12 → masster-0.3.14}/PKG-INFO +2 -1
  2. {masster-0.3.12 → masster-0.3.14}/pyproject.toml +4 -3
  3. {masster-0.3.12 → masster-0.3.14}/src/masster/_version.py +1 -1
  4. {masster-0.3.12 → masster-0.3.14}/src/masster/study/helpers.py +492 -2
  5. {masster-0.3.12 → masster-0.3.14}/src/masster/study/load.py +35 -7
  6. {masster-0.3.12 → masster-0.3.14}/src/masster/study/plot.py +261 -96
  7. {masster-0.3.12 → masster-0.3.14}/src/masster/study/processing.py +9 -0
  8. {masster-0.3.12 → masster-0.3.14}/src/masster/study/study.py +8 -25
  9. {masster-0.3.12 → masster-0.3.14}/uv.lock +15 -1
  10. masster-0.3.12/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +0 -199787
  11. masster-0.3.12/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
  12. masster-0.3.12/src/masster/docs/SCX_API_Documentation.md +0 -0
  13. masster-0.3.12/src/masster/docs/SCX_DLL_Analysis.md +0 -0
  14. {masster-0.3.12 → masster-0.3.14}/.github/workflows/publish.yml +0 -0
  15. {masster-0.3.12 → masster-0.3.14}/.github/workflows/security.yml +0 -0
  16. {masster-0.3.12 → masster-0.3.14}/.github/workflows/test.yml +0 -0
  17. {masster-0.3.12 → masster-0.3.14}/.gitignore +0 -0
  18. {masster-0.3.12 → masster-0.3.14}/.pre-commit-config.yaml +0 -0
  19. {masster-0.3.12 → masster-0.3.14}/LICENSE +0 -0
  20. {masster-0.3.12 → masster-0.3.14}/Makefile +0 -0
  21. {masster-0.3.12 → masster-0.3.14}/README.md +0 -0
  22. {masster-0.3.12 → masster-0.3.14}/TESTING.md +0 -0
  23. {masster-0.3.12 → masster-0.3.14}/demo/example_batch_process.py +0 -0
  24. {masster-0.3.12 → masster-0.3.14}/demo/example_sample_process.py +0 -0
  25. {masster-0.3.12 → masster-0.3.14}/src/masster/__init__.py +0 -0
  26. {masster-0.3.12 → masster-0.3.14}/src/masster/chromatogram.py +0 -0
  27. {masster-0.3.12 → masster-0.3.14}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +0 -0
  28. {masster-0.3.12 → masster-0.3.14}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  29. {masster-0.3.12 → masster-0.3.14}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  30. {masster-0.3.12 → masster-0.3.14}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  31. {masster-0.3.12 → masster-0.3.14}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  32. {masster-0.3.12 → masster-0.3.14}/src/masster/logger.py +0 -0
  33. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/__init__.py +0 -0
  34. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/defaults/__init__.py +0 -0
  35. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/defaults/find_adducts_def.py +0 -0
  36. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/defaults/find_features_def.py +0 -0
  37. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/defaults/find_ms2_def.py +0 -0
  38. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
  39. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/defaults/sample_def.py +0 -0
  40. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/h5.py +0 -0
  41. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/helpers.py +0 -0
  42. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/lib.py +0 -0
  43. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/load.py +0 -0
  44. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/parameters.py +0 -0
  45. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/plot.py +0 -0
  46. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/processing.py +0 -0
  47. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/quant.py +0 -0
  48. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/sample.py +0 -0
  49. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/sample5_schema.json +0 -0
  50. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/save.py +0 -0
  51. {masster-0.3.12 → masster-0.3.14}/src/masster/sample/sciex.py +0 -0
  52. {masster-0.3.12 → masster-0.3.14}/src/masster/spectrum.py +0 -0
  53. {masster-0.3.12 → masster-0.3.14}/src/masster/study/__init__.py +0 -0
  54. {masster-0.3.12 → masster-0.3.14}/src/masster/study/defaults/__init__.py +0 -0
  55. {masster-0.3.12 → masster-0.3.14}/src/masster/study/defaults/align_def.py +0 -0
  56. {masster-0.3.12 → masster-0.3.14}/src/masster/study/defaults/export_def.py +0 -0
  57. {masster-0.3.12 → masster-0.3.14}/src/masster/study/defaults/fill_chrom_def.py +0 -0
  58. {masster-0.3.12 → masster-0.3.14}/src/masster/study/defaults/fill_def.py +0 -0
  59. {masster-0.3.12 → masster-0.3.14}/src/masster/study/defaults/find_consensus_def.py +0 -0
  60. {masster-0.3.12 → masster-0.3.14}/src/masster/study/defaults/find_ms2_def.py +0 -0
  61. {masster-0.3.12 → masster-0.3.14}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
  62. {masster-0.3.12 → masster-0.3.14}/src/masster/study/defaults/integrate_def.py +0 -0
  63. {masster-0.3.12 → masster-0.3.14}/src/masster/study/defaults/merge_def.py +0 -0
  64. {masster-0.3.12 → masster-0.3.14}/src/masster/study/defaults/study_def.py +0 -0
  65. {masster-0.3.12 → masster-0.3.14}/src/masster/study/export.py +0 -0
  66. {masster-0.3.12 → masster-0.3.14}/src/masster/study/h5.py +0 -0
  67. {masster-0.3.12 → masster-0.3.14}/src/masster/study/helpers_optimized.py +0 -0
  68. {masster-0.3.12 → masster-0.3.14}/src/masster/study/parameters.py +0 -0
  69. {masster-0.3.12 → masster-0.3.14}/src/masster/study/save.py +0 -0
  70. {masster-0.3.12 → masster-0.3.14}/src/masster/study/study5_schema.json +0 -0
  71. {masster-0.3.12 → masster-0.3.14}/tests/conftest.py +0 -0
  72. {masster-0.3.12 → masster-0.3.14}/tests/test_chromatogram.py +0 -0
  73. {masster-0.3.12 → masster-0.3.14}/tests/test_defaults.py +0 -0
  74. {masster-0.3.12 → masster-0.3.14}/tests/test_imports.py +0 -0
  75. {masster-0.3.12 → masster-0.3.14}/tests/test_integration.py +0 -0
  76. {masster-0.3.12 → masster-0.3.14}/tests/test_logger.py +0 -0
  77. {masster-0.3.12 → masster-0.3.14}/tests/test_parameters.py +0 -0
  78. {masster-0.3.12 → masster-0.3.14}/tests/test_sample.py +0 -0
  79. {masster-0.3.12 → masster-0.3.14}/tests/test_spectrum.py +0 -0
  80. {masster-0.3.12 → masster-0.3.14}/tests/test_study.py +0 -0
  81. {masster-0.3.12 → masster-0.3.14}/tests/test_version.py +0 -0
  82. {masster-0.3.12 → masster-0.3.14}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.3.12
3
+ Version: 0.3.14
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -684,6 +684,7 @@ Requires-Dist: alphabase>=1.0.0
684
684
  Requires-Dist: alpharaw>=0.4.8
685
685
  Requires-Dist: altair>=5.5.0
686
686
  Requires-Dist: bokeh>=3.7.3
687
+ Requires-Dist: cmap>=0.6.2
687
688
  Requires-Dist: datashader>=0.18.1
688
689
  Requires-Dist: h5py>=3.14.0
689
690
  Requires-Dist: holoviews>=1.21.0
@@ -1,7 +1,7 @@
1
1
 
2
2
  [project]
3
3
  name = "masster"
4
- version = "0.3.12"
4
+ version = "0.3.14"
5
5
  description = "Mass spectrometry data analysis package"
6
6
  authors = [
7
7
  { name = "Zamboni Lab" }
@@ -37,7 +37,7 @@ dependencies = [
37
37
  "hvplot>=0.11.3",
38
38
  "loguru>=0.7.3",
39
39
  "numpy>=2.0.0",
40
- # "marimo>=0.14.16",
40
+ # "marimo>=0.14.16",
41
41
  "matchms>=0.30.2",
42
42
  "matplotlib>=3.8.0",
43
43
  "pandas>=2.2.0",
@@ -50,7 +50,8 @@ dependencies = [
50
50
  "scipy>=1.12.0",
51
51
  "simple-parsing>=0.1.7",
52
52
  "tqdm>=4.65.0",
53
- "openpyxl>=3.1.5"
53
+ "openpyxl>=3.1.5",
54
+ "cmap>=0.6.2",
54
55
  ]
55
56
 
56
57
  [project.optional-dependencies]
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.5.7"
4
+ __version__ = "0.3.14"
5
5
 
6
6
 
7
7
  def get_version():
@@ -1,3 +1,18 @@
1
+ """
2
+ helpers.py
3
+
4
+ This module contains helper functions for the Study class that handle various operations
5
+ like data retrieval, filtering, compression, and utility functions.
6
+
7
+ The functions are organized into the following sections:
8
+ 1. Chromatogram extraction functions (BPC, TIC, EIC, chrom matrix)
9
+ 2. Data retrieval helper functions (get_sample, get_consensus, etc.)
10
+ 3. UID helper functions (_get_*_uids)
11
+ 4. Data filtering and selection functions
12
+ 5. Data compression and restoration functions
13
+ 6. Utility functions (reset, naming, colors, schema ordering)
14
+ """
15
+
1
16
  from __future__ import annotations
2
17
 
3
18
  import os
@@ -10,6 +25,11 @@ from tqdm import tqdm
10
25
  from masster.chromatogram import Chromatogram
11
26
 
12
27
 
28
+ # =====================================================================================
29
+ # CHROMATOGRAM EXTRACTION FUNCTIONS
30
+ # =====================================================================================
31
+
32
+
13
33
  def get_bpc(owner, sample=None, rt_unit="s", label=None, original=False):
14
34
  """
15
35
  Return a Chromatogram object containing the Base Peak Chromatogram (BPC).
@@ -96,7 +116,6 @@ def get_bpc(owner, sample=None, rt_unit="s", label=None, original=False):
96
116
  if (mapping_rows is None or mapping_rows.is_empty()) and hasattr(s, "sample_path"):
97
117
  # attempt to match by sample_path or file name
98
118
  try:
99
- sample_paths = feats.select(["sample_uid", "sample_name", "sample_path"]) # type: ignore[arg-type]
100
119
  # find row where sample_path matches
101
120
  mapping_rows = feats.filter(pl.col("sample_path") == getattr(s, "file", None))
102
121
  except Exception:
@@ -290,6 +309,9 @@ def get_eic(owner, sample=None, mz=None, mz_tol=0.01, rt_unit="s", label=None):
290
309
  return chrom
291
310
 
292
311
 
312
+ # =====================================================================================
313
+ # DATA RETRIEVAL AND MATRIX FUNCTIONS
314
+ # =====================================================================================
293
315
 
294
316
 
295
317
  def get_chrom(self, uids=None, samples=None):
@@ -393,10 +415,14 @@ def get_chrom(self, uids=None, samples=None):
393
415
  # Create Polars DataFrame with complex objects
394
416
  df2_pivoted = pl.DataFrame(pivot_data)
395
417
 
396
- # Return as Polars DataFrame (can handle complex objects like Chromatogram)
397
418
  return df2_pivoted
398
419
 
399
420
 
421
+ # =====================================================================================
422
+ # UTILITY AND CONFIGURATION FUNCTIONS
423
+ # =====================================================================================
424
+
425
+
400
426
  def set_folder(self, folder):
401
427
  """
402
428
  Set the folder for saving and loading files.
@@ -424,6 +450,12 @@ def align_reset(self):
424
450
  # Ensure column order is maintained after with_columns operation
425
451
  self._ensure_features_df_schema_order()
426
452
 
453
+
454
+ # =====================================================================================
455
+ # DATA RETRIEVAL HELPER FUNCTIONS
456
+ # =====================================================================================
457
+
458
+
427
459
  # TODO I don't get this param
428
460
  def get_consensus(self, quant="chrom_area"):
429
461
  if self.consensus_df is None:
@@ -555,6 +587,11 @@ def get_consensus_matches(self, uids=None):
555
587
  return matches
556
588
 
557
589
 
590
+ # =====================================================================================
591
+ # UID HELPER FUNCTIONS
592
+ # =====================================================================================
593
+
594
+
558
595
  def fill_reset(self):
559
596
  # remove all features with filled=True
560
597
  if self.features_df is None:
@@ -757,6 +794,11 @@ def get_orphans(self):
757
794
  return not_in_consensus
758
795
 
759
796
 
797
+ # =====================================================================================
798
+ # DATA COMPRESSION AND RESTORATION FUNCTIONS
799
+ # =====================================================================================
800
+
801
+
760
802
  def compress(self, features=True, ms2=True, chrom=False, ms2_max=5):
761
803
  """
762
804
  Perform compress_features, compress_ms2, and compress_chrom operations.
@@ -1251,6 +1293,11 @@ def compress_chrom(self):
1251
1293
  self.logger.info(f"Compressed chromatograms: cleared {non_null_count} chromatogram objects from features_df")
1252
1294
 
1253
1295
 
1296
+ # =====================================================================================
1297
+ # SAMPLE MANAGEMENT AND NAMING FUNCTIONS
1298
+ # =====================================================================================
1299
+
1300
+
1254
1301
  def name_replace(self, replace_dict):
1255
1302
  """
1256
1303
  Replace sample names in samples_df based on a dictionary mapping.
@@ -1447,6 +1494,11 @@ def set_source(self, filename):
1447
1494
  self.logger.warning(f"Failed to update file_source for {failed_count} samples")
1448
1495
 
1449
1496
 
1497
+ # =====================================================================================
1498
+ # DATA FILTERING AND SELECTION FUNCTIONS
1499
+ # =====================================================================================
1500
+
1501
+
1450
1502
  def features_select(
1451
1503
  self,
1452
1504
  mz=None,
@@ -2222,3 +2274,441 @@ def consensus_delete(self, consensus):
2222
2274
  None (modifies self.consensus_df and related DataFrames in place)
2223
2275
  """
2224
2276
  self.consensus_filter(consensus)
2277
+
2278
+
2279
+ # =====================================================================================
2280
+ # COLOR PALETTE AND VISUALIZATION FUNCTIONS
2281
+ # =====================================================================================
2282
+
2283
+
2284
+ def sample_color(self, by=None, palette="Turbo256"):
2285
+ """
2286
+ Set sample colors in the sample_color column of samples_df.
2287
+
2288
+ When a new sample is added, this function resets all colors picking from the specified palette.
2289
+ The default palette is Turbo256.
2290
+
2291
+ Parameters:
2292
+ by (str or list, optional): Property to base colors on. Options:
2293
+ - 'sample_uid': Use sample_uid values to assign colors
2294
+ - 'sample_index': Use sample index (position) to assign colors
2295
+ - 'sample_type': Use sample_type values to assign colors
2296
+ - 'sample_name': Use sample_name values to assign colors
2297
+ - list of colors: Use provided list of hex color codes
2298
+ - None: Use sequential colors from palette (default)
2299
+ palette (str): Color palette to use. Options:
2300
+ - 'Turbo256': Turbo colormap (256 colors, perceptually uniform)
2301
+ - 'Viridis256': Viridis colormap (256 colors, perceptually uniform)
2302
+ - 'Plasma256': Plasma colormap (256 colors, perceptually uniform)
2303
+ - 'Inferno256': Inferno colormap (256 colors, perceptually uniform)
2304
+ - 'Magma256': Magma colormap (256 colors, perceptually uniform)
2305
+ - 'Cividis256': Cividis colormap (256 colors, colorblind-friendly)
2306
+ - 'Set1': Qualitative palette (9 distinct colors)
2307
+ - 'Set2': Qualitative palette (8 distinct colors)
2308
+ - 'Set3': Qualitative palette (12 distinct colors)
2309
+ - 'Tab10': Tableau 10 palette (10 distinct colors)
2310
+ - 'Tab20': Tableau 20 palette (20 distinct colors)
2311
+ - 'Dark2': Dark qualitative palette (8 colors)
2312
+ - 'Paired': Paired qualitative palette (12 colors)
2313
+ - 'Spectral': Spectral diverging colormap
2314
+ - 'Rainbow': Rainbow colormap
2315
+ - 'Coolwarm': Cool-warm diverging colormap
2316
+ - 'Seismic': Seismic diverging colormap
2317
+ - Any other colormap name supported by the cmap library
2318
+
2319
+ For a complete catalog of available colormaps, see:
2320
+ https://cmap-docs.readthedocs.io/en/latest/catalog/
2321
+
2322
+ Returns:
2323
+ None (modifies self.samples_df in place)
2324
+
2325
+ Example:
2326
+ # Set colors based on sample type
2327
+ study.sample_color(by='sample_type', palette='Set1')
2328
+
2329
+ # Set colors using a custom color list
2330
+ study.sample_color(by=['#FF0000', '#00FF00', '#0000FF'])
2331
+
2332
+ # Reset to default Turbo256 sequential colors
2333
+ study.sample_color()
2334
+ """
2335
+ if self.samples_df is None or len(self.samples_df) == 0:
2336
+ self.logger.warning("No samples found in study.")
2337
+ return
2338
+
2339
+ sample_count = len(self.samples_df)
2340
+
2341
+ # Handle custom color list
2342
+ if isinstance(by, list):
2343
+ if len(by) < sample_count:
2344
+ self.logger.warning(f"Provided color list has {len(by)} colors but {sample_count} samples. Repeating colors.")
2345
+ # Cycle through the provided colors if there aren't enough
2346
+ colors = []
2347
+ for i in range(sample_count):
2348
+ colors.append(by[i % len(by)])
2349
+ else:
2350
+ colors = by[:sample_count]
2351
+ else:
2352
+ # Use the new approach: sample colors evenly from the whole colormap
2353
+ if by is None:
2354
+ # Sequential colors evenly sampled from the colormap
2355
+ try:
2356
+ colors = _sample_colors_from_colormap(palette, sample_count)
2357
+ except ValueError as e:
2358
+ self.logger.error(f"Error sampling colors from colormap: {e}")
2359
+ return
2360
+
2361
+ elif by == 'sample_uid':
2362
+ # Use sample_uid to determine position in evenly sampled colormap
2363
+ sample_uids = self.samples_df['sample_uid'].to_list()
2364
+ try:
2365
+ # Sample colors evenly for the number of samples
2366
+ palette_colors = _sample_colors_from_colormap(palette, sample_count)
2367
+ colors = []
2368
+ for uid in sample_uids:
2369
+ # Use modulo to cycle through evenly sampled colors
2370
+ color_index = uid % len(palette_colors)
2371
+ colors.append(palette_colors[color_index])
2372
+ except ValueError as e:
2373
+ self.logger.error(f"Error sampling colors from colormap: {e}")
2374
+ return
2375
+
2376
+ elif by == 'sample_index':
2377
+ # Use sample index (position in DataFrame) with evenly sampled colors
2378
+ try:
2379
+ colors = _sample_colors_from_colormap(palette, sample_count)
2380
+ except ValueError as e:
2381
+ self.logger.error(f"Error sampling colors from colormap: {e}")
2382
+ return
2383
+
2384
+ elif by == 'sample_type':
2385
+ # Use sample_type to assign colors - same type gets same color
2386
+ # Sample colors evenly across colormap for unique types
2387
+ sample_types = self.samples_df['sample_type'].to_list()
2388
+ unique_types = list(set([t for t in sample_types if t is not None]))
2389
+
2390
+ try:
2391
+ # Sample colors evenly for unique types
2392
+ type_colors = _sample_colors_from_colormap(palette, len(unique_types))
2393
+ type_to_color = {}
2394
+
2395
+ for i, sample_type in enumerate(unique_types):
2396
+ type_to_color[sample_type] = type_colors[i]
2397
+
2398
+ colors = []
2399
+ for sample_type in sample_types:
2400
+ if sample_type is None:
2401
+ # Default to first color for None
2402
+ colors.append(type_colors[0] if type_colors else "#000000")
2403
+ else:
2404
+ colors.append(type_to_color[sample_type])
2405
+ except ValueError as e:
2406
+ self.logger.error(f"Error sampling colors from colormap: {e}")
2407
+ return
2408
+
2409
+ elif by == 'sample_name':
2410
+ # Use sample_name to assign colors - same name gets same color (unlikely but possible)
2411
+ # Sample colors evenly across colormap for unique names
2412
+ sample_names = self.samples_df['sample_name'].to_list()
2413
+ unique_names = list(set([n for n in sample_names if n is not None]))
2414
+
2415
+ try:
2416
+ # Sample colors evenly for unique names
2417
+ name_colors = _sample_colors_from_colormap(palette, len(unique_names))
2418
+ name_to_color = {}
2419
+
2420
+ for i, sample_name in enumerate(unique_names):
2421
+ name_to_color[sample_name] = name_colors[i]
2422
+
2423
+ colors = []
2424
+ for sample_name in sample_names:
2425
+ if sample_name is None:
2426
+ # Default to first color for None
2427
+ colors.append(name_colors[0] if name_colors else "#000000")
2428
+ else:
2429
+ colors.append(name_to_color[sample_name])
2430
+ except ValueError as e:
2431
+ self.logger.error(f"Error sampling colors from colormap: {e}")
2432
+ return
2433
+ else:
2434
+ self.logger.error(f"Invalid by value: {by}. Must be 'sample_uid', 'sample_index', 'sample_type', 'sample_name', a list of colors, or None.")
2435
+ return
2436
+
2437
+ # Update the sample_color column
2438
+ self.samples_df = self.samples_df.with_columns(
2439
+ pl.Series("sample_color", colors).alias("sample_color")
2440
+ )
2441
+
2442
+ if isinstance(by, list):
2443
+ self.logger.debug(f"Set sample colors using provided color list ({len(by)} colors)")
2444
+ elif by is None:
2445
+ self.logger.debug(f"Set sequential sample colors using {palette} palette")
2446
+ else:
2447
+ self.logger.debug(f"Set sample colors based on {by} using {palette} palette")
2448
+
2449
+
2450
+ def sample_color_reset(self):
2451
+ """
2452
+ Reset sample colors to default coloring using the 'turbo' colormap.
2453
+
2454
+ This function assigns colors by distributing samples evenly across the full
2455
+ turbo colormap range, ensuring maximum color diversity and visual distinction
2456
+ between samples.
2457
+
2458
+ Returns:
2459
+ None (modifies self.samples_df in place)
2460
+ """
2461
+ if self.samples_df is None or len(self.samples_df) == 0:
2462
+ self.logger.warning("No samples found in study.")
2463
+ return
2464
+
2465
+ try:
2466
+ from cmap import Colormap
2467
+
2468
+ # Use turbo colormap
2469
+ cm = Colormap('turbo')
2470
+
2471
+ # Get sample count and assign colors evenly distributed across colormap
2472
+ n_samples = len(self.samples_df)
2473
+ colors = []
2474
+
2475
+ # Distribute samples evenly across the full colormap range
2476
+ for i in range(n_samples):
2477
+ # Evenly distribute samples across colormap (avoiding endpoints to prevent white/black)
2478
+ normalized_value = (i + 0.5) / n_samples # +0.5 to center samples in their bins
2479
+ # Optionally, map to a subset of colormap to avoid extreme colors
2480
+ # Use 10% to 90% of colormap range for better color diversity
2481
+ normalized_value = 0.1 + (normalized_value * 0.8)
2482
+
2483
+ color_rgba = cm(normalized_value)
2484
+
2485
+ # Convert RGBA to hex
2486
+ if len(color_rgba) >= 3:
2487
+ r, g, b = color_rgba[:3]
2488
+ # Convert to 0-255 range if needed
2489
+ if max(color_rgba[:3]) <= 1.0:
2490
+ r, g, b = int(r * 255), int(g * 255), int(b * 255)
2491
+ hex_color = f"#{r:02x}{g:02x}{b:02x}"
2492
+ colors.append(hex_color)
2493
+
2494
+ # Update the sample_color column
2495
+ self.samples_df = self.samples_df.with_columns(
2496
+ pl.Series("sample_color", colors).alias("sample_color")
2497
+ )
2498
+
2499
+ self.logger.debug(f"Reset sample colors using turbo colormap with even distribution ({n_samples} samples)")
2500
+
2501
+ except ImportError:
2502
+ self.logger.error("cmap library is required for sample color reset. Install with: pip install cmap")
2503
+ except Exception as e:
2504
+ self.logger.error(f"Failed to reset sample colors: {e}")
2505
+
2506
+
2507
+ def _get_color_palette(palette_name):
2508
+ """
2509
+ Get color palette as a list of hex color codes using the cmap library.
2510
+
2511
+ Parameters:
2512
+ palette_name (str): Name of the palette
2513
+
2514
+ Returns:
2515
+ list: List of hex color codes
2516
+
2517
+ Raises:
2518
+ ValueError: If palette_name is not supported
2519
+ """
2520
+ try:
2521
+ from cmap import Colormap
2522
+ except ImportError:
2523
+ raise ValueError("cmap library is required for color palettes. Install with: pip install cmap")
2524
+
2525
+ # Map common palette names to cmap names
2526
+ palette_mapping = {
2527
+ # Scientific colormaps
2528
+ "Turbo256": "turbo",
2529
+ "Viridis256": "viridis",
2530
+ "Plasma256": "plasma",
2531
+ "Inferno256": "inferno",
2532
+ "Magma256": "magma",
2533
+ "Cividis256": "cividis",
2534
+
2535
+ # Qualitative palettes
2536
+ "Set1": "Set1",
2537
+ "Set2": "Set2",
2538
+ "Set3": "Set3",
2539
+ "Tab10": "tab10",
2540
+ "Tab20": "tab20",
2541
+ "Dark2": "Dark2",
2542
+ "Paired": "Paired",
2543
+
2544
+ # Additional useful palettes
2545
+ "Spectral": "Spectral",
2546
+ "Rainbow": "rainbow",
2547
+ "Coolwarm": "coolwarm",
2548
+ "Seismic": "seismic",
2549
+ }
2550
+
2551
+ # Get the cmap name
2552
+ cmap_name = palette_mapping.get(palette_name, palette_name.lower())
2553
+
2554
+ try:
2555
+ # Create colormap
2556
+ cm = Colormap(cmap_name)
2557
+
2558
+ # Determine number of colors to generate
2559
+ if "256" in palette_name:
2560
+ n_colors = 256
2561
+ elif palette_name in ["Set1"]:
2562
+ n_colors = 9
2563
+ elif palette_name in ["Set2", "Dark2"]:
2564
+ n_colors = 8
2565
+ elif palette_name in ["Set3", "Paired"]:
2566
+ n_colors = 12
2567
+ elif palette_name in ["Tab10"]:
2568
+ n_colors = 10
2569
+ elif palette_name in ["Tab20"]:
2570
+ n_colors = 20
2571
+ else:
2572
+ n_colors = 256 # Default for continuous colormaps
2573
+
2574
+ # Generate colors
2575
+ if n_colors <= 20:
2576
+ # For discrete palettes, use evenly spaced indices
2577
+ indices = [i / (n_colors - 1) for i in range(n_colors)]
2578
+ else:
2579
+ # For continuous palettes, use full range
2580
+ indices = [i / (n_colors - 1) for i in range(n_colors)]
2581
+
2582
+ # Get colors as RGBA and convert to hex
2583
+ colors = cm(indices)
2584
+ hex_colors = []
2585
+
2586
+ for color in colors:
2587
+ if len(color) >= 3: # RGBA or RGB
2588
+ r, g, b = color[:3]
2589
+ # Convert to 0-255 range if needed
2590
+ if max(color[:3]) <= 1.0:
2591
+ r, g, b = int(r * 255), int(g * 255), int(b * 255)
2592
+ hex_color = f"#{r:02x}{g:02x}{b:02x}"
2593
+ hex_colors.append(hex_color)
2594
+
2595
+ return hex_colors
2596
+
2597
+ except Exception as e:
2598
+ raise ValueError(f"Failed to create colormap '{cmap_name}': {e}. "
2599
+ f"Available palettes: {list(palette_mapping.keys())}")
2600
+
2601
+
2602
+ def _sample_colors_from_colormap(palette_name, n_colors):
2603
+ """
2604
+ Sample colors evenly from the whole colormap range, similar to sample_color_reset.
2605
+
2606
+ Parameters:
2607
+ palette_name (str): Name of the palette/colormap
2608
+ n_colors (int): Number of colors to sample
2609
+
2610
+ Returns:
2611
+ list: List of hex color codes sampled evenly from the colormap
2612
+
2613
+ Raises:
2614
+ ValueError: If palette_name is not supported
2615
+ """
2616
+ try:
2617
+ from cmap import Colormap
2618
+ except ImportError:
2619
+ raise ValueError("cmap library is required for color palettes. Install with: pip install cmap")
2620
+
2621
+ # Map common palette names to cmap names (same as _get_color_palette)
2622
+ palette_mapping = {
2623
+ # Scientific colormaps
2624
+ "Turbo256": "turbo",
2625
+ "Viridis256": "viridis",
2626
+ "Plasma256": "plasma",
2627
+ "Inferno256": "inferno",
2628
+ "Magma256": "magma",
2629
+ "Cividis256": "cividis",
2630
+
2631
+ # Qualitative palettes
2632
+ "Set1": "Set1",
2633
+ "Set2": "Set2",
2634
+ "Set3": "Set3",
2635
+ "Tab10": "tab10",
2636
+ "Tab20": "tab20",
2637
+ "Dark2": "Dark2",
2638
+ "Paired": "Paired",
2639
+
2640
+ # Additional useful palettes
2641
+ "Spectral": "Spectral",
2642
+ "Rainbow": "rainbow",
2643
+ "Coolwarm": "coolwarm",
2644
+ "Seismic": "seismic",
2645
+ }
2646
+
2647
+ # Get the cmap name
2648
+ cmap_name = palette_mapping.get(palette_name, palette_name.lower())
2649
+
2650
+ try:
2651
+ # Create colormap
2652
+ cm = Colormap(cmap_name)
2653
+
2654
+ colors = []
2655
+
2656
+ # Distribute samples evenly across the full colormap range (same approach as sample_color_reset)
2657
+ for i in range(n_colors):
2658
+ # Evenly distribute samples across colormap (avoiding endpoints to prevent white/black)
2659
+ normalized_value = (i + 0.5) / n_colors # +0.5 to center samples in their bins
2660
+ # Map to a subset of colormap to avoid extreme colors (use 10% to 90% range)
2661
+ normalized_value = 0.1 + (normalized_value * 0.8)
2662
+
2663
+ color_rgba = cm(normalized_value)
2664
+
2665
+ # Convert RGBA to hex
2666
+ if len(color_rgba) >= 3:
2667
+ r, g, b = color_rgba[:3]
2668
+ # Convert to 0-255 range if needed
2669
+ if max(color_rgba[:3]) <= 1.0:
2670
+ r, g, b = int(r * 255), int(g * 255), int(b * 255)
2671
+ hex_color = f"#{r:02x}{g:02x}{b:02x}"
2672
+ colors.append(hex_color)
2673
+
2674
+ return colors
2675
+
2676
+ except Exception as e:
2677
+ raise ValueError(f"Failed to create colormap '{cmap_name}': {e}. "
2678
+ f"Available palettes: {list(palette_mapping.keys())}")
2679
+
2680
+
2681
+ def _matplotlib_to_hex(color_dict):
2682
+ """Convert matplotlib color dictionary to list of hex colors."""
2683
+ return list(color_dict.values())
2684
+
2685
+
2686
+ # =====================================================================================
2687
+ # SCHEMA AND DATA STRUCTURE FUNCTIONS
2688
+ # =====================================================================================
2689
+
2690
+
2691
+ def _ensure_features_df_schema_order(self):
2692
+ """
2693
+ Ensure features_df columns are ordered according to study5_schema.json.
2694
+
2695
+ This method should be called after operations that might scramble the column order.
2696
+ """
2697
+ if self.features_df is None or self.features_df.is_empty():
2698
+ return
2699
+
2700
+ try:
2701
+ import os
2702
+ import json
2703
+ from masster.study.h5 import _reorder_columns_by_schema
2704
+
2705
+ # Load schema
2706
+ schema_path = os.path.join(os.path.dirname(__file__), "study5_schema.json")
2707
+ with open(schema_path, 'r') as f:
2708
+ schema = json.load(f)
2709
+
2710
+ # Reorder columns to match schema
2711
+ self.features_df = _reorder_columns_by_schema(self.features_df, schema, 'features_df')
2712
+
2713
+ except Exception as e:
2714
+ self.logger.warning(f"Failed to reorder features_df columns: {e}")