masster 0.3.13__tar.gz → 0.3.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- {masster-0.3.13 → masster-0.3.14}/PKG-INFO +2 -1
- {masster-0.3.13 → masster-0.3.14}/pyproject.toml +4 -3
- {masster-0.3.13 → masster-0.3.14}/src/masster/_version.py +1 -1
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/helpers.py +492 -2
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/load.py +7 -5
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/plot.py +261 -96
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/processing.py +9 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/study.py +8 -25
- {masster-0.3.13 → masster-0.3.14}/uv.lock +15 -1
- masster-0.3.13/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +0 -199787
- masster-0.3.13/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
- masster-0.3.13/src/masster/docs/SCX_API_Documentation.md +0 -0
- masster-0.3.13/src/masster/docs/SCX_DLL_Analysis.md +0 -0
- {masster-0.3.13 → masster-0.3.14}/.github/workflows/publish.yml +0 -0
- {masster-0.3.13 → masster-0.3.14}/.github/workflows/security.yml +0 -0
- {masster-0.3.13 → masster-0.3.14}/.github/workflows/test.yml +0 -0
- {masster-0.3.13 → masster-0.3.14}/.gitignore +0 -0
- {masster-0.3.13 → masster-0.3.14}/.pre-commit-config.yaml +0 -0
- {masster-0.3.13 → masster-0.3.14}/LICENSE +0 -0
- {masster-0.3.13 → masster-0.3.14}/Makefile +0 -0
- {masster-0.3.13 → masster-0.3.14}/README.md +0 -0
- {masster-0.3.13 → masster-0.3.14}/TESTING.md +0 -0
- {masster-0.3.13 → masster-0.3.14}/demo/example_batch_process.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/demo/example_sample_process.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/__init__.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/chromatogram.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/logger.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/__init__.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/defaults/__init__.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/defaults/find_adducts_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/defaults/find_features_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/defaults/find_ms2_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/defaults/sample_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/h5.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/helpers.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/lib.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/load.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/parameters.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/plot.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/processing.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/quant.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/sample.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/sample5_schema.json +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/save.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/sample/sciex.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/spectrum.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/__init__.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/defaults/__init__.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/defaults/align_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/defaults/export_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/defaults/fill_chrom_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/defaults/fill_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/defaults/find_consensus_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/defaults/find_ms2_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/defaults/integrate_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/defaults/merge_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/defaults/study_def.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/export.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/h5.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/helpers_optimized.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/parameters.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/save.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/src/masster/study/study5_schema.json +0 -0
- {masster-0.3.13 → masster-0.3.14}/tests/conftest.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/tests/test_chromatogram.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/tests/test_defaults.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/tests/test_imports.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/tests/test_integration.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/tests/test_logger.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/tests/test_parameters.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/tests/test_sample.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/tests/test_spectrum.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/tests/test_study.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/tests/test_version.py +0 -0
- {masster-0.3.13 → masster-0.3.14}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: masster
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.14
|
|
4
4
|
Summary: Mass spectrometry data analysis package
|
|
5
5
|
Project-URL: homepage, https://github.com/zamboni-lab/masster
|
|
6
6
|
Project-URL: repository, https://github.com/zamboni-lab/masster
|
|
@@ -684,6 +684,7 @@ Requires-Dist: alphabase>=1.0.0
|
|
|
684
684
|
Requires-Dist: alpharaw>=0.4.8
|
|
685
685
|
Requires-Dist: altair>=5.5.0
|
|
686
686
|
Requires-Dist: bokeh>=3.7.3
|
|
687
|
+
Requires-Dist: cmap>=0.6.2
|
|
687
688
|
Requires-Dist: datashader>=0.18.1
|
|
688
689
|
Requires-Dist: h5py>=3.14.0
|
|
689
690
|
Requires-Dist: holoviews>=1.21.0
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
[project]
|
|
3
3
|
name = "masster"
|
|
4
|
-
version = "0.3.
|
|
4
|
+
version = "0.3.14"
|
|
5
5
|
description = "Mass spectrometry data analysis package"
|
|
6
6
|
authors = [
|
|
7
7
|
{ name = "Zamboni Lab" }
|
|
@@ -37,7 +37,7 @@ dependencies = [
|
|
|
37
37
|
"hvplot>=0.11.3",
|
|
38
38
|
"loguru>=0.7.3",
|
|
39
39
|
"numpy>=2.0.0",
|
|
40
|
-
# "marimo>=0.14.16",
|
|
40
|
+
# "marimo>=0.14.16",
|
|
41
41
|
"matchms>=0.30.2",
|
|
42
42
|
"matplotlib>=3.8.0",
|
|
43
43
|
"pandas>=2.2.0",
|
|
@@ -50,7 +50,8 @@ dependencies = [
|
|
|
50
50
|
"scipy>=1.12.0",
|
|
51
51
|
"simple-parsing>=0.1.7",
|
|
52
52
|
"tqdm>=4.65.0",
|
|
53
|
-
"openpyxl>=3.1.5"
|
|
53
|
+
"openpyxl>=3.1.5",
|
|
54
|
+
"cmap>=0.6.2",
|
|
54
55
|
]
|
|
55
56
|
|
|
56
57
|
[project.optional-dependencies]
|
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
helpers.py
|
|
3
|
+
|
|
4
|
+
This module contains helper functions for the Study class that handle various operations
|
|
5
|
+
like data retrieval, filtering, compression, and utility functions.
|
|
6
|
+
|
|
7
|
+
The functions are organized into the following sections:
|
|
8
|
+
1. Chromatogram extraction functions (BPC, TIC, EIC, chrom matrix)
|
|
9
|
+
2. Data retrieval helper functions (get_sample, get_consensus, etc.)
|
|
10
|
+
3. UID helper functions (_get_*_uids)
|
|
11
|
+
4. Data filtering and selection functions
|
|
12
|
+
5. Data compression and restoration functions
|
|
13
|
+
6. Utility functions (reset, naming, colors, schema ordering)
|
|
14
|
+
"""
|
|
15
|
+
|
|
1
16
|
from __future__ import annotations
|
|
2
17
|
|
|
3
18
|
import os
|
|
@@ -10,6 +25,11 @@ from tqdm import tqdm
|
|
|
10
25
|
from masster.chromatogram import Chromatogram
|
|
11
26
|
|
|
12
27
|
|
|
28
|
+
# =====================================================================================
|
|
29
|
+
# CHROMATOGRAM EXTRACTION FUNCTIONS
|
|
30
|
+
# =====================================================================================
|
|
31
|
+
|
|
32
|
+
|
|
13
33
|
def get_bpc(owner, sample=None, rt_unit="s", label=None, original=False):
|
|
14
34
|
"""
|
|
15
35
|
Return a Chromatogram object containing the Base Peak Chromatogram (BPC).
|
|
@@ -96,7 +116,6 @@ def get_bpc(owner, sample=None, rt_unit="s", label=None, original=False):
|
|
|
96
116
|
if (mapping_rows is None or mapping_rows.is_empty()) and hasattr(s, "sample_path"):
|
|
97
117
|
# attempt to match by sample_path or file name
|
|
98
118
|
try:
|
|
99
|
-
sample_paths = feats.select(["sample_uid", "sample_name", "sample_path"]) # type: ignore[arg-type]
|
|
100
119
|
# find row where sample_path matches
|
|
101
120
|
mapping_rows = feats.filter(pl.col("sample_path") == getattr(s, "file", None))
|
|
102
121
|
except Exception:
|
|
@@ -290,6 +309,9 @@ def get_eic(owner, sample=None, mz=None, mz_tol=0.01, rt_unit="s", label=None):
|
|
|
290
309
|
return chrom
|
|
291
310
|
|
|
292
311
|
|
|
312
|
+
# =====================================================================================
|
|
313
|
+
# DATA RETRIEVAL AND MATRIX FUNCTIONS
|
|
314
|
+
# =====================================================================================
|
|
293
315
|
|
|
294
316
|
|
|
295
317
|
def get_chrom(self, uids=None, samples=None):
|
|
@@ -393,10 +415,14 @@ def get_chrom(self, uids=None, samples=None):
|
|
|
393
415
|
# Create Polars DataFrame with complex objects
|
|
394
416
|
df2_pivoted = pl.DataFrame(pivot_data)
|
|
395
417
|
|
|
396
|
-
# Return as Polars DataFrame (can handle complex objects like Chromatogram)
|
|
397
418
|
return df2_pivoted
|
|
398
419
|
|
|
399
420
|
|
|
421
|
+
# =====================================================================================
|
|
422
|
+
# UTILITY AND CONFIGURATION FUNCTIONS
|
|
423
|
+
# =====================================================================================
|
|
424
|
+
|
|
425
|
+
|
|
400
426
|
def set_folder(self, folder):
|
|
401
427
|
"""
|
|
402
428
|
Set the folder for saving and loading files.
|
|
@@ -424,6 +450,12 @@ def align_reset(self):
|
|
|
424
450
|
# Ensure column order is maintained after with_columns operation
|
|
425
451
|
self._ensure_features_df_schema_order()
|
|
426
452
|
|
|
453
|
+
|
|
454
|
+
# =====================================================================================
|
|
455
|
+
# DATA RETRIEVAL HELPER FUNCTIONS
|
|
456
|
+
# =====================================================================================
|
|
457
|
+
|
|
458
|
+
|
|
427
459
|
# TODO I don't get this param
|
|
428
460
|
def get_consensus(self, quant="chrom_area"):
|
|
429
461
|
if self.consensus_df is None:
|
|
@@ -555,6 +587,11 @@ def get_consensus_matches(self, uids=None):
|
|
|
555
587
|
return matches
|
|
556
588
|
|
|
557
589
|
|
|
590
|
+
# =====================================================================================
|
|
591
|
+
# UID HELPER FUNCTIONS
|
|
592
|
+
# =====================================================================================
|
|
593
|
+
|
|
594
|
+
|
|
558
595
|
def fill_reset(self):
|
|
559
596
|
# remove all features with filled=True
|
|
560
597
|
if self.features_df is None:
|
|
@@ -757,6 +794,11 @@ def get_orphans(self):
|
|
|
757
794
|
return not_in_consensus
|
|
758
795
|
|
|
759
796
|
|
|
797
|
+
# =====================================================================================
|
|
798
|
+
# DATA COMPRESSION AND RESTORATION FUNCTIONS
|
|
799
|
+
# =====================================================================================
|
|
800
|
+
|
|
801
|
+
|
|
760
802
|
def compress(self, features=True, ms2=True, chrom=False, ms2_max=5):
|
|
761
803
|
"""
|
|
762
804
|
Perform compress_features, compress_ms2, and compress_chrom operations.
|
|
@@ -1251,6 +1293,11 @@ def compress_chrom(self):
|
|
|
1251
1293
|
self.logger.info(f"Compressed chromatograms: cleared {non_null_count} chromatogram objects from features_df")
|
|
1252
1294
|
|
|
1253
1295
|
|
|
1296
|
+
# =====================================================================================
|
|
1297
|
+
# SAMPLE MANAGEMENT AND NAMING FUNCTIONS
|
|
1298
|
+
# =====================================================================================
|
|
1299
|
+
|
|
1300
|
+
|
|
1254
1301
|
def name_replace(self, replace_dict):
|
|
1255
1302
|
"""
|
|
1256
1303
|
Replace sample names in samples_df based on a dictionary mapping.
|
|
@@ -1447,6 +1494,11 @@ def set_source(self, filename):
|
|
|
1447
1494
|
self.logger.warning(f"Failed to update file_source for {failed_count} samples")
|
|
1448
1495
|
|
|
1449
1496
|
|
|
1497
|
+
# =====================================================================================
|
|
1498
|
+
# DATA FILTERING AND SELECTION FUNCTIONS
|
|
1499
|
+
# =====================================================================================
|
|
1500
|
+
|
|
1501
|
+
|
|
1450
1502
|
def features_select(
|
|
1451
1503
|
self,
|
|
1452
1504
|
mz=None,
|
|
@@ -2222,3 +2274,441 @@ def consensus_delete(self, consensus):
|
|
|
2222
2274
|
None (modifies self.consensus_df and related DataFrames in place)
|
|
2223
2275
|
"""
|
|
2224
2276
|
self.consensus_filter(consensus)
|
|
2277
|
+
|
|
2278
|
+
|
|
2279
|
+
# =====================================================================================
|
|
2280
|
+
# COLOR PALETTE AND VISUALIZATION FUNCTIONS
|
|
2281
|
+
# =====================================================================================
|
|
2282
|
+
|
|
2283
|
+
|
|
2284
|
+
def sample_color(self, by=None, palette="Turbo256"):
|
|
2285
|
+
"""
|
|
2286
|
+
Set sample colors in the sample_color column of samples_df.
|
|
2287
|
+
|
|
2288
|
+
When a new sample is added, this function resets all colors picking from the specified palette.
|
|
2289
|
+
The default palette is Turbo256.
|
|
2290
|
+
|
|
2291
|
+
Parameters:
|
|
2292
|
+
by (str or list, optional): Property to base colors on. Options:
|
|
2293
|
+
- 'sample_uid': Use sample_uid values to assign colors
|
|
2294
|
+
- 'sample_index': Use sample index (position) to assign colors
|
|
2295
|
+
- 'sample_type': Use sample_type values to assign colors
|
|
2296
|
+
- 'sample_name': Use sample_name values to assign colors
|
|
2297
|
+
- list of colors: Use provided list of hex color codes
|
|
2298
|
+
- None: Use sequential colors from palette (default)
|
|
2299
|
+
palette (str): Color palette to use. Options:
|
|
2300
|
+
- 'Turbo256': Turbo colormap (256 colors, perceptually uniform)
|
|
2301
|
+
- 'Viridis256': Viridis colormap (256 colors, perceptually uniform)
|
|
2302
|
+
- 'Plasma256': Plasma colormap (256 colors, perceptually uniform)
|
|
2303
|
+
- 'Inferno256': Inferno colormap (256 colors, perceptually uniform)
|
|
2304
|
+
- 'Magma256': Magma colormap (256 colors, perceptually uniform)
|
|
2305
|
+
- 'Cividis256': Cividis colormap (256 colors, colorblind-friendly)
|
|
2306
|
+
- 'Set1': Qualitative palette (9 distinct colors)
|
|
2307
|
+
- 'Set2': Qualitative palette (8 distinct colors)
|
|
2308
|
+
- 'Set3': Qualitative palette (12 distinct colors)
|
|
2309
|
+
- 'Tab10': Tableau 10 palette (10 distinct colors)
|
|
2310
|
+
- 'Tab20': Tableau 20 palette (20 distinct colors)
|
|
2311
|
+
- 'Dark2': Dark qualitative palette (8 colors)
|
|
2312
|
+
- 'Paired': Paired qualitative palette (12 colors)
|
|
2313
|
+
- 'Spectral': Spectral diverging colormap
|
|
2314
|
+
- 'Rainbow': Rainbow colormap
|
|
2315
|
+
- 'Coolwarm': Cool-warm diverging colormap
|
|
2316
|
+
- 'Seismic': Seismic diverging colormap
|
|
2317
|
+
- Any other colormap name supported by the cmap library
|
|
2318
|
+
|
|
2319
|
+
For a complete catalog of available colormaps, see:
|
|
2320
|
+
https://cmap-docs.readthedocs.io/en/latest/catalog/
|
|
2321
|
+
|
|
2322
|
+
Returns:
|
|
2323
|
+
None (modifies self.samples_df in place)
|
|
2324
|
+
|
|
2325
|
+
Example:
|
|
2326
|
+
# Set colors based on sample type
|
|
2327
|
+
study.sample_color(by='sample_type', palette='Set1')
|
|
2328
|
+
|
|
2329
|
+
# Set colors using a custom color list
|
|
2330
|
+
study.sample_color(by=['#FF0000', '#00FF00', '#0000FF'])
|
|
2331
|
+
|
|
2332
|
+
# Reset to default Turbo256 sequential colors
|
|
2333
|
+
study.sample_color()
|
|
2334
|
+
"""
|
|
2335
|
+
if self.samples_df is None or len(self.samples_df) == 0:
|
|
2336
|
+
self.logger.warning("No samples found in study.")
|
|
2337
|
+
return
|
|
2338
|
+
|
|
2339
|
+
sample_count = len(self.samples_df)
|
|
2340
|
+
|
|
2341
|
+
# Handle custom color list
|
|
2342
|
+
if isinstance(by, list):
|
|
2343
|
+
if len(by) < sample_count:
|
|
2344
|
+
self.logger.warning(f"Provided color list has {len(by)} colors but {sample_count} samples. Repeating colors.")
|
|
2345
|
+
# Cycle through the provided colors if there aren't enough
|
|
2346
|
+
colors = []
|
|
2347
|
+
for i in range(sample_count):
|
|
2348
|
+
colors.append(by[i % len(by)])
|
|
2349
|
+
else:
|
|
2350
|
+
colors = by[:sample_count]
|
|
2351
|
+
else:
|
|
2352
|
+
# Use the new approach: sample colors evenly from the whole colormap
|
|
2353
|
+
if by is None:
|
|
2354
|
+
# Sequential colors evenly sampled from the colormap
|
|
2355
|
+
try:
|
|
2356
|
+
colors = _sample_colors_from_colormap(palette, sample_count)
|
|
2357
|
+
except ValueError as e:
|
|
2358
|
+
self.logger.error(f"Error sampling colors from colormap: {e}")
|
|
2359
|
+
return
|
|
2360
|
+
|
|
2361
|
+
elif by == 'sample_uid':
|
|
2362
|
+
# Use sample_uid to determine position in evenly sampled colormap
|
|
2363
|
+
sample_uids = self.samples_df['sample_uid'].to_list()
|
|
2364
|
+
try:
|
|
2365
|
+
# Sample colors evenly for the number of samples
|
|
2366
|
+
palette_colors = _sample_colors_from_colormap(palette, sample_count)
|
|
2367
|
+
colors = []
|
|
2368
|
+
for uid in sample_uids:
|
|
2369
|
+
# Use modulo to cycle through evenly sampled colors
|
|
2370
|
+
color_index = uid % len(palette_colors)
|
|
2371
|
+
colors.append(palette_colors[color_index])
|
|
2372
|
+
except ValueError as e:
|
|
2373
|
+
self.logger.error(f"Error sampling colors from colormap: {e}")
|
|
2374
|
+
return
|
|
2375
|
+
|
|
2376
|
+
elif by == 'sample_index':
|
|
2377
|
+
# Use sample index (position in DataFrame) with evenly sampled colors
|
|
2378
|
+
try:
|
|
2379
|
+
colors = _sample_colors_from_colormap(palette, sample_count)
|
|
2380
|
+
except ValueError as e:
|
|
2381
|
+
self.logger.error(f"Error sampling colors from colormap: {e}")
|
|
2382
|
+
return
|
|
2383
|
+
|
|
2384
|
+
elif by == 'sample_type':
|
|
2385
|
+
# Use sample_type to assign colors - same type gets same color
|
|
2386
|
+
# Sample colors evenly across colormap for unique types
|
|
2387
|
+
sample_types = self.samples_df['sample_type'].to_list()
|
|
2388
|
+
unique_types = list(set([t for t in sample_types if t is not None]))
|
|
2389
|
+
|
|
2390
|
+
try:
|
|
2391
|
+
# Sample colors evenly for unique types
|
|
2392
|
+
type_colors = _sample_colors_from_colormap(palette, len(unique_types))
|
|
2393
|
+
type_to_color = {}
|
|
2394
|
+
|
|
2395
|
+
for i, sample_type in enumerate(unique_types):
|
|
2396
|
+
type_to_color[sample_type] = type_colors[i]
|
|
2397
|
+
|
|
2398
|
+
colors = []
|
|
2399
|
+
for sample_type in sample_types:
|
|
2400
|
+
if sample_type is None:
|
|
2401
|
+
# Default to first color for None
|
|
2402
|
+
colors.append(type_colors[0] if type_colors else "#000000")
|
|
2403
|
+
else:
|
|
2404
|
+
colors.append(type_to_color[sample_type])
|
|
2405
|
+
except ValueError as e:
|
|
2406
|
+
self.logger.error(f"Error sampling colors from colormap: {e}")
|
|
2407
|
+
return
|
|
2408
|
+
|
|
2409
|
+
elif by == 'sample_name':
|
|
2410
|
+
# Use sample_name to assign colors - same name gets same color (unlikely but possible)
|
|
2411
|
+
# Sample colors evenly across colormap for unique names
|
|
2412
|
+
sample_names = self.samples_df['sample_name'].to_list()
|
|
2413
|
+
unique_names = list(set([n for n in sample_names if n is not None]))
|
|
2414
|
+
|
|
2415
|
+
try:
|
|
2416
|
+
# Sample colors evenly for unique names
|
|
2417
|
+
name_colors = _sample_colors_from_colormap(palette, len(unique_names))
|
|
2418
|
+
name_to_color = {}
|
|
2419
|
+
|
|
2420
|
+
for i, sample_name in enumerate(unique_names):
|
|
2421
|
+
name_to_color[sample_name] = name_colors[i]
|
|
2422
|
+
|
|
2423
|
+
colors = []
|
|
2424
|
+
for sample_name in sample_names:
|
|
2425
|
+
if sample_name is None:
|
|
2426
|
+
# Default to first color for None
|
|
2427
|
+
colors.append(name_colors[0] if name_colors else "#000000")
|
|
2428
|
+
else:
|
|
2429
|
+
colors.append(name_to_color[sample_name])
|
|
2430
|
+
except ValueError as e:
|
|
2431
|
+
self.logger.error(f"Error sampling colors from colormap: {e}")
|
|
2432
|
+
return
|
|
2433
|
+
else:
|
|
2434
|
+
self.logger.error(f"Invalid by value: {by}. Must be 'sample_uid', 'sample_index', 'sample_type', 'sample_name', a list of colors, or None.")
|
|
2435
|
+
return
|
|
2436
|
+
|
|
2437
|
+
# Update the sample_color column
|
|
2438
|
+
self.samples_df = self.samples_df.with_columns(
|
|
2439
|
+
pl.Series("sample_color", colors).alias("sample_color")
|
|
2440
|
+
)
|
|
2441
|
+
|
|
2442
|
+
if isinstance(by, list):
|
|
2443
|
+
self.logger.debug(f"Set sample colors using provided color list ({len(by)} colors)")
|
|
2444
|
+
elif by is None:
|
|
2445
|
+
self.logger.debug(f"Set sequential sample colors using {palette} palette")
|
|
2446
|
+
else:
|
|
2447
|
+
self.logger.debug(f"Set sample colors based on {by} using {palette} palette")
|
|
2448
|
+
|
|
2449
|
+
|
|
2450
|
+
def sample_color_reset(self):
|
|
2451
|
+
"""
|
|
2452
|
+
Reset sample colors to default coloring using the 'turbo' colormap.
|
|
2453
|
+
|
|
2454
|
+
This function assigns colors by distributing samples evenly across the full
|
|
2455
|
+
turbo colormap range, ensuring maximum color diversity and visual distinction
|
|
2456
|
+
between samples.
|
|
2457
|
+
|
|
2458
|
+
Returns:
|
|
2459
|
+
None (modifies self.samples_df in place)
|
|
2460
|
+
"""
|
|
2461
|
+
if self.samples_df is None or len(self.samples_df) == 0:
|
|
2462
|
+
self.logger.warning("No samples found in study.")
|
|
2463
|
+
return
|
|
2464
|
+
|
|
2465
|
+
try:
|
|
2466
|
+
from cmap import Colormap
|
|
2467
|
+
|
|
2468
|
+
# Use turbo colormap
|
|
2469
|
+
cm = Colormap('turbo')
|
|
2470
|
+
|
|
2471
|
+
# Get sample count and assign colors evenly distributed across colormap
|
|
2472
|
+
n_samples = len(self.samples_df)
|
|
2473
|
+
colors = []
|
|
2474
|
+
|
|
2475
|
+
# Distribute samples evenly across the full colormap range
|
|
2476
|
+
for i in range(n_samples):
|
|
2477
|
+
# Evenly distribute samples across colormap (avoiding endpoints to prevent white/black)
|
|
2478
|
+
normalized_value = (i + 0.5) / n_samples # +0.5 to center samples in their bins
|
|
2479
|
+
# Optionally, map to a subset of colormap to avoid extreme colors
|
|
2480
|
+
# Use 10% to 90% of colormap range for better color diversity
|
|
2481
|
+
normalized_value = 0.1 + (normalized_value * 0.8)
|
|
2482
|
+
|
|
2483
|
+
color_rgba = cm(normalized_value)
|
|
2484
|
+
|
|
2485
|
+
# Convert RGBA to hex
|
|
2486
|
+
if len(color_rgba) >= 3:
|
|
2487
|
+
r, g, b = color_rgba[:3]
|
|
2488
|
+
# Convert to 0-255 range if needed
|
|
2489
|
+
if max(color_rgba[:3]) <= 1.0:
|
|
2490
|
+
r, g, b = int(r * 255), int(g * 255), int(b * 255)
|
|
2491
|
+
hex_color = f"#{r:02x}{g:02x}{b:02x}"
|
|
2492
|
+
colors.append(hex_color)
|
|
2493
|
+
|
|
2494
|
+
# Update the sample_color column
|
|
2495
|
+
self.samples_df = self.samples_df.with_columns(
|
|
2496
|
+
pl.Series("sample_color", colors).alias("sample_color")
|
|
2497
|
+
)
|
|
2498
|
+
|
|
2499
|
+
self.logger.debug(f"Reset sample colors using turbo colormap with even distribution ({n_samples} samples)")
|
|
2500
|
+
|
|
2501
|
+
except ImportError:
|
|
2502
|
+
self.logger.error("cmap library is required for sample color reset. Install with: pip install cmap")
|
|
2503
|
+
except Exception as e:
|
|
2504
|
+
self.logger.error(f"Failed to reset sample colors: {e}")
|
|
2505
|
+
|
|
2506
|
+
|
|
2507
|
+
def _get_color_palette(palette_name):
|
|
2508
|
+
"""
|
|
2509
|
+
Get color palette as a list of hex color codes using the cmap library.
|
|
2510
|
+
|
|
2511
|
+
Parameters:
|
|
2512
|
+
palette_name (str): Name of the palette
|
|
2513
|
+
|
|
2514
|
+
Returns:
|
|
2515
|
+
list: List of hex color codes
|
|
2516
|
+
|
|
2517
|
+
Raises:
|
|
2518
|
+
ValueError: If palette_name is not supported
|
|
2519
|
+
"""
|
|
2520
|
+
try:
|
|
2521
|
+
from cmap import Colormap
|
|
2522
|
+
except ImportError:
|
|
2523
|
+
raise ValueError("cmap library is required for color palettes. Install with: pip install cmap")
|
|
2524
|
+
|
|
2525
|
+
# Map common palette names to cmap names
|
|
2526
|
+
palette_mapping = {
|
|
2527
|
+
# Scientific colormaps
|
|
2528
|
+
"Turbo256": "turbo",
|
|
2529
|
+
"Viridis256": "viridis",
|
|
2530
|
+
"Plasma256": "plasma",
|
|
2531
|
+
"Inferno256": "inferno",
|
|
2532
|
+
"Magma256": "magma",
|
|
2533
|
+
"Cividis256": "cividis",
|
|
2534
|
+
|
|
2535
|
+
# Qualitative palettes
|
|
2536
|
+
"Set1": "Set1",
|
|
2537
|
+
"Set2": "Set2",
|
|
2538
|
+
"Set3": "Set3",
|
|
2539
|
+
"Tab10": "tab10",
|
|
2540
|
+
"Tab20": "tab20",
|
|
2541
|
+
"Dark2": "Dark2",
|
|
2542
|
+
"Paired": "Paired",
|
|
2543
|
+
|
|
2544
|
+
# Additional useful palettes
|
|
2545
|
+
"Spectral": "Spectral",
|
|
2546
|
+
"Rainbow": "rainbow",
|
|
2547
|
+
"Coolwarm": "coolwarm",
|
|
2548
|
+
"Seismic": "seismic",
|
|
2549
|
+
}
|
|
2550
|
+
|
|
2551
|
+
# Get the cmap name
|
|
2552
|
+
cmap_name = palette_mapping.get(palette_name, palette_name.lower())
|
|
2553
|
+
|
|
2554
|
+
try:
|
|
2555
|
+
# Create colormap
|
|
2556
|
+
cm = Colormap(cmap_name)
|
|
2557
|
+
|
|
2558
|
+
# Determine number of colors to generate
|
|
2559
|
+
if "256" in palette_name:
|
|
2560
|
+
n_colors = 256
|
|
2561
|
+
elif palette_name in ["Set1"]:
|
|
2562
|
+
n_colors = 9
|
|
2563
|
+
elif palette_name in ["Set2", "Dark2"]:
|
|
2564
|
+
n_colors = 8
|
|
2565
|
+
elif palette_name in ["Set3", "Paired"]:
|
|
2566
|
+
n_colors = 12
|
|
2567
|
+
elif palette_name in ["Tab10"]:
|
|
2568
|
+
n_colors = 10
|
|
2569
|
+
elif palette_name in ["Tab20"]:
|
|
2570
|
+
n_colors = 20
|
|
2571
|
+
else:
|
|
2572
|
+
n_colors = 256 # Default for continuous colormaps
|
|
2573
|
+
|
|
2574
|
+
# Generate colors
|
|
2575
|
+
if n_colors <= 20:
|
|
2576
|
+
# For discrete palettes, use evenly spaced indices
|
|
2577
|
+
indices = [i / (n_colors - 1) for i in range(n_colors)]
|
|
2578
|
+
else:
|
|
2579
|
+
# For continuous palettes, use full range
|
|
2580
|
+
indices = [i / (n_colors - 1) for i in range(n_colors)]
|
|
2581
|
+
|
|
2582
|
+
# Get colors as RGBA and convert to hex
|
|
2583
|
+
colors = cm(indices)
|
|
2584
|
+
hex_colors = []
|
|
2585
|
+
|
|
2586
|
+
for color in colors:
|
|
2587
|
+
if len(color) >= 3: # RGBA or RGB
|
|
2588
|
+
r, g, b = color[:3]
|
|
2589
|
+
# Convert to 0-255 range if needed
|
|
2590
|
+
if max(color[:3]) <= 1.0:
|
|
2591
|
+
r, g, b = int(r * 255), int(g * 255), int(b * 255)
|
|
2592
|
+
hex_color = f"#{r:02x}{g:02x}{b:02x}"
|
|
2593
|
+
hex_colors.append(hex_color)
|
|
2594
|
+
|
|
2595
|
+
return hex_colors
|
|
2596
|
+
|
|
2597
|
+
except Exception as e:
|
|
2598
|
+
raise ValueError(f"Failed to create colormap '{cmap_name}': {e}. "
|
|
2599
|
+
f"Available palettes: {list(palette_mapping.keys())}")
|
|
2600
|
+
|
|
2601
|
+
|
|
2602
|
+
def _sample_colors_from_colormap(palette_name, n_colors):
|
|
2603
|
+
"""
|
|
2604
|
+
Sample colors evenly from the whole colormap range, similar to sample_color_reset.
|
|
2605
|
+
|
|
2606
|
+
Parameters:
|
|
2607
|
+
palette_name (str): Name of the palette/colormap
|
|
2608
|
+
n_colors (int): Number of colors to sample
|
|
2609
|
+
|
|
2610
|
+
Returns:
|
|
2611
|
+
list: List of hex color codes sampled evenly from the colormap
|
|
2612
|
+
|
|
2613
|
+
Raises:
|
|
2614
|
+
ValueError: If palette_name is not supported
|
|
2615
|
+
"""
|
|
2616
|
+
try:
|
|
2617
|
+
from cmap import Colormap
|
|
2618
|
+
except ImportError:
|
|
2619
|
+
raise ValueError("cmap library is required for color palettes. Install with: pip install cmap")
|
|
2620
|
+
|
|
2621
|
+
# Map common palette names to cmap names (same as _get_color_palette)
|
|
2622
|
+
palette_mapping = {
|
|
2623
|
+
# Scientific colormaps
|
|
2624
|
+
"Turbo256": "turbo",
|
|
2625
|
+
"Viridis256": "viridis",
|
|
2626
|
+
"Plasma256": "plasma",
|
|
2627
|
+
"Inferno256": "inferno",
|
|
2628
|
+
"Magma256": "magma",
|
|
2629
|
+
"Cividis256": "cividis",
|
|
2630
|
+
|
|
2631
|
+
# Qualitative palettes
|
|
2632
|
+
"Set1": "Set1",
|
|
2633
|
+
"Set2": "Set2",
|
|
2634
|
+
"Set3": "Set3",
|
|
2635
|
+
"Tab10": "tab10",
|
|
2636
|
+
"Tab20": "tab20",
|
|
2637
|
+
"Dark2": "Dark2",
|
|
2638
|
+
"Paired": "Paired",
|
|
2639
|
+
|
|
2640
|
+
# Additional useful palettes
|
|
2641
|
+
"Spectral": "Spectral",
|
|
2642
|
+
"Rainbow": "rainbow",
|
|
2643
|
+
"Coolwarm": "coolwarm",
|
|
2644
|
+
"Seismic": "seismic",
|
|
2645
|
+
}
|
|
2646
|
+
|
|
2647
|
+
# Get the cmap name
|
|
2648
|
+
cmap_name = palette_mapping.get(palette_name, palette_name.lower())
|
|
2649
|
+
|
|
2650
|
+
try:
|
|
2651
|
+
# Create colormap
|
|
2652
|
+
cm = Colormap(cmap_name)
|
|
2653
|
+
|
|
2654
|
+
colors = []
|
|
2655
|
+
|
|
2656
|
+
# Distribute samples evenly across the full colormap range (same approach as sample_color_reset)
|
|
2657
|
+
for i in range(n_colors):
|
|
2658
|
+
# Evenly distribute samples across colormap (avoiding endpoints to prevent white/black)
|
|
2659
|
+
normalized_value = (i + 0.5) / n_colors # +0.5 to center samples in their bins
|
|
2660
|
+
# Map to a subset of colormap to avoid extreme colors (use 10% to 90% range)
|
|
2661
|
+
normalized_value = 0.1 + (normalized_value * 0.8)
|
|
2662
|
+
|
|
2663
|
+
color_rgba = cm(normalized_value)
|
|
2664
|
+
|
|
2665
|
+
# Convert RGBA to hex
|
|
2666
|
+
if len(color_rgba) >= 3:
|
|
2667
|
+
r, g, b = color_rgba[:3]
|
|
2668
|
+
# Convert to 0-255 range if needed
|
|
2669
|
+
if max(color_rgba[:3]) <= 1.0:
|
|
2670
|
+
r, g, b = int(r * 255), int(g * 255), int(b * 255)
|
|
2671
|
+
hex_color = f"#{r:02x}{g:02x}{b:02x}"
|
|
2672
|
+
colors.append(hex_color)
|
|
2673
|
+
|
|
2674
|
+
return colors
|
|
2675
|
+
|
|
2676
|
+
except Exception as e:
|
|
2677
|
+
raise ValueError(f"Failed to create colormap '{cmap_name}': {e}. "
|
|
2678
|
+
f"Available palettes: {list(palette_mapping.keys())}")
|
|
2679
|
+
|
|
2680
|
+
|
|
2681
|
+
def _matplotlib_to_hex(color_dict):
|
|
2682
|
+
"""Convert matplotlib color dictionary to list of hex colors."""
|
|
2683
|
+
return list(color_dict.values())
|
|
2684
|
+
|
|
2685
|
+
|
|
2686
|
+
# =====================================================================================
|
|
2687
|
+
# SCHEMA AND DATA STRUCTURE FUNCTIONS
|
|
2688
|
+
# =====================================================================================
|
|
2689
|
+
|
|
2690
|
+
|
|
2691
|
+
def _ensure_features_df_schema_order(self):
|
|
2692
|
+
"""
|
|
2693
|
+
Ensure features_df columns are ordered according to study5_schema.json.
|
|
2694
|
+
|
|
2695
|
+
This method should be called after operations that might scramble the column order.
|
|
2696
|
+
"""
|
|
2697
|
+
if self.features_df is None or self.features_df.is_empty():
|
|
2698
|
+
return
|
|
2699
|
+
|
|
2700
|
+
try:
|
|
2701
|
+
import os
|
|
2702
|
+
import json
|
|
2703
|
+
from masster.study.h5 import _reorder_columns_by_schema
|
|
2704
|
+
|
|
2705
|
+
# Load schema
|
|
2706
|
+
schema_path = os.path.join(os.path.dirname(__file__), "study5_schema.json")
|
|
2707
|
+
with open(schema_path, 'r') as f:
|
|
2708
|
+
schema = json.load(f)
|
|
2709
|
+
|
|
2710
|
+
# Reorder columns to match schema
|
|
2711
|
+
self.features_df = _reorder_columns_by_schema(self.features_df, schema, 'features_df')
|
|
2712
|
+
|
|
2713
|
+
except Exception as e:
|
|
2714
|
+
self.logger.warning(f"Failed to reorder features_df columns: {e}")
|
|
@@ -71,11 +71,8 @@ def add(
|
|
|
71
71
|
|
|
72
72
|
# Build search pattern
|
|
73
73
|
if any(char in folder for char in ["*", "?", "[", "]"]):
|
|
74
|
-
# If folder already contains glob patterns,
|
|
75
|
-
|
|
76
|
-
pattern = folder.replace("*.sample5", f"*{ext}")
|
|
77
|
-
else:
|
|
78
|
-
pattern = os.path.join(search_folder, "**", f"*{ext}")
|
|
74
|
+
# If folder already contains glob patterns, use it as-is
|
|
75
|
+
pattern = folder
|
|
79
76
|
else:
|
|
80
77
|
pattern = os.path.join(search_folder, "**", f"*{ext}")
|
|
81
78
|
|
|
@@ -233,6 +230,7 @@ def add_sample(self, file, type=None, reset=False, adducts=None):
|
|
|
233
230
|
"file_source": [getattr(ddaobj, "file_source", file)],
|
|
234
231
|
"ms1": [ms1_count],
|
|
235
232
|
"ms2": [ms2_count],
|
|
233
|
+
"sample_color": [None], # Will be set by set_sample_color below
|
|
236
234
|
},
|
|
237
235
|
schema={
|
|
238
236
|
"sample_uid": pl.Int64,
|
|
@@ -244,6 +242,7 @@ def add_sample(self, file, type=None, reset=False, adducts=None):
|
|
|
244
242
|
"file_source": pl.Utf8,
|
|
245
243
|
"ms1": pl.Int64,
|
|
246
244
|
"ms2": pl.Int64,
|
|
245
|
+
"sample_color": pl.Utf8,
|
|
247
246
|
},
|
|
248
247
|
)
|
|
249
248
|
self.samples_df = pl.concat([self.samples_df, new_sample])
|
|
@@ -309,6 +308,9 @@ def add_sample(self, file, type=None, reset=False, adducts=None):
|
|
|
309
308
|
# Ensure features_df column order matches schema
|
|
310
309
|
self._ensure_features_df_schema_order()
|
|
311
310
|
|
|
311
|
+
# Auto-assign colors when new sample is added (reset all colors using turbo colormap based on UID)
|
|
312
|
+
self.sample_color_reset()
|
|
313
|
+
|
|
312
314
|
self.logger.debug(
|
|
313
315
|
f"Added sample {sample_name} with {ddaobj.features.size()} features to the study.",
|
|
314
316
|
)
|