masster 0.3.17__tar.gz → 0.3.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (78) hide show
  1. {masster-0.3.17 → masster-0.3.18}/PKG-INFO +1 -1
  2. {masster-0.3.17 → masster-0.3.18}/pyproject.toml +1 -1
  3. {masster-0.3.17 → masster-0.3.18}/src/masster/_version.py +1 -1
  4. {masster-0.3.17 → masster-0.3.18}/src/masster/study/helpers.py +55 -35
  5. {masster-0.3.17 → masster-0.3.18}/src/masster/study/load.py +4 -3
  6. {masster-0.3.17 → masster-0.3.18}/src/masster/study/plot.py +66 -5
  7. {masster-0.3.17 → masster-0.3.18}/uv.lock +1 -1
  8. {masster-0.3.17 → masster-0.3.18}/.github/workflows/publish.yml +0 -0
  9. {masster-0.3.17 → masster-0.3.18}/.github/workflows/security.yml +0 -0
  10. {masster-0.3.17 → masster-0.3.18}/.github/workflows/test.yml +0 -0
  11. {masster-0.3.17 → masster-0.3.18}/.gitignore +0 -0
  12. {masster-0.3.17 → masster-0.3.18}/.pre-commit-config.yaml +0 -0
  13. {masster-0.3.17 → masster-0.3.18}/LICENSE +0 -0
  14. {masster-0.3.17 → masster-0.3.18}/Makefile +0 -0
  15. {masster-0.3.17 → masster-0.3.18}/README.md +0 -0
  16. {masster-0.3.17 → masster-0.3.18}/TESTING.md +0 -0
  17. {masster-0.3.17 → masster-0.3.18}/demo/example_batch_process.py +0 -0
  18. {masster-0.3.17 → masster-0.3.18}/demo/example_sample_process.py +0 -0
  19. {masster-0.3.17 → masster-0.3.18}/src/masster/__init__.py +0 -0
  20. {masster-0.3.17 → masster-0.3.18}/src/masster/chromatogram.py +0 -0
  21. {masster-0.3.17 → masster-0.3.18}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +0 -0
  22. {masster-0.3.17 → masster-0.3.18}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  23. {masster-0.3.17 → masster-0.3.18}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  24. {masster-0.3.17 → masster-0.3.18}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  25. {masster-0.3.17 → masster-0.3.18}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  26. {masster-0.3.17 → masster-0.3.18}/src/masster/logger.py +0 -0
  27. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/__init__.py +0 -0
  28. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/defaults/__init__.py +0 -0
  29. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/defaults/find_adducts_def.py +0 -0
  30. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/defaults/find_features_def.py +0 -0
  31. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/defaults/find_ms2_def.py +0 -0
  32. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
  33. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/defaults/sample_def.py +0 -0
  34. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/h5.py +0 -0
  35. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/helpers.py +0 -0
  36. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/lib.py +0 -0
  37. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/load.py +0 -0
  38. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/parameters.py +0 -0
  39. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/plot.py +0 -0
  40. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/processing.py +0 -0
  41. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/quant.py +0 -0
  42. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/sample.py +0 -0
  43. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/sample5_schema.json +0 -0
  44. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/save.py +0 -0
  45. {masster-0.3.17 → masster-0.3.18}/src/masster/sample/sciex.py +0 -0
  46. {masster-0.3.17 → masster-0.3.18}/src/masster/spectrum.py +0 -0
  47. {masster-0.3.17 → masster-0.3.18}/src/masster/study/__init__.py +0 -0
  48. {masster-0.3.17 → masster-0.3.18}/src/masster/study/defaults/__init__.py +0 -0
  49. {masster-0.3.17 → masster-0.3.18}/src/masster/study/defaults/align_def.py +0 -0
  50. {masster-0.3.17 → masster-0.3.18}/src/masster/study/defaults/export_def.py +0 -0
  51. {masster-0.3.17 → masster-0.3.18}/src/masster/study/defaults/fill_chrom_def.py +0 -0
  52. {masster-0.3.17 → masster-0.3.18}/src/masster/study/defaults/fill_def.py +0 -0
  53. {masster-0.3.17 → masster-0.3.18}/src/masster/study/defaults/find_consensus_def.py +0 -0
  54. {masster-0.3.17 → masster-0.3.18}/src/masster/study/defaults/find_ms2_def.py +0 -0
  55. {masster-0.3.17 → masster-0.3.18}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
  56. {masster-0.3.17 → masster-0.3.18}/src/masster/study/defaults/integrate_def.py +0 -0
  57. {masster-0.3.17 → masster-0.3.18}/src/masster/study/defaults/merge_def.py +0 -0
  58. {masster-0.3.17 → masster-0.3.18}/src/masster/study/defaults/study_def.py +0 -0
  59. {masster-0.3.17 → masster-0.3.18}/src/masster/study/export.py +0 -0
  60. {masster-0.3.17 → masster-0.3.18}/src/masster/study/h5.py +0 -0
  61. {masster-0.3.17 → masster-0.3.18}/src/masster/study/helpers_optimized.py +0 -0
  62. {masster-0.3.17 → masster-0.3.18}/src/masster/study/parameters.py +0 -0
  63. {masster-0.3.17 → masster-0.3.18}/src/masster/study/processing.py +0 -0
  64. {masster-0.3.17 → masster-0.3.18}/src/masster/study/save.py +0 -0
  65. {masster-0.3.17 → masster-0.3.18}/src/masster/study/study.py +0 -0
  66. {masster-0.3.17 → masster-0.3.18}/src/masster/study/study5_schema.json +0 -0
  67. {masster-0.3.17 → masster-0.3.18}/tests/conftest.py +0 -0
  68. {masster-0.3.17 → masster-0.3.18}/tests/test_chromatogram.py +0 -0
  69. {masster-0.3.17 → masster-0.3.18}/tests/test_defaults.py +0 -0
  70. {masster-0.3.17 → masster-0.3.18}/tests/test_imports.py +0 -0
  71. {masster-0.3.17 → masster-0.3.18}/tests/test_integration.py +0 -0
  72. {masster-0.3.17 → masster-0.3.18}/tests/test_logger.py +0 -0
  73. {masster-0.3.17 → masster-0.3.18}/tests/test_parameters.py +0 -0
  74. {masster-0.3.17 → masster-0.3.18}/tests/test_sample.py +0 -0
  75. {masster-0.3.17 → masster-0.3.18}/tests/test_spectrum.py +0 -0
  76. {masster-0.3.17 → masster-0.3.18}/tests/test_study.py +0 -0
  77. {masster-0.3.17 → masster-0.3.18}/tests/test_version.py +0 -0
  78. {masster-0.3.17 → masster-0.3.18}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.3.17
3
+ Version: 0.3.18
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -1,7 +1,7 @@
1
1
 
2
2
  [project]
3
3
  name = "masster"
4
- version = "0.3.17"
4
+ version = "0.3.18"
5
5
  description = "Mass spectrometry data analysis package"
6
6
  authors = [
7
7
  { name = "Zamboni Lab" }
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.3.17"
4
+ __version__ = "0.3.18"
5
5
 
6
6
 
7
7
  def get_version():
@@ -479,7 +479,9 @@ def get_consensus(self, quant="chrom_area"):
479
479
  # sort by consensus_id
480
480
  df1 = df1.sort_index()
481
481
 
482
- df2 = self.get_consensus_matrix(quant=quant)
482
+ df2_polars = self.get_consensus_matrix(quant=quant)
483
+ # Convert to pandas for merging (since the result is used for export)
484
+ df2 = df2_polars.to_pandas().set_index("consensus_uid")
483
485
  # sort df2 row by consensus_id
484
486
  df2 = df2.sort_index()
485
487
  # merge df and df2 on consensus_id
@@ -492,6 +494,7 @@ def get_consensus(self, quant="chrom_area"):
492
494
  def get_consensus_matrix(self, quant="chrom_area"):
493
495
  """
494
496
  Get a matrix of consensus features with samples as columns and consensus features as rows.
497
+ Optimized implementation that avoids expensive join operations.
495
498
  """
496
499
  if quant not in self.features_df.columns:
497
500
  self.logger.error(
@@ -499,41 +502,58 @@ def get_consensus_matrix(self, quant="chrom_area"):
499
502
  )
500
503
  return None
501
504
 
502
- # Use Polars join instead of pandas merge
503
- features_subset = self.features_df.select(["feature_uid", "sample_uid", quant])
504
- consensus_mapping_subset = self.consensus_mapping_df.select([
505
- "consensus_uid",
506
- "feature_uid",
507
- ])
508
-
509
- df1 = features_subset.join(
510
- consensus_mapping_subset,
511
- on="feature_uid",
512
- how="left",
513
- )
514
-
515
- # Convert to pandas for pivot operation (Polars pivot is still evolving)
516
- df1_pd = df1.to_pandas()
517
- df2 = df1_pd.pivot_table(
518
- index="consensus_uid",
519
- columns="sample_uid",
520
- values=quant,
521
- aggfunc="max",
522
- )
523
-
524
- # Create sample_uid to sample_name mapping using Polars
525
- sample_mapping = dict(
526
- self.samples_df.select(["sample_uid", "sample_name"]).iter_rows(),
527
- )
528
- # replace sample_uid with sample_name in df2
529
- df2 = df2.rename(columns=sample_mapping)
505
+ # Create a lookup dictionary from features_df for O(1) value access
506
+ feature_values = {}
507
+ for row in self.features_df.iter_rows(named=True):
508
+ feature_uid = row['feature_uid']
509
+ sample_uid = row['sample_uid']
510
+ value = row[quant] if row[quant] is not None else 0
511
+ feature_values[(feature_uid, sample_uid)] = value
512
+
513
+ # Build consensus matrix directly using the consensus_mapping_df
514
+ matrix_dict = {}
515
+ sample_mapping = dict(self.samples_df.select(["sample_uid", "sample_name"]).iter_rows())
516
+
517
+ for row in self.consensus_mapping_df.iter_rows(named=True):
518
+ consensus_uid = row['consensus_uid']
519
+ sample_uid = row['sample_uid']
520
+ feature_uid = row['feature_uid']
521
+
522
+ # Look up the quantification value
523
+ key = (feature_uid, sample_uid)
524
+ value = feature_values.get(key, 0)
525
+
526
+ if consensus_uid not in matrix_dict:
527
+ matrix_dict[consensus_uid] = {}
528
+
529
+ sample_name = sample_mapping.get(sample_uid, f"sample_{sample_uid}")
530
+
531
+ # Take max if multiple features map to same consensus/sample combination
532
+ if sample_name in matrix_dict[consensus_uid]:
533
+ matrix_dict[consensus_uid][sample_name] = max(matrix_dict[consensus_uid][sample_name], value)
534
+ else:
535
+ matrix_dict[consensus_uid][sample_name] = value
530
536
 
531
- # round to integer
532
- df2 = df2.round()
533
- # set consensus_id as uint64
534
- df2.index = df2.index.astype("uint64")
535
- # set index to consensus_id
536
- df2.index.name = "consensus_uid"
537
+ # Convert to Polars DataFrame with proper formatting
538
+ import polars as pl
539
+
540
+ # Convert matrix_dict to list of records for Polars
541
+ records = []
542
+ for consensus_uid, sample_values in matrix_dict.items():
543
+ record = {"consensus_uid": consensus_uid}
544
+ record.update(sample_values)
545
+ records.append(record)
546
+
547
+ # Create Polars DataFrame and set proper data types
548
+ df2 = pl.DataFrame(records)
549
+
550
+ # Fill null values with 0 and round numeric columns
551
+ numeric_cols = [col for col in df2.columns if col != "consensus_uid"]
552
+ df2 = df2.with_columns([
553
+ pl.col("consensus_uid").cast(pl.UInt64),
554
+ *[pl.col(col).fill_null(0).round(0) for col in numeric_cols]
555
+ ])
556
+
537
557
  return df2
538
558
 
539
559
 
@@ -1379,7 +1379,7 @@ def _add_sample_optimized(self, file, type=None, reset=False, adducts=None, skip
1379
1379
  self.samples_df = pl.concat([self.samples_df, new_sample])
1380
1380
 
1381
1381
  # SIMPLIFIED feature processing
1382
- current_sample_uid = len(self.samples_df) - 1
1382
+ current_sample_uid = len(self.samples_df)
1383
1383
 
1384
1384
  # Add required columns with minimal operations
1385
1385
  columns_to_add = [
@@ -1520,7 +1520,7 @@ def _add_sample_standard(self, file, type=None, reset=False, adducts=None, skip_
1520
1520
  self.samples_df = pl.concat([self.samples_df, new_sample])
1521
1521
 
1522
1522
  # SIMPLIFIED feature processing
1523
- current_sample_uid = len(self.samples_df) - 1
1523
+ current_sample_uid = len(self.samples_df)
1524
1524
 
1525
1525
  # Add required columns with minimal operations
1526
1526
  columns_to_add = [
@@ -1621,7 +1621,7 @@ def _add_sample_standard(self, file, type=None, reset=False, adducts=None, skip_
1621
1621
  self.samples_df = pl.concat([self.samples_df, new_sample])
1622
1622
 
1623
1623
  # SIMPLIFIED feature processing
1624
- current_sample_uid = len(self.samples_df) - 1
1624
+ current_sample_uid = len(self.samples_df)
1625
1625
 
1626
1626
  # Add required columns with minimal operations
1627
1627
  columns_to_add = [
@@ -1695,3 +1695,4 @@ def _sample_color_reset_optimized(self):
1695
1695
  )
1696
1696
 
1697
1697
  self.logger.debug(f"Reset sample colors (cached) for {n_samples} samples")
1698
+
@@ -17,7 +17,7 @@ hv.extension("bokeh")
17
17
  from bokeh.layouts import row as bokeh_row
18
18
 
19
19
 
20
- def plot_alignment(self, maps: bool = True, filename: str | None = None, width: int = 450, height: int = 450, markersize: int = 3):
20
+ def plot_alignment(self, maps: bool = True, samples: int | list[int | str] | None = None, filename: str | None = None, width: int = 450, height: int = 450, markersize: int = 3):
21
21
  """Visualize retention time alignment using two synchronized Bokeh scatter plots.
22
22
 
23
23
  - When ``maps=True`` the function reads ``self.features_maps`` (list of FeatureMap)
@@ -27,6 +27,11 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
27
27
 
28
28
  Parameters
29
29
  - maps: whether to use feature maps (default True).
30
+ - samples: Sample selection parameter, interpreted like in plot_samples_2d:
31
+ - None: show all samples
32
+ - int: show a random subset of N samples
33
+ - list of ints: show samples with these sample_uids
34
+ - list of strings: show samples with these sample_names
30
35
  - filename: optional HTML file path to save the plot.
31
36
  - width/height: pixel size of each subplot.
32
37
  - markersize: base marker size.
@@ -54,6 +59,32 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
54
59
  self.logger.error("No feature maps available for plotting.")
55
60
  return
56
61
 
62
+ # Get sample_uids to limit which samples to show
63
+ sample_uids_to_show = self._get_sample_uids(samples)
64
+
65
+ # Filter feature maps based on sample selection
66
+ if sample_uids_to_show is not None:
67
+ # Get sample indices for the selected sample_uids
68
+ selected_indices = []
69
+ if hasattr(self, 'samples_df') and self.samples_df is not None and not self.samples_df.is_empty():
70
+ samples_info = self.samples_df.to_pandas()
71
+ for idx, row in samples_info.iterrows():
72
+ if row.get('sample_uid') in sample_uids_to_show:
73
+ selected_indices.append(idx)
74
+ else:
75
+ # If no samples_df, just limit to the first N samples
76
+ if isinstance(samples, int):
77
+ selected_indices = list(range(min(samples, len(fmaps))))
78
+ else:
79
+ selected_indices = list(range(len(fmaps)))
80
+
81
+ # Filter feature maps to only include selected indices
82
+ fmaps = [fmaps[i] for i in selected_indices if i < len(fmaps)]
83
+
84
+ if not fmaps:
85
+ self.logger.error("No feature maps match the selected samples.")
86
+ return
87
+
57
88
  # Reference (first) sample: use current RT for both before and after
58
89
  ref = fmaps[0]
59
90
  ref_rt = [f.getRT() for f in ref]
@@ -143,6 +174,28 @@ def plot_alignment(self, maps: bool = True, filename: str | None = None, width:
143
174
  self.logger.error("No sample identifier column found in features_df.")
144
175
  return
145
176
 
177
+ # Get sample_uids to limit which samples to show
178
+ sample_uids_to_show = self._get_sample_uids(samples)
179
+
180
+ # Filter features_df based on sample selection if specified
181
+ if sample_uids_to_show is not None:
182
+ if sample_col == 'sample_uid':
183
+ features_df = features_df.filter(pl.col('sample_uid').is_in(sample_uids_to_show))
184
+ else:
185
+ # Need to convert sample names to sample_uids if using sample_name column
186
+ if 'sample_uid' in features_df.columns:
187
+ # Filter by sample_uid even though we're using sample_name as the primary column
188
+ features_df = features_df.filter(pl.col('sample_uid').is_in(sample_uids_to_show))
189
+ else:
190
+ # Convert sample_uids to sample_names and filter
191
+ sample_names_to_show = []
192
+ if hasattr(self, 'samples_df') and self.samples_df is not None:
193
+ for uid in sample_uids_to_show:
194
+ matching_rows = self.samples_df.filter(pl.col("sample_uid") == uid)
195
+ if not matching_rows.is_empty():
196
+ sample_names_to_show.append(matching_rows.row(0, named=True)["sample_name"])
197
+ features_df = features_df.filter(pl.col('sample_name').is_in(sample_names_to_show))
198
+
146
199
  # Get unique samples using Polars
147
200
  samples = features_df.select(pl.col(sample_col)).unique().to_series().to_list()
148
201
 
@@ -1649,11 +1702,19 @@ def plot_pca(
1649
1702
 
1650
1703
  self.logger.debug(f"Performing PCA on consensus matrix with shape: {consensus_matrix.shape}")
1651
1704
 
1652
- # Convert consensus matrix to numpy if it's not already
1653
- if hasattr(consensus_matrix, "values"):
1705
+ # Convert consensus matrix to numpy - handle both Polars and pandas DataFrames
1706
+ if hasattr(consensus_matrix, "to_numpy"):
1707
+ # Polars or pandas DataFrame
1708
+ if hasattr(consensus_matrix, "select"):
1709
+ # Polars DataFrame - exclude the consensus_uid column
1710
+ numeric_cols = [col for col in consensus_matrix.columns if col != "consensus_uid"]
1711
+ matrix_data = consensus_matrix.select(numeric_cols).to_numpy()
1712
+ else:
1713
+ # Pandas DataFrame
1714
+ matrix_data = consensus_matrix.to_numpy()
1715
+ elif hasattr(consensus_matrix, "values"):
1716
+ # Pandas DataFrame
1654
1717
  matrix_data = consensus_matrix.values
1655
- elif hasattr(consensus_matrix, "to_numpy"):
1656
- matrix_data = consensus_matrix.to_numpy()
1657
1718
  else:
1658
1719
  matrix_data = np.array(consensus_matrix)
1659
1720
 
@@ -1372,7 +1372,7 @@ wheels = [
1372
1372
 
1373
1373
  [[package]]
1374
1374
  name = "masster"
1375
- version = "0.3.17"
1375
+ version = "0.3.18"
1376
1376
  source = { editable = "." }
1377
1377
  dependencies = [
1378
1378
  { name = "alphabase" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes