masster 0.4.22__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/study.py CHANGED
@@ -52,36 +52,26 @@ import sys
52
52
  import polars as pl
53
53
 
54
54
  # Study-specific imports
55
- from masster.study.h5 import _load_study5
56
- from masster.study.h5 import _save_study5
57
- from masster.study.h5 import _save_study5_compressed
58
- from masster.study.h5 import _load_ms1
55
+ from masster.study.analysis import analyze_umap
59
56
  from masster.study.helpers import _get_consensus_uids
60
- from masster.study.helpers import _get_feature_uids
61
- from masster.study.helpers import _get_sample_uids
62
- from masster.study.helpers import _ensure_features_df_schema_order
57
+ from masster.study.helpers import _get_features_uids
58
+ from masster.study.helpers import _get_samples_uids
63
59
  from masster.study.helpers import compress
64
- from masster.study.helpers import compress_features
65
- from masster.study.helpers import compress_ms2
66
- from masster.study.helpers import compress_chrom
67
- from masster.study.helpers import restore_features
68
- from masster.study.helpers import restore_chrom
69
- from masster.study.helpers import restore_ms2
70
60
  from masster.study.helpers import decompress
71
61
  from masster.study.helpers import fill_reset
72
62
  from masster.study.helpers import get_chrom
73
- from masster.study.helpers import get_sample
63
+ from masster.study.helpers import get_samples
74
64
  from masster.study.helpers import get_consensus
75
65
  from masster.study.helpers import get_consensus_matches
76
66
  from masster.study.helpers import get_consensus_matrix
77
67
  from masster.study.helpers import get_orphans
68
+ from masster.study.helpers import get_sample_stats
78
69
  from masster.study.helpers import get_gaps_matrix
79
70
  from masster.study.helpers import get_gaps_stats
80
71
  from masster.study.helpers import align_reset
81
- from masster.study.helpers import set_folder
82
- from masster.study.helpers import set_source
83
- from masster.study.helpers import sample_color
84
- from masster.study.helpers import sample_color_reset
72
+ from masster.study.helpers import set_study_folder
73
+ from masster.study.helpers import set_samples_source
74
+ from masster.study.helpers import set_samples_color
85
75
  from masster.study.helpers import sample_name_replace
86
76
  from masster.study.helpers import sample_name_reset
87
77
  from masster.study.helpers import samples_select
@@ -94,61 +84,34 @@ from masster.study.helpers import consensus_filter
94
84
  from masster.study.helpers import consensus_delete
95
85
  from masster.study.load import add
96
86
  from masster.study.load import add_sample
97
- from masster.study.load import _add_samples_batch
98
- from masster.study.load import _add_sample_optimized
99
- from masster.study.load import _add_sample_standard
100
- from masster.study.load import _sample_color_reset_optimized
101
- from masster.study.load import fill_single
102
87
  from masster.study.load import fill
103
- from masster.study.load import _process_sample_for_parallel_fill
104
- from masster.study.load import _get_missing_consensus_sample_combinations
105
88
  from masster.study.load import load
106
- from masster.study.load import _load_consensusXML
107
- from masster.study.load import load_features
108
- from masster.study.load import sanitize
89
+ from masster.study.load import _load_features
109
90
  from masster.study.plot import plot_alignment
110
91
  from masster.study.plot import plot_consensus_2d
111
92
  from masster.study.plot import plot_samples_2d
112
93
  from masster.study.plot import plot_consensus_stats
113
94
  from masster.study.plot import plot_chrom
114
- from masster.study.plot import plot_pca
95
+ from masster.study.plot import plot_samples_pca
96
+ from masster.study.plot import plot_samples_umap
115
97
  from masster.study.plot import plot_bpc
116
98
  from masster.study.plot import plot_tic
117
99
  from masster.study.plot import plot_eic
118
100
  from masster.study.plot import plot_rt_correction
119
101
  from masster.study.processing import align
120
102
  from masster.study.merge import merge
121
- from masster.study.merge import _reset_consensus_data
122
- from masster.study.merge import _extract_consensus_features
123
- from masster.study.merge import _perform_adduct_grouping
124
- from masster.study.merge import _consensus_cleanup
125
- from masster.study.merge import _identify_adduct_by_mass_shift
126
- from masster.study.merge import _finalize_merge
127
- from masster.study.merge import _count_tight_clusters
128
103
  from masster.study.processing import integrate
129
104
  from masster.study.processing import find_ms2
130
105
  from masster.study.processing import find_iso
131
106
  from masster.study.processing import reset_iso
132
- from masster.study.parameters import store_history
107
+ from masster.study.parameters import update_history
133
108
  from masster.study.parameters import get_parameters
134
109
  from masster.study.parameters import update_parameters
135
110
  from masster.study.parameters import get_parameters_property
136
111
  from masster.study.parameters import set_parameters_property
137
- from masster.study.save import save, save_consensus, _save_consensusXML, save_samples
138
- from masster.study.export import (
139
- export_mgf,
140
- export_mztab,
141
- export_xlsx,
142
- export_parquet,
143
- _get_mgf_df,
144
- )
112
+ from masster.study.save import save, save_consensus, save_samples
113
+ from masster.study.export import export_mgf, export_mztab, export_xlsx, export_parquet
145
114
  from masster.study.id import lib_load, identify, get_id, id_reset, lib_reset
146
- from masster.study.id import (
147
- _get_adducts,
148
- _calculate_formula_mass_shift,
149
- _format_adduct_name,
150
- _parse_element_counts,
151
- )
152
115
 
153
116
  from masster.logger import MassterLogger
154
117
  from masster.study.defaults.study_def import study_defaults
@@ -253,8 +216,24 @@ class Study:
253
216
  For backward compatibility, original signature is supported:
254
217
  Study(folder=..., label=..., log_level=..., log_label=..., log_sink=...)
255
218
  """
256
- # Initialize default parameters
219
+ # ===== PARAMETER INITIALIZATION =====
220
+ auto_load_filename = self._init_parameters(filename, kwargs)
221
+
222
+ # ===== DATA STRUCTURES INITIALIZATION =====
223
+ self._init_data_structures()
224
+
225
+ # ===== LOGGER INITIALIZATION =====
226
+ self._init_logger()
227
+
228
+ # ===== AUTO-LOAD FILE IF PROVIDED =====
229
+ if auto_load_filename is not None:
230
+ self.load(filename=auto_load_filename)
257
231
 
232
+ # ===== SAMPLE CACHE =====
233
+ self._samples_cache = {}
234
+
235
+ def _init_parameters(self, filename, kwargs):
236
+ """Initialize parameters and handle filename for auto-loading."""
258
237
  # Handle filename parameter for automatic loading
259
238
  auto_load_filename = None
260
239
  if filename is not None:
@@ -281,13 +260,11 @@ class Study:
281
260
  if hasattr(params, key):
282
261
  params.set(key, value, validate=True)
283
262
 
284
- # Keeps a pointer to study5 whenever it's saved or loaded
285
- self.filename = None
286
-
287
- # Store parameter instance for method access
263
+ # Store parameter instance and initialize history
264
+ self.filename = None # Keeps a pointer to study5 whenever it's saved or loaded
288
265
  self.parameters = params
289
266
  self.history = {}
290
- self.store_history(["study"], params.to_dict())
267
+ self.update_history(["study"], params.to_dict())
291
268
 
292
269
  # Set instance attributes (ensure proper string values for logger)
293
270
  self.folder = params.folder
@@ -301,10 +278,15 @@ class Study:
301
278
  self.log_label = params.log_label + " | " if params.log_label else ""
302
279
  self.log_sink = params.log_sink
303
280
 
281
+ # Create folder if it doesn't exist
304
282
  if self.folder is not None and not os.path.exists(self.folder):
305
- # create the folder if it does not exist
306
283
  os.makedirs(self.folder)
284
+
285
+ return auto_load_filename
307
286
 
287
+ def _init_data_structures(self):
288
+ """Initialize all data structures used by the Study."""
289
+ # Sample information DataFrame
308
290
  self.samples_df = pl.DataFrame(
309
291
  {
310
292
  "sample_uid": [],
@@ -337,21 +319,24 @@ class Study:
337
319
  "num_ms2": pl.Int64,
338
320
  },
339
321
  )
322
+
323
+ # Feature-related data structures
340
324
  self.features_maps = []
341
325
  self.features_df = pl.DataFrame()
326
+
327
+ # Consensus-related data structures
342
328
  self.consensus_ms2 = pl.DataFrame()
343
329
  self.consensus_df = pl.DataFrame()
344
330
  self.consensus_map = None
345
331
  self.consensus_mapping_df = pl.DataFrame()
346
332
  self.alignment_ref_index = None
347
333
 
348
- # Library DataFrame (populated by lib_load)
349
- self.lib_df = pl.DataFrame()
334
+ # Library and identification data structures
335
+ self.lib_df = pl.DataFrame() # populated by lib_load
336
+ self.id_df = pl.DataFrame() # populated by identify
350
337
 
351
- # Identification results DataFrame (populated by identify)
352
- self.id_df = pl.DataFrame()
353
-
354
- # Initialize independent logger
338
+ def _init_logger(self):
339
+ """Initialize the logger for this Study instance."""
355
340
  self.logger = MassterLogger(
356
341
  instance_type="study",
357
342
  level=self.log_level.upper(),
@@ -361,85 +346,68 @@ class Study:
361
346
  self.logger.debug(f"Study folder: {self.folder}")
362
347
  self.logger.debug(f"Polarity: {self.polarity}")
363
348
 
364
- # Auto-load study file if filename was provided
365
- if auto_load_filename is not None:
366
- self.load(filename=auto_load_filename)
367
-
368
- # cache for Sample instances created/loaded by this Study
369
- self._samples_cache = {}
370
-
371
- # ===== ATTACH MODULE FUNCTIONS AS CLASS METHODS =====
372
-
373
349
  # === File I/O Operations ===
374
350
  load = load
375
351
  save = save
376
352
  save_consensus = save_consensus
377
353
  save_samples = save_samples
378
- sanitize = sanitize
379
- set_folder = set_folder
354
+ set_study_folder = set_study_folder
380
355
 
381
356
  # === Sample Management ===
382
357
  add = add
383
- add_folder = add # backward compatibility alias
384
358
  add_sample = add_sample
385
359
 
386
360
  # === Core Processing Operations ===
387
361
  align = align
388
- merge = merge
389
- find_consensus = merge # Backward compatibility alias
362
+ merge = merge
363
+
390
364
  find_ms2 = find_ms2
391
365
  find_iso = find_iso
392
366
  reset_iso = reset_iso
393
367
  iso_reset = reset_iso
394
368
  integrate = integrate
395
- integrate_chrom = integrate # Backward compatibility alias
369
+
396
370
  fill = fill
397
- fill_chrom = fill # Backward compatibility alias
398
- fill_single = fill_single
399
- fill_chrom_single = fill_single # Backward compatibility alias
371
+
400
372
  # === Data Retrieval and Access ===
401
373
  get_consensus = get_consensus
402
374
  get_chrom = get_chrom
403
- get_sample = get_sample
375
+ get_samples = get_samples
404
376
  get_consensus_matches = get_consensus_matches
405
377
  get_consensus_matrix = get_consensus_matrix
406
378
  get_gaps_matrix = get_gaps_matrix
407
379
  get_gaps_stats = get_gaps_stats
408
380
  get_orphans = get_orphans
381
+ get_sample_stats = get_sample_stats
409
382
 
410
383
  # === Data Selection and Filtering ===
411
384
  samples_select = samples_select
412
385
  samples_delete = samples_delete
386
+
413
387
  features_select = features_select
414
388
  features_filter = features_filter
415
389
  features_delete = features_delete
416
390
  consensus_select = consensus_select
417
391
  consensus_filter = consensus_filter
418
392
  consensus_delete = consensus_delete
419
- # Backward compatibility aliases
420
- filter_consensus = consensus_filter
421
- select_consensus = consensus_select
422
- filter_features = features_filter
423
- select_features = features_select
424
- consensus_find = merge
425
-
426
- # === Sample Metadata and Styling ===
427
- set_source = set_source
428
- sample_color = sample_color
429
- sample_color_reset = sample_color_reset
430
- reset_sample_color = sample_color_reset
431
- name_replace = sample_name_replace
432
- name_reset = sample_name_reset
433
- reset_name = sample_name_reset
434
393
 
394
+ # === Sample Metadata and Styling ===
395
+ set_samples_source = set_samples_source
396
+ set_samples_color = set_samples_color
397
+
398
+ samples_name_replace = sample_name_replace
399
+ samples_name_reset = sample_name_reset
400
+
401
+ # Backward compatibility aliases for renamed methods
402
+ set_folder = set_study_folder
403
+ set_source = set_samples_source
404
+ #sample_color = set_samples_color
405
+ #get_sample = get_samples
406
+ #load_features = _load_features
407
+ store_history = update_history
408
+
435
409
  # === Data Compression and Storage ===
436
410
  compress = compress
437
- compress_features = compress_features
438
- compress_ms2 = compress_ms2
439
- compress_chrom = compress_chrom
440
- restore_features = restore_features
441
- restore_chrom = restore_chrom
442
- restore_ms2 = restore_ms2
443
411
  decompress = decompress
444
412
 
445
413
  # === Reset Operations ===
@@ -453,12 +421,16 @@ class Study:
453
421
  plot_chrom = plot_chrom
454
422
  plot_consensus_2d = plot_consensus_2d
455
423
  plot_consensus_stats = plot_consensus_stats
456
- plot_pca = plot_pca
424
+ plot_samples_pca = plot_samples_pca
425
+ plot_samples_umap = plot_samples_umap
457
426
  plot_samples_2d = plot_samples_2d
458
427
  plot_bpc = plot_bpc
459
428
  plot_rt_correction = plot_rt_correction
460
429
  plot_tic = plot_tic
461
430
  plot_eic = plot_eic
431
+
432
+ # === Analysis Operations ===
433
+ analyze_umap = analyze_umap
462
434
 
463
435
  # === Export Operations ===
464
436
  export_mgf = export_mgf
@@ -468,6 +440,11 @@ class Study:
468
440
 
469
441
  # === Identification and Library Matching ===
470
442
  lib_load = lib_load
443
+
444
+ def lib_to_consensus(self, **kwargs):
445
+ """Create consensus features from library entries."""
446
+ from masster.study.id import lib_to_consensus as _lib_to_consensus
447
+ return _lib_to_consensus(self, **kwargs)
471
448
  identify = identify
472
449
  get_id = get_id
473
450
  id_reset = id_reset
@@ -476,45 +453,23 @@ class Study:
476
453
  reset_lib = lib_reset
477
454
 
478
455
  # === Parameter Management ===
479
- store_history = store_history
456
+ update_history = update_history
480
457
  get_parameters = get_parameters
481
458
  update_parameters = update_parameters
482
459
  get_parameters_property = get_parameters_property
483
460
  set_parameters_property = set_parameters_property
484
461
 
485
462
  # === Private/Internal Methods ===
486
- _add_samples_batch = _add_samples_batch
487
- _add_sample_optimized = _add_sample_optimized
488
- _add_sample_standard = _add_sample_standard
489
- _sample_color_reset_optimized = _sample_color_reset_optimized
490
- _load_study5 = _load_study5
491
- _save_study5 = _save_study5
492
- _save_study5_compressed = _save_study5_compressed
493
- _load_ms1 = _load_ms1
494
463
  _get_consensus_uids = _get_consensus_uids
495
- _get_feature_uids = _get_feature_uids
496
- _get_sample_uids = _get_sample_uids
497
- _ensure_features_df_schema_order = _ensure_features_df_schema_order
498
- _process_sample_for_parallel_fill = _process_sample_for_parallel_fill
499
- _get_missing_consensus_sample_combinations = (
500
- _get_missing_consensus_sample_combinations
501
- )
502
- _load_consensusXML = _load_consensusXML
503
- load_features = load_features
504
- _save_consensusXML = _save_consensusXML
505
- _get_mgf_df = _get_mgf_df
506
- _get_adducts = _get_adducts
507
- _calculate_formula_mass_shift = _calculate_formula_mass_shift
508
- _format_adduct_name = _format_adduct_name
509
- _parse_element_counts = _parse_element_counts
464
+ _get_features_uids = _get_features_uids
465
+ _get_samples_uids = _get_samples_uids
466
+ _load_features = _load_features
467
+
468
+ # Note: _load_study5 and _save_study5 are not exposed as class methods
469
+ # They are used internally by load() and save() methods only
510
470
 
511
471
  # === Merge Helper Methods ===
512
- _reset_consensus_data = _reset_consensus_data
513
- _extract_consensus_features = _extract_consensus_features
514
- _perform_adduct_grouping = _perform_adduct_grouping
515
- _consensus_cleanup = _consensus_cleanup
516
- _identify_adduct_by_mass_shift = _identify_adduct_by_mass_shift
517
- _finalize_merge = _finalize_merge
472
+ # (All merge helper methods are now internal to the merge module)
518
473
 
519
474
  # === Default Parameters ===
520
475
  study_defaults = study_defaults
@@ -612,6 +567,83 @@ class Study:
612
567
  except Exception as e:
613
568
  self.logger.error(f"Failed to reload current module {current_module}: {e}")
614
569
 
570
+ def _sanitize_null_ids(self):
571
+ """
572
+ Sanitize null feature_id and consensus_id values by replacing them with new integer IDs.
573
+ For feature_id: generates large sequential integers that can be converted by merge/align functions.
574
+ For consensus_id: uses 16-character UUID strings (as expected by merge function).
575
+ """
576
+ import uuid
577
+ import polars as pl
578
+ import time
579
+
580
+ # Sanitize features_df feature_id column
581
+ if hasattr(self, 'features_df') and self.features_df is not None and not self.features_df.is_empty():
582
+ # Check for null feature_ids
583
+ null_feature_ids = self.features_df.filter(pl.col("feature_id").is_null()).shape[0]
584
+ if null_feature_ids > 0:
585
+ self.logger.info(f"Sanitizing {null_feature_ids} null feature_id values with new integer IDs")
586
+
587
+ # Find the maximum existing feature_id (convert strings to int if possible)
588
+ max_existing_id = 0
589
+ existing_ids = self.features_df.filter(pl.col("feature_id").is_not_null())["feature_id"].to_list()
590
+ for fid in existing_ids:
591
+ try:
592
+ int_id = int(fid)
593
+ max_existing_id = max(max_existing_id, int_id)
594
+ except (ValueError, TypeError):
595
+ # Skip non-integer IDs
596
+ pass
597
+
598
+ # Generate new sequential integer IDs starting from max + timestamp offset
599
+ # Use timestamp to ensure uniqueness across different sanitization runs
600
+ base_id = max(max_existing_id + 1, int(time.time() * 1000000)) # Microsecond timestamp
601
+ new_int_ids = [str(base_id + i) for i in range(null_feature_ids)]
602
+ uid_index = 0
603
+
604
+ # Create a list to store all feature_ids
605
+ feature_ids = []
606
+ for feature_id in self.features_df["feature_id"].to_list():
607
+ if feature_id is None:
608
+ feature_ids.append(new_int_ids[uid_index])
609
+ uid_index += 1
610
+ else:
611
+ feature_ids.append(feature_id)
612
+
613
+ # Update the DataFrame with sanitized feature_ids
614
+ self.features_df = self.features_df.with_columns(
615
+ pl.Series("feature_id", feature_ids, dtype=pl.Utf8)
616
+ )
617
+
618
+ self.logger.info(f"Successfully sanitized {null_feature_ids} feature_id values")
619
+
620
+ # Sanitize consensus_df consensus_id column
621
+ if hasattr(self, 'consensus_df') and self.consensus_df is not None and not self.consensus_df.is_empty():
622
+ if "consensus_id" in self.consensus_df.columns:
623
+ null_consensus_ids = self.consensus_df.filter(pl.col("consensus_id").is_null()).shape[0]
624
+ if null_consensus_ids > 0:
625
+ self.logger.info(f"Sanitizing {null_consensus_ids} null consensus_id values with new UIDs")
626
+
627
+ # Generate new UIDs for null values using the same method as merge()
628
+ new_uids = [str(uuid.uuid4()).replace('-', '')[:16] for _ in range(null_consensus_ids)]
629
+ uid_index = 0
630
+
631
+ # Create a list to store all consensus_ids
632
+ consensus_ids = []
633
+ for consensus_id in self.consensus_df["consensus_id"].to_list():
634
+ if consensus_id is None:
635
+ consensus_ids.append(new_uids[uid_index])
636
+ uid_index += 1
637
+ else:
638
+ consensus_ids.append(consensus_id)
639
+
640
+ # Update the DataFrame with sanitized consensus_ids
641
+ self.consensus_df = self.consensus_df.with_columns(
642
+ pl.Series("consensus_id", consensus_ids, dtype=pl.Utf8)
643
+ )
644
+
645
+ self.logger.info(f"Successfully sanitized {null_consensus_ids} consensus_id values")
646
+
615
647
  def __dir__(self):
616
648
  """
617
649
  Custom __dir__ implementation to hide internal methods starting with '_'
@@ -627,16 +659,24 @@ class Study:
627
659
  'find_consensus', # alias for merge
628
660
  'integrate_chrom', # alias for integrate
629
661
  'fill_chrom', # alias for fill
630
- 'fill_chrom_single', # alias for fill_single
631
662
  'filter_consensus', # alias for consensus_filter
632
663
  'select_consensus', # alias for consensus_select
633
664
  'filter_features', # alias for features_filter
634
665
  'select_features', # alias for features_select
635
666
  'consensus_find', # alias for merge
667
+ # Backward compatibility for renamed methods
668
+ 'set_folder', # alias for set_study_folder
669
+ 'set_source', # alias for set_samples_source
670
+ 'sample_color', # alias for set_samples_color
671
+ 'get_sample', # alias for get_samples
672
+ 'load_features', # alias for _load_features
673
+ 'store_history', # alias for update_history
674
+ 'sample_color_reset', # alias for set_samples_color(by=None)
675
+ 'reset_sample_color', # alias for sample_color_reset
636
676
  }
637
677
 
638
678
  # Get all attributes from the class
639
- all_attrs = set()
679
+ all_attrs: set[str] = set()
640
680
 
641
681
  # Add attributes from the class and all its bases
642
682
  for cls in self.__class__.__mro__:
@@ -895,8 +935,9 @@ class Study:
895
935
  tight_clusters_count = 0
896
936
  if consensus_df_len > 0:
897
937
  try:
938
+ from masster.study.merge import _count_tight_clusters
898
939
  tight_clusters_count = _count_tight_clusters(self, mz_tol=0.04, rt_tol=0.3)
899
- except Exception as e:
940
+ except Exception:
900
941
  # If tight clusters calculation fails, just use 0
901
942
  tight_clusters_count = 0
902
943
 
@@ -952,7 +993,6 @@ class Study:
952
993
 
953
994
  print(summary)
954
995
 
955
-
956
996
  if __name__ == "__main__":
957
997
  # This block is executed when the script is run directly
958
998
  pass
@@ -327,6 +327,9 @@
327
327
  "formula": {
328
328
  "dtype": "pl.String"
329
329
  },
330
+ "iso": {
331
+ "dtype": "pl.Int64"
332
+ },
330
333
  "adduct": {
331
334
  "dtype": "pl.String"
332
335
  },
@@ -342,6 +345,9 @@
342
345
  "rt": {
343
346
  "dtype": "pl.Null"
344
347
  },
348
+ "quant_group": {
349
+ "dtype": "pl.Int64"
350
+ },
345
351
  "db_id": {
346
352
  "dtype": "pl.String"
347
353
  },
@@ -369,6 +375,9 @@
369
375
  },
370
376
  "score": {
371
377
  "dtype": "pl.Float64"
378
+ },
379
+ "iso": {
380
+ "dtype": "pl.Int64"
372
381
  }
373
382
  }
374
383
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.4.22
3
+ Version: 0.5.1
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -686,6 +686,7 @@ Requires-Dist: bokeh>=3.7.3
686
686
  Requires-Dist: cmap>=0.6.2
687
687
  Requires-Dist: datashader>=0.18.1
688
688
  Requires-Dist: h5py>=3.14.0
689
+ Requires-Dist: hdbscan>=0.8.40
689
690
  Requires-Dist: holoviews>=1.21.0
690
691
  Requires-Dist: hvplot>=0.11.3
691
692
  Requires-Dist: loguru>=0.7.3
@@ -701,6 +702,7 @@ Requires-Dist: pythonnet>=3.0.0
701
702
  Requires-Dist: scikit-learn>=1.7.1
702
703
  Requires-Dist: scipy>=1.12.0
703
704
  Requires-Dist: tqdm>=4.65.0
705
+ Requires-Dist: umap-learn>=0.5.9.post2
704
706
  Provides-Extra: dev
705
707
  Requires-Dist: bandit>=1.7.0; extra == 'dev'
706
708
  Requires-Dist: black>=23.0.0; extra == 'dev'
@@ -726,12 +728,13 @@ Description-Content-Type: text/markdown
726
728
 
727
729
  # MASSter
728
730
 
729
- **MASSter** is a comprehensive Python package for mass spectrometry data analysis, designed for metabolomics and LC-MS data processing. It provides tools for feature detection, alignment, consensus building, and interactive visualization of mass spectrometry datasets. It is designed to deal with DDA, and hides functionalities for DIA and ZTScan DIA data.
731
+ **MASSter** is a Python package for the analysis of mass spectrometry data, tailored for the purpose of metabolomics and LC-MS data processing. It is designed to deal with DDA, and hides functionalities for DIA and ZTScan DIA data. The sample-centric feature detection uses OpenMS. All other functionalities for e.g. centroiding, RT alignment, adduct and isotopomer detection, merging of multiple samples, gap-filling, quantification, etc. were redesigned and engineered to maximize scalability (tested with 3000 LC-MS), speed, quality, and results.
730
732
 
731
733
  This is a poorly documented, stable branch of the development codebase in use in the Zamboni lab.
732
734
 
733
- Some of the core processing functions are derived from OpenMS. We use the same nomenclature and refer to their documentation for an explanation of the parameters. To a large extent, however, you should be able to use the defaults (=no parameters) when calling processing steps.
735
+ ## Prerequisites
734
736
 
737
+ **MASSter** reads raw (Thermo), wiff (SCIEX), or mzML data. It's recommended to provide raw, profile data.
735
738
 
736
739
  ## Installation
737
740
 
@@ -739,20 +742,60 @@ Some of the core processing functions are derived from OpenMS. We use the same n
739
742
  pip install masster
740
743
  ```
741
744
 
742
- ### Basic Workflow for analyzing LC-MS study with 2-... samples
745
+ ## Basic usage
746
+ ### Quick start: use the wizard
747
+
748
+ ```python
749
+ import masster
750
+ masster.wizard.execute(
751
+ source=r'..\..\folder_with_raw_data',
752
+ folder=r'..\..folder_to_store_results'
753
+ )
754
+ ```
743
755
 
756
+ This will run a wizard that should perform all key steps and save the results to the `folder`.
757
+
758
+ ### Basic workflow for analyzing a single sample
744
759
  ```python
745
760
  import masster
761
+ sample = masster.Sample(filename='...') # full path to a *.raw, *.wiff, or *.mzML file
762
+ # process
763
+ sample.find_features(chrom_fwhm=0.5, noise=50) # for orbitrap data, set noise to 1e5
764
+ sample.find_adducts()
765
+ sample.find_ms2()
766
+
767
+ # access data
768
+ sample.features_df
769
+
770
+ # save results
771
+ sample.save() # stores to *.sample5, our custom hdf5 format
772
+ sample.export_mgf()
773
+
774
+ # some plots
775
+ sample.plot_bpc()
776
+ sample.plot_tic()
777
+ sample.plot_2d()
778
+ sample.plot_features_stats()
779
+
780
+ # explore methods
781
+ dir(study)
782
+ ```
783
+
784
+ ### Basic Workflow for analyzing LC-MS study with 2-... samples
746
785
 
786
+ ```python
787
+ import masster
747
788
  # Initialize the Study object with the default folder
748
- study = masster.Study(default_folder=r'D:\...\mylcms')
789
+ study = masster.Study(folder=r'D:\...\mylcms')
749
790
 
750
791
  # Load data from folder with raw data, here: WIFF
751
792
  study.add(r'D:\...\...\...\*.wiff')
752
793
 
753
794
  # Perform retention time correction
754
- study.align(rt_max_diff=2.0)
795
+ study.align(rt_tol=2.0)
755
796
  study.plot_alignment()
797
+ study.plot_bpc()
798
+ study.plot_rt_correction()
756
799
 
757
800
  # Find consensus features
758
801
  study.merge(min_samples=3)
@@ -772,18 +815,15 @@ study.export_parquet()
772
815
 
773
816
  # Save the study to .study5
774
817
  study.save()
775
- ```
776
-
777
- ## Requirements
778
818
 
779
- - Python 3.11
780
- - Key dependencies: pandas, polars, numpy, scipy, matplotlib, bokeh, holoviews, panel
781
- - See `pyproject.toml` for complete dependency list
819
+ # Some of the plots...
820
+ study.plot_samples_pca()
821
+ study.plot_samples_umap()
822
+ study.plot_samples_2d()
823
+ ```
782
824
 
783
825
  ## License
784
-
785
826
  GNU Affero General Public License v3
786
827
 
787
828
  ## Citation
788
-
789
829
  If you use Masster in your research, please cite this repository.