masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/save.py CHANGED
@@ -48,11 +48,7 @@ def save(self, filename=None, add_timestamp=True, compress=False):
48
48
  # Log file size information for performance monitoring
49
49
  if hasattr(self, "features_df") and not self.features_df.is_empty():
50
50
  feature_count = len(self.features_df)
51
- sample_count = (
52
- len(self.samples_df)
53
- if hasattr(self, "samples_df") and not self.samples_df.is_empty()
54
- else 0
55
- )
51
+ sample_count = len(self.samples_df) if hasattr(self, "samples_df") and not self.samples_df.is_empty() else 0
56
52
  self.logger.debug(
57
53
  f"Saving study with {sample_count} samples and {feature_count} features to {filename}",
58
54
  )
@@ -60,14 +56,17 @@ def save(self, filename=None, add_timestamp=True, compress=False):
60
56
  # Use compressed mode for large datasets
61
57
  if compress:
62
58
  from masster.study.h5 import _save_study5_compressed
59
+
63
60
  _save_study5_compressed(self, filename)
64
61
  else:
65
62
  from masster.study.h5 import _save_study5
63
+
66
64
  _save_study5(self, filename)
67
65
 
68
66
  if self.consensus_map is not None:
69
67
  # save the features as a separate file
70
68
  from masster.study.save import _save_consensusXML
69
+
71
70
  _save_consensusXML(self, filename=filename.replace(".study5", ".consensusXML"))
72
71
  self.filename = filename
73
72
 
@@ -160,14 +159,15 @@ def _save_consensusXML(self, filename: str):
160
159
  if self.consensus_df is None or self.consensus_df.is_empty():
161
160
  self.logger.error("No consensus features found.")
162
161
  return
163
-
162
+
164
163
  # Build consensus map from consensus_df with proper consensus_id values
165
164
  import pyopenms as oms
165
+
166
166
  consensus_map = oms.ConsensusMap()
167
-
167
+
168
168
  # Set up file descriptions for all samples
169
169
  file_descriptions = consensus_map.getColumnHeaders()
170
- if hasattr(self, 'samples_df') and not self.samples_df.is_empty():
170
+ if hasattr(self, "samples_df") and not self.samples_df.is_empty():
171
171
  for i, sample_row in enumerate(self.samples_df.iter_rows(named=True)):
172
172
  file_description = file_descriptions.get(i, oms.ColumnHeader())
173
173
  file_description.filename = sample_row.get("sample_name", f"sample_{i}")
@@ -175,17 +175,17 @@ def _save_consensusXML(self, filename: str):
175
175
  file_description.unique_id = i + 1
176
176
  file_descriptions[i] = file_description
177
177
  consensus_map.setColumnHeaders(file_descriptions)
178
-
178
+
179
179
  # Add consensus features to the map (simplified version without individual features)
180
180
  for consensus_row in self.consensus_df.iter_rows(named=True):
181
181
  consensus_feature = oms.ConsensusFeature()
182
-
182
+
183
183
  # Set basic properties
184
184
  consensus_feature.setRT(float(consensus_row.get("rt", 0.0)))
185
185
  consensus_feature.setMZ(float(consensus_row.get("mz", 0.0)))
186
186
  consensus_feature.setIntensity(float(consensus_row.get("inty_mean", 0.0)))
187
187
  consensus_feature.setQuality(float(consensus_row.get("quality", 1.0)))
188
-
188
+
189
189
  # Set the unique consensus_id as the unique ID
190
190
  consensus_id_str = consensus_row.get("consensus_id", "")
191
191
  if consensus_id_str and len(consensus_id_str) == 16:
@@ -199,9 +199,9 @@ def _save_consensusXML(self, filename: str):
199
199
  else:
200
200
  # Fallback to consensus_uid
201
201
  consensus_feature.setUniqueId(consensus_row.get("consensus_uid", 0))
202
-
202
+
203
203
  consensus_map.push_back(consensus_feature)
204
-
204
+
205
205
  # Save the consensus map
206
206
  fh = oms.ConsensusXMLFile()
207
207
  fh.store(filename, consensus_map)
@@ -215,4 +215,5 @@ def save_consensus(self, **kwargs):
215
215
  self.logger.error("No consensus map found.")
216
216
  return
217
217
  from masster.study.save import _save_consensusXML
218
+
218
219
  _save_consensusXML(self, **kwargs)
masster/study/study.py CHANGED
@@ -80,7 +80,8 @@ from masster.study.load import add
80
80
  from masster.study.load import add_sample
81
81
  from masster.study.load import fill
82
82
  from masster.study.load import load
83
- #from masster.study.load import _load_features
83
+
84
+ # from masster.study.load import _load_features
84
85
  from masster.study.h5 import _load_ms1
85
86
  from masster.study.h5 import _load_study5
86
87
  from masster.study.h5 import _save_study5
@@ -215,13 +216,13 @@ class Study:
215
216
  """
216
217
  # ===== PARAMETER INITIALIZATION =====
217
218
  auto_load_filename = self._init_parameters(filename, kwargs)
218
-
219
+
219
220
  # ===== DATA STRUCTURES INITIALIZATION =====
220
221
  self._init_data_structures()
221
-
222
+
222
223
  # ===== LOGGER INITIALIZATION =====
223
224
  self._init_logger()
224
-
225
+
225
226
  # ===== AUTO-LOAD FILE IF PROVIDED =====
226
227
  if auto_load_filename is not None:
227
228
  self.load(filename=auto_load_filename)
@@ -266,11 +267,7 @@ class Study:
266
267
  # Set instance attributes (ensure proper string values for logger)
267
268
  self.folder = params.folder
268
269
  self.label = params.label
269
- self.polarity = (
270
- params.polarity
271
- if params.polarity in ["positive", "negative", "pos", "neg"]
272
- else "positive"
273
- )
270
+ self.polarity = params.polarity if params.polarity in ["positive", "negative", "pos", "neg"] else "positive"
274
271
  self.log_level = params.log_level.upper() if params.log_level else "INFO"
275
272
  self.log_label = params.log_label + " | " if params.log_label else ""
276
273
  self.log_sink = params.log_sink
@@ -278,7 +275,7 @@ class Study:
278
275
  # Create folder if it doesn't exist
279
276
  if self.folder is not None and not os.path.exists(self.folder):
280
277
  os.makedirs(self.folder)
281
-
278
+
282
279
  return auto_load_filename
283
280
 
284
281
  def _init_data_structures(self):
@@ -316,11 +313,11 @@ class Study:
316
313
  "num_ms2": pl.Int64,
317
314
  },
318
315
  )
319
-
316
+
320
317
  # Feature-related data structures
321
318
  self.features_maps = []
322
319
  self.features_df = pl.DataFrame()
323
-
320
+
324
321
  # Consensus-related data structures
325
322
  self.consensus_ms2 = pl.DataFrame()
326
323
  self.consensus_df = pl.DataFrame()
@@ -330,7 +327,7 @@ class Study:
330
327
 
331
328
  # Library and identification data structures
332
329
  self.lib_df = pl.DataFrame() # populated by lib_load
333
- self.id_df = pl.DataFrame() # populated by identify
330
+ self.id_df = pl.DataFrame() # populated by identify
334
331
 
335
332
  def _init_logger(self):
336
333
  """Initialize the logger for this Study instance."""
@@ -352,24 +349,24 @@ class Study:
352
349
  _load_ms1 = _load_ms1
353
350
  _load_study5 = _load_study5
354
351
  _save_study5 = _save_study5
355
-
352
+
356
353
  # === Sample Management ===
357
354
  add = add
358
355
  add_sample = add_sample
359
-
356
+
360
357
  # === Core Processing Operations ===
361
358
  align = align
362
- merge = merge
363
-
359
+ merge = merge
360
+
364
361
  find_ms2 = find_ms2
365
362
  find_iso = find_iso
366
363
  reset_iso = reset_iso
367
364
  iso_reset = reset_iso
368
365
  integrate = integrate
369
-
366
+
370
367
  fill = fill
371
- #_estimate_rt_original_for_filled_feature = _estimate_rt_original_for_filled_feature
372
-
368
+ # _estimate_rt_original_for_filled_feature = _estimate_rt_original_for_filled_feature
369
+
373
370
  # === Data Retrieval and Access ===
374
371
  get_consensus = get_consensus
375
372
  get_chrom = get_chrom
@@ -382,11 +379,11 @@ class Study:
382
379
  get_sample_stats = get_sample_stats
383
380
  get_consensus_stats = get_consensus_stats
384
381
  _get_adducts = _get_adducts
385
-
382
+
386
383
  # === Data Selection and Filtering ===
387
384
  samples_select = samples_select
388
385
  samples_delete = samples_delete
389
-
386
+
390
387
  features_select = features_select
391
388
  features_filter = features_filter
392
389
  features_delete = features_delete
@@ -397,22 +394,22 @@ class Study:
397
394
  # === Sample Metadata and Styling ===
398
395
  set_samples_source = set_samples_source
399
396
  set_samples_color = set_samples_color
400
-
397
+
401
398
  samples_name_replace = sample_name_replace
402
399
  samples_name_reset = sample_name_reset
403
-
400
+
404
401
  # Backward compatibility aliases for renamed methods
405
402
  set_folder = set_study_folder
406
- set_source = set_samples_source
407
- #sample_color = set_samples_color
408
- #get_sample = get_samples
409
- #load_features = _load_features
403
+ set_source = set_samples_source
404
+ # sample_color = set_samples_color
405
+ # get_sample = get_samples
406
+ # load_features = _load_features
410
407
  store_history = update_history
411
-
408
+
412
409
  # === Data Compression and Storage ===
413
410
  compress = compress
414
411
  decompress = decompress
415
-
412
+
416
413
  # === Reset Operations ===
417
414
  consensus_reset = consensus_reset
418
415
  fill_reset = fill_reset
@@ -435,27 +432,29 @@ class Study:
435
432
 
436
433
  # === Analysis Operations ===
437
434
  analyze_umap = analyze_umap
438
-
435
+
439
436
  # === Export Operations ===
440
437
  export_mgf = export_mgf
441
438
  export_mztab = export_mztab
442
439
  export_xlsx = export_xlsx
443
440
  export_parquet = export_parquet
444
-
441
+
445
442
  # === Identification and Library Matching ===
446
443
  lib_load = lib_load
447
-
444
+
448
445
  def lib_to_consensus(self, **kwargs):
449
446
  """Create consensus features from library entries."""
450
447
  from masster.study.id import lib_to_consensus as _lib_to_consensus
448
+
451
449
  return _lib_to_consensus(self, **kwargs)
450
+
452
451
  identify = identify
453
452
  get_id = get_id
454
453
  id_reset = id_reset
455
454
  reset_id = id_reset
456
455
  lib_reset = lib_reset
457
456
  reset_lib = lib_reset
458
-
457
+
459
458
  # === Oracle Import Operations ===
460
459
  import_oracle = import_oracle
461
460
 
@@ -465,12 +464,12 @@ class Study:
465
464
  update_parameters = update_parameters
466
465
  get_parameters_property = get_parameters_property
467
466
  set_parameters_property = set_parameters_property
468
-
467
+
469
468
  # === Private/Internal Methods ===
470
469
  _get_consensus_uids = _get_consensus_uids
471
470
  _get_features_uids = _get_features_uids
472
471
  _get_samples_uids = _get_samples_uids
473
-
472
+
474
473
  # === Default Parameters ===
475
474
  study_defaults = study_defaults
476
475
  align_defaults = align_defaults
@@ -506,10 +505,7 @@ class Study:
506
505
 
507
506
  # Get all currently loaded modules that are part of the study package
508
507
  for module_name in sys.modules:
509
- if (
510
- module_name.startswith(study_module_prefix)
511
- and module_name != current_module
512
- ):
508
+ if module_name.startswith(study_module_prefix) and module_name != current_module:
513
509
  study_modules.append(module_name)
514
510
 
515
511
  # Add core masster modules
@@ -524,20 +520,14 @@ class Study:
524
520
  sample_modules = []
525
521
  sample_module_prefix = f"{base_modname}.sample."
526
522
  for module_name in sys.modules:
527
- if (
528
- module_name.startswith(sample_module_prefix)
529
- and module_name != current_module
530
- ):
523
+ if module_name.startswith(sample_module_prefix) and module_name != current_module:
531
524
  sample_modules.append(module_name)
532
525
 
533
526
  # Add lib submodules
534
527
  lib_modules = []
535
528
  lib_module_prefix = f"{base_modname}.lib."
536
529
  for module_name in sys.modules:
537
- if (
538
- module_name.startswith(lib_module_prefix)
539
- and module_name != current_module
540
- ):
530
+ if module_name.startswith(lib_module_prefix) and module_name != current_module:
541
531
  lib_modules.append(module_name)
542
532
 
543
533
  all_modules_to_reload = core_modules + sample_modules + study_modules + lib_modules
@@ -565,73 +555,66 @@ class Study:
565
555
  self.logger.debug("Module reload completed")
566
556
  except Exception as e:
567
557
  self.logger.error(f"Failed to reload current module {current_module}: {e}")
568
-
558
+
569
559
  def __dir__(self):
570
560
  """
571
- Custom __dir__ implementation to hide internal methods starting with '_'
572
- and backward compatibility aliases from tab completion and dir() calls,
561
+ Custom __dir__ implementation to hide internal methods starting with '_'
562
+ and backward compatibility aliases from tab completion and dir() calls,
573
563
  while keeping them accessible to class methods.
574
-
564
+
575
565
  Returns:
576
566
  list: List of public attribute and method names (excluding internal and deprecated methods)
577
567
  """
578
568
  # Define backward compatibility aliases to hide
579
569
  backward_compatibility_aliases = {
580
- 'add_folder', # alias for add
581
- 'find_consensus', # alias for merge
582
- 'integrate_chrom', # alias for integrate
583
- 'fill_chrom', # alias for fill
584
- 'filter_consensus', # alias for consensus_filter
585
- 'select_consensus', # alias for consensus_select
586
- 'filter_features', # alias for features_filter
587
- 'select_features', # alias for features_select
588
- 'consensus_find', # alias for merge
570
+ "add_folder", # alias for add
571
+ "find_consensus", # alias for merge
572
+ "integrate_chrom", # alias for integrate
573
+ "fill_chrom", # alias for fill
574
+ "filter_consensus", # alias for consensus_filter
575
+ "select_consensus", # alias for consensus_select
576
+ "filter_features", # alias for features_filter
577
+ "select_features", # alias for features_select
578
+ "consensus_find", # alias for merge
589
579
  # Backward compatibility for renamed methods
590
- 'set_folder', # alias for set_study_folder
591
- 'set_source', # alias for set_samples_source
592
- 'sample_color', # alias for set_samples_color
593
- 'get_sample', # alias for get_samples
594
- 'load_features', # alias for _load_features
595
- 'store_history', # alias for update_history
596
- 'sample_color_reset', # alias for set_samples_color(by=None)
597
- 'reset_sample_color', # alias for sample_color_reset
580
+ "set_folder", # alias for set_study_folder
581
+ "set_source", # alias for set_samples_source
582
+ "sample_color", # alias for set_samples_color
583
+ "get_sample", # alias for get_samples
584
+ "load_features", # alias for _load_features
585
+ "store_history", # alias for update_history
586
+ "sample_color_reset", # alias for set_samples_color(by=None)
587
+ "reset_sample_color", # alias for sample_color_reset
598
588
  }
599
-
589
+
600
590
  # Get all attributes from the class
601
591
  all_attrs: set[str] = set()
602
-
592
+
603
593
  # Add attributes from the class and all its bases
604
594
  for cls in self.__class__.__mro__:
605
595
  all_attrs.update(cls.__dict__.keys())
606
-
596
+
607
597
  # Add instance attributes
608
598
  all_attrs.update(self.__dict__.keys())
609
-
599
+
610
600
  # Filter out attributes starting with '_' (but keep special methods like __init__, __str__, etc.)
611
601
  # Also filter out backward compatibility aliases
612
602
  public_attrs = [
613
- attr for attr in all_attrs
614
- if not attr.startswith('_') or attr.startswith('__') and attr.endswith('__')
603
+ attr for attr in all_attrs if not attr.startswith("_") or attr.startswith("__") and attr.endswith("__")
615
604
  ]
616
-
605
+
617
606
  # Remove backward compatibility aliases from the public attributes
618
607
  public_attrs = [attr for attr in public_attrs if attr not in backward_compatibility_aliases]
619
-
608
+
620
609
  return sorted(public_attrs)
621
610
 
622
611
  def __str__(self):
623
612
  """
624
613
  Return a short summary string with number of samples and consensus features.
625
614
  """
626
- samples = (
627
- len(self.samples_df)
628
- if (self.samples_df is not None and not self.samples_df.is_empty())
629
- else 0
630
- )
615
+ samples = len(self.samples_df) if (self.samples_df is not None and not self.samples_df.is_empty()) else 0
631
616
  consensus = (
632
- len(self.consensus_df)
633
- if (self.consensus_df is not None and not self.consensus_df.is_empty())
634
- else 0
617
+ len(self.consensus_df) if (self.consensus_df is not None and not self.consensus_df.is_empty()) else 0
635
618
  )
636
619
  return f"{samples} samples, {consensus} consensus"
637
620
 
@@ -699,8 +682,7 @@ class Study:
699
682
  max_samples = 0
700
683
 
701
684
  # Count only features where 'filled' == False
702
- if (self.features_df is not None and not self.features_df.is_empty() and
703
- "filled" in self.features_df.columns):
685
+ if self.features_df is not None and not self.features_df.is_empty() and "filled" in self.features_df.columns:
704
686
  unfilled_features_count = self.features_df.filter(
705
687
  ~self.features_df["filled"],
706
688
  ).height
@@ -708,8 +690,12 @@ class Study:
708
690
  unfilled_features_count = 0
709
691
 
710
692
  # Calculate features in consensus vs not in consensus (only for unfilled features)
711
- if (self.features_df is not None and not self.features_df.is_empty() and
712
- self.consensus_mapping_df is not None and not self.consensus_mapping_df.is_empty()):
693
+ if (
694
+ self.features_df is not None
695
+ and not self.features_df.is_empty()
696
+ and self.consensus_mapping_df is not None
697
+ and not self.consensus_mapping_df.is_empty()
698
+ ):
713
699
  # Get unfilled features only
714
700
  unfilled_features = (
715
701
  self.features_df.filter(~self.features_df["filled"])
@@ -730,17 +716,13 @@ class Study:
730
716
  unfilled_features = unfilled_features.with_columns(
731
717
  pl.col("feature_uid").cast(pl.Int64),
732
718
  )
733
- consensus_feature_uids = [
734
- int(uid) for uid in consensus_feature_uids
735
- ]
719
+ consensus_feature_uids = [int(uid) for uid in consensus_feature_uids]
736
720
  except Exception:
737
721
  # If casting fails, ensure both are strings
738
722
  unfilled_features = unfilled_features.with_columns(
739
723
  pl.col("feature_uid").cast(pl.Utf8),
740
724
  )
741
- consensus_feature_uids = [
742
- str(uid) for uid in consensus_feature_uids
743
- ]
725
+ consensus_feature_uids = [str(uid) for uid in consensus_feature_uids]
744
726
 
745
727
  # Count unfilled features that are in consensus
746
728
  in_consensus_count = unfilled_features.filter(
@@ -749,12 +731,8 @@ class Study:
749
731
 
750
732
  # Calculate ratios that sum to 100%
751
733
  total_unfilled = unfilled_features.height
752
- ratio_in_consensus_to_total = (
753
- (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
754
- )
755
- ratio_not_in_consensus_to_total = (
756
- 100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
757
- )
734
+ ratio_in_consensus_to_total = (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
735
+ ratio_not_in_consensus_to_total = 100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
758
736
  else:
759
737
  ratio_in_consensus_to_total = 0
760
738
  ratio_not_in_consensus_to_total = 0
@@ -789,8 +767,7 @@ class Study:
789
767
  )
790
768
 
791
769
  # Use more efficient counting - count non-null chroms only for features in consensus mapping
792
- if (self.consensus_mapping_df is not None and
793
- not self.consensus_mapping_df.is_empty()):
770
+ if self.consensus_mapping_df is not None and not self.consensus_mapping_df.is_empty():
794
771
  non_null_chroms = (
795
772
  self.features_df.join(
796
773
  self.consensus_mapping_df.select("feature_uid"),
@@ -805,9 +782,7 @@ class Study:
805
782
  else:
806
783
  non_null_chroms = 0
807
784
  total_possible = samples_df_len * consensus_df_len
808
- chrom_completeness = (
809
- non_null_chroms / total_possible if total_possible > 0 else 0
810
- )
785
+ chrom_completeness = non_null_chroms / total_possible if total_possible > 0 else 0
811
786
  else:
812
787
  chrom_completeness = 0
813
788
 
@@ -831,19 +806,13 @@ class Study:
831
806
  rt_spread_row = filtered.select(
832
807
  (pl.col("rt_max") - pl.col("rt_min")).mean(),
833
808
  ).row(0)
834
- rt_spread = (
835
- float(rt_spread_row[0])
836
- if rt_spread_row and rt_spread_row[0] is not None
837
- else 0.0
838
- )
809
+ rt_spread = float(rt_spread_row[0]) if rt_spread_row and rt_spread_row[0] is not None else 0.0
839
810
  else:
840
811
  rt_spread = -1.0
841
812
 
842
813
  # Calculate percentage of consensus features with MS2
843
814
  consensus_with_ms2_percentage = (
844
- (consensus_with_ms2_count / consensus_df_len * 100)
845
- if consensus_df_len > 0
846
- else 0
815
+ (consensus_with_ms2_count / consensus_df_len * 100) if consensus_df_len > 0 else 0
847
816
  )
848
817
 
849
818
  # Total MS2 spectra count
@@ -865,6 +834,7 @@ class Study:
865
834
  if consensus_df_len > 0:
866
835
  try:
867
836
  from masster.study.merge import _count_tight_clusters
837
+
868
838
  tight_clusters_count = _count_tight_clusters(self, mz_tol=0.04, rt_tol=0.3)
869
839
  except Exception:
870
840
  # If tight clusters calculation fails, just use 0
@@ -874,25 +844,13 @@ class Study:
874
844
  consensus_warning = f" {_WARNING_SYMBOL}" if consensus_df_len < 50 else ""
875
845
 
876
846
  rt_spread_text = "N/A" if rt_spread < 0 else f"{rt_spread:.3f}s"
877
- rt_spread_warning = (
878
- f" {_WARNING_SYMBOL}"
879
- if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1)
880
- else ""
881
- )
847
+ rt_spread_warning = f" {_WARNING_SYMBOL}" if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1) else ""
882
848
 
883
849
  chrom_completeness_pct = chrom_completeness * 100
884
- chrom_warning = (
885
- f" {_WARNING_SYMBOL}"
886
- if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0
887
- else ""
888
- )
850
+ chrom_warning = f" {_WARNING_SYMBOL}" if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0 else ""
889
851
 
890
852
  max_samples_warning = ""
891
- if (
892
- isinstance(max_samples, (int, float))
893
- and samples_df_len > 0
894
- and max_samples > 0
895
- ):
853
+ if isinstance(max_samples, (int, float)) and samples_df_len > 0 and max_samples > 0:
896
854
  if max_samples < samples_df_len / 3.0:
897
855
  max_samples_warning = f" {_WARNING_SYMBOL}"
898
856
  elif max_samples < samples_df_len * 0.8:
@@ -923,5 +881,6 @@ class Study:
923
881
 
924
882
  print(summary)
925
883
 
926
- if __name__ == "__main__":
884
+
885
+ if __name__ == "__main__":
927
886
  pass