masster 0.3.16__tar.gz → 0.3.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (78) hide show
  1. {masster-0.3.16 → masster-0.3.17}/PKG-INFO +1 -1
  2. {masster-0.3.16 → masster-0.3.17}/pyproject.toml +1 -1
  3. {masster-0.3.16 → masster-0.3.17}/src/masster/_version.py +1 -1
  4. {masster-0.3.16 → masster-0.3.17}/src/masster/study/defaults/align_def.py +9 -0
  5. {masster-0.3.16 → masster-0.3.17}/src/masster/study/load.py +88 -43
  6. {masster-0.3.16 → masster-0.3.17}/src/masster/study/processing.py +149 -120
  7. {masster-0.3.16 → masster-0.3.17}/uv.lock +1 -1
  8. {masster-0.3.16 → masster-0.3.17}/.github/workflows/publish.yml +0 -0
  9. {masster-0.3.16 → masster-0.3.17}/.github/workflows/security.yml +0 -0
  10. {masster-0.3.16 → masster-0.3.17}/.github/workflows/test.yml +0 -0
  11. {masster-0.3.16 → masster-0.3.17}/.gitignore +0 -0
  12. {masster-0.3.16 → masster-0.3.17}/.pre-commit-config.yaml +0 -0
  13. {masster-0.3.16 → masster-0.3.17}/LICENSE +0 -0
  14. {masster-0.3.16 → masster-0.3.17}/Makefile +0 -0
  15. {masster-0.3.16 → masster-0.3.17}/README.md +0 -0
  16. {masster-0.3.16 → masster-0.3.17}/TESTING.md +0 -0
  17. {masster-0.3.16 → masster-0.3.17}/demo/example_batch_process.py +0 -0
  18. {masster-0.3.16 → masster-0.3.17}/demo/example_sample_process.py +0 -0
  19. {masster-0.3.16 → masster-0.3.17}/src/masster/__init__.py +0 -0
  20. {masster-0.3.16 → masster-0.3.17}/src/masster/chromatogram.py +0 -0
  21. {masster-0.3.16 → masster-0.3.17}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +0 -0
  22. {masster-0.3.16 → masster-0.3.17}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  23. {masster-0.3.16 → masster-0.3.17}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  24. {masster-0.3.16 → masster-0.3.17}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  25. {masster-0.3.16 → masster-0.3.17}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  26. {masster-0.3.16 → masster-0.3.17}/src/masster/logger.py +0 -0
  27. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/__init__.py +0 -0
  28. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/defaults/__init__.py +0 -0
  29. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/defaults/find_adducts_def.py +0 -0
  30. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/defaults/find_features_def.py +0 -0
  31. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/defaults/find_ms2_def.py +0 -0
  32. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
  33. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/defaults/sample_def.py +0 -0
  34. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/h5.py +0 -0
  35. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/helpers.py +0 -0
  36. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/lib.py +0 -0
  37. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/load.py +0 -0
  38. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/parameters.py +0 -0
  39. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/plot.py +0 -0
  40. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/processing.py +0 -0
  41. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/quant.py +0 -0
  42. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/sample.py +0 -0
  43. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/sample5_schema.json +0 -0
  44. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/save.py +0 -0
  45. {masster-0.3.16 → masster-0.3.17}/src/masster/sample/sciex.py +0 -0
  46. {masster-0.3.16 → masster-0.3.17}/src/masster/spectrum.py +0 -0
  47. {masster-0.3.16 → masster-0.3.17}/src/masster/study/__init__.py +0 -0
  48. {masster-0.3.16 → masster-0.3.17}/src/masster/study/defaults/__init__.py +0 -0
  49. {masster-0.3.16 → masster-0.3.17}/src/masster/study/defaults/export_def.py +0 -0
  50. {masster-0.3.16 → masster-0.3.17}/src/masster/study/defaults/fill_chrom_def.py +0 -0
  51. {masster-0.3.16 → masster-0.3.17}/src/masster/study/defaults/fill_def.py +0 -0
  52. {masster-0.3.16 → masster-0.3.17}/src/masster/study/defaults/find_consensus_def.py +0 -0
  53. {masster-0.3.16 → masster-0.3.17}/src/masster/study/defaults/find_ms2_def.py +0 -0
  54. {masster-0.3.16 → masster-0.3.17}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
  55. {masster-0.3.16 → masster-0.3.17}/src/masster/study/defaults/integrate_def.py +0 -0
  56. {masster-0.3.16 → masster-0.3.17}/src/masster/study/defaults/merge_def.py +0 -0
  57. {masster-0.3.16 → masster-0.3.17}/src/masster/study/defaults/study_def.py +0 -0
  58. {masster-0.3.16 → masster-0.3.17}/src/masster/study/export.py +0 -0
  59. {masster-0.3.16 → masster-0.3.17}/src/masster/study/h5.py +0 -0
  60. {masster-0.3.16 → masster-0.3.17}/src/masster/study/helpers.py +0 -0
  61. {masster-0.3.16 → masster-0.3.17}/src/masster/study/helpers_optimized.py +0 -0
  62. {masster-0.3.16 → masster-0.3.17}/src/masster/study/parameters.py +0 -0
  63. {masster-0.3.16 → masster-0.3.17}/src/masster/study/plot.py +0 -0
  64. {masster-0.3.16 → masster-0.3.17}/src/masster/study/save.py +0 -0
  65. {masster-0.3.16 → masster-0.3.17}/src/masster/study/study.py +0 -0
  66. {masster-0.3.16 → masster-0.3.17}/src/masster/study/study5_schema.json +0 -0
  67. {masster-0.3.16 → masster-0.3.17}/tests/conftest.py +0 -0
  68. {masster-0.3.16 → masster-0.3.17}/tests/test_chromatogram.py +0 -0
  69. {masster-0.3.16 → masster-0.3.17}/tests/test_defaults.py +0 -0
  70. {masster-0.3.16 → masster-0.3.17}/tests/test_imports.py +0 -0
  71. {masster-0.3.16 → masster-0.3.17}/tests/test_integration.py +0 -0
  72. {masster-0.3.16 → masster-0.3.17}/tests/test_logger.py +0 -0
  73. {masster-0.3.16 → masster-0.3.17}/tests/test_parameters.py +0 -0
  74. {masster-0.3.16 → masster-0.3.17}/tests/test_sample.py +0 -0
  75. {masster-0.3.16 → masster-0.3.17}/tests/test_spectrum.py +0 -0
  76. {masster-0.3.16 → masster-0.3.17}/tests/test_study.py +0 -0
  77. {masster-0.3.16 → masster-0.3.17}/tests/test_version.py +0 -0
  78. {masster-0.3.16 → masster-0.3.17}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.3.16
3
+ Version: 0.3.17
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -1,7 +1,7 @@
1
1
 
2
2
  [project]
3
3
  name = "masster"
4
- version = "0.3.16"
4
+ version = "0.3.17"
5
5
  description = "Mass spectrometry data analysis package"
6
6
  authors = [
7
7
  { name = "Zamboni Lab" }
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.3.16"
4
+ __version__ = "0.3.17"
5
5
 
6
6
 
7
7
  def get_version():
@@ -24,6 +24,7 @@ class align_defaults:
24
24
  skip_blanks (bool): Whether to skip blank samples. Default is False.
25
25
 
26
26
  KD algorithm specific parameters:
27
+ min_samples (int): Minimum number of samples required for KD alignment. Default is 3.
27
28
  nr_partitions (int): Number of partitions in m/z dimension. Default is 100.
28
29
  warp_enabled (bool): Enable non-linear retention time transformation. Default is True.
29
30
  warp_rt_tol (float): RT tolerance for the LOWESS fit. Default is 5.0.
@@ -59,6 +60,7 @@ class align_defaults:
59
60
  algo: str = "pc"
60
61
 
61
62
  # KD algorithm specific parameters
63
+ min_samples: int = 3
62
64
  nr_partitions: int = 100
63
65
  warp_enabled: bool = True
64
66
  warp_rt_tol: float = 5.0
@@ -137,6 +139,13 @@ class align_defaults:
137
139
  "allowed_values": ["pc", "kd"],
138
140
  },
139
141
  # KD algorithm specific parameters
142
+ "min_samples": {
143
+ "dtype": int,
144
+ "description": "Minimum number of samples required for KD alignment algorithm",
145
+ "default": 3,
146
+ "min_value": 2,
147
+ "max_value": 1000,
148
+ },
140
149
  "nr_partitions": {
141
150
  "dtype": int,
142
151
  "description": "Number of partitions in m/z dimension for KD algorithm",
@@ -961,51 +961,96 @@ def _get_missing_consensus_sample_combinations(self, uids):
961
961
  """
962
962
  Efficiently identify which consensus_uid/sample combinations are missing.
963
963
  Returns a list of tuples: (consensus_uid, sample_uid, sample_name, sample_path)
964
+
965
+ Optimized for common scenarios:
966
+ - Early termination for fully-filled studies
967
+ - Efficient dictionary lookups instead of expensive DataFrame joins
968
+ - Smart handling of sparse vs dense missing data patterns
964
969
  """
965
- # Get all consensus UIDs we're interested in
966
- consensus_uids_set = set(uids)
967
-
968
- # Get all sample UIDs and create lookup
969
- all_sample_info = {}
970
- for row in self.samples_df.select([
971
- "sample_uid",
972
- "sample_name",
973
- "sample_path",
974
- ]).iter_rows(named=True):
975
- all_sample_info[row["sample_uid"]] = {
976
- "sample_name": row["sample_name"],
977
- "sample_path": row["sample_path"],
978
- }
979
-
980
- # Get existing consensus/sample combinations from consensus_mapping_df
981
- existing_combinations = set()
982
- consensus_mapping_filtered = self.consensus_mapping_df.filter(
983
- pl.col("consensus_uid").is_in(list(consensus_uids_set)),
984
- )
985
-
986
- # Join with features_df to get sample_uid information
987
- existing_features = consensus_mapping_filtered.join(
988
- self.features_df.select(["feature_uid", "sample_uid"]),
989
- on="feature_uid",
990
- how="inner",
970
+ if not uids:
971
+ return []
972
+
973
+ n_consensus = len(uids)
974
+ n_samples = len(self.samples_df)
975
+ total_possible = n_consensus * n_samples
976
+
977
+ # Quick early termination check for fully/nearly filled studies
978
+ # This handles the common case where fill() is run on an already-filled study
979
+ consensus_counts = (
980
+ self.consensus_mapping_df
981
+ .filter(pl.col("consensus_uid").is_in(uids))
982
+ .group_by("consensus_uid")
983
+ .agg(pl.count("feature_uid").alias("count"))
991
984
  )
992
-
993
- for row in existing_features.select(["consensus_uid", "sample_uid"]).iter_rows():
994
- existing_combinations.add((row[0], row[1])) # (consensus_uid, sample_uid)
995
-
996
- # Find missing combinations
997
- missing_combinations = []
998
- for consensus_uid in consensus_uids_set:
999
- for sample_uid, sample_info in all_sample_info.items():
1000
- if (consensus_uid, sample_uid) not in existing_combinations:
1001
- missing_combinations.append((
1002
- consensus_uid,
1003
- sample_uid,
1004
- sample_info["sample_name"],
1005
- sample_info["sample_path"],
1006
- ))
1007
-
1008
- return missing_combinations
985
+
986
+ total_existing = consensus_counts["count"].sum() if not consensus_counts.is_empty() else 0
987
+
988
+ # If >95% filled, likely no gaps (common case)
989
+ if total_existing >= total_possible * 0.95:
990
+ self.logger.debug(f"Study appears {total_existing/total_possible*100:.1f}% filled, using sparse optimization")
991
+
992
+ # For sparse missing data, check each consensus feature individually
993
+ missing_combinations = []
994
+ uids_set = set(uids)
995
+
996
+ # Build efficient lookups
997
+ feature_to_sample = dict(
998
+ self.features_df.select(["feature_uid", "sample_uid"]).iter_rows()
999
+ )
1000
+
1001
+ # Get existing combinations for target UIDs only
1002
+ existing_by_consensus = {}
1003
+ for consensus_uid, feature_uid in self.consensus_mapping_df.select(["consensus_uid", "feature_uid"]).iter_rows():
1004
+ if consensus_uid in uids_set and feature_uid in feature_to_sample:
1005
+ if consensus_uid not in existing_by_consensus:
1006
+ existing_by_consensus[consensus_uid] = set()
1007
+ existing_by_consensus[consensus_uid].add(feature_to_sample[feature_uid])
1008
+
1009
+ # Get sample info once
1010
+ all_samples = list(
1011
+ self.samples_df.select(["sample_uid", "sample_name", "sample_path"]).iter_rows()
1012
+ )
1013
+
1014
+ # Check for missing combinations
1015
+ for consensus_uid in uids:
1016
+ existing_samples = existing_by_consensus.get(consensus_uid, set())
1017
+ for sample_uid, sample_name, sample_path in all_samples:
1018
+ if sample_uid not in existing_samples:
1019
+ missing_combinations.append((consensus_uid, sample_uid, sample_name, sample_path))
1020
+
1021
+ return missing_combinations
1022
+
1023
+ else:
1024
+ # For studies with many gaps, use bulk operations
1025
+ self.logger.debug(f"Study {total_existing/total_possible*100:.1f}% filled, using bulk optimization")
1026
+
1027
+ # Build efficient lookups
1028
+ uids_set = set(uids)
1029
+ feature_to_sample = dict(
1030
+ self.features_df.select(["feature_uid", "sample_uid"]).iter_rows()
1031
+ )
1032
+
1033
+ # Build existing combinations set
1034
+ existing_combinations = {
1035
+ (consensus_uid, feature_to_sample[feature_uid])
1036
+ for consensus_uid, feature_uid in self.consensus_mapping_df.select(["consensus_uid", "feature_uid"]).iter_rows()
1037
+ if consensus_uid in uids_set and feature_uid in feature_to_sample
1038
+ }
1039
+
1040
+ # Get all sample info
1041
+ all_samples = list(
1042
+ self.samples_df.select(["sample_uid", "sample_name", "sample_path"]).iter_rows()
1043
+ )
1044
+
1045
+ # Generate all missing combinations
1046
+ missing_combinations = [
1047
+ (consensus_uid, sample_uid, sample_name, sample_path)
1048
+ for consensus_uid in uids
1049
+ for sample_uid, sample_name, sample_path in all_samples
1050
+ if (consensus_uid, sample_uid) not in existing_combinations
1051
+ ]
1052
+
1053
+ return missing_combinations
1009
1054
 
1010
1055
 
1011
1056
  def sanitize(self):
@@ -33,6 +33,7 @@ def align(self, **kwargs):
33
33
  - algo (str): Alignment algorithm ('pc' for PoseClustering, 'kd' for KD).
34
34
 
35
35
  KD algorithm specific parameters:
36
+ - min_samples (int): Minimum number of samples required for KD alignment.
36
37
  - nr_partitions (int): Number of partitions in m/z dimension.
37
38
  - warp_enabled (bool): Enable non-linear retention time transformation.
38
39
  - warp_rt_tol (float): RT tolerance for the LOWESS fit.
@@ -87,131 +88,17 @@ def align(self, **kwargs):
87
88
 
88
89
  fmaps = self.features_maps
89
90
 
90
- # Initialize OpenMS parameters
91
- params_oms = oms.Param()
92
- # Choose alignment algorithm based on parameter
91
+ # Choose alignment algorithm
93
92
  algo = params.get("algo").lower()
94
-
95
- # Set common parameters for both algorithms
96
- if algo == "pc":
97
- # Parameters specific to PoseClustering
98
- params_oms.setValue("pairfinder:distance_intensity:log_transform", "disabled")
99
- params_oms.setValue("pairfinder:ignore_charge", "true")
100
- params_oms.setValue("max_num_peaks_considered", 1000)
101
- params_oms.setValue("pairfinder:distance_RT:max_difference", params.get("rt_max_diff"))
102
- params_oms.setValue("pairfinder:distance_MZ:max_difference", params.get("mz_max_diff"))
103
- params_oms.setValue("superimposer:rt_pair_distance_fraction", params.get("rt_pair_distance_frac"))
104
- params_oms.setValue("superimposer:mz_pair_max_distance", params.get("mz_pair_max_distance"))
105
- params_oms.setValue("superimposer:num_used_points", params.get("num_used_points"))
106
- params_oms.setValue("pairfinder:distance_MZ:exponent", 3.0)
107
- params_oms.setValue("pairfinder:distance_RT:exponent", 2.0)
108
-
109
- """
110
- {b'max_num_peaks_considered': 1000,
111
- b'superimposer:mz_pair_max_distance': 0.5,
112
- b'superimposer:rt_pair_distance_fraction': 0.1,
113
- b'superimposer:num_used_points': 2000,
114
- b'superimposer:scaling_bucket_size': 0.005,
115
- b'superimposer:shift_bucket_size': 3.0,
116
- b'superimposer:max_shift': 1000.0,
117
- b'superimposer:max_scaling': 2.0,
118
- b'superimposer:dump_buckets': '',
119
- b'superimposer:dump_pairs': '',
120
- b'pairfinder:second_nearest_gap': 2.0,
121
- b'pairfinder:use_identifications': 'false',
122
- b'pairfinder:ignore_charge': 'false',
123
- b'pairfinder:ignore_adduct': 'true',
124
- b'pairfinder:distance_RT:max_difference': 100.0,
125
- b'pairfinder:distance_RT:exponent': 1.0,
126
- b'pairfinder:distance_RT:weight': 1.0,
127
- b'pairfinder:distance_MZ:max_difference': 0.3,
128
- b'pairfinder:distance_MZ:unit': 'Da',
129
- b'pairfinder:distance_MZ:exponent': 2.0,
130
- b'pairfinder:distance_MZ:weight': 1.0,
131
- b'pairfinder:distance_intensity:exponent': 1.0,
132
- b'pairfinder:distance_intensity:weight': 0.0,
133
- b'pairfinder:distance_intensity:log_transform': 'disabled'}
134
- """
135
- elif algo == "kd":
136
- # Parameters specific to KD algorithm
137
- params_oms.setValue("mz_unit", "Da")
138
- params_oms.setValue("nr_partitions", params.get("nr_partitions"))
139
-
140
- # Warp parameters for non-linear RT transformation
141
- params_oms.setValue("warp:enabled", "true" if params.get("warp_enabled") else "false")
142
- params_oms.setValue("warp:rt_tol", params.get("warp_rt_tol"))
143
- params_oms.setValue("warp:mz_tol", params.get("warp_mz_tol"))
144
- params_oms.setValue("warp:max_pairwise_log_fc", params.get("warp_max_pairwise_log_fc"))
145
- params_oms.setValue("warp:min_rel_cc_size", params.get("warp_min_rel_cc_size"))
146
- params_oms.setValue("warp:max_nr_conflicts", params.get("warp_max_nr_conflicts"))
147
-
148
- # Link parameters
149
- params_oms.setValue("link:rt_tol", params.get("link_rt_tol"))
150
- params_oms.setValue("link:mz_tol", params.get("link_mz_tol"))
151
- params_oms.setValue("link:charge_merging", params.get("link_charge_merging"))
152
- params_oms.setValue("link:adduct_merging", params.get("link_adduct_merging"))
153
-
154
- # Distance parameters
155
- params_oms.setValue("distance_RT:exponent", params.get("distance_RT_exponent"))
156
- params_oms.setValue("distance_RT:weight", params.get("distance_RT_weight"))
157
- params_oms.setValue("distance_MZ:exponent", params.get("distance_MZ_exponent"))
158
- params_oms.setValue("distance_MZ:weight", params.get("distance_MZ_weight"))
159
- params_oms.setValue("distance_intensity:exponent", params.get("distance_intensity_exponent"))
160
- params_oms.setValue("distance_intensity:weight", params.get("distance_intensity_weight"))
161
- params_oms.setValue("distance_intensity:log_transform", params.get("distance_intensity_log_transform"))
162
-
163
- # LOWESS parameters
164
- params_oms.setValue("LOWESS:span", params.get("LOWESS_span"))
165
- params_oms.setValue("LOWESS:num_iterations", params.get("LOWESS_num_iterations"))
166
- params_oms.setValue("LOWESS:delta", params.get("LOWESS_delta"))
167
- params_oms.setValue("LOWESS:interpolation_type", params.get("LOWESS_interpolation_type"))
168
- params_oms.setValue("LOWESS:extrapolation_type", params.get("LOWESS_extrapolation_type"))
169
-
93
+
170
94
  if algo == "pc":
171
- aligner = oms.MapAlignmentAlgorithmPoseClustering()
172
- self.logger.info("Starting alignment with PoseClustering")
173
- # set ref_index to feature map index with largest number of features
174
- ref_index = [i[0] for i in sorted(enumerate([fm.size() for fm in fmaps]), key=lambda x: x[1])][-1]
175
- self.logger.debug(
176
- f"Reference map is {self.samples_df.row(ref_index, named=True)['sample_name']}",
177
- )
178
- aligner.setParameters(params_oms)
179
- aligner.setReference(fmaps[ref_index])
180
- self.logger.debug(f"Parameters for alignment: {params}")
181
- # perform alignment and transformation of feature maps to the reference map (exclude reference map)
182
- tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
183
- for index, fm in tqdm(
184
- list(enumerate(fmaps)),
185
- total=len(fmaps),
186
- desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Align feature maps",
187
- disable=tdqm_disable,
188
- ):
189
- if index == ref_index:
190
- continue
191
- if params.get("skip_blanks") and self.samples_df.row(index, named=True)["sample_type"] == "blank":
192
- continue
193
- trafo = oms.TransformationDescription()
194
- aligner.align(fm, trafo)
195
- transformer = oms.MapAlignmentTransformer()
196
- transformer.transformRetentionTimes(fm, trafo, True)
197
-
198
- self.alignment_ref_index = ref_index
199
-
95
+ _align_pose_clustering(self, fmaps, params)
96
+
200
97
  elif algo == "kd":
201
- # KD algorithm requires num_maps and Param parameters
202
- num_maps = len(fmaps)
203
- aligner = oms.MapAlignmentAlgorithmKD(3, params_oms)
204
- self.logger.info(f"Starting alignment with KD algorithm using {num_maps} maps")
205
-
206
- kdtree = oms.KDTreeFeatureMaps()
207
- kdtree.addMaps(fmaps) # Add all feature maps to the KDTree
208
- # kdtree.optimizeTree()
209
- aligner.addRTFitData(kdtree)
210
- aligner.fitLOWESS()
211
- aligner.transform(kdtree)
212
-
98
+ _align_kd_algorithm(self, fmaps, params)
213
99
  else:
214
100
  self.logger.error(f"Unknown alignment algorithm '{algo}'")
101
+ self.logger.error(f"Unknown alignment algorithm '{algo}'")
215
102
 
216
103
  # check if rt_original exists in features_df, if not, add it after rt
217
104
  if "rt_original" not in self.features_df.columns:
@@ -1163,3 +1050,145 @@ def _find_closest_valley(chrom, rt, dir="left", threshold=0.9):
1163
1050
  else:
1164
1051
  break
1165
1052
  return chrom.rt[idx]
1053
+
1054
+
1055
+ def _align_pose_clustering(study_obj, fmaps, params):
1056
+ """Perform alignment using PoseClustering algorithm."""
1057
+ import pyopenms as oms
1058
+ from tqdm import tqdm
1059
+ from datetime import datetime
1060
+
1061
+ # Create PC-specific OpenMS parameters
1062
+ params_oms = oms.Param()
1063
+ params_oms.setValue("pairfinder:distance_intensity:log_transform", "disabled")
1064
+ params_oms.setValue("pairfinder:ignore_charge", "true")
1065
+ params_oms.setValue("max_num_peaks_considered", 1000)
1066
+ params_oms.setValue("pairfinder:distance_RT:max_difference", params.get("rt_max_diff"))
1067
+ params_oms.setValue("pairfinder:distance_MZ:max_difference", params.get("mz_max_diff"))
1068
+ params_oms.setValue("superimposer:rt_pair_distance_fraction", params.get("rt_pair_distance_frac"))
1069
+ params_oms.setValue("superimposer:mz_pair_max_distance", params.get("mz_pair_max_distance"))
1070
+ params_oms.setValue("superimposer:num_used_points", params.get("num_used_points"))
1071
+ params_oms.setValue("pairfinder:distance_MZ:exponent", 3.0)
1072
+ params_oms.setValue("pairfinder:distance_RT:exponent", 2.0)
1073
+
1074
+ aligner = oms.MapAlignmentAlgorithmPoseClustering()
1075
+ study_obj.logger.info("Starting alignment with PoseClustering")
1076
+
1077
+ # Set ref_index to feature map index with largest number of features
1078
+ ref_index = [i[0] for i in sorted(enumerate([fm.size() for fm in fmaps]), key=lambda x: x[1])][-1]
1079
+ study_obj.logger.debug(
1080
+ f"Reference map is {study_obj.samples_df.row(ref_index, named=True)['sample_name']}",
1081
+ )
1082
+
1083
+ aligner.setParameters(params_oms)
1084
+ aligner.setReference(fmaps[ref_index])
1085
+ study_obj.logger.debug(f"Parameters for alignment: {params}")
1086
+
1087
+ # Perform alignment and transformation of feature maps to the reference map (exclude reference map)
1088
+ tdqm_disable = study_obj.log_level not in ["TRACE", "DEBUG", "INFO"]
1089
+ for index, fm in tqdm(
1090
+ list(enumerate(fmaps)),
1091
+ total=len(fmaps),
1092
+ desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {study_obj.log_label}Align feature maps",
1093
+ disable=tdqm_disable,
1094
+ ):
1095
+ if index == ref_index:
1096
+ continue
1097
+ if params.get("skip_blanks") and study_obj.samples_df.row(index, named=True)["sample_type"] == "blank":
1098
+ continue
1099
+ trafo = oms.TransformationDescription()
1100
+ aligner.align(fm, trafo)
1101
+ transformer = oms.MapAlignmentTransformer()
1102
+ transformer.transformRetentionTimes(fm, trafo, True)
1103
+
1104
+ study_obj.alignment_ref_index = ref_index
1105
+
1106
+
1107
+ def _align_kd_algorithm(study_obj, fmaps, params):
1108
+ """Perform alignment using KD algorithm."""
1109
+ import pyopenms as oms
1110
+
1111
+ num_maps = len(fmaps)
1112
+ study_obj.logger.info(f"Starting alignment with KD algorithm using {num_maps} maps")
1113
+
1114
+ try:
1115
+ # Use the EXACT approach from test_oms.py that works
1116
+ # First parameter is DIMENSIONS (3), not min_samples!
1117
+ study_obj.logger.debug("Creating MapAlignmentAlgorithmKD with 3 dimensions and empty parameters...")
1118
+ empty_params = oms.Param() # Empty params - this is what worked in test_oms.py!
1119
+ aligner = oms.MapAlignmentAlgorithmKD(3, empty_params) # 3 = dimensions, not min_samples
1120
+ study_obj.logger.debug("Created MapAlignmentAlgorithmKD successfully")
1121
+
1122
+ # Create KD-tree structure
1123
+ kdtree = oms.KDTreeFeatureMaps()
1124
+
1125
+ # Set all required warping parameters based on OpenMS requirements
1126
+ kd_params = oms.Param()
1127
+ # Core warp parameters that OpenMS expects
1128
+ kd_params.setValue(b"warp:min_rel_cc_size", 0.2, b"Minimum relative connected component size")
1129
+ kd_params.setValue(b"warp:max_ratio_small_big", 0.5, b"Maximum ratio of small to big connected component")
1130
+ kd_params.setValue(b"warp:min_score", 0.3, b"Minimum score for warping")
1131
+ kd_params.setValue(b"warp:rt_tol", 5.0, b"RT tolerance for feature matching")
1132
+ kd_params.setValue(b"warp:mz_tol", 0.015, b"m/z tolerance for feature matching")
1133
+ # Additional potentially required parameters
1134
+ kd_params.setValue(b"warp:max_shift", 30.0, b"Maximum RT shift allowed")
1135
+ kd_params.setValue(b"warp:bins", 100, b"Number of bins for warping")
1136
+ kdtree.setParameters(kd_params)
1137
+
1138
+ # Add all feature maps to KD-tree (NO limiting - this worked with 38k features!)
1139
+ study_obj.logger.debug("Adding maps to KD-tree structure...")
1140
+ kdtree.addMaps(fmaps)
1141
+ study_obj.logger.debug("Successfully added maps to KD-tree")
1142
+
1143
+ # Add RT fitting data (this is where the magic happens)
1144
+ study_obj.logger.debug("Adding RT fitting data to aligner...")
1145
+ aligner.addRTFitData(kdtree)
1146
+ study_obj.logger.debug("Successfully added RT fitting data")
1147
+
1148
+ # Perform LOWESS fitting
1149
+ study_obj.logger.debug("Performing LOWESS fitting...")
1150
+ aligner.fitLOWESS()
1151
+ study_obj.logger.debug("Successfully completed LOWESS fitting")
1152
+
1153
+ # Apply transformations to feature maps
1154
+ study_obj.logger.debug("Applying transformations to feature maps...")
1155
+ for i, fmap in enumerate(fmaps):
1156
+ trafo = oms.TransformationDescription()
1157
+ aligner.getTransformation(i, trafo)
1158
+ oms.MapAlignmentTransformer.transformRetentionTimes(fmap, trafo, True)
1159
+
1160
+ study_obj.logger.info("KD alignment completed successfully")
1161
+
1162
+ except Exception as e:
1163
+ study_obj.logger.error(f"KD alignment failed with error: {e}")
1164
+ study_obj.logger.info("Falling back to PoseClustering alignment...")
1165
+
1166
+ # Fallback to pose clustering with basic parameters
1167
+ _align_pose_clustering_fallback(study_obj, fmaps, params)
1168
+
1169
+
1170
+ def _align_pose_clustering_fallback(study_obj, fmaps, params):
1171
+ """Fallback PoseClustering alignment with minimal parameters."""
1172
+ import pyopenms as oms
1173
+
1174
+ aligner = oms.MapAlignmentAlgorithmPoseClustering()
1175
+ ref_index = [i[0] for i in sorted(enumerate([fm.size() for fm in fmaps]), key=lambda x: x[1])][-1]
1176
+
1177
+ # Set up basic parameters for pose clustering
1178
+ pc_params = oms.Param()
1179
+ pc_params.setValue("max_num_peaks_considered", 1000)
1180
+ pc_params.setValue("pairfinder:distance_RT:max_difference", params.get("rt_max_diff"))
1181
+ pc_params.setValue("pairfinder:distance_MZ:max_difference", params.get("mz_max_diff"))
1182
+
1183
+ aligner.setParameters(pc_params)
1184
+ aligner.setReference(fmaps[ref_index])
1185
+
1186
+ for index, fm in enumerate(fmaps):
1187
+ if index == ref_index:
1188
+ continue
1189
+ trafo = oms.TransformationDescription()
1190
+ aligner.align(fm, trafo)
1191
+ transformer = oms.MapAlignmentTransformer()
1192
+ transformer.transformRetentionTimes(fm, trafo, True)
1193
+
1194
+ study_obj.alignment_ref_index = ref_index
@@ -1372,7 +1372,7 @@ wheels = [
1372
1372
 
1373
1373
  [[package]]
1374
1374
  name = "masster"
1375
- version = "0.3.16"
1375
+ version = "0.3.17"
1376
1376
  source = { editable = "." }
1377
1377
  dependencies = [
1378
1378
  { name = "alphabase" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes