MsTargetPeaker 0.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. mstargetpeaker-0.3.5/LICENSE +21 -0
  2. mstargetpeaker-0.3.5/MsTargetPeaker/MsTargetPeaker.cfg +68 -0
  3. mstargetpeaker-0.3.5/MsTargetPeaker/MsTargetPeaker.py +225 -0
  4. mstargetpeaker-0.3.5/MsTargetPeaker/MsTargetPeakerEnv.py +705 -0
  5. mstargetpeaker-0.3.5/MsTargetPeaker/MsTargetReporter.py +36 -0
  6. mstargetpeaker-0.3.5/MsTargetPeaker/__init__.py +7 -0
  7. mstargetpeaker-0.3.5/MsTargetPeaker/mcts/MCTS_DPW.py +178 -0
  8. mstargetpeaker-0.3.5/MsTargetPeaker/mcts/__init__.py +1 -0
  9. mstargetpeaker-0.3.5/MsTargetPeaker/policy/MsTargetPeaker_env66_40-2.state_dict.pth +0 -0
  10. mstargetpeaker-0.3.5/MsTargetPeaker/tmsqe/ChromatogramDB.py +321 -0
  11. mstargetpeaker-0.3.5/MsTargetPeaker/tmsqe/PeakQualityEval.py +442 -0
  12. mstargetpeaker-0.3.5/MsTargetPeaker/tmsqe/PeakQualityReport.py +1142 -0
  13. mstargetpeaker-0.3.5/MsTargetPeaker/tmsqe/QualityEncoder.py +165 -0
  14. mstargetpeaker-0.3.5/MsTargetPeaker/tmsqe/__init__.py +4 -0
  15. mstargetpeaker-0.3.5/MsTargetPeaker/tmsqe/encoder/quality_encoder_type1.pickle +0 -0
  16. mstargetpeaker-0.3.5/MsTargetPeaker/tmsqe/encoder/quality_encoder_type2.pickle +0 -0
  17. mstargetpeaker-0.3.5/MsTargetPeaker/tmsqe/encoder/quality_encoder_type3.pickle +0 -0
  18. mstargetpeaker-0.3.5/MsTargetPeaker/utils/__init__.py +3 -0
  19. mstargetpeaker-0.3.5/MsTargetPeaker/utils/check_chrom_tsv.py +56 -0
  20. mstargetpeaker-0.3.5/MsTargetPeaker/utils/merge_peak_csv.py +76 -0
  21. mstargetpeaker-0.3.5/MsTargetPeaker/utils/split_chrom_tsv.py +34 -0
  22. mstargetpeaker-0.3.5/MsTargetPeaker.egg-info/PKG-INFO +172 -0
  23. mstargetpeaker-0.3.5/MsTargetPeaker.egg-info/SOURCES.txt +29 -0
  24. mstargetpeaker-0.3.5/MsTargetPeaker.egg-info/dependency_links.txt +1 -0
  25. mstargetpeaker-0.3.5/MsTargetPeaker.egg-info/entry_points.txt +6 -0
  26. mstargetpeaker-0.3.5/MsTargetPeaker.egg-info/requires.txt +8 -0
  27. mstargetpeaker-0.3.5/MsTargetPeaker.egg-info/top_level.txt +1 -0
  28. mstargetpeaker-0.3.5/PKG-INFO +172 -0
  29. mstargetpeaker-0.3.5/README.md +155 -0
  30. mstargetpeaker-0.3.5/pyproject.toml +34 -0
  31. mstargetpeaker-0.3.5/setup.cfg +4 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 MsTargetPeaker
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,68 @@
1
+ [DEFAULT]
2
+ Speed = SuperFast
3
+ SearchMode = MRM
4
+ ParallelProcessNumber = 8
5
+ InternalStandardType = heavy
6
+ MaxTimeStep = 5
7
+ PolicyPath = None
8
+ Device = auto
9
+
10
+ [MCTSParam]
11
+ MaxStep = 5
12
+ EvalMode = 'policy'
13
+ Alpha = 0.4
14
+ Beta = 0.25
15
+ K = 2**0.5
16
+
17
+ [SearchParam.MRM]
18
+ UseWeightedPBAR = 1
19
+ Threshold = [ 100, 8, 6, 4, 2, 1, 0.0125]
20
+ SelectionNoise = [ 0.1, 0.3, 0.5, 1.0, -1.0, -1.0, -1.0]
21
+ PBARHeavyWeight = [ 0.67, 0.67, 0.67, 0.67, 1.0, 1.0, 1.0]
22
+ PairRatioFactor = [ 0.5, 0.5, 0.5, 0.5, 0.2, 0.2, 0.2]
23
+ TopNIon = [ 1, 1, 1, 1, 3, 3, 3]
24
+ IntensityPower = [ 1, 1, 1, 1, 6, 8, 8]
25
+ QualityPower = [ 2, 2, 2, 2, 2, 1, 1]
26
+ UseConsensus = [ 0, 0, 0, 1, 1, 1, 0]
27
+ ConsensusThresholds = [ 0, 0, 0, 4, 2, 0.1, 0.1]
28
+ UseRefIfNoConsensus = [ 0, 0, 0, 0, 1, 1, 1]
29
+ Overwrite = [ 0, 0, 0, 1, 1, 1, 0]
30
+
31
+
32
+ [SearchParam.PRM]
33
+ UseWeightedPBAR = 0
34
+ Threshold = [ 100, 8, 6, 4, 3, 2, 1]
35
+ SelectionNoise = [ 0.1, 0.3, 0.5, 1.0, -1.0, -1.0, -1.0]
36
+ PBARHeavyWeight = [ 0.67, 0.67, 0.67, 0.67, 1.0, 1.0, 1.0]
37
+ PairRatioFactor = [ 0.5, 0.5, 0.5, 0.5, 0.2, 0.2, 0.2]
38
+ TopNIon = [ 1, 1, 1, 1, 3, 3, 3]
39
+ IntensityPower = [ 1, 1, 1, 1, 4, 6, 6]
40
+ QualityPower = [ 2, 2, 2, 2, 2, 2, 2]
41
+ UseConsensus = [ 0, 0, 0, 1, 1, 1, 1]
42
+ ConsensusThresholds = [ 0, 0, 0, 2, 1, 0, 0]
43
+ UseRefIfNoConsensus = [ 0, 0, 0, 0, 1, 1, 1]
44
+ Overwrite = [ 0, 0, 0, 1, 1, 1, 0]
45
+
46
+
47
+ # Speed settings for MCTS cycles, You can increase search rounds by adding more cycle numbers in the list.
48
+ # Or, create a new set of the speed settings with a custom name.
49
+
50
+ # 95% reduction of MCTS cycles
51
+ [Speed.UltraFast]
52
+ MCTSCycle = [15, 25, 50, 50, 50, 50, 50]
53
+
54
+ # 90% reduction of MCTS cycles
55
+ [Speed.SuperFast]
56
+ MCTSCycle = [30, 50, 100, 100, 100, 100, 100]
57
+
58
+ # 80% reduciton of MCTS cycles
59
+ [Speed.Faster]
60
+ MCTSCycle = [60, 100, 200, 200, 200, 200, 200]
61
+
62
+ # 50% reduciton of MCTS cycles
63
+ [Speed.Fast]
64
+ MCTSCycle = [150, 250, 500, 500, 500, 500, 500]
65
+
66
+ # 0% reduction of MCTS cycles
67
+ [Speed.Standard]
68
+ MCTSCycle = [300, 500, 1000, 1000, 1000, 1000, 1000]
@@ -0,0 +1,225 @@
1
+ from argparse import ArgumentParser
2
+ import configparser
3
+ import json
4
+ from time import time
5
+ from os import environ as os_environ
6
+ from numpy import split as np_split, arange as np_arange, array as np_array, median as np_median, asarray as np_asarray, inf as np_inf
7
+ from pandas import DataFrame
8
+ from torch import multiprocessing as th_mp
9
+ from torch.cuda import is_available as th_cuda_is_available
10
+ from torch.backends import mps as torch_mps
11
+ from os.path import join as os_path_join, dirname as os_path_dirname
12
+ from MsTargetPeaker import MsTargetPeakerEnv
13
+ import MsTargetPeaker
14
+ os_environ["OMP_NUM_THREADS"] = "1"
15
+
16
+ def main():
17
+ th_mp.freeze_support()
18
+ parser = ArgumentParser()
19
+ parser.add_argument('--version', '-v', help='show the version of the tmasque package', action='version', version=MsTargetPeaker.__version__)
20
+ parser.add_argument("chromatogram_tsv", type=str, help="The chromatogram tsv file path")
21
+ parser.add_argument("output_peak_boundary_csv", type=str, help="The output peak_boundary csv file path")
22
+ parser.add_argument("--speed", '-s', type=str, default="SuperFast", help="The speed mode of SuperFast (10X), SuperFast (10X), Faster (5X), Fast (2X), or Standard (1X speed). (default: SuperFast).")
23
+ parser.add_argument("--mode", '-m', type=str, default="MRM", help="The search mode defined in the config file. Default search modes can be MRM or PRM. (default: MRM)")
24
+ parser.add_argument("--config", '-c', type=str, default=None, help="The file path for customized config file. If unset, use the default config. (default: None)")
25
+ parser.add_argument('--picked', type=str, default=None, help="The previously picked boundaries to continue picking. (default: None)")
26
+ parser.add_argument('--process_num', '-p', type=int, default=4, help="The parallel processing number to calculate quality feature values for all peak groups (default: 4)")
27
+ parser.add_argument('--prescreen', '-pre', type=int, default=50, help="Prescreen peaks for better peak boundaries as initial state. (default: 50)")
28
+ parser.add_argument('--internal_standard_type', '-r', type=str, default=None, choices=['heavy', 'light'], help="Set the internal standard reference to heavy or light ions. (default: heavy)")
29
+ parser.add_argument('--start_round', '-sr', type=int, default=1, help="Specify the start MCTS round set in the config file. This can be helpful when using continuous peak search. (default: 1)")
30
+ parser.add_argument('--end_round', '-er', type=int, default=7, help="Specify the end MCTS round set in the config file. This can be helpful when using continuous peak search. (default: 7)")
31
+ parser.add_argument('--device', '-d', type=str, default='cpu', help="Use cpu or cuda device for model peak picking. (default: cpu)")
32
+ args = parser.parse_args()
33
+ print('Input Parameters')
34
+ for arg in vars(args):
35
+ print(arg, getattr(args, arg))
36
+ print('#########################################################')
37
+ start = time()
38
+ config = configparser.ConfigParser()
39
+ if args.config:
40
+ config.read(args.config)
41
+ else:
42
+ config.read(os_path_join(os_path_dirname(__file__), 'MsTargetPeaker.cfg'))
43
+ default_conf = config['DEFAULT']
44
+ speed = default_conf['Speed']
45
+ if 'speed' in args or args.speed:
46
+ speed = args.speed
47
+ search_mode = default_conf['SearchMode']
48
+ if 'search' in args or args.mode:
49
+ search_mode = args.mode
50
+ device = default_conf['Device']
51
+ policy_path = None if default_conf['PolicyPath'] == 'None' else default_conf['PolicyPath']
52
+ process_num = float(default_conf['ParallelProcessNumber'])
53
+ internal_standard_type = default_conf['InternalStandardType']
54
+ max_step = int(default_conf['MaxTimeStep'])
55
+ mcts_param_conf = config['MCTSParam']
56
+ mcts_param = {
57
+ "alpha": float(mcts_param_conf['Alpha']),
58
+ "beta": float(mcts_param_conf['Beta']),
59
+ "K": eval(mcts_param_conf['K']),
60
+ "eval_mode": mcts_param_conf['EvalMode']
61
+ }
62
+ MCTS_cycles = json.loads(config[f'Speed.{speed}']['MCTSCycle'])
63
+
64
+ searchParams = config[f'SearchParam.{search_mode}']
65
+ use_weighted_PBAR = int(searchParams['UseWeightedPBAR'])
66
+ threshold_list = json.loads(searchParams['Threshold'])
67
+ selection_noise_list = json.loads(searchParams['SelectionNoise'])
68
+ pbar_heavy_weight = json.loads(searchParams['PBARHeavyWeight'])
69
+ pair_ratio_factor_list = json.loads(searchParams['PairRatioFactor'])
70
+ top_n_ion_list = json.loads(searchParams['TopNIon'])
71
+ intensity_power_list = json.loads(searchParams['IntensityPower'])
72
+ quality_power_list = json.loads(searchParams['QualityPower'])
73
+ use_consensus_list = json.loads(searchParams['UseConsensus'])
74
+ consensus_threshold_list = json.loads(searchParams['ConsensusThresholds'])
75
+ use_ref_if_no_consensus_list = json.loads(searchParams['UseRefIfNoConsensus'])
76
+ overwrite_list = json.loads(searchParams['Overwrite'])
77
+
78
+ output_peak_boundary_csv = args.output_peak_boundary_csv
79
+ th_mp.set_start_method('spawn')
80
+ start_idx = args.start_round - 1
81
+ end_idx = args.end_round - 1
82
+ if 'device' in args or args.device:
83
+ device = args.device
84
+ if device == 'auto':
85
+ device = 'cuda' if th_cuda_is_available() else 'cpu'
86
+
87
+ if 'process_num' in args or args.process_num:
88
+ process_num = args.process_num
89
+ if 'internal_standard_type' in args or args.internal_standard_type:
90
+ internal_standard_type = args.internal_standard_type
91
+
92
+ current_csv_file = args.picked
93
+ canStop = False
94
+ total_steps = len(MCTS_cycles)
95
+ if total_steps > len(threshold_list):
96
+ total_steps = len(threshold_list)
97
+ for step in range(total_steps):
98
+ if step < start_idx:
99
+ continue
100
+ if step > end_idx:
101
+ break
102
+ print(f"[Round {step+1}] Start.")
103
+ env = MsTargetPeakerEnv(args.chromatogram_tsv, policy_path=policy_path, picked_peak_csv=current_csv_file,
104
+ max_step=max_step, device=device,
105
+ internal_standard_type=internal_standard_type,
106
+ pair_ratio_factor= pair_ratio_factor_list[step],
107
+ pbar_heavy_weight = pbar_heavy_weight[step],
108
+ intensity_power=intensity_power_list[step],
109
+ quality_power=quality_power_list[step],
110
+ top_n_ion=top_n_ion_list[step],
111
+ use_weighted_pbar = True if use_weighted_PBAR == 1 else False,
112
+ use_kde=current_csv_file if use_consensus_list[step] == 1 else None,
113
+ threshold_for_kde=consensus_threshold_list[step],
114
+ use_ref_if_no_kde= True if use_ref_if_no_consensus_list[step] == 1 else False
115
+ )
116
+ all_samples, samples_to_process, need_repicking, pre_picked = env.get_sample_lists(threshold_list[step])
117
+ sample_batches = np_split(samples_to_process, np_arange(process_num, len(samples_to_process), process_num))
118
+ prescreen = args.prescreen
119
+ sample_batches = [(batch, prescreen) for batch in sample_batches]
120
+ print(f'Reading peak groups with {prescreen} pre-screens ...')
121
+ with th_mp.Pool(args.process_num) as pool:
122
+ chrom_list = pool.starmap(env.get_chrom_list_from_sample, sample_batches)
123
+ pool.close()
124
+ pool.join()
125
+ chrom_list = [ y for x in list(filter(lambda x: x is not None, chrom_list)) for y in x]
126
+ chrom_list = np_asarray(chrom_list, dtype='object')
127
+ print(f'[Round {step+1}] Number of peak groups (reward < {threshold_list[step]}) to be processed: {len(chrom_list)}/{len(all_samples)} ({round(100*len(chrom_list)/len(all_samples), 2) if len(all_samples) > 0 else 0.00}%)')
128
+ if overwrite_list[step]:
129
+ print(f'[Round {step+1}] Ignore pre-selected peaks.')
130
+ print(f"RUN MCTS-DPW with {mcts_param['eval_mode']} evaluation")
131
+ option = {'cycle': MCTS_cycles[step], 'alpha': mcts_param['alpha'], 'beta': mcts_param['beta'],
132
+ 'K': mcts_param['K'], 'eval_mode': mcts_param['eval_mode'], 'selection_noise': selection_noise_list[step]}
133
+ print(f'[Round {step+1} Option] {option}')
134
+ chrom_len = len(chrom_list)
135
+ arg_list = [(chrom, option, f"[Round {step+1} ({idx+1}/{chrom_len})]") for idx, chrom in enumerate(chrom_list)]
136
+ results = []
137
+ total_size = len(arg_list)
138
+ poolsize = total_size if process_num > total_size else process_num
139
+ print(f'Pool size: {poolsize}')
140
+ if poolsize > 0:
141
+ pool = th_mp.Pool(poolsize)
142
+ results = pool.starmap(env.run_mcts, arg_list)
143
+ pool.close()
144
+ pool.join()
145
+ print('Finished MCTS')
146
+ else:
147
+ canStop = True
148
+ #results = list(chain.from_iterable(results))
149
+ result_df = DataFrame(results)
150
+ if not result_df.empty:
151
+ result_df.columns = ['File Name', 'Peptide Modified Sequence', 'Min Start Time', 'Max End Time', 'Type1Reward', 'Type2Reward',
152
+ 'FinalReward', 'PBAR', 'PBARFactor','PairRatioConsistencyMedian',
153
+ 'PairRatioConsistencyFactor', 'PeakModality', 'PeakModalityFactor', 'IntensityQuantile',
154
+ 'IntensityQuantileFactor', 'PeakStartFactor', 'PeakEndFactor', 'PeakBoundaryFactor', 'Note']
155
+ non_qualified = result_df[result_df['FinalReward'] < threshold_list[step]]
156
+ else:
157
+ non_qualified = []
158
+ print(f"[Round {step+1}] #Peak Groups with Final Reward < {threshold_list[step]}: {len(non_qualified)}")
159
+ print(f'[Round {step+1}] Outputing picked peaks to {output_peak_boundary_csv} ...')
160
+ if need_repicking is None:
161
+ output_df = result_df
162
+ else:
163
+ output_arr = []
164
+ improved_counts = 0
165
+ improved_scores = []
166
+ for file, pep in all_samples:
167
+ if pep in need_repicking and file in need_repicking[pep]:
168
+ previous_result = need_repicking[pep][file]
169
+ if result_df.empty:
170
+ output_arr.append(previous_result)
171
+ else:
172
+ repick_result = result_df[(result_df['Peptide Modified Sequence'] == pep) & (result_df['File Name'] == file)]
173
+ if len(repick_result) >= 1:
174
+ repicked = repick_result.iloc[0]
175
+ previous_reward = previous_result['FinalReward']
176
+ repicked_reward = repicked['FinalReward']
177
+ if repicked_reward > previous_reward:
178
+ improved_counts += 1
179
+ improved_scores.append(repicked['FinalReward'] - previous_result['FinalReward'])
180
+ if overwrite_list[step] or (repicked_reward >= previous_reward):
181
+ output_arr.append(repicked)
182
+ else:
183
+ output_arr.append(previous_result)
184
+ else:
185
+ output_arr.append(previous_result) #Should not exist
186
+ else:
187
+ previous_result = pre_picked[(pre_picked['Peptide Modified Sequence'] == pep) & (pre_picked['File Name'] == file)] if pre_picked is not None else []
188
+ if len(previous_result) >= 1:
189
+ output_arr.append(previous_result.iloc[0])
190
+ output_df = DataFrame(output_arr)
191
+ improved_scores = np_array(improved_scores)
192
+ if len(improved_scores) > 0:
193
+ improved_mean = improved_scores.mean()
194
+ improved_median = np_median(improved_scores)
195
+ improved_min = improved_scores.min()
196
+ improved_max = improved_scores.max()
197
+ print(f'Improved chromatograms: {improved_counts}')
198
+ print(f'Improved scores => mean: {improved_mean}; median: {improved_median}; min: {improved_min}; max: {improved_max}')
199
+ else:
200
+ print('No improvement')
201
+ print('#########################################################')
202
+ print(f'[Round {step+1}] Score distributions:')
203
+ total_len = len(output_df)
204
+ if total_len > 0:
205
+ ranges = [-1, 0, 0.01, 1, 2, 4, 6, 8, 10, 12]
206
+ for idx, range_start in enumerate(ranges):
207
+ if idx + 1 < len(ranges):
208
+ range_end = ranges[idx + 1]
209
+ else:
210
+ range_end = np_inf
211
+ in_range_data = output_df[(output_df['FinalReward'] >= range_start) & (output_df['FinalReward'] < range_end)]
212
+ in_range_len = len(in_range_data)
213
+ print(f'[{range_start}, {range_end}): {in_range_len} ({ round(100*in_range_len/total_len, 2) }%)')
214
+ print('#########################################################')
215
+ print(f'[Round {step+1}] Output peak boundary csv file to ' + output_peak_boundary_csv)
216
+ current_csv_file = output_peak_boundary_csv
217
+ output_df.to_csv(output_peak_boundary_csv, index=False)
218
+ #output_df.to_csv(f'{output_peak_boundary_csv}.step{step+1}.txt', index=False)
219
+ if canStop:
220
+ break
221
+ end = time()
222
+ print('Total execution time: %.2f seconds' % (end - start))
223
+
224
+ if __name__ == '__main__':
225
+ main()