PyEvoMotion 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ """
2
+ The main functionality of the ``PyEvoMotion`` project is abstracted into the following classes:
3
+
4
+ * :class:`PyEvoMotion` - The main class that encapsulates the entire analysis.
5
+ * :class:`PyEvoMotionBase` - The base class that provides basic utility functions inherited by :class:`PyEvoMotion`.
6
+ * :class:`PyEvoMotionParser` - The class that provides the functionality to parse the input data for the analysis, inherited by :class:`PyEvoMotion`.
7
+ """
8
+
9
+ from .core.core import PyEvoMotion
10
+ from .core.base import PyEvoMotionBase
11
+ from .core.parser import PyEvoMotionParser
PyEvoMotion/cli.py ADDED
@@ -0,0 +1,440 @@
1
+ """
2
+ Command line interface for :class:`PyEvoMotion`.
3
+
4
+ It parses the arguments from the command line and runs the analysis with the specified parameters.
5
+
6
+ This module is not meant to be inherited from, but to be used as a standalone script in the command line.
7
+ """
8
+
9
+ import json
10
+ import argparse
11
+ from datetime import datetime
12
+
13
+ from .core.core import PyEvoMotion
14
+ from .utils import check_and_install_mafft
15
+
16
+ PACKAGE_DESCRIPTION = "PyEvoMotion"
17
+ BANNER = r"""
18
+ Welcome to Rodrigolab's
19
+ _____ ______ __ __ _ _
20
+ | __ \ | ____| | \/ | | | (_)
21
+ | |__) | _| |____ _____ | \ / | ___ | |_ _ ___ _ __
22
+ | ___/ | | | __\ \ / / _ \| |\/| |/ _ \| __| |/ _ \| '_ \
23
+ | | | |_| | |___\ V / (_) | | | | (_) | |_| | (_) | | | |
24
+ |_| \__, |______\_/ \___/|_| |_|\___/ \__|_|\___/|_| |_|
25
+ __/ |
26
+ |___/
27
+ """
28
+
29
+ class _ArgumentParserWithHelpOnError(argparse.ArgumentParser):
30
+ """
31
+ Custom ArgumentParser that prints the help message when an error occurs.
32
+ """
33
+
34
+ def error(self, message: str) -> None:
35
+ """
36
+ Print the help message and the error message.
37
+
38
+ :param message: the error message to print.
39
+ :type message: str
40
+ """
41
+ self.print_help()
42
+ print(f"\nError: {message}\n")
43
+ super().exit(2)
44
+
45
+ class _ParseFilter(argparse.Action):
46
+ """
47
+ Custom action to parse the filters from the command line.
48
+
49
+ The filters are passed as key-value pairs, where the key is followed by multiple values, specified in square brackets.
50
+ """
51
+ def __call__(self, _: argparse.ArgumentParser, namespace: argparse.Namespace, values: list[str], option_string: str | None = None) -> None:
52
+ """
53
+ Call the action to parse the filters.
54
+
55
+ :param _: the parser.
56
+ :type _: argparse.ArgumentParser
57
+ :param namespace: the namespace to store the parsed filters.
58
+ :type namespace: argparse.Namespace
59
+ :param values: the values to parse.
60
+ :type values: list[str]
61
+ :param option_string: the option string.
62
+ :type option_string: str
63
+ :raises ValueError: if the values are not in the correct format.
64
+ """
65
+
66
+ setattr(namespace, self.dest, self.parse_filters(values))
67
+
68
+ @staticmethod
69
+ def parse_filters(values: list[str] | None) -> dict[str, str | list[str]] | None:
70
+ """
71
+ Parse the filters from the values.
72
+
73
+ :param values: the values to parse.
74
+ :type values: list[str] | None
75
+ :return: the parsed filters as a dictionary.
76
+ :rtype: dict[str, str | list[str]] | None
77
+ """
78
+
79
+ if values is None: return None
80
+
81
+ # Create an iterator to process values one by one
82
+ cleaned_values = []
83
+ buffer = []
84
+ inside_brackets = False
85
+
86
+ # Loop through the input values and handle brackets
87
+ for value in values:
88
+ if value.startswith('[') and value.endswith(']'): # Single value inside brackets
89
+ cleaned_values.append(value[1:-1])
90
+ if value.startswith('['): # Start of a bracketed group
91
+ inside_brackets = True
92
+ buffer.append(value[1:]) # Strip the '['
93
+ elif value.endswith(']'): # End of a bracketed group
94
+ buffer.append(value[:-1]) # Strip the ']'
95
+ cleaned_values.append(buffer)
96
+ buffer = []
97
+ inside_brackets = False
98
+ elif inside_brackets: # Values inside the brackets
99
+ buffer.append(value)
100
+ else: # Regular values outside of brackets
101
+ cleaned_values.append(value)
102
+
103
+ return dict(zip(
104
+ cleaned_values[::2],
105
+ cleaned_values[1::2]
106
+ ))
107
+
108
+ class _ParseGenomePosition(argparse.Action):
109
+ """
110
+ Custom action to parse the genome positions from the command line.
111
+
112
+ The genome positions are passed as a string with two dots separating the start and end positions. Open start or end positions are allowed by omitting the first or last position, respectively.
113
+ """
114
+ def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: str, option_string: str | None = None):
115
+ """
116
+ Call the action to parse the genome positions.
117
+
118
+ :param parser: the parser.
119
+ :type parser: argparse.ArgumentParser
120
+ :param namespace: the namespace to store the parsed genome positions.
121
+ :type namespace: argparse.Namespace
122
+ :param values: the values to parse.
123
+ :type values: str
124
+ :param option_string: the option string.
125
+ :type option_string: str
126
+ :raises ValueError: if the values are not in the correct format.
127
+ """
128
+
129
+
130
+
131
+ setattr(namespace, self.dest, self.parse_genome_position(parser, values))
132
+
133
+ @staticmethod
134
+ def parse_genome_position(parser: argparse.ArgumentParser, values: str | None) -> tuple[int, int] | None:
135
+ """
136
+ Parse the genome positions from the values.
137
+
138
+ :param parser: the parser.
139
+ :type parser: argparse.ArgumentParser
140
+ :param values: the values to parse.
141
+ :type values: str | None
142
+ :return: the parsed genome positions.
143
+ :rtype: tuple[int, int] | None
144
+ :raises ValueError: if the values are not in the correct format.
145
+ """
146
+
147
+ if values is None: return None
148
+
149
+ if not(".." in values):
150
+ parser.error("The genome positions must be separated by two dots. Example: 1..1000")
151
+
152
+ _split = values.split("..")
153
+
154
+ positions = []
155
+ for el in _split:
156
+ if not el.isdigit() and el != "":
157
+ parser.error("The genome positions must be positive integers")
158
+ positions.append(0 if el == "" else int(el))
159
+
160
+ return tuple(positions)
161
+
162
+ class _ParseDateRange(argparse.Action):
163
+ """
164
+ Custom action to parse the date range from the command line.
165
+
166
+ The date range is passed as a string with two dots separating the start and end dates. The format must be YYYY-MM-DD.
167
+ """
168
+ def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: str, option_string: str | None = None):
169
+
170
+ setattr(namespace, self.dest, self.parse_date_range(parser, values))
171
+
172
+ @staticmethod
173
+ def parse_date_range(parser: argparse.ArgumentParser, values: str | None) -> tuple[datetime | None, datetime | None] | None:
174
+ """
175
+ Parse the date range from the values.
176
+
177
+ :param parser: the parser.
178
+ :type parser: argparse.ArgumentParser
179
+ :param values: the values to parse.
180
+ :type values: str | None
181
+ :return: the parsed date range.
182
+ :rtype: tuple[datetime | None, datetime | None] | None
183
+ """
184
+
185
+ if values is None: return None
186
+
187
+ if not(".." in values):
188
+ parser.error("The date range must be separated by two dots. Example: 2020-01-01..2020-12-31")
189
+ if values.count(".") > 2:
190
+ parser.error("The date range must contain '..' as separator")
191
+
192
+ _split = values.split("..")
193
+
194
+ range = []
195
+ for date in _split:
196
+ if date == "":
197
+ range.append(None)
198
+ continue
199
+ try:
200
+ range.append(datetime.strptime(date, "%Y-%m-%d"))
201
+ except ValueError:
202
+ parser.error("Incorrect date format, should be YYYY-MM-DD")
203
+
204
+ return tuple(range)
205
+
206
+
207
+ def _parse_arguments() -> argparse.Namespace:
208
+ """
209
+ Parse the arguments from the command line.
210
+
211
+ :return: the parsed arguments.
212
+ :rtype: argparse.Namespace
213
+ """
214
+
215
+ # True parser. If the -ij argument is not passed, it will be used to parse the arguments
216
+ parser = _ArgumentParserWithHelpOnError(description=PACKAGE_DESCRIPTION)
217
+ parser.add_argument(
218
+ "seqs",
219
+ type=str,
220
+ help="Path to the input fasta file containing the sequences."
221
+ )
222
+ parser.add_argument(
223
+ "meta",
224
+ type=str,
225
+ help="Path to the corresponding metadata file for the sequences."
226
+ )
227
+ parser.add_argument(
228
+ "out",
229
+ type=str,
230
+ help="Path to the output filename prefix used to save the different results."
231
+ )
232
+ parser.add_argument(
233
+ "-dt",
234
+ "--delta_t",
235
+ type=str,
236
+ default="7D",
237
+ help="Time interval to calculate the statistics. Default is 7 days (7D)."
238
+ )
239
+ parser.add_argument(
240
+ "-sh",
241
+ "--show",
242
+ action="store_true",
243
+ help="Show the plots of the analysis."
244
+ )
245
+ parser.add_argument(
246
+ "-ep",
247
+ "--export_plots",
248
+ action="store_true",
249
+ help="Export the plots of the analysis."
250
+ )
251
+ parser.add_argument(
252
+ "-l",
253
+ "--length_filter",
254
+ type=int,
255
+ default=0,
256
+ help="Length filter for the sequences (removes sequences with length less than the specified value). Default is 0."
257
+ )
258
+ parser.add_argument(
259
+ "-n",
260
+ "--n_threshold",
261
+ type=int,
262
+ default=2,
263
+ help="Minimum number of sequences required in a time interval to compute statistics. Default is 2."
264
+ )
265
+ parser.add_argument(
266
+ "-xj",
267
+ "--export_json",
268
+ action="store_true",
269
+ help="Export the run arguments to a json file."
270
+ )
271
+ parser.add_argument(
272
+ "-ij",
273
+ "--import_json",
274
+ type=str,
275
+ help="Import the run arguments from a JSON file. If this argument is passed, the other arguments are ignored. The JSON file must contain the mandatory keys 'seqs', 'meta', and 'out'."
276
+ )
277
+ parser.add_argument(
278
+ "-k",
279
+ "--kind",
280
+ type=str,
281
+ choices=["all", "total", "substitutions", "indels"],
282
+ default="all",
283
+ help="Kind of mutations to consider for the analysis. Default is 'all'."
284
+ )
285
+ parser.add_argument(
286
+ "-f",
287
+ "--filter",
288
+ nargs='+', # Accepts multiple arguments
289
+ action=_ParseFilter,
290
+ default=None,
291
+ help="Specify filters to be applied on the data with keys followed by values. If the values are multiple, they must be enclosed in square brackets. Example: --filter key1 value1 key2 [value2 value3] key3 value4. If either the keys or values contain spaces, they must be enclosed in quotes. keys must be present in the metadata file as columns for the filter to be applied. Use '*' as a wildcard, for example Bio* to filter all columns starting with 'Bio'."
292
+ )
293
+ parser.add_argument(
294
+ "-gp",
295
+ "--genome_positions",
296
+ type=str,
297
+ action=_ParseGenomePosition,
298
+ default=None,
299
+ help="Genome positions to restrict the analysis. The positions must be separated by two dots. Example: 1..1000. Open start or end positions are allowed by omitting the first or last position, respectively. If not specified, the whole reference genome is considered."
300
+ )
301
+ parser.add_argument(
302
+ "-dr",
303
+ "--date_range",
304
+ type=str,
305
+ action=_ParseDateRange,
306
+ default=None,
307
+ help="Date range to filter the data. The date range must be separated by two dots and the format must be YYYY-MM-DD. Example: 2020-01-01..2020-12-31. If not specified, the whole dataset is considered. Note that if the origin is specified, the most restrictive date range is considered."
308
+ )
309
+
310
+ # Initial parser to parse just the -ij argument
311
+ json_input_parser = argparse.ArgumentParser(add_help=False)
312
+ json_input_parser.add_argument(
313
+ "-ij",
314
+ "--import_json",
315
+ type=str
316
+ )
317
+ json_input_args, _ = json_input_parser.parse_known_args()
318
+
319
+ # If the -ij argument is passed, the arguments are imported from the JSON file
320
+ if json_input_args.import_json:
321
+ with open(json_input_args.import_json, "r") as file:
322
+ # Dumps the arguments to the namespace
323
+ _args = json.load(file)
324
+
325
+ # Checks if the JSON file contains the minimum required keys
326
+ if not {"seqs", "meta", "out"}.issubset(set(_args.keys())):
327
+ parser.error("The JSON file must contain the keys 'seqs', 'meta', and 'out'")
328
+
329
+ # Initialize a new namespace
330
+ namespace = argparse.Namespace()
331
+
332
+ # Apply the JSON values to the namespace
333
+ for action in parser._actions:
334
+ if action.dest in _args:
335
+ value = _args[action.dest]
336
+
337
+ # If the argument has a custom action, apply the action manually
338
+ if isinstance(action, (_ParseFilter, _ParseGenomePosition, _ParseDateRange)):
339
+ action(parser, namespace, value)
340
+ else:
341
+ # For regular arguments, just set them in the namespace
342
+ setattr(namespace, action.dest, value)
343
+ else:
344
+ # If no value from JSON, use the default value
345
+ setattr(namespace, action.dest, action.default)
346
+
347
+ return namespace
348
+
349
+ return parser.parse_args()
350
+
351
+ def _simple_serializer(k: str, v: any) -> any:
352
+ """
353
+ Simple serializer to convert the arguments to JSON.
354
+
355
+ :param k: the key of the argument.
356
+ :type k: str
357
+ :param v: the value of the argument.
358
+ :type v: any
359
+ :return: the serialized value.
360
+ :rtype: any
361
+ """
362
+
363
+ if k == "date_range":
364
+ return "..".join(map(lambda x: x.strftime("%Y-%m-%d") if x else "", v))
365
+ return v
366
+
367
+ def _main():
368
+ check_and_install_mafft()
369
+ """
370
+ Command line interface for :class:`PyEvoMotion`.
371
+
372
+ It parses the arguments from the command line and runs the analysis with the specified parameters.
373
+ """
374
+ print(BANNER)
375
+ args = _parse_arguments()
376
+
377
+ # If the -xj argument is passed, the arguments are exported to a JSON file before running the analysis altogether
378
+ if args.export_json:
379
+ with open(f"{args.out}_run_args.json", "w") as file:
380
+ json.dump(
381
+ {
382
+ k: _simple_serializer(k, v)
383
+ for k, v in vars(args).items()
384
+ if k not in ["export_json", "import_json"]
385
+ },
386
+ file,
387
+ indent=4
388
+ )
389
+
390
+ # Instantiates the PyEvoMotion class, which parses the data on construction
391
+ instance = PyEvoMotion(
392
+ args.seqs,
393
+ args.meta,
394
+ dt=args.delta_t,
395
+ filters=args.filter,
396
+ positions=args.genome_positions,
397
+ date_range=args.date_range,
398
+ )
399
+
400
+ # Exports the data to a TSV file
401
+ instance.data.to_csv(
402
+ f"{args.out}.tsv",
403
+ sep="\t",
404
+ index=False
405
+ )
406
+
407
+ # Runs the analysis
408
+ stats, reg = instance.analysis(
409
+ length=args.length_filter,
410
+ n_threshold=args.n_threshold,
411
+ show=args.show,
412
+ mutation_kind=args.kind,
413
+ export_plots_filename=(
414
+ f"{args.out}_plots"
415
+ if args.export_plots
416
+ else None
417
+ )
418
+ )
419
+
420
+ _reg = reg.copy()
421
+
422
+ for k in _reg.keys():
423
+ del _reg[k]["model"]
424
+
425
+ # Exports the statistic results to TSV file
426
+ stats.to_csv(
427
+ f"{args.out}_stats.tsv",
428
+ sep="\t",
429
+ index=False
430
+ )
431
+
432
+ # Exports the regression models to a JSON file
433
+ with open(f"{args.out}_regression_results.json", "w") as file:
434
+ json.dump(_reg, file, indent=4)
435
+
436
+ # Exits the program with code 0 (success)
437
+ exit(0)
438
+
439
+ if __name__ == "__main__":
440
+ _main()
@@ -0,0 +1,7 @@
1
+ """
2
+ The main functionality of the ``PyEvoMotion`` project is abstracted into the following classes:
3
+
4
+ * :class:`PyEvoMotion` - The main class that encapsulates the entire analysis.
5
+ * :class:`PyEvoMotionBase` - The base class that provides basic utility functions inherited by :class:`PyEvoMotion`.
6
+ * :class:`PyEvoMotionParser` - The class that provides the functionality to parse the input data for the analysis, inherited by :class:`PyEvoMotion`.
7
+ """