PyEvoMotion 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@
5
5
  "delta_t": "7D",
6
6
  "show": false,
7
7
  "export_plots": true,
8
+ "confidence_level": 0.95,
8
9
  "length_filter": 0,
9
10
  "kind": "total",
10
11
  "filter": null,
share/figUSA_stats.tsv CHANGED
@@ -1,34 +1,34 @@
1
- date mean number of mutations var number of mutations size
2
- 2020-12-17 34.666666666666664 5.066666666666665 6
3
- 2020-12-24 33.976190476190474 3.7311265969802596 42
4
- 2020-12-31 34.19230769230769 2.546953046953047 78
5
- 2021-01-07 35.15238095238095 5.4188644688644745 105
6
- 2021-01-14 36.39887640449438 4.094236018536149 178
7
- 2021-01-21 37.67487684729064 4.557137979807834 203
8
- 2021-01-28 38.28421052631579 4.2745737583395 285
9
- 2021-02-04 38.86852589641434 4.146645418326694 251
10
- 2021-02-11 39.538699690402474 4.584677807049599 323
11
- 2021-02-18 40.1993769470405 5.741374610591899 321
12
- 2021-02-25 40.53168044077135 5.183385842351184 363
13
- 2021-03-04 40.43465045592705 7.520868856104973 329
14
- 2021-03-11 41.03508771929825 6.573545300200646 342
15
- 2021-03-18 41.24050632911393 7.756730707447153 395
16
- 2021-03-25 41.31578947368421 6.91238966982674 323
17
- 2021-04-01 41.463611859838274 6.357456108399504 371
18
- 2021-04-08 41.80653266331658 8.771040340240246 398
19
- 2021-04-15 42.03133903133903 8.596157916157916 351
20
- 2021-04-22 42.390243902439025 7.597295864262986 369
21
- 2021-04-29 42.429752066115704 7.4004383361490325 363
22
- 2021-05-06 42.70674486803519 7.531395549422115 341
23
- 2021-05-13 42.96060606060606 9.405738233397797 330
24
- 2021-05-20 43.21902017291066 11.183105395545644 347
25
- 2021-05-27 43.90149253731343 11.154937885423184 335
26
- 2021-06-03 43.76347305389221 11.958902015788244 334
27
- 2021-06-10 44.061624649859944 11.277090611525502 357
28
- 2021-06-17 44.22287390029326 11.103122304640328 341
29
- 2021-06-24 44.62145110410095 9.539791558519338 317
30
- 2021-07-01 44.512121212121215 11.460338951828312 330
31
- 2021-07-08 44.526490066225165 8.841488636113615 302
32
- 2021-07-15 44.91830065359477 9.275270545376623 306
33
- 2021-07-22 44.656462585034014 9.31161338255439 294
34
- 2021-07-29 45.55797101449275 9.13895059769385 138
1
+ date mean number of mutations var number of mutations size dt_idx
2
+ 2020-12-17 34.666666666666664 5.066666666666665 6 0.0
3
+ 2020-12-24 33.976190476190474 3.7311265969802596 42 1.0
4
+ 2020-12-31 34.19230769230769 2.546953046953047 78 2.0
5
+ 2021-01-07 35.15238095238095 5.4188644688644745 105 3.0
6
+ 2021-01-14 36.39887640449438 4.094236018536149 178 4.0
7
+ 2021-01-21 37.67487684729064 4.557137979807834 203 5.0
8
+ 2021-01-28 38.28421052631579 4.2745737583395 285 6.0
9
+ 2021-02-04 38.86852589641434 4.146645418326694 251 7.0
10
+ 2021-02-11 39.538699690402474 4.584677807049599 323 8.0
11
+ 2021-02-18 40.1993769470405 5.741374610591899 321 9.0
12
+ 2021-02-25 40.53168044077135 5.183385842351184 363 10.0
13
+ 2021-03-04 40.43465045592705 7.520868856104973 329 11.0
14
+ 2021-03-11 41.03508771929825 6.573545300200646 342 12.0
15
+ 2021-03-18 41.24050632911393 7.756730707447153 395 13.0
16
+ 2021-03-25 41.31578947368421 6.91238966982674 323 14.0
17
+ 2021-04-01 41.463611859838274 6.357456108399504 371 15.0
18
+ 2021-04-08 41.80653266331658 8.771040340240246 398 16.0
19
+ 2021-04-15 42.03133903133903 8.596157916157916 351 17.0
20
+ 2021-04-22 42.390243902439025 7.597295864262986 369 18.0
21
+ 2021-04-29 42.429752066115704 7.4004383361490325 363 19.0
22
+ 2021-05-06 42.70674486803519 7.531395549422115 341 20.0
23
+ 2021-05-13 42.96060606060606 9.405738233397797 330 21.0
24
+ 2021-05-20 43.21902017291066 11.183105395545644 347 22.0
25
+ 2021-05-27 43.90149253731343 11.154937885423184 335 23.0
26
+ 2021-06-03 43.76347305389221 11.958902015788244 334 24.0
27
+ 2021-06-10 44.061624649859944 11.277090611525502 357 25.0
28
+ 2021-06-17 44.22287390029326 11.103122304640328 341 26.0
29
+ 2021-06-24 44.62145110410095 9.539791558519338 317 27.0
30
+ 2021-07-01 44.512121212121215 11.460338951828312 330 28.0
31
+ 2021-07-08 44.526490066225165 8.841488636113615 302 29.0
32
+ 2021-07-15 44.91830065359477 9.275270545376623 306 30.0
33
+ 2021-07-22 44.656462585034014 9.31161338255439 294 31.0
34
+ 2021-07-29 45.55797101449275 9.13895059769385 138 32.0
@@ -0,0 +1,107 @@
1
+ #´:°•.°+.*•´.*:˚.°*.˚•´.°:°•.°•.*•´.*:˚.°*.˚•´.°:°•.°+.*•´.*:#
2
+ # IMPORTS #
3
+ #.•°:°.´+˚.*°.˚:*.´•*.+°.•°:´*.´•*.•°.•°:°.´:•˚°.*°.˚:*.´+°.•#
4
+
5
+ import os
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+ from Bio import SeqIO
10
+ from Bio.SeqRecord import SeqRecord
11
+
12
+ #´:°•.°+.*•´.*:˚.°*.˚•´.°:°•.°•.*•´.*:˚.°*.˚•´.°:°•.°+.*•´.*:#
13
+ # CONSTANTS #
14
+ #.•°:°.´+˚.*°.˚:*.´•*.+°.•°:´*.´•*.•°.•°:°.´:•˚°.*°.˚:*.´+°.•#
15
+
16
+ WUHAN_REF_PATH = "tests/data/test3/test3UK.fasta"
17
+ DATE_MAP = dict(zip(
18
+ range(41),
19
+ pd.date_range(start="2020-01-01", periods=41, freq="7D")
20
+ ))
21
+ NUCLEOTIDES = {"A", "C", "G", "T"}
22
+ TEST5_BASE_PATH = "tests/data/test5"
23
+
24
+ #´:°•.°+.*•´.*:˚.°*.˚•´.°:°•.°•.*•´.*:˚.°*.˚•´.°:°•.°+.*•´.*:#
25
+ # FUNCTIONS #
26
+ #.•°:°.´+˚.*°.˚:*.´•*.+°.•°:´*.´•*.•°.•°:°.´:•˚°.*°.˚:*.´+°.•#
27
+
28
+ def load_synthdata(path: str) -> pd.DataFrame:
29
+ """Loads the synthdata from the given path.
30
+ """
31
+
32
+ df = pd.read_csv(path, sep="\t", index_col=0)
33
+ df.columns = [f"seq{k}" for k in range(len(df.columns))]
34
+
35
+ return df
36
+
37
+ def get_wuhan_ref() -> SeqRecord:
38
+ """Returns the Wuhan reference sequence.
39
+ """
40
+ # We know the Wuhan sequence is the first one in the file
41
+ with open(WUHAN_REF_PATH, "r") as f:
42
+ for record in SeqIO.parse(f, "fasta"):
43
+ return record
44
+
45
+ def create_synthetic_sequence(mut_num: int, wuhan_ref: SeqRecord) -> str:
46
+ """Creates a synthetic sequence from the Wuhan reference sequence.
47
+ """
48
+ _seq = list(wuhan_ref.seq)
49
+ mut_positions = np.random.choice(range(len(_seq)), mut_num, replace=False)
50
+ nucleotides = [
51
+ np.random.choice(list(NUCLEOTIDES - {_seq[i]}))
52
+ for i in mut_positions
53
+ ]
54
+ for pos, nuc in zip(mut_positions, nucleotides):
55
+ _seq[pos] = nuc
56
+ return "".join(_seq)
57
+
58
+
59
+ def create_synthetic_sequences(output_path: str, synthdata: pd.DataFrame, wuhan_ref: SeqRecord) -> None:
60
+ """Creates the synthetic dataset from the synthdata reference and the Wuhan reference sequence.
61
+ """
62
+ with open(f"{output_path}.fasta", "w") as f, open(f"{output_path}.tsv", "w") as g:
63
+ f.write(f">{wuhan_ref.id}\n{wuhan_ref.seq}\n")
64
+ g.write(f"id\tdate\n{wuhan_ref.id}\t2019-12-29\n")
65
+ for row_idx, row in synthdata.iterrows():
66
+ for col_name, col in row.items():
67
+ # Fetch the row number, the column name and the entry in the synthdata
68
+ f.write(f">{col_name}_{row_idx}\n{create_synthetic_sequence(col, wuhan_ref)}\n")
69
+ g.write(f"{col_name}_{row_idx}\t{DATE_MAP[row_idx].strftime('%Y-%m-%d')}\n")
70
+
71
+ def process_test5_files():
72
+ """Process all .txt files in test5 subdirectories and generate corresponding .fasta and .tsv files.
73
+ """
74
+ WUHAN_REF = get_wuhan_ref()
75
+
76
+ for subdir in os.listdir(TEST5_BASE_PATH):
77
+ txt_files = [
78
+ file
79
+ for file in os.listdir(f"{TEST5_BASE_PATH}/{subdir}")
80
+ if file.endswith(".txt")
81
+ ]
82
+
83
+ for txt_file in txt_files:
84
+
85
+ name = txt_file.strip(".txt")
86
+ create_synthetic_sequences(
87
+ f"{TEST5_BASE_PATH}/{subdir}/{name}",
88
+ load_synthdata(
89
+ f"{TEST5_BASE_PATH}/{subdir}/{txt_file}"
90
+ ),
91
+ WUHAN_REF
92
+ )
93
+
94
+
95
+ #´:°•.°+.*•´.*:˚.°*.˚•´.°:°•.°•.*•´.*:˚.°*.˚•´.°:°•.°+.*•´.*:#
96
+ # MAIN #
97
+ #.•°:°.´+˚.*°.˚:*.´•*.+°.•°:´*.´•*.•°.•°:°.´:•˚°.*°.˚:*.´+°.•#
98
+
99
+ def main():
100
+ """Main function to process all test5 files.
101
+ """
102
+ print("Processing all .txt files in test5 subdirectories...")
103
+ process_test5_files()
104
+ print("All synthetic datasets created successfully.")
105
+
106
+ if __name__ == "__main__":
107
+ main()