pg-sui 0.2.0__py3-none-any.whl → 1.6.14.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/METADATA +101 -79
  2. pg_sui-1.6.14.dev9.dist-info/RECORD +81 -0
  3. {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/WHEEL +1 -1
  4. pg_sui-1.6.14.dev9.dist-info/entry_points.txt +4 -0
  5. {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info/licenses}/LICENSE +0 -0
  6. pg_sui-1.6.14.dev9.dist-info/top_level.txt +1 -0
  7. pgsui/__init__.py +35 -54
  8. pgsui/_version.py +34 -0
  9. pgsui/cli.py +909 -0
  10. pgsui/data_processing/__init__.py +0 -0
  11. pgsui/data_processing/config.py +565 -0
  12. pgsui/data_processing/containers.py +1424 -0
  13. pgsui/data_processing/transformers.py +557 -907
  14. pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
  15. pgsui/electron/app/__main__.py +5 -0
  16. pgsui/electron/app/extra-resources/.gitkeep +1 -0
  17. pgsui/electron/app/icons/icons/1024x1024.png +0 -0
  18. pgsui/electron/app/icons/icons/128x128.png +0 -0
  19. pgsui/electron/app/icons/icons/16x16.png +0 -0
  20. pgsui/electron/app/icons/icons/24x24.png +0 -0
  21. pgsui/electron/app/icons/icons/256x256.png +0 -0
  22. pgsui/electron/app/icons/icons/32x32.png +0 -0
  23. pgsui/electron/app/icons/icons/48x48.png +0 -0
  24. pgsui/electron/app/icons/icons/512x512.png +0 -0
  25. pgsui/electron/app/icons/icons/64x64.png +0 -0
  26. pgsui/electron/app/icons/icons/icon.icns +0 -0
  27. pgsui/electron/app/icons/icons/icon.ico +0 -0
  28. pgsui/electron/app/main.js +227 -0
  29. pgsui/electron/app/package-lock.json +6894 -0
  30. pgsui/electron/app/package.json +51 -0
  31. pgsui/electron/app/preload.js +15 -0
  32. pgsui/electron/app/server.py +157 -0
  33. pgsui/electron/app/ui/logo.png +0 -0
  34. pgsui/electron/app/ui/renderer.js +131 -0
  35. pgsui/electron/app/ui/styles.css +59 -0
  36. pgsui/electron/app/ui/ui_shim.js +72 -0
  37. pgsui/electron/bootstrap.py +43 -0
  38. pgsui/electron/launch.py +57 -0
  39. pgsui/electron/package.json +14 -0
  40. pgsui/example_data/__init__.py +0 -0
  41. pgsui/example_data/phylip_files/__init__.py +0 -0
  42. pgsui/example_data/phylip_files/test.phy +0 -0
  43. pgsui/example_data/popmaps/__init__.py +0 -0
  44. pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
  45. pgsui/example_data/structure_files/__init__.py +0 -0
  46. pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
  47. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
  48. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
  49. pgsui/impute/__init__.py +0 -0
  50. pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
  51. pgsui/impute/deterministic/imputers/mode.py +844 -0
  52. pgsui/impute/deterministic/imputers/nmf.py +221 -0
  53. pgsui/impute/deterministic/imputers/phylo.py +973 -0
  54. pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
  55. pgsui/impute/supervised/__init__.py +0 -0
  56. pgsui/impute/supervised/base.py +343 -0
  57. pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
  58. pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
  59. pgsui/impute/supervised/imputers/random_forest.py +291 -0
  60. pgsui/impute/unsupervised/__init__.py +0 -0
  61. pgsui/impute/unsupervised/base.py +1118 -0
  62. pgsui/impute/unsupervised/callbacks.py +92 -262
  63. {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
  64. pgsui/impute/unsupervised/imputers/autoencoder.py +1285 -0
  65. pgsui/impute/unsupervised/imputers/nlpca.py +1554 -0
  66. pgsui/impute/unsupervised/imputers/ubp.py +1575 -0
  67. pgsui/impute/unsupervised/imputers/vae.py +1228 -0
  68. pgsui/impute/unsupervised/loss_functions.py +261 -0
  69. pgsui/impute/unsupervised/models/__init__.py +0 -0
  70. pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
  71. pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
  72. pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
  73. pgsui/impute/unsupervised/models/vae_model.py +269 -630
  74. pgsui/impute/unsupervised/nn_scorers.py +255 -0
  75. pgsui/utils/__init__.py +0 -0
  76. pgsui/utils/classification_viz.py +608 -0
  77. pgsui/utils/logging_utils.py +22 -0
  78. pgsui/utils/misc.py +35 -480
  79. pgsui/utils/plotting.py +996 -829
  80. pgsui/utils/pretty_metrics.py +290 -0
  81. pgsui/utils/scorers.py +213 -666
  82. pg_sui-0.2.0.dist-info/RECORD +0 -75
  83. pg_sui-0.2.0.dist-info/top_level.txt +0 -3
  84. pgsui/example_data/phylip_files/test_n10.phy +0 -118
  85. pgsui/example_data/phylip_files/test_n100.phy +0 -118
  86. pgsui/example_data/phylip_files/test_n2.phy +0 -118
  87. pgsui/example_data/phylip_files/test_n500.phy +0 -118
  88. pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
  89. pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
  90. pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
  91. pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
  92. pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
  93. pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
  94. pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
  95. pgsui/example_data/trees/test.iqtree +0 -376
  96. pgsui/example_data/trees/test.qmat +0 -5
  97. pgsui/example_data/trees/test.rate +0 -2033
  98. pgsui/example_data/trees/test.tre +0 -1
  99. pgsui/example_data/trees/test_n10.rate +0 -19
  100. pgsui/example_data/trees/test_n100.rate +0 -109
  101. pgsui/example_data/trees/test_n500.rate +0 -509
  102. pgsui/example_data/trees/test_siterates.txt +0 -2024
  103. pgsui/example_data/trees/test_siterates_n10.txt +0 -10
  104. pgsui/example_data/trees/test_siterates_n100.txt +0 -100
  105. pgsui/example_data/trees/test_siterates_n500.txt +0 -500
  106. pgsui/example_data/vcf_files/test.vcf +0 -244
  107. pgsui/example_data/vcf_files/test.vcf.gz +0 -0
  108. pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
  109. pgsui/impute/estimators.py +0 -1268
  110. pgsui/impute/impute.py +0 -1463
  111. pgsui/impute/simple_imputers.py +0 -1431
  112. pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
  113. pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
  114. pgsui/impute/unsupervised/keras_classifiers.py +0 -697
  115. pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
  116. pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
  117. pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
  118. pgsui/pg_sui.py +0 -261
  119. pgsui/utils/sequence_tools.py +0 -407
  120. simulation/sim_benchmarks.py +0 -333
  121. simulation/sim_treeparams.py +0 -475
  122. test/__init__.py +0 -0
  123. test/pg_sui_simtest.py +0 -215
  124. test/pg_sui_testing.py +0 -523
  125. test/test.py +0 -151
  126. test/test_pgsui.py +0 -374
  127. test/test_tkc.py +0 -185
@@ -1,333 +0,0 @@
1
- #!/usr/bin/env python
2
- import sys
3
- import os
4
- import subprocess
5
- import errno
6
-
7
- import toytree
8
- import toyplot.pdf
9
- import pyvolve
10
- import copy
11
- import random
12
- import re
13
- import numpy as np
14
- import pandas as pd
15
-
16
- import matplotlib.pyplot as plt
17
-
18
- def main():
19
- """
20
- Using pyvolve and toytree to simulate data for PG-SUI
21
-
22
- Generates random data matrices w/ varying dimensions
23
- for the purposes of runtime benchmarking
24
-
25
- """
26
- seed=553423
27
- random.seed(seed)
28
-
29
- num_samples_range=[10, 100, 1000]
30
- num_loci_range=[100, 1000, 10000]
31
- loc_length=100
32
- write_gene_alignments=False
33
- make_gene_trees=False
34
- make_guidetrees=True #set to true to run IQTREE on simulated SNP matrices
35
- keep_all=False #set to true to keep ALL iqtree outputs
36
- keep_report=True #set to true to keep .iqtree files
37
- get_siterates=True #set to true to infer site-specific rates in IQTREE
38
- snps_per_locus=1
39
- iqtree_bin="iqtree2"
40
- get_rates=True
41
- iq_procs=2
42
-
43
- ###################
44
-
45
- if get_siterates and not make_guidetrees:
46
- print("ERROR: can't set get_siterates=True and make_guidetrees=False")
47
- print("Setting make_guidetrees=True and proceeding...")
48
- make_guidetrees=True
49
-
50
- for num_samples in num_samples_range:
51
- outgroup="r0"
52
- tobj = toytree.rtree.unittree(ntips=num_samples,
53
- treeheight=1.0,
54
- random_names=False,
55
- seed=random.randint(1, (sys.maxsize * 2 + 1)))
56
- guidetree=tobj.write(tree_format=5)
57
- for num_loci in num_loci_range:
58
- print("num_samples:",num_samples)
59
- print("num_loci:",num_loci)
60
- pass
61
-
62
- base="benchmark_"+"i"+str(num_samples)+"_l"+str(num_loci)
63
- tobj.write((base+"_guidetree.tre"))
64
-
65
- data=dict()
66
- for ind in tobj.get_tip_labels():
67
- data[ind] = list()
68
-
69
- my_tree = pyvolve.read_tree(tree=guidetree)
70
-
71
-
72
- #for model in ["gtr","gtrgamma"]:
73
- for model in ["gtrgamma"]:
74
- model_outpath=base+"_"+model
75
-
76
- for locus in range(num_loci):
77
- print(locus)
78
- f = np.random.random(4)
79
- f /= f.sum()
80
- parameters = {
81
- "mu":
82
- {"AC": np.random.uniform(low=0.0, high=1.0),
83
- "AG": np.random.uniform(low=0.0, high=1.0),
84
- "AT": np.random.uniform(low=0.0, high=1.0),
85
- "CG": np.random.uniform(low=0.0, high=1.0),
86
- "CT": np.random.uniform(low=0.0, high=1.0),
87
- "GT": np.random.uniform(low=0.0, high=1.0)},
88
- "state_freqs":
89
- [f[0], f[1], f[2], f[3]]
90
- }
91
- if model == "gtr":
92
- #GTR model, without rate heterogeneity
93
- my_model = pyvolve.Model("nucleotide",
94
- parameters)
95
- else:
96
- my_model = pyvolve.Model("nucleotide",
97
- parameters,
98
- rate_factors = [
99
- np.random.uniform(low=0.1, high=0.7, size=1),
100
- np.random.uniform(low=0.5, high=1.2, size=1),
101
- np.random.uniform(low=1.0, high=1.8, size=1),
102
- np.random.uniform(low=1.5, high=5.0, size=1)
103
- ],
104
- rate_probs = [0.4, 0.3, 0.2, 0.1] )
105
- if write_gene_alignments:
106
- fasta_outpath="full_alignments/"
107
- if not os.path.exists(fasta_outpath):
108
- os.mkdir(fasta_outpath)
109
- else:
110
- fasta_outpath=model_outpath
111
- fastaout=fasta_outpath +model_outpath+"_loc"+str(locus) + "_gene-alignment.fasta"
112
- #sample a gene alignment
113
- loc = sample_locus(my_tree, my_model, loc_length, snps_per_locus, fastaout)
114
-
115
- if loc:
116
- #sample SNP(s) from gene alignment
117
- sampled = sample_snp(read_fasta(fastaout), loc_length, snps_per_locus)
118
- if sampled is not None:
119
- data = add_locus(data,sampled)
120
-
121
- if not write_gene_alignments:
122
- os.remove(fastaout)
123
- if make_gene_trees:
124
- print("ERROR: Can't make gene trees when write_gene_alignments = False")
125
- elif make_gene_trees:
126
- run_iqtree(fastaout,
127
- iqtree_path=iqtree_bin,
128
- keep_all=keep_all,
129
- keep_report=keep_report,
130
- rates=get_siterates,
131
- procs=iq_procs)
132
- reroot_tree(tree=(fastaout+".treefile"),
133
- rooted=(fastaout+".rooted.tre"),
134
- outgroup_wildcard=outgroup)
135
-
136
- #write full SNP alignment & generate tree
137
- all_snp_out=model_outpath+"_base-snps-concat.fasta"
138
- write_fasta(data, all_snp_out)
139
- if make_guidetrees:
140
- run_iqtree(all_snp_out,
141
- iqtree_path=iqtree_bin,
142
- keep_all=keep_all,
143
- keep_report=keep_report,
144
- rates=get_siterates,
145
- procs=iq_procs)
146
- reroot_tree(tree=(all_snp_out+".treefile"),
147
- rooted=(all_snp_out+".rooted.tre"),
148
- outgroup_wildcard=outgroup)
149
-
150
- def reroot_tree(tree, rooted="out.rooted.tre", outgroup_wildcard="out"):
151
- t=toytree.tree(tree)
152
- try:
153
- rt=t.root(wildcard=outgroup_wildcard)
154
- rt.write(rooted, tree_format=5)
155
- return(rt)
156
- except Exception:
157
- t.write(rooted, tree_format=5)
158
- return(None)
159
-
160
-
161
- def hybridization(dat, prob=0.1, source=None, target=None):
162
- new_dat=dict()
163
- if source is None:
164
- source = [key for key in dat.keys()]
165
- if target is None:
166
- target = [key for key in dat.keys()]
167
-
168
- for individual in dat.keys():
169
- new_dat[individual] = dat[individual]
170
- aln_len=len(dat[individual])
171
- all_indices=list(range(aln_len))
172
- num=int(aln_len*prob)
173
-
174
- for target_individual in target:
175
- snp_indices = np.random.choice(all_indices, size=num, replace=False)
176
- for index in snp_indices:
177
- source_ind=np.random.choice(source, size=1)[0]
178
- new_dat[target_individual][index] = new_dat[source_ind][index]
179
- return(new_dat)
180
-
181
- def run_iqtree(aln,
182
- iqtree_path="iqtree",
183
- keep_all=False,
184
- keep_report=False,
185
- outgroup=None,
186
- rates=False,
187
- procs=4):
188
- #run
189
- cmd = [iqtree_path,
190
- "-s",
191
- str(aln),
192
- "-m",
193
- "GTR+I*G4",
194
- "-redo",
195
- "-T",
196
- str(procs)
197
- ]
198
- if outgroup is not None:
199
- cmd.append("-o")
200
- cmd.append(str(outgroup))
201
- if rates:
202
- #use -wst (NOT -mlrate) if using iq-tree 1.6xx
203
- #cmd.append("-wsr")
204
- cmd.append("--mlrate")
205
- result = subprocess.run(cmd, capture_output=True, text=True)
206
- #print(result.stdout)
207
- #print(result.stderr)
208
-
209
- if not keep_all:
210
- #delete everything except treefile
211
- silentremove((aln + ".bionj"))
212
- silentremove((aln + ".ckp.gz"))
213
- silentremove((aln + ".log"))
214
- silentremove((aln + ".mldist"))
215
- silentremove((aln + ".uniqueseq.phy"))
216
- if not keep_report:
217
- silentremove((aln + ".iqtree"))
218
- return((aln + ".treefile"))
219
-
220
- def add_locus(d, new):
221
- for sample in d.keys():
222
- for snp in new[sample]:
223
- d[sample].append(snp)
224
- return(d)
225
-
226
- def write_fasta(seqs, fas):
227
- with open(fas, 'w') as fh:
228
- #Write seqs to FASTA first
229
- for a in seqs.keys():
230
- name = ">" + str(a) + "\n"
231
- seq = "".join(seqs[a]) + "\n"
232
- fh.write(name)
233
- fh.write(seq)
234
- fh.close()
235
-
236
- def read_fasta(fasta):
237
- data = dict()
238
- header=False
239
- sample=None
240
- sequence=""
241
- with open(fasta, "r") as fin:
242
- for line in fin:
243
- line = line.strip()
244
- if not line: # If blank line.
245
- continue
246
- if line[0] == ">":
247
- if sample:
248
- data[sample] = sequence
249
- sequence = ""
250
- sample=line[1:]
251
- else:
252
- sequence = sequence + line
253
- data[sample] = sequence
254
- return(data)
255
-
256
- def sample_snp(aln_dict, aln_len, snps_per_locus=1):
257
- snp_indices = []
258
- snp_aln = dict()
259
- if aln_len == 1:
260
- for sample in aln_dict.keys():
261
- snp_aln[sample] = aln_dict[sample][0]
262
- return(snp_aln)
263
- else:
264
- for sample in aln_dict.keys():
265
- snp_aln[sample] = []
266
- for i in range(aln_len):
267
- vars=[]
268
- for sample in aln_dict.keys():
269
- nuc=aln_dict[sample][i]
270
- if len(vars) == 0:
271
- vars.append(nuc)
272
- elif nuc not in vars:
273
- snp_indices.append(i)
274
- break
275
- if len(snp_indices) == 0:
276
- return(None)
277
- elif len(snp_indices) == 1:
278
- #sample them all
279
- for sample in aln_dict.keys():
280
- snp_aln[sample] = aln_dict[sample][snp_indices[0]]
281
- else:
282
- sampled_indices = np.random.choice(snp_indices, size=snps_per_locus, replace=False)
283
- for sample in aln_dict.keys():
284
- for i in sampled_indices:
285
- snp_aln[sample].append(aln_dict[sample][i])
286
- return(snp_aln)
287
-
288
- def sample_locus(tree, model, gene_len=1000, num_snps=1, out="out.fasta"):
289
- try:
290
- my_partition = pyvolve.Partition(models = model, size=gene_len)
291
- my_evolver = pyvolve.Evolver(partitions = my_partition, tree = tree)
292
- my_evolver(seqfile = out,
293
- seqfmt = "fasta",
294
- ratefile=False,
295
- infofile=False)
296
- return(True)
297
- except Exception:
298
- return False
299
-
300
- def silentremove(filename):
301
- try:
302
- os.remove(filename)
303
- except OSError as e: # this would be "except OSError, e:" before Python 2.6
304
- if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
305
- raise # re-raise exception if a different error occurred
306
-
307
- def get_tree_tips(tree):
308
- tips = re.split('[ ,\(\);]', tree)
309
- return([i for i in tips if i])
310
-
311
- def basic_tree_plot(tree, out="out.pdf"):
312
- mystyle = {
313
- "edge_type": "p",
314
- "edge_style": {
315
- "stroke": toytree.colors[0],
316
- "stroke-width": 1,
317
- },
318
- "tip_labels_align": True,
319
- "tip_labels_style": {"font-size": "5px"},
320
- "node_labels": False,
321
- "tip_labels": True
322
- }
323
-
324
- canvas, axes, mark = tree.draw(
325
- width=400,
326
- height=600,
327
- **mystyle,
328
- )
329
-
330
- toyplot.pdf.render(canvas, out)
331
-
332
- if __name__ == "__main__":
333
- main()