pg-sui 1.0.2.1__py3-none-any.whl → 1.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pg-sui might be problematic. Click here for more details.

Files changed (112) hide show
  1. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/METADATA +51 -70
  2. pg_sui-1.6.8.dist-info/RECORD +78 -0
  3. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/WHEEL +1 -1
  4. pg_sui-1.6.8.dist-info/entry_points.txt +4 -0
  5. pg_sui-1.6.8.dist-info/top_level.txt +1 -0
  6. pgsui/__init__.py +35 -54
  7. pgsui/_version.py +34 -0
  8. pgsui/cli.py +635 -0
  9. pgsui/data_processing/config.py +576 -0
  10. pgsui/data_processing/containers.py +1782 -0
  11. pgsui/data_processing/transformers.py +121 -1103
  12. pgsui/electron/app/__main__.py +5 -0
  13. pgsui/electron/app/icons/icons/1024x1024.png +0 -0
  14. pgsui/electron/app/icons/icons/128x128.png +0 -0
  15. pgsui/electron/app/icons/icons/16x16.png +0 -0
  16. pgsui/electron/app/icons/icons/24x24.png +0 -0
  17. pgsui/electron/app/icons/icons/256x256.png +0 -0
  18. pgsui/electron/app/icons/icons/32x32.png +0 -0
  19. pgsui/electron/app/icons/icons/48x48.png +0 -0
  20. pgsui/electron/app/icons/icons/512x512.png +0 -0
  21. pgsui/electron/app/icons/icons/64x64.png +0 -0
  22. pgsui/electron/app/icons/icons/icon.icns +0 -0
  23. pgsui/electron/app/icons/icons/icon.ico +0 -0
  24. pgsui/electron/app/main.js +189 -0
  25. pgsui/electron/app/package-lock.json +6893 -0
  26. pgsui/electron/app/package.json +50 -0
  27. pgsui/electron/app/preload.js +15 -0
  28. pgsui/electron/app/server.py +146 -0
  29. pgsui/electron/app/ui/logo.png +0 -0
  30. pgsui/electron/app/ui/renderer.js +130 -0
  31. pgsui/electron/app/ui/styles.css +59 -0
  32. pgsui/electron/app/ui/ui_shim.js +72 -0
  33. pgsui/electron/bootstrap.py +43 -0
  34. pgsui/electron/launch.py +59 -0
  35. pgsui/electron/package.json +14 -0
  36. pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
  37. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
  38. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
  39. pgsui/impute/deterministic/imputers/allele_freq.py +691 -0
  40. pgsui/impute/deterministic/imputers/mode.py +679 -0
  41. pgsui/impute/deterministic/imputers/nmf.py +221 -0
  42. pgsui/impute/deterministic/imputers/phylo.py +971 -0
  43. pgsui/impute/deterministic/imputers/ref_allele.py +530 -0
  44. pgsui/impute/supervised/base.py +339 -0
  45. pgsui/impute/supervised/imputers/hist_gradient_boosting.py +293 -0
  46. pgsui/impute/supervised/imputers/random_forest.py +287 -0
  47. pgsui/impute/unsupervised/base.py +924 -0
  48. pgsui/impute/unsupervised/callbacks.py +89 -263
  49. pgsui/impute/unsupervised/imputers/autoencoder.py +972 -0
  50. pgsui/impute/unsupervised/imputers/nlpca.py +1264 -0
  51. pgsui/impute/unsupervised/imputers/ubp.py +1288 -0
  52. pgsui/impute/unsupervised/imputers/vae.py +957 -0
  53. pgsui/impute/unsupervised/loss_functions.py +158 -0
  54. pgsui/impute/unsupervised/models/autoencoder_model.py +208 -558
  55. pgsui/impute/unsupervised/models/nlpca_model.py +149 -468
  56. pgsui/impute/unsupervised/models/ubp_model.py +198 -1317
  57. pgsui/impute/unsupervised/models/vae_model.py +259 -618
  58. pgsui/impute/unsupervised/nn_scorers.py +215 -0
  59. pgsui/utils/classification_viz.py +591 -0
  60. pgsui/utils/misc.py +35 -480
  61. pgsui/utils/plotting.py +514 -824
  62. pgsui/utils/scorers.py +212 -438
  63. pg_sui-1.0.2.1.dist-info/RECORD +0 -75
  64. pg_sui-1.0.2.1.dist-info/top_level.txt +0 -3
  65. pgsui/example_data/phylip_files/test_n10.phy +0 -118
  66. pgsui/example_data/phylip_files/test_n100.phy +0 -118
  67. pgsui/example_data/phylip_files/test_n2.phy +0 -118
  68. pgsui/example_data/phylip_files/test_n500.phy +0 -118
  69. pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
  70. pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
  71. pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
  72. pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
  73. pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
  74. pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
  75. pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
  76. pgsui/example_data/trees/test.iqtree +0 -376
  77. pgsui/example_data/trees/test.qmat +0 -5
  78. pgsui/example_data/trees/test.rate +0 -2033
  79. pgsui/example_data/trees/test.tre +0 -1
  80. pgsui/example_data/trees/test_n10.rate +0 -19
  81. pgsui/example_data/trees/test_n100.rate +0 -109
  82. pgsui/example_data/trees/test_n500.rate +0 -509
  83. pgsui/example_data/trees/test_siterates.txt +0 -2024
  84. pgsui/example_data/trees/test_siterates_n10.txt +0 -10
  85. pgsui/example_data/trees/test_siterates_n100.txt +0 -100
  86. pgsui/example_data/trees/test_siterates_n500.txt +0 -500
  87. pgsui/example_data/vcf_files/test.vcf +0 -244
  88. pgsui/example_data/vcf_files/test.vcf.gz +0 -0
  89. pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
  90. pgsui/impute/estimators.py +0 -735
  91. pgsui/impute/impute.py +0 -1486
  92. pgsui/impute/simple_imputers.py +0 -1439
  93. pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -785
  94. pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1027
  95. pgsui/impute/unsupervised/keras_classifiers.py +0 -702
  96. pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
  97. pgsui/impute/unsupervised/neural_network_imputers.py +0 -1424
  98. pgsui/impute/unsupervised/neural_network_methods.py +0 -1549
  99. pgsui/pg_sui.py +0 -261
  100. pgsui/utils/sequence_tools.py +0 -407
  101. simulation/sim_benchmarks.py +0 -333
  102. simulation/sim_treeparams.py +0 -475
  103. test/__init__.py +0 -0
  104. test/pg_sui_simtest.py +0 -215
  105. test/pg_sui_testing.py +0 -523
  106. test/test.py +0 -297
  107. test/test_pgsui.py +0 -374
  108. test/test_tkc.py +0 -214
  109. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info/licenses}/LICENSE +0 -0
  110. /pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
  111. /pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
  112. {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
@@ -1,333 +0,0 @@
1
- #!/usr/bin/env python
2
- import sys
3
- import os
4
- import subprocess
5
- import errno
6
-
7
- import toytree
8
- import toyplot.pdf
9
- import pyvolve
10
- import copy
11
- import random
12
- import re
13
- import numpy as np
14
- import pandas as pd
15
-
16
- import matplotlib.pyplot as plt
17
-
18
- def main():
19
- """
20
- Using pyvolve and toytree to simulate data for PG-SUI
21
-
22
- Generates random data matrices w/ varying dimensions
23
- for the purposes of runtime benchmarking
24
-
25
- """
26
- seed=553423
27
- random.seed(seed)
28
-
29
- num_samples_range=[10, 100, 1000]
30
- num_loci_range=[100, 1000, 10000]
31
- loc_length=100
32
- write_gene_alignments=False
33
- make_gene_trees=False
34
- make_guidetrees=True #set to true to run IQTREE on simulated SNP matrices
35
- keep_all=False #set to true to keep ALL iqtree outputs
36
- keep_report=True #set to true to keep .iqtree files
37
- get_siterates=True #set to true to infer site-specific rates in IQTREE
38
- snps_per_locus=1
39
- iqtree_bin="iqtree2"
40
- get_rates=True
41
- iq_procs=2
42
-
43
- ###################
44
-
45
- if get_siterates and not make_guidetrees:
46
- print("ERROR: can't set get_siterates=True and make_guidetrees=False")
47
- print("Setting make_guidetrees=True and proceeding...")
48
- make_guidetrees=True
49
-
50
- for num_samples in num_samples_range:
51
- outgroup="r0"
52
- tobj = toytree.rtree.unittree(ntips=num_samples,
53
- treeheight=1.0,
54
- random_names=False,
55
- seed=random.randint(1, (sys.maxsize * 2 + 1)))
56
- guidetree=tobj.write(tree_format=5)
57
- for num_loci in num_loci_range:
58
- print("num_samples:",num_samples)
59
- print("num_loci:",num_loci)
60
- pass
61
-
62
- base="benchmark_"+"i"+str(num_samples)+"_l"+str(num_loci)
63
- tobj.write((base+"_guidetree.tre"))
64
-
65
- data=dict()
66
- for ind in tobj.get_tip_labels():
67
- data[ind] = list()
68
-
69
- my_tree = pyvolve.read_tree(tree=guidetree)
70
-
71
-
72
- #for model in ["gtr","gtrgamma"]:
73
- for model in ["gtrgamma"]:
74
- model_outpath=base+"_"+model
75
-
76
- for locus in range(num_loci):
77
- print(locus)
78
- f = np.random.random(4)
79
- f /= f.sum()
80
- parameters = {
81
- "mu":
82
- {"AC": np.random.uniform(low=0.0, high=1.0),
83
- "AG": np.random.uniform(low=0.0, high=1.0),
84
- "AT": np.random.uniform(low=0.0, high=1.0),
85
- "CG": np.random.uniform(low=0.0, high=1.0),
86
- "CT": np.random.uniform(low=0.0, high=1.0),
87
- "GT": np.random.uniform(low=0.0, high=1.0)},
88
- "state_freqs":
89
- [f[0], f[1], f[2], f[3]]
90
- }
91
- if model == "gtr":
92
- #GTR model, without rate heterogeneity
93
- my_model = pyvolve.Model("nucleotide",
94
- parameters)
95
- else:
96
- my_model = pyvolve.Model("nucleotide",
97
- parameters,
98
- rate_factors = [
99
- np.random.uniform(low=0.1, high=0.7, size=1),
100
- np.random.uniform(low=0.5, high=1.2, size=1),
101
- np.random.uniform(low=1.0, high=1.8, size=1),
102
- np.random.uniform(low=1.5, high=5.0, size=1)
103
- ],
104
- rate_probs = [0.4, 0.3, 0.2, 0.1] )
105
- if write_gene_alignments:
106
- fasta_outpath="full_alignments/"
107
- if not os.path.exists(fasta_outpath):
108
- os.mkdir(fasta_outpath)
109
- else:
110
- fasta_outpath=model_outpath
111
- fastaout=fasta_outpath +model_outpath+"_loc"+str(locus) + "_gene-alignment.fasta"
112
- #sample a gene alignment
113
- loc = sample_locus(my_tree, my_model, loc_length, snps_per_locus, fastaout)
114
-
115
- if loc:
116
- #sample SNP(s) from gene alignment
117
- sampled = sample_snp(read_fasta(fastaout), loc_length, snps_per_locus)
118
- if sampled is not None:
119
- data = add_locus(data,sampled)
120
-
121
- if not write_gene_alignments:
122
- os.remove(fastaout)
123
- if make_gene_trees:
124
- print("ERROR: Can't make gene trees when write_gene_alignments = False")
125
- elif make_gene_trees:
126
- run_iqtree(fastaout,
127
- iqtree_path=iqtree_bin,
128
- keep_all=keep_all,
129
- keep_report=keep_report,
130
- rates=get_siterates,
131
- procs=iq_procs)
132
- reroot_tree(tree=(fastaout+".treefile"),
133
- rooted=(fastaout+".rooted.tre"),
134
- outgroup_wildcard=outgroup)
135
-
136
- #write full SNP alignment & generate tree
137
- all_snp_out=model_outpath+"_base-snps-concat.fasta"
138
- write_fasta(data, all_snp_out)
139
- if make_guidetrees:
140
- run_iqtree(all_snp_out,
141
- iqtree_path=iqtree_bin,
142
- keep_all=keep_all,
143
- keep_report=keep_report,
144
- rates=get_siterates,
145
- procs=iq_procs)
146
- reroot_tree(tree=(all_snp_out+".treefile"),
147
- rooted=(all_snp_out+".rooted.tre"),
148
- outgroup_wildcard=outgroup)
149
-
150
- def reroot_tree(tree, rooted="out.rooted.tre", outgroup_wildcard="out"):
151
- t=toytree.tree(tree)
152
- try:
153
- rt=t.root(wildcard=outgroup_wildcard)
154
- rt.write(rooted, tree_format=5)
155
- return(rt)
156
- except Exception:
157
- t.write(rooted, tree_format=5)
158
- return(None)
159
-
160
-
161
- def hybridization(dat, prob=0.1, source=None, target=None):
162
- new_dat=dict()
163
- if source is None:
164
- source = [key for key in dat.keys()]
165
- if target is None:
166
- target = [key for key in dat.keys()]
167
-
168
- for individual in dat.keys():
169
- new_dat[individual] = dat[individual]
170
- aln_len=len(dat[individual])
171
- all_indices=list(range(aln_len))
172
- num=int(aln_len*prob)
173
-
174
- for target_individual in target:
175
- snp_indices = np.random.choice(all_indices, size=num, replace=False)
176
- for index in snp_indices:
177
- source_ind=np.random.choice(source, size=1)[0]
178
- new_dat[target_individual][index] = new_dat[source_ind][index]
179
- return(new_dat)
180
-
181
- def run_iqtree(aln,
182
- iqtree_path="iqtree",
183
- keep_all=False,
184
- keep_report=False,
185
- outgroup=None,
186
- rates=False,
187
- procs=4):
188
- #run
189
- cmd = [iqtree_path,
190
- "-s",
191
- str(aln),
192
- "-m",
193
- "GTR+I*G4",
194
- "-redo",
195
- "-T",
196
- str(procs)
197
- ]
198
- if outgroup is not None:
199
- cmd.append("-o")
200
- cmd.append(str(outgroup))
201
- if rates:
202
- #use -wst (NOT -mlrate) if using iq-tree 1.6xx
203
- #cmd.append("-wsr")
204
- cmd.append("--mlrate")
205
- result = subprocess.run(cmd, capture_output=True, text=True)
206
- #print(result.stdout)
207
- #print(result.stderr)
208
-
209
- if not keep_all:
210
- #delete everything except treefile
211
- silentremove((aln + ".bionj"))
212
- silentremove((aln + ".ckp.gz"))
213
- silentremove((aln + ".log"))
214
- silentremove((aln + ".mldist"))
215
- silentremove((aln + ".uniqueseq.phy"))
216
- if not keep_report:
217
- silentremove((aln + ".iqtree"))
218
- return((aln + ".treefile"))
219
-
220
- def add_locus(d, new):
221
- for sample in d.keys():
222
- for snp in new[sample]:
223
- d[sample].append(snp)
224
- return(d)
225
-
226
- def write_fasta(seqs, fas):
227
- with open(fas, 'w') as fh:
228
- #Write seqs to FASTA first
229
- for a in seqs.keys():
230
- name = ">" + str(a) + "\n"
231
- seq = "".join(seqs[a]) + "\n"
232
- fh.write(name)
233
- fh.write(seq)
234
- fh.close()
235
-
236
- def read_fasta(fasta):
237
- data = dict()
238
- header=False
239
- sample=None
240
- sequence=""
241
- with open(fasta, "r") as fin:
242
- for line in fin:
243
- line = line.strip()
244
- if not line: # If blank line.
245
- continue
246
- if line[0] == ">":
247
- if sample:
248
- data[sample] = sequence
249
- sequence = ""
250
- sample=line[1:]
251
- else:
252
- sequence = sequence + line
253
- data[sample] = sequence
254
- return(data)
255
-
256
- def sample_snp(aln_dict, aln_len, snps_per_locus=1):
257
- snp_indices = []
258
- snp_aln = dict()
259
- if aln_len == 1:
260
- for sample in aln_dict.keys():
261
- snp_aln[sample] = aln_dict[sample][0]
262
- return(snp_aln)
263
- else:
264
- for sample in aln_dict.keys():
265
- snp_aln[sample] = []
266
- for i in range(aln_len):
267
- vars=[]
268
- for sample in aln_dict.keys():
269
- nuc=aln_dict[sample][i]
270
- if len(vars) == 0:
271
- vars.append(nuc)
272
- elif nuc not in vars:
273
- snp_indices.append(i)
274
- break
275
- if len(snp_indices) == 0:
276
- return(None)
277
- elif len(snp_indices) == 1:
278
- #sample them all
279
- for sample in aln_dict.keys():
280
- snp_aln[sample] = aln_dict[sample][snp_indices[0]]
281
- else:
282
- sampled_indices = np.random.choice(snp_indices, size=snps_per_locus, replace=False)
283
- for sample in aln_dict.keys():
284
- for i in sampled_indices:
285
- snp_aln[sample].append(aln_dict[sample][i])
286
- return(snp_aln)
287
-
288
- def sample_locus(tree, model, gene_len=1000, num_snps=1, out="out.fasta"):
289
- try:
290
- my_partition = pyvolve.Partition(models = model, size=gene_len)
291
- my_evolver = pyvolve.Evolver(partitions = my_partition, tree = tree)
292
- my_evolver(seqfile = out,
293
- seqfmt = "fasta",
294
- ratefile=False,
295
- infofile=False)
296
- return(True)
297
- except Exception:
298
- return False
299
-
300
- def silentremove(filename):
301
- try:
302
- os.remove(filename)
303
- except OSError as e: # this would be "except OSError, e:" before Python 2.6
304
- if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
305
- raise # re-raise exception if a different error occurred
306
-
307
- def get_tree_tips(tree):
308
- tips = re.split('[ ,\(\);]', tree)
309
- return([i for i in tips if i])
310
-
311
- def basic_tree_plot(tree, out="out.pdf"):
312
- mystyle = {
313
- "edge_type": "p",
314
- "edge_style": {
315
- "stroke": toytree.colors[0],
316
- "stroke-width": 1,
317
- },
318
- "tip_labels_align": True,
319
- "tip_labels_style": {"font-size": "5px"},
320
- "node_labels": False,
321
- "tip_labels": True
322
- }
323
-
324
- canvas, axes, mark = tree.draw(
325
- width=400,
326
- height=600,
327
- **mystyle,
328
- )
329
-
330
- toyplot.pdf.render(canvas, out)
331
-
332
- if __name__ == "__main__":
333
- main()