biosak 1.123.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. biosak-1.123.7/BioSAK/BLCA_op_parser.py +87 -0
  2. biosak-1.123.7/BioSAK/BioSAK_config.py +33 -0
  3. biosak-1.123.7/BioSAK/COG2020.py +666 -0
  4. biosak-1.123.7/BioSAK/COG2024.py +662 -0
  5. biosak-1.123.7/BioSAK/COG_boxplot_last1row.R +104 -0
  6. biosak-1.123.7/BioSAK/COG_boxplot_last2row.R +223 -0
  7. biosak-1.123.7/BioSAK/CheckM.py +136 -0
  8. biosak-1.123.7/BioSAK/Combine_KEGG_COG.py +220 -0
  9. biosak-1.123.7/BioSAK/Combine_KEGG_arCOG.py +233 -0
  10. biosak-1.123.7/BioSAK/DendroDF.py +25 -0
  11. biosak-1.123.7/BioSAK/DnaFeaturesViewer.py +373 -0
  12. biosak-1.123.7/BioSAK/FastaSplitler_by_num.py +76 -0
  13. biosak-1.123.7/BioSAK/FastaSplitler_by_size.py +92 -0
  14. biosak-1.123.7/BioSAK/FasterqDump.py +81 -0
  15. biosak-1.123.7/BioSAK/GTDB_for_BLCA.py +64 -0
  16. biosak-1.123.7/BioSAK/GenBank.py +167 -0
  17. biosak-1.123.7/BioSAK/Gene2Ctg.py +49 -0
  18. biosak-1.123.7/BioSAK/KEGG.py +813 -0
  19. biosak-1.123.7/BioSAK/KEGG_boxplot_last1row.R +101 -0
  20. biosak-1.123.7/BioSAK/KEGG_get_eukaryotic_kos.py +19 -0
  21. biosak-1.123.7/BioSAK/KeepRemovingTmp.py +90 -0
  22. biosak-1.123.7/BioSAK/MeanMappingDepth.py +73 -0
  23. biosak-1.123.7/BioSAK/MetaBiosample.py +96 -0
  24. biosak-1.123.7/BioSAK/MetaCHIP_phylo.hmm +22125 -0
  25. biosak-1.123.7/BioSAK/MetaCyc_reactions_with_ec.txt +11013 -0
  26. biosak-1.123.7/BioSAK/NetEnzymes.py +410 -0
  27. biosak-1.123.7/BioSAK/Prodigal.py +263 -0
  28. biosak-1.123.7/BioSAK/Reads_simulator.py +144 -0
  29. biosak-1.123.7/BioSAK/RunGraphMB.py +98 -0
  30. biosak-1.123.7/BioSAK/SILVA_for_BLCA.py +93 -0
  31. biosak-1.123.7/BioSAK/SankeyTaxon.py +167 -0
  32. biosak-1.123.7/BioSAK/SubsampleLongReads.py +80 -0
  33. biosak-1.123.7/BioSAK/Tax4Fun2IndOTU.py +31 -0
  34. biosak-1.123.7/BioSAK/UNITE_for_BLCA.py +69 -0
  35. biosak-1.123.7/BioSAK/Usearch16S.py +463 -0
  36. biosak-1.123.7/BioSAK/VERSION +365 -0
  37. biosak-1.123.7/BioSAK/VisBlastOp.py +23 -0
  38. biosak-1.123.7/BioSAK/VisGeneFlk.py +227 -0
  39. biosak-1.123.7/BioSAK/__init__.py +0 -0
  40. biosak-1.123.7/BioSAK/abd.backup.py +439 -0
  41. biosak-1.123.7/BioSAK/abd.py +567 -0
  42. biosak-1.123.7/BioSAK/abd_16s_amplicon.py +75 -0
  43. biosak-1.123.7/BioSAK/abd_16s_metagenome.py +75 -0
  44. biosak-1.123.7/BioSAK/abd_mask.py +207 -0
  45. biosak-1.123.7/BioSAK/abd_rename_and_combine.py +45 -0
  46. biosak-1.123.7/BioSAK/abund.py +145 -0
  47. biosak-1.123.7/BioSAK/add_desc.py +78 -0
  48. biosak-1.123.7/BioSAK/arCOG.py +654 -0
  49. biosak-1.123.7/BioSAK/bam2reads.py +185 -0
  50. biosak-1.123.7/BioSAK/blast.py +112 -0
  51. biosak-1.123.7/BioSAK/blca.py +185 -0
  52. biosak-1.123.7/BioSAK/blca_generate_abundance_table.py +108 -0
  53. biosak-1.123.7/BioSAK/blca_main.py +417 -0
  54. biosak-1.123.7/BioSAK/blca_subset_db_acc.py +226 -0
  55. biosak-1.123.7/BioSAK/blca_subset_db_gg.py +163 -0
  56. biosak-1.123.7/BioSAK/boxplot.py +82 -0
  57. biosak-1.123.7/BioSAK/boxplot_last1row.R +93 -0
  58. biosak-1.123.7/BioSAK/boxplot_matrix_COG.py +136 -0
  59. biosak-1.123.7/BioSAK/boxplot_matrix_COG_backup.py +127 -0
  60. biosak-1.123.7/BioSAK/boxplot_matrix_KEGG.py +184 -0
  61. biosak-1.123.7/BioSAK/boxplot_matrix_dbCAN.py +152 -0
  62. biosak-1.123.7/BioSAK/cat_fa.py +49 -0
  63. biosak-1.123.7/BioSAK/cdb.py +75 -0
  64. biosak-1.123.7/BioSAK/cdb_to_itol_piechart.py +150 -0
  65. biosak-1.123.7/BioSAK/cdd2cog.pl +414 -0
  66. biosak-1.123.7/BioSAK/checkm_marker.py +42 -0
  67. biosak-1.123.7/BioSAK/combine_fun_stats.py +83 -0
  68. biosak-1.123.7/BioSAK/combine_low_abd_otu.py +170 -0
  69. biosak-1.123.7/BioSAK/compare_sets.py +52 -0
  70. biosak-1.123.7/BioSAK/count_num.py +33 -0
  71. biosak-1.123.7/BioSAK/cross_link_seqs.py +136 -0
  72. biosak-1.123.7/BioSAK/dbCAN.py +468 -0
  73. biosak-1.123.7/BioSAK/download_GenBank_genome_subset_prokaryotes_csv.py +38 -0
  74. biosak-1.123.7/BioSAK/enrich.py +341 -0
  75. biosak-1.123.7/BioSAK/exe_cmds.py +38 -0
  76. biosak-1.123.7/BioSAK/ezaai2mat.py +51 -0
  77. biosak-1.123.7/BioSAK/fa2id.py +43 -0
  78. biosak-1.123.7/BioSAK/fastaai.py +85 -0
  79. biosak-1.123.7/BioSAK/format_converter.py +192 -0
  80. biosak-1.123.7/BioSAK/fq2fa.py +32 -0
  81. biosak-1.123.7/BioSAK/gapseq.py +121 -0
  82. biosak-1.123.7/BioSAK/gbk2faa.py +97 -0
  83. biosak-1.123.7/BioSAK/gbk2ffn.py +105 -0
  84. biosak-1.123.7/BioSAK/gbk2fna.py +88 -0
  85. biosak-1.123.7/BioSAK/gbk2gff.py +212 -0
  86. biosak-1.123.7/BioSAK/gbk_to_ffn_faa.py +106 -0
  87. biosak-1.123.7/BioSAK/gc.py +73 -0
  88. biosak-1.123.7/BioSAK/get_EC_from_ko_stats_D.py +22 -0
  89. biosak-1.123.7/BioSAK/get_GTDB_taxon_gnm.py +164 -0
  90. biosak-1.123.7/BioSAK/get_MAG_reads_long.py +125 -0
  91. biosak-1.123.7/BioSAK/get_Pfam_hmms.py +109 -0
  92. biosak-1.123.7/BioSAK/get_TopHits_taxonomy.py +52 -0
  93. biosak-1.123.7/BioSAK/get_aa_composition.py +66 -0
  94. biosak-1.123.7/BioSAK/get_abd1_mask.py +150 -0
  95. biosak-1.123.7/BioSAK/get_abd3_stats.py +143 -0
  96. biosak-1.123.7/BioSAK/get_bin_abundance copy.py +277 -0
  97. biosak-1.123.7/BioSAK/get_data_matrix.py +59 -0
  98. biosak-1.123.7/BioSAK/get_eu_otu.py +160 -0
  99. biosak-1.123.7/BioSAK/get_gene_depth.py +121 -0
  100. biosak-1.123.7/BioSAK/get_genome_GTDB.py +91 -0
  101. biosak-1.123.7/BioSAK/get_genome_NCBI.py +160 -0
  102. biosak-1.123.7/BioSAK/get_genome_NCBI_v1.py +216 -0
  103. biosak-1.123.7/BioSAK/get_genome_NCBI_v2.py +237 -0
  104. biosak-1.123.7/BioSAK/get_gnm_size.py +85 -0
  105. biosak-1.123.7/BioSAK/get_ko_gene_seqs.py +134 -0
  106. biosak-1.123.7/BioSAK/get_krona_plot.py +45 -0
  107. biosak-1.123.7/BioSAK/get_reads_from_sam.py +47 -0
  108. biosak-1.123.7/BioSAK/get_reads_id_in_sam.py +28 -0
  109. biosak-1.123.7/BioSAK/get_single_page_web.py +74 -0
  110. biosak-1.123.7/BioSAK/get_top_hit.py +104 -0
  111. biosak-1.123.7/BioSAK/get_total_length.py +38 -0
  112. biosak-1.123.7/BioSAK/gff2chrom.py +50 -0
  113. biosak-1.123.7/BioSAK/global_functions.py +158 -0
  114. biosak-1.123.7/BioSAK/hpc3.py +96 -0
  115. biosak-1.123.7/BioSAK/js_cmds.py +93 -0
  116. biosak-1.123.7/BioSAK/js_hpc3.py +86 -0
  117. biosak-1.123.7/BioSAK/keep_best_hit.py +56 -0
  118. biosak-1.123.7/BioSAK/ko00001.keg +50796 -0
  119. biosak-1.123.7/BioSAK/koala.py +57 -0
  120. biosak-1.123.7/BioSAK/link_16S_MAG.py +647 -0
  121. biosak-1.123.7/BioSAK/link_16s.py +4789 -0
  122. biosak-1.123.7/BioSAK/magabund.py +275 -0
  123. biosak-1.123.7/BioSAK/magabund2.py +462 -0
  124. biosak-1.123.7/BioSAK/manipulator_fasta.py +55 -0
  125. biosak-1.123.7/BioSAK/manipulator_msa.py +0 -0
  126. biosak-1.123.7/BioSAK/manipulator_newick.py +78 -0
  127. biosak-1.123.7/BioSAK/manipulator_sam.py +0 -0
  128. biosak-1.123.7/BioSAK/mann_whitney_u.py +56 -0
  129. biosak-1.123.7/BioSAK/mean_MAG_cov.py +74 -0
  130. biosak-1.123.7/BioSAK/merge_df.py +51 -0
  131. biosak-1.123.7/BioSAK/merge_seq.py +66 -0
  132. biosak-1.123.7/BioSAK/metaAssembly.py +214 -0
  133. biosak-1.123.7/BioSAK/metabat2concoct.py +42 -0
  134. biosak-1.123.7/BioSAK/metabat2maxbin.py +51 -0
  135. biosak-1.123.7/BioSAK/metadata.py +67 -0
  136. biosak-1.123.7/BioSAK/mmseqs.py +70 -0
  137. biosak-1.123.7/BioSAK/msa_to_distance_matrix.py +0 -0
  138. biosak-1.123.7/BioSAK/ncbi_dataset.py +51 -0
  139. biosak-1.123.7/BioSAK/odp.py +0 -0
  140. biosak-1.123.7/BioSAK/parse_MetaCyc_RxnDB.py +76 -0
  141. biosak-1.123.7/BioSAK/parse_mmseqs_tsv.py +48 -0
  142. biosak-1.123.7/BioSAK/plot_mag.py +207 -0
  143. biosak-1.123.7/BioSAK/plot_sam_depth.py +214 -0
  144. biosak-1.123.7/BioSAK/plot_tree.R +51 -0
  145. biosak-1.123.7/BioSAK/prefix_file.py +65 -0
  146. biosak-1.123.7/BioSAK/prefix_seq_by_file_name.py +73 -0
  147. biosak-1.123.7/BioSAK/prokka.py +111 -0
  148. biosak-1.123.7/BioSAK/reads2bam.py +115 -0
  149. biosak-1.123.7/BioSAK/rename_df_row.py +67 -0
  150. biosak-1.123.7/BioSAK/rename_reads_for_Reago.py +62 -0
  151. biosak-1.123.7/BioSAK/rename_seq.py +166 -0
  152. biosak-1.123.7/BioSAK/ribbon.py +325 -0
  153. biosak-1.123.7/BioSAK/rm_low_abd_otu.py +73 -0
  154. biosak-1.123.7/BioSAK/rm_low_depth_sample.py +41 -0
  155. biosak-1.123.7/BioSAK/sam2bam.py +62 -0
  156. biosak-1.123.7/BioSAK/sampling_GTDB_gnms.py +144 -0
  157. biosak-1.123.7/BioSAK/sankey.R +65 -0
  158. biosak-1.123.7/BioSAK/sankey.py +107 -0
  159. biosak-1.123.7/BioSAK/select_seq.py +88 -0
  160. biosak-1.123.7/BioSAK/sep_reads_by_barcode.py +81 -0
  161. biosak-1.123.7/BioSAK/slice_seq.py +69 -0
  162. biosak-1.123.7/BioSAK/split_fasta.py +99 -0
  163. biosak-1.123.7/BioSAK/split_folder.py +66 -0
  164. biosak-1.123.7/BioSAK/split_sam.py +135 -0
  165. biosak-1.123.7/BioSAK/sra.py +116 -0
  166. biosak-1.123.7/BioSAK/srun.py +34 -0
  167. biosak-1.123.7/BioSAK/statsTaxa.py +59 -0
  168. biosak-1.123.7/BioSAK/stats_arcog.py +100 -0
  169. biosak-1.123.7/BioSAK/stats_blast_hit.py +182 -0
  170. biosak-1.123.7/BioSAK/stats_cog2024.py +100 -0
  171. biosak-1.123.7/BioSAK/stats_ko.py +185 -0
  172. biosak-1.123.7/BioSAK/submitHPC.py +39 -0
  173. biosak-1.123.7/BioSAK/subset_GTDB_meta.py +58 -0
  174. biosak-1.123.7/BioSAK/subset_df.py +156 -0
  175. biosak-1.123.7/BioSAK/subset_tree.py +192 -0
  176. biosak-1.123.7/BioSAK/taxdump.py +116 -0
  177. biosak-1.123.7/BioSAK/tmp_1.py +48 -0
  178. biosak-1.123.7/BioSAK/tmp_2.py +34 -0
  179. biosak-1.123.7/BioSAK/tmp_3.py +23 -0
  180. biosak-1.123.7/BioSAK/tmp_4.py +31 -0
  181. biosak-1.123.7/BioSAK/tmp_5.py +39 -0
  182. biosak-1.123.7/BioSAK/top_16S_hits.py +188 -0
  183. biosak-1.123.7/BioSAK/top_hits_in_a_group.py +60 -0
  184. biosak-1.123.7/BioSAK/transpose.py +32 -0
  185. biosak-1.123.7/BioSAK/trim.py +75 -0
  186. biosak-1.123.7/BioSAK/usearch_uc.py +59 -0
  187. biosak-1.123.7/BioSAK/vis_color_scheme.py +82 -0
  188. biosak-1.123.7/BioSAK/wilcox.py +41 -0
  189. biosak-1.123.7/LICENSE +674 -0
  190. biosak-1.123.7/MANIFEST.in +10 -0
  191. biosak-1.123.7/PKG-INFO +23 -0
  192. biosak-1.123.7/README.md +178 -0
  193. biosak-1.123.7/bin/BioSAK +1367 -0
  194. biosak-1.123.7/biosak.egg-info/PKG-INFO +23 -0
  195. biosak-1.123.7/biosak.egg-info/SOURCES.txt +198 -0
  196. biosak-1.123.7/biosak.egg-info/dependency_links.txt +1 -0
  197. biosak-1.123.7/biosak.egg-info/requires.txt +11 -0
  198. biosak-1.123.7/biosak.egg-info/top_level.txt +1 -0
  199. biosak-1.123.7/setup.cfg +4 -0
  200. biosak-1.123.7/setup.py +29 -0
@@ -0,0 +1,87 @@
1
+ import argparse
2
+ from BioSAK.global_functions import sep_path_basename_ext
3
+
4
+
5
+ BLCA_op_parser_usage = '''
6
+ ========== BLCA_op_parser example commands ==========
7
+
8
+ BioSAK BLCA_op_parser -in OTUs.fasta.blca.out
9
+
10
+ =====================================================
11
+ '''
12
+
13
+
14
+ def BLCA_op_parser(args):
15
+
16
+ blca_output = args['in']
17
+
18
+ file_in_path, file_in_basename, file_in_ext = sep_path_basename_ext(blca_output)
19
+ output_file_1 = '%s/%s_reformatted_1.txt' % (file_in_path, file_in_basename)
20
+ output_file_2 = '%s/%s_reformatted_2.txt' % (file_in_path, file_in_basename)
21
+
22
+ # read in input file
23
+ s16_taxon_blca_dict = {}
24
+ for each_16s_taxon in open(blca_output):
25
+ each_16s_taxon_split = each_16s_taxon.strip().split('\t')
26
+ s16_taxon_blca_dict[each_16s_taxon_split[0]] = each_16s_taxon_split[1]
27
+
28
+ taxon_dict_formatted_with_num = {}
29
+ taxon_dict_formatted_no_num = {}
30
+ for each_16s in s16_taxon_blca_dict:
31
+ taxon_blca_raw = s16_taxon_blca_dict[each_16s]
32
+ formatted_taxon_str_with_num = 'Unclassified'
33
+ formatted_taxon_str_no_num = 'Unclassified'
34
+ if taxon_blca_raw != 'Unclassified':
35
+ taxon_blca_raw_split_1 = taxon_blca_raw.strip().split(':')[1:]
36
+ formatted_taxon_list_with_num = []
37
+ formatted_taxon_list_no_num = []
38
+ for each_str in taxon_blca_raw_split_1:
39
+ each_str_split = each_str.split(';')
40
+
41
+ # determine_current_rank
42
+ current_rank = ''
43
+ if each_str_split[-1] == 'phylum':
44
+ current_rank = 'd'
45
+ elif each_str_split[-1] == 'class':
46
+ current_rank = 'p'
47
+ elif each_str_split[-1] == 'order':
48
+ current_rank = 'c'
49
+ elif each_str_split[-1] == 'family':
50
+ current_rank = 'o'
51
+ elif each_str_split[-1] == 'genus':
52
+ current_rank = 'f'
53
+ elif each_str_split[-1] == 'species':
54
+ current_rank = 'g'
55
+ elif each_str_split[-1] == '':
56
+ current_rank = 's'
57
+
58
+ taxon_with_confidence = '%s(%s)' % (each_str_split[0], each_str_split[1][:5])
59
+ taxon_without_confidence = '%s__%s' % (current_rank, each_str_split[0])
60
+
61
+ formatted_taxon_list_with_num.append(taxon_with_confidence)
62
+ formatted_taxon_list_no_num.append(taxon_without_confidence)
63
+
64
+ formatted_taxon_str_with_num = ';'.join(formatted_taxon_list_with_num)
65
+ formatted_taxon_str_no_num = ';'.join(formatted_taxon_list_no_num)
66
+
67
+ formatted_taxon_str_with_numno_space = '_'.join(formatted_taxon_str_with_num.split(' '))
68
+ formatted_taxon_str_no_num_no_space = '_'.join(formatted_taxon_str_no_num.split(' '))
69
+
70
+ taxon_dict_formatted_with_num[each_16s] = formatted_taxon_str_with_numno_space
71
+ taxon_dict_formatted_no_num[each_16s] = formatted_taxon_str_no_num_no_space
72
+
73
+ output_file_1_handle = open(output_file_1, 'w')
74
+ output_file_2_handle = open(output_file_2, 'w')
75
+ for each_seq in taxon_dict_formatted_with_num:
76
+ output_file_1_handle.write('%s\t%s\n' % (each_seq, taxon_dict_formatted_with_num[each_seq]))
77
+ output_file_2_handle.write('%s\t%s\n' % (each_seq, taxon_dict_formatted_no_num[each_seq]))
78
+ output_file_1_handle.close()
79
+ output_file_2_handle.close()
80
+
81
+
82
+ if __name__ == '__main__':
83
+
84
+ parser = argparse.ArgumentParser(usage=BLCA_op_parser_usage)
85
+ parser.add_argument('-in', required=True, help='BLCA output')
86
+ args = vars(parser.parse_args())
87
+ BLCA_op_parser(args)
@@ -0,0 +1,33 @@
1
+ import os
2
+
3
+ # extract path to the config file
4
+ pwd_config_file = os.path.realpath(__file__)
5
+ config_file_path = '/'.join(pwd_config_file.split('/')[:-1])
6
+
7
+ # specify full path to corresponding executables at the right side of colon
8
+ config_dict = {'config_file_path' : config_file_path,
9
+ 'prodigal' : 'prodigal',
10
+ 'hmmsearch' : 'hmmsearch',
11
+ 'hmmfetch' : 'hmmfetch',
12
+ 'hmmalign' : 'hmmalign',
13
+ 'hmmstat' : 'hmmstat',
14
+ 'mafft' : 'mafft',
15
+ 'bowtie2' : 'bowtie2',
16
+ 'bowtie2_build' : 'bowtie2-build',
17
+ 'blastp' : 'blastp',
18
+ 'blastn' : 'blastn',
19
+ 'makeblastdb' : 'makeblastdb',
20
+ 'fasttree' : 'FastTree',
21
+ 'ranger_mac' : '%s/Ranger-DTL-Dated.mac' % config_file_path,
22
+ 'ranger_linux' : '%s/Ranger-DTL-Dated.linux' % config_file_path,
23
+ 'path_to_hmm' : '%s/MetaCHIP_phylo.hmm' % config_file_path,
24
+ 'circos_HGT_R' : '%s/MetaCHIP_circos_HGT.R' % config_file_path,
25
+ 'label_tree_R' : '%s/label_tree.R' % config_file_path,
26
+ 'cdd2cog_perl' : '%s/cdd2cog.pl' % config_file_path,
27
+ 'get_sankey_plot_R' : '%s/sankey.R' % config_file_path,
28
+ 'compare_trees_R' : '%s/compare_trees.R' % config_file_path,
29
+ 'PhyloBiAssoc_R' : '%s/PhyloBiAssoc.R' % config_file_path,
30
+ 'ko00001_keg' : '%s/ko00001.keg' % config_file_path,
31
+ 'MetaCyc_rxns_with_ec' : '%s/MetaCyc_reactions_with_ec.txt' % config_file_path
32
+ }
33
+