biosak 1.123.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biosak-1.123.7/BioSAK/BLCA_op_parser.py +87 -0
- biosak-1.123.7/BioSAK/BioSAK_config.py +33 -0
- biosak-1.123.7/BioSAK/COG2020.py +666 -0
- biosak-1.123.7/BioSAK/COG2024.py +662 -0
- biosak-1.123.7/BioSAK/COG_boxplot_last1row.R +104 -0
- biosak-1.123.7/BioSAK/COG_boxplot_last2row.R +223 -0
- biosak-1.123.7/BioSAK/CheckM.py +136 -0
- biosak-1.123.7/BioSAK/Combine_KEGG_COG.py +220 -0
- biosak-1.123.7/BioSAK/Combine_KEGG_arCOG.py +233 -0
- biosak-1.123.7/BioSAK/DendroDF.py +25 -0
- biosak-1.123.7/BioSAK/DnaFeaturesViewer.py +373 -0
- biosak-1.123.7/BioSAK/FastaSplitler_by_num.py +76 -0
- biosak-1.123.7/BioSAK/FastaSplitler_by_size.py +92 -0
- biosak-1.123.7/BioSAK/FasterqDump.py +81 -0
- biosak-1.123.7/BioSAK/GTDB_for_BLCA.py +64 -0
- biosak-1.123.7/BioSAK/GenBank.py +167 -0
- biosak-1.123.7/BioSAK/Gene2Ctg.py +49 -0
- biosak-1.123.7/BioSAK/KEGG.py +813 -0
- biosak-1.123.7/BioSAK/KEGG_boxplot_last1row.R +101 -0
- biosak-1.123.7/BioSAK/KEGG_get_eukaryotic_kos.py +19 -0
- biosak-1.123.7/BioSAK/KeepRemovingTmp.py +90 -0
- biosak-1.123.7/BioSAK/MeanMappingDepth.py +73 -0
- biosak-1.123.7/BioSAK/MetaBiosample.py +96 -0
- biosak-1.123.7/BioSAK/MetaCHIP_phylo.hmm +22125 -0
- biosak-1.123.7/BioSAK/MetaCyc_reactions_with_ec.txt +11013 -0
- biosak-1.123.7/BioSAK/NetEnzymes.py +410 -0
- biosak-1.123.7/BioSAK/Prodigal.py +263 -0
- biosak-1.123.7/BioSAK/Reads_simulator.py +144 -0
- biosak-1.123.7/BioSAK/RunGraphMB.py +98 -0
- biosak-1.123.7/BioSAK/SILVA_for_BLCA.py +93 -0
- biosak-1.123.7/BioSAK/SankeyTaxon.py +167 -0
- biosak-1.123.7/BioSAK/SubsampleLongReads.py +80 -0
- biosak-1.123.7/BioSAK/Tax4Fun2IndOTU.py +31 -0
- biosak-1.123.7/BioSAK/UNITE_for_BLCA.py +69 -0
- biosak-1.123.7/BioSAK/Usearch16S.py +463 -0
- biosak-1.123.7/BioSAK/VERSION +365 -0
- biosak-1.123.7/BioSAK/VisBlastOp.py +23 -0
- biosak-1.123.7/BioSAK/VisGeneFlk.py +227 -0
- biosak-1.123.7/BioSAK/__init__.py +0 -0
- biosak-1.123.7/BioSAK/abd.backup.py +439 -0
- biosak-1.123.7/BioSAK/abd.py +567 -0
- biosak-1.123.7/BioSAK/abd_16s_amplicon.py +75 -0
- biosak-1.123.7/BioSAK/abd_16s_metagenome.py +75 -0
- biosak-1.123.7/BioSAK/abd_mask.py +207 -0
- biosak-1.123.7/BioSAK/abd_rename_and_combine.py +45 -0
- biosak-1.123.7/BioSAK/abund.py +145 -0
- biosak-1.123.7/BioSAK/add_desc.py +78 -0
- biosak-1.123.7/BioSAK/arCOG.py +654 -0
- biosak-1.123.7/BioSAK/bam2reads.py +185 -0
- biosak-1.123.7/BioSAK/blast.py +112 -0
- biosak-1.123.7/BioSAK/blca.py +185 -0
- biosak-1.123.7/BioSAK/blca_generate_abundance_table.py +108 -0
- biosak-1.123.7/BioSAK/blca_main.py +417 -0
- biosak-1.123.7/BioSAK/blca_subset_db_acc.py +226 -0
- biosak-1.123.7/BioSAK/blca_subset_db_gg.py +163 -0
- biosak-1.123.7/BioSAK/boxplot.py +82 -0
- biosak-1.123.7/BioSAK/boxplot_last1row.R +93 -0
- biosak-1.123.7/BioSAK/boxplot_matrix_COG.py +136 -0
- biosak-1.123.7/BioSAK/boxplot_matrix_COG_backup.py +127 -0
- biosak-1.123.7/BioSAK/boxplot_matrix_KEGG.py +184 -0
- biosak-1.123.7/BioSAK/boxplot_matrix_dbCAN.py +152 -0
- biosak-1.123.7/BioSAK/cat_fa.py +49 -0
- biosak-1.123.7/BioSAK/cdb.py +75 -0
- biosak-1.123.7/BioSAK/cdb_to_itol_piechart.py +150 -0
- biosak-1.123.7/BioSAK/cdd2cog.pl +414 -0
- biosak-1.123.7/BioSAK/checkm_marker.py +42 -0
- biosak-1.123.7/BioSAK/combine_fun_stats.py +83 -0
- biosak-1.123.7/BioSAK/combine_low_abd_otu.py +170 -0
- biosak-1.123.7/BioSAK/compare_sets.py +52 -0
- biosak-1.123.7/BioSAK/count_num.py +33 -0
- biosak-1.123.7/BioSAK/cross_link_seqs.py +136 -0
- biosak-1.123.7/BioSAK/dbCAN.py +468 -0
- biosak-1.123.7/BioSAK/download_GenBank_genome_subset_prokaryotes_csv.py +38 -0
- biosak-1.123.7/BioSAK/enrich.py +341 -0
- biosak-1.123.7/BioSAK/exe_cmds.py +38 -0
- biosak-1.123.7/BioSAK/ezaai2mat.py +51 -0
- biosak-1.123.7/BioSAK/fa2id.py +43 -0
- biosak-1.123.7/BioSAK/fastaai.py +85 -0
- biosak-1.123.7/BioSAK/format_converter.py +192 -0
- biosak-1.123.7/BioSAK/fq2fa.py +32 -0
- biosak-1.123.7/BioSAK/gapseq.py +121 -0
- biosak-1.123.7/BioSAK/gbk2faa.py +97 -0
- biosak-1.123.7/BioSAK/gbk2ffn.py +105 -0
- biosak-1.123.7/BioSAK/gbk2fna.py +88 -0
- biosak-1.123.7/BioSAK/gbk2gff.py +212 -0
- biosak-1.123.7/BioSAK/gbk_to_ffn_faa.py +106 -0
- biosak-1.123.7/BioSAK/gc.py +73 -0
- biosak-1.123.7/BioSAK/get_EC_from_ko_stats_D.py +22 -0
- biosak-1.123.7/BioSAK/get_GTDB_taxon_gnm.py +164 -0
- biosak-1.123.7/BioSAK/get_MAG_reads_long.py +125 -0
- biosak-1.123.7/BioSAK/get_Pfam_hmms.py +109 -0
- biosak-1.123.7/BioSAK/get_TopHits_taxonomy.py +52 -0
- biosak-1.123.7/BioSAK/get_aa_composition.py +66 -0
- biosak-1.123.7/BioSAK/get_abd1_mask.py +150 -0
- biosak-1.123.7/BioSAK/get_abd3_stats.py +143 -0
- biosak-1.123.7/BioSAK/get_bin_abundance copy.py +277 -0
- biosak-1.123.7/BioSAK/get_data_matrix.py +59 -0
- biosak-1.123.7/BioSAK/get_eu_otu.py +160 -0
- biosak-1.123.7/BioSAK/get_gene_depth.py +121 -0
- biosak-1.123.7/BioSAK/get_genome_GTDB.py +91 -0
- biosak-1.123.7/BioSAK/get_genome_NCBI.py +160 -0
- biosak-1.123.7/BioSAK/get_genome_NCBI_v1.py +216 -0
- biosak-1.123.7/BioSAK/get_genome_NCBI_v2.py +237 -0
- biosak-1.123.7/BioSAK/get_gnm_size.py +85 -0
- biosak-1.123.7/BioSAK/get_ko_gene_seqs.py +134 -0
- biosak-1.123.7/BioSAK/get_krona_plot.py +45 -0
- biosak-1.123.7/BioSAK/get_reads_from_sam.py +47 -0
- biosak-1.123.7/BioSAK/get_reads_id_in_sam.py +28 -0
- biosak-1.123.7/BioSAK/get_single_page_web.py +74 -0
- biosak-1.123.7/BioSAK/get_top_hit.py +104 -0
- biosak-1.123.7/BioSAK/get_total_length.py +38 -0
- biosak-1.123.7/BioSAK/gff2chrom.py +50 -0
- biosak-1.123.7/BioSAK/global_functions.py +158 -0
- biosak-1.123.7/BioSAK/hpc3.py +96 -0
- biosak-1.123.7/BioSAK/js_cmds.py +93 -0
- biosak-1.123.7/BioSAK/js_hpc3.py +86 -0
- biosak-1.123.7/BioSAK/keep_best_hit.py +56 -0
- biosak-1.123.7/BioSAK/ko00001.keg +50796 -0
- biosak-1.123.7/BioSAK/koala.py +57 -0
- biosak-1.123.7/BioSAK/link_16S_MAG.py +647 -0
- biosak-1.123.7/BioSAK/link_16s.py +4789 -0
- biosak-1.123.7/BioSAK/magabund.py +275 -0
- biosak-1.123.7/BioSAK/magabund2.py +462 -0
- biosak-1.123.7/BioSAK/manipulator_fasta.py +55 -0
- biosak-1.123.7/BioSAK/manipulator_msa.py +0 -0
- biosak-1.123.7/BioSAK/manipulator_newick.py +78 -0
- biosak-1.123.7/BioSAK/manipulator_sam.py +0 -0
- biosak-1.123.7/BioSAK/mann_whitney_u.py +56 -0
- biosak-1.123.7/BioSAK/mean_MAG_cov.py +74 -0
- biosak-1.123.7/BioSAK/merge_df.py +51 -0
- biosak-1.123.7/BioSAK/merge_seq.py +66 -0
- biosak-1.123.7/BioSAK/metaAssembly.py +214 -0
- biosak-1.123.7/BioSAK/metabat2concoct.py +42 -0
- biosak-1.123.7/BioSAK/metabat2maxbin.py +51 -0
- biosak-1.123.7/BioSAK/metadata.py +67 -0
- biosak-1.123.7/BioSAK/mmseqs.py +70 -0
- biosak-1.123.7/BioSAK/msa_to_distance_matrix.py +0 -0
- biosak-1.123.7/BioSAK/ncbi_dataset.py +51 -0
- biosak-1.123.7/BioSAK/odp.py +0 -0
- biosak-1.123.7/BioSAK/parse_MetaCyc_RxnDB.py +76 -0
- biosak-1.123.7/BioSAK/parse_mmseqs_tsv.py +48 -0
- biosak-1.123.7/BioSAK/plot_mag.py +207 -0
- biosak-1.123.7/BioSAK/plot_sam_depth.py +214 -0
- biosak-1.123.7/BioSAK/plot_tree.R +51 -0
- biosak-1.123.7/BioSAK/prefix_file.py +65 -0
- biosak-1.123.7/BioSAK/prefix_seq_by_file_name.py +73 -0
- biosak-1.123.7/BioSAK/prokka.py +111 -0
- biosak-1.123.7/BioSAK/reads2bam.py +115 -0
- biosak-1.123.7/BioSAK/rename_df_row.py +67 -0
- biosak-1.123.7/BioSAK/rename_reads_for_Reago.py +62 -0
- biosak-1.123.7/BioSAK/rename_seq.py +166 -0
- biosak-1.123.7/BioSAK/ribbon.py +325 -0
- biosak-1.123.7/BioSAK/rm_low_abd_otu.py +73 -0
- biosak-1.123.7/BioSAK/rm_low_depth_sample.py +41 -0
- biosak-1.123.7/BioSAK/sam2bam.py +62 -0
- biosak-1.123.7/BioSAK/sampling_GTDB_gnms.py +144 -0
- biosak-1.123.7/BioSAK/sankey.R +65 -0
- biosak-1.123.7/BioSAK/sankey.py +107 -0
- biosak-1.123.7/BioSAK/select_seq.py +88 -0
- biosak-1.123.7/BioSAK/sep_reads_by_barcode.py +81 -0
- biosak-1.123.7/BioSAK/slice_seq.py +69 -0
- biosak-1.123.7/BioSAK/split_fasta.py +99 -0
- biosak-1.123.7/BioSAK/split_folder.py +66 -0
- biosak-1.123.7/BioSAK/split_sam.py +135 -0
- biosak-1.123.7/BioSAK/sra.py +116 -0
- biosak-1.123.7/BioSAK/srun.py +34 -0
- biosak-1.123.7/BioSAK/statsTaxa.py +59 -0
- biosak-1.123.7/BioSAK/stats_arcog.py +100 -0
- biosak-1.123.7/BioSAK/stats_blast_hit.py +182 -0
- biosak-1.123.7/BioSAK/stats_cog2024.py +100 -0
- biosak-1.123.7/BioSAK/stats_ko.py +185 -0
- biosak-1.123.7/BioSAK/submitHPC.py +39 -0
- biosak-1.123.7/BioSAK/subset_GTDB_meta.py +58 -0
- biosak-1.123.7/BioSAK/subset_df.py +156 -0
- biosak-1.123.7/BioSAK/subset_tree.py +192 -0
- biosak-1.123.7/BioSAK/taxdump.py +116 -0
- biosak-1.123.7/BioSAK/tmp_1.py +48 -0
- biosak-1.123.7/BioSAK/tmp_2.py +34 -0
- biosak-1.123.7/BioSAK/tmp_3.py +23 -0
- biosak-1.123.7/BioSAK/tmp_4.py +31 -0
- biosak-1.123.7/BioSAK/tmp_5.py +39 -0
- biosak-1.123.7/BioSAK/top_16S_hits.py +188 -0
- biosak-1.123.7/BioSAK/top_hits_in_a_group.py +60 -0
- biosak-1.123.7/BioSAK/transpose.py +32 -0
- biosak-1.123.7/BioSAK/trim.py +75 -0
- biosak-1.123.7/BioSAK/usearch_uc.py +59 -0
- biosak-1.123.7/BioSAK/vis_color_scheme.py +82 -0
- biosak-1.123.7/BioSAK/wilcox.py +41 -0
- biosak-1.123.7/LICENSE +674 -0
- biosak-1.123.7/MANIFEST.in +10 -0
- biosak-1.123.7/PKG-INFO +23 -0
- biosak-1.123.7/README.md +178 -0
- biosak-1.123.7/bin/BioSAK +1367 -0
- biosak-1.123.7/biosak.egg-info/PKG-INFO +23 -0
- biosak-1.123.7/biosak.egg-info/SOURCES.txt +198 -0
- biosak-1.123.7/biosak.egg-info/dependency_links.txt +1 -0
- biosak-1.123.7/biosak.egg-info/requires.txt +11 -0
- biosak-1.123.7/biosak.egg-info/top_level.txt +1 -0
- biosak-1.123.7/setup.cfg +4 -0
- biosak-1.123.7/setup.py +29 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from BioSAK.global_functions import sep_path_basename_ext
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
BLCA_op_parser_usage = '''
|
|
6
|
+
========== BLCA_op_parser example commands ==========
|
|
7
|
+
|
|
8
|
+
BioSAK BLCA_op_parser -in OTUs.fasta.blca.out
|
|
9
|
+
|
|
10
|
+
=====================================================
|
|
11
|
+
'''
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def BLCA_op_parser(args):
|
|
15
|
+
|
|
16
|
+
blca_output = args['in']
|
|
17
|
+
|
|
18
|
+
file_in_path, file_in_basename, file_in_ext = sep_path_basename_ext(blca_output)
|
|
19
|
+
output_file_1 = '%s/%s_reformatted_1.txt' % (file_in_path, file_in_basename)
|
|
20
|
+
output_file_2 = '%s/%s_reformatted_2.txt' % (file_in_path, file_in_basename)
|
|
21
|
+
|
|
22
|
+
# read in input file
|
|
23
|
+
s16_taxon_blca_dict = {}
|
|
24
|
+
for each_16s_taxon in open(blca_output):
|
|
25
|
+
each_16s_taxon_split = each_16s_taxon.strip().split('\t')
|
|
26
|
+
s16_taxon_blca_dict[each_16s_taxon_split[0]] = each_16s_taxon_split[1]
|
|
27
|
+
|
|
28
|
+
taxon_dict_formatted_with_num = {}
|
|
29
|
+
taxon_dict_formatted_no_num = {}
|
|
30
|
+
for each_16s in s16_taxon_blca_dict:
|
|
31
|
+
taxon_blca_raw = s16_taxon_blca_dict[each_16s]
|
|
32
|
+
formatted_taxon_str_with_num = 'Unclassified'
|
|
33
|
+
formatted_taxon_str_no_num = 'Unclassified'
|
|
34
|
+
if taxon_blca_raw != 'Unclassified':
|
|
35
|
+
taxon_blca_raw_split_1 = taxon_blca_raw.strip().split(':')[1:]
|
|
36
|
+
formatted_taxon_list_with_num = []
|
|
37
|
+
formatted_taxon_list_no_num = []
|
|
38
|
+
for each_str in taxon_blca_raw_split_1:
|
|
39
|
+
each_str_split = each_str.split(';')
|
|
40
|
+
|
|
41
|
+
# determine_current_rank
|
|
42
|
+
current_rank = ''
|
|
43
|
+
if each_str_split[-1] == 'phylum':
|
|
44
|
+
current_rank = 'd'
|
|
45
|
+
elif each_str_split[-1] == 'class':
|
|
46
|
+
current_rank = 'p'
|
|
47
|
+
elif each_str_split[-1] == 'order':
|
|
48
|
+
current_rank = 'c'
|
|
49
|
+
elif each_str_split[-1] == 'family':
|
|
50
|
+
current_rank = 'o'
|
|
51
|
+
elif each_str_split[-1] == 'genus':
|
|
52
|
+
current_rank = 'f'
|
|
53
|
+
elif each_str_split[-1] == 'species':
|
|
54
|
+
current_rank = 'g'
|
|
55
|
+
elif each_str_split[-1] == '':
|
|
56
|
+
current_rank = 's'
|
|
57
|
+
|
|
58
|
+
taxon_with_confidence = '%s(%s)' % (each_str_split[0], each_str_split[1][:5])
|
|
59
|
+
taxon_without_confidence = '%s__%s' % (current_rank, each_str_split[0])
|
|
60
|
+
|
|
61
|
+
formatted_taxon_list_with_num.append(taxon_with_confidence)
|
|
62
|
+
formatted_taxon_list_no_num.append(taxon_without_confidence)
|
|
63
|
+
|
|
64
|
+
formatted_taxon_str_with_num = ';'.join(formatted_taxon_list_with_num)
|
|
65
|
+
formatted_taxon_str_no_num = ';'.join(formatted_taxon_list_no_num)
|
|
66
|
+
|
|
67
|
+
formatted_taxon_str_with_numno_space = '_'.join(formatted_taxon_str_with_num.split(' '))
|
|
68
|
+
formatted_taxon_str_no_num_no_space = '_'.join(formatted_taxon_str_no_num.split(' '))
|
|
69
|
+
|
|
70
|
+
taxon_dict_formatted_with_num[each_16s] = formatted_taxon_str_with_numno_space
|
|
71
|
+
taxon_dict_formatted_no_num[each_16s] = formatted_taxon_str_no_num_no_space
|
|
72
|
+
|
|
73
|
+
output_file_1_handle = open(output_file_1, 'w')
|
|
74
|
+
output_file_2_handle = open(output_file_2, 'w')
|
|
75
|
+
for each_seq in taxon_dict_formatted_with_num:
|
|
76
|
+
output_file_1_handle.write('%s\t%s\n' % (each_seq, taxon_dict_formatted_with_num[each_seq]))
|
|
77
|
+
output_file_2_handle.write('%s\t%s\n' % (each_seq, taxon_dict_formatted_no_num[each_seq]))
|
|
78
|
+
output_file_1_handle.close()
|
|
79
|
+
output_file_2_handle.close()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
if __name__ == '__main__':
|
|
83
|
+
|
|
84
|
+
parser = argparse.ArgumentParser(usage=BLCA_op_parser_usage)
|
|
85
|
+
parser.add_argument('-in', required=True, help='BLCA output')
|
|
86
|
+
args = vars(parser.parse_args())
|
|
87
|
+
BLCA_op_parser(args)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
# extract path to the config file
|
|
4
|
+
pwd_config_file = os.path.realpath(__file__)
|
|
5
|
+
config_file_path = '/'.join(pwd_config_file.split('/')[:-1])
|
|
6
|
+
|
|
7
|
+
# specify full path to corresponding executables at the right side of colon
|
|
8
|
+
config_dict = {'config_file_path' : config_file_path,
|
|
9
|
+
'prodigal' : 'prodigal',
|
|
10
|
+
'hmmsearch' : 'hmmsearch',
|
|
11
|
+
'hmmfetch' : 'hmmfetch',
|
|
12
|
+
'hmmalign' : 'hmmalign',
|
|
13
|
+
'hmmstat' : 'hmmstat',
|
|
14
|
+
'mafft' : 'mafft',
|
|
15
|
+
'bowtie2' : 'bowtie2',
|
|
16
|
+
'bowtie2_build' : 'bowtie2-build',
|
|
17
|
+
'blastp' : 'blastp',
|
|
18
|
+
'blastn' : 'blastn',
|
|
19
|
+
'makeblastdb' : 'makeblastdb',
|
|
20
|
+
'fasttree' : 'FastTree',
|
|
21
|
+
'ranger_mac' : '%s/Ranger-DTL-Dated.mac' % config_file_path,
|
|
22
|
+
'ranger_linux' : '%s/Ranger-DTL-Dated.linux' % config_file_path,
|
|
23
|
+
'path_to_hmm' : '%s/MetaCHIP_phylo.hmm' % config_file_path,
|
|
24
|
+
'circos_HGT_R' : '%s/MetaCHIP_circos_HGT.R' % config_file_path,
|
|
25
|
+
'label_tree_R' : '%s/label_tree.R' % config_file_path,
|
|
26
|
+
'cdd2cog_perl' : '%s/cdd2cog.pl' % config_file_path,
|
|
27
|
+
'get_sankey_plot_R' : '%s/sankey.R' % config_file_path,
|
|
28
|
+
'compare_trees_R' : '%s/compare_trees.R' % config_file_path,
|
|
29
|
+
'PhyloBiAssoc_R' : '%s/PhyloBiAssoc.R' % config_file_path,
|
|
30
|
+
'ko00001_keg' : '%s/ko00001.keg' % config_file_path,
|
|
31
|
+
'MetaCyc_rxns_with_ec' : '%s/MetaCyc_reactions_with_ec.txt' % config_file_path
|
|
32
|
+
}
|
|
33
|
+
|