PyPI - offtracker - Versions diffs - 2.7.10__zip → 2.10.1__zip - Mend

offtracker 2.7.10zip → 2.10.1zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

offtracker-2.10.1/scripts/offtracker_candidates.py ADDED Viewed

@@ -0,0 +1,318 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# 2023.10.27. v2.0: 2.0以target_location midpoint为中心，因此取消 pct 计算
+# 2023.12.06. v2.1: 2.1增加 cleavage_site 推测, 修正 deletion 错位, 以 cleavage_site 为中心
+# 2025.04.25. 修正大小写问题
+# 2025.06.11. 调整跳过已存在的candidates的代码顺序
+import os,sys,re,time
+from itertools import product, permutations
+if sys.version_info < (3,0):
+    import platform
+    raise Exception(f'python3 is needed, while running {platform.python_version()} now')
+import offtracker
+import offtracker.X_sequence as xseq
+script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
+script_folder= os.path.join(script_dir, 'utility')
+import argparse
+import pandas as pd
+import pybedtools
+import multiprocessing as mp
+from Bio.Blast.Applications import NcbiblastnCommandline
+def main():
+    parser = argparse.ArgumentParser()
+    parser.description='Generate candidate regions by sgRNA sequence'
+    parser.add_argument('--sgrna' ,       type=str, required=True, help='One sgRNA sequence without PAM' )
+    parser.add_argument('--pam'   ,       type=str, required=True, help='The protospacer adjacent motif' )
+    parser.add_argument('--pam_location', type=str, default='downstream', help='Upstream or downstream, default is downstream (Cas9)' )
+    parser.add_argument('--name'  ,       type=str, required=True, help='custom name of the sgRNA' )
+    parser.add_argument('-r','--ref'    , type=str, required=True, help='The fasta file of reference genome')
+    parser.add_argument('-b','--blastdb', type=str, required=True, help='blast database')
+    parser.add_argument('-o','--outdir' , type=str, required=True, help='The output folder')
+    parser.add_argument('-g','--genome' , type=str, default='hg38', help='File of chromosome sizes, or "hg38", "mm10" ')
+    parser.add_argument('-t','--thread' , type=int, default=4,     help='Number of threads for parallel computing')
+    parser.add_argument('--quick_mode'  , action='store_true',  help='BLAST faster but less candidates.')
+    args = parser.parse_args()
+    if (args.genome == 'hg38') or (args.genome == 'mm10'):
+        dir_chrom_sizes = os.path.join(script_folder, f'{args.genome}.chrom.sizes')
+    else:
+        dir_chrom_sizes = args.genome
+    sgRNA_name = args.name
+    sgRNA_seq  = args.sgrna
+    PAM = args.pam
+    PAM_loc = args.pam_location.lower()
+    n_threads  = args.thread
+    dir_output = args.outdir
+    if not os.path.exists(dir_output):
+        os.makedirs(dir_output)
+    dir_ref_fa = args.ref
+    blast_db   = args.blastdb
+    quick_mode = args.quick_mode
+    # parameters for alignment
+    half_width = 100
+    pct_params = 1.0
+    frag_len= half_width*2
+    dir_df_candidate = os.path.join(dir_output, f'df_candidate_{sgRNA_name}.csv')
+    if os.path.isfile(dir_df_candidate):
+        print(f'{dir_df_candidate} exists, skipped.')
+        return 'skipped'
+    sgRNA_seq = sgRNA_seq.upper()
+    PAM = PAM.upper()
+    dir_sgRNA_fasta = os.path.join(dir_output, f'{sgRNA_name}_PAM.fasta')
+    dir_sgRNA_blast = os.path.join(dir_output, f'{sgRNA_name}_PAM.blast')
+    dir_sgRNA_bed   = os.path.join(dir_output, f'{sgRNA_name}_PAM.bed')
+    if PAM_loc == 'downstream':
+        possible_sgRNA_PAM = list(product([sgRNA_seq],xseq.possible_seq(PAM)))
+    elif PAM_loc == 'upstream':
+        possible_sgRNA_PAM = list(product(xseq.possible_seq(PAM),[sgRNA_seq]))
+    else:
+        raise Exception(f'PAM_location should be "upstream" or "downstream", while {PAM_loc} is given.')
+    possible_sgRNA_PAM = [''.join(combination) for combination in possible_sgRNA_PAM]
+    n_seq = len(possible_sgRNA_PAM)
+    ID = pd.Series(['seq']*n_seq) + pd.Series(range(1,n_seq+1)).astype(str)
+    df_sgRNA_PAM = pd.DataFrame({'ID':ID,'sequence':possible_sgRNA_PAM})
+    xseq.write_fasta(df_sgRNA_PAM, dir_sgRNA_fasta)
+    #########
+    # BLAST #
+    #########
+    if os.path.isfile(dir_sgRNA_blast):
+        print(f'{dir_sgRNA_blast} exists, skipped.')
+    else:
+        if quick_mode:
+            print('Using quick mode for BLAST')
+            blastx_cline = NcbiblastnCommandline(query=dir_sgRNA_fasta, task='blastn-short',out=dir_sgRNA_blast,
+                                                db=blast_db, evalue=10000,outfmt=6, num_threads=n_threads,
+                                                gapopen=4, gapextend=2, reward=2, word_size=5, dust='no', soft_masking=False)
+        else:
+            blastx_cline = NcbiblastnCommandline(query=dir_sgRNA_fasta, task='blastn-short',out=dir_sgRNA_blast,
+                                                db=blast_db, evalue=10000,outfmt=6, num_threads=n_threads,
+                                                gapopen=4, gapextend=2, reward=2, word_size=4, dust='no', soft_masking=False)
+        print(f'BLAST for candidate off-target sites of {sgRNA_name}.')
+        blastx_cline()
+        print(f'BLAST finished.')
+    ##############
+    # Output bed #
+    ##############
+    blast_regions = pd.read_csv(dir_sgRNA_blast, sep='\t',header=None)
+    blast_regions.columns = ['query acc.','chr','% identity','alignment length','mismatches','gap opens','q. start','q. end','st','ed','evalue','bit score']
+    blast_regions = blast_regions[blast_regions.evalue<10000]
+    # reverse strand
+    blast_regions['reverse'] = (blast_regions['st']>blast_regions['ed']).astype(int)
+    blast_regions_f = blast_regions[blast_regions.reverse==0].copy()
+    blast_regions_r = blast_regions[blast_regions.reverse==1].copy()
+    temp = blast_regions_r['st'].copy()
+    blast_regions_r['st'] = blast_regions_r['ed']
+    blast_regions_r['ed'] = temp
+    blast_regions = pd.concat([blast_regions_f, blast_regions_r])
+    # sort and add location
+    blast_regions = blast_regions.sort_values('evalue').reset_index(drop=True)
+    blast_regions['location']=blast_regions['chr'].str[:] + ':' + blast_regions['st'].astype(str).str[:] + '-' + blast_regions['ed'].astype(str).str[:]
+    blast_regions = blast_regions.drop_duplicates(subset='location').copy()
+    # alignment length 筛选
+    len_sgRNA=len(sgRNA_seq)
+    min_len = len_sgRNA-8
+    blast_regions = blast_regions[blast_regions['alignment length']>=min_len].copy().reset_index(drop=True)
+    blast_regions = blast_regions.reindex(columns = ['chr', 'st', 'ed' , 'query acc.', '% identity', 'alignment length', 'mismatches',
+        'gap opens', 'q. start', 'q. end', 'evalue', 'bit score', 'reverse', 'location'] )
+    # 输出 bed 用于后续 alignment score 计算
+    blast_regions_bed = blast_regions[['chr','st','ed']]
+    xseq.write_bed(blast_regions_bed, dir_sgRNA_bed)
+    # 对 bed 进行排序但不合并
+    a = pybedtools.BedTool(dir_sgRNA_bed)
+    a.sort(g=dir_chrom_sizes).saveas( dir_sgRNA_bed )
+    print(f'Output {sgRNA_name}_PAM.bed')
+    ###################
+    # alignment score #
+    ###################
+    #########
+    # 读取 blast bed
+    #########
+    common_chr = pd.Series(['chr']*23).str[:] + pd.Series(range(23)).astype(str).str[:]
+    common_chr = pd.concat([common_chr, pd.Series(['chrX','chrY'])]).to_numpy()
+    bed_short = xseq.X_readbed(dir_sgRNA_bed)
+    bed_short = bed_short[bed_short['chr'].isin(common_chr)].copy()
+    bed_short['midpoint'] = ((bed_short['st'] + bed_short['ed'])/2).astype(int)
+    bed_short['st'] = bed_short['midpoint'] - half_width
+    bed_short['ed'] = bed_short['midpoint'] + half_width
+    bed_short.loc[bed_short['st']<0,'st']=0
+    bed_short = bed_short.drop_duplicates()
+    #########
+    # 根据 bed_f 位点 ed 前后 half_width 取基因组序列
+    #########
+    temp_bed = os.path.join(dir_output, 'temp.bed')
+    xseq.write_bed(bed_short.iloc[:,:3], temp_bed)
+    a = pybedtools.BedTool(temp_bed)
+    fasta = pybedtools.example_filename(dir_ref_fa)
+    a = a.sequence(fi=fasta)
+    with open(a.seqfn) as f:
+        fasta = {}
+        for line in f:
+            line = line.strip() # 去除末尾换行符
+            if line[0] == '>':
+                header = line[1:]
+            else:
+                sequence = line
+                fasta[header] = fasta.get(header,'') + sequence
+    # pybedtools 得到位置 chrA:X-Y 时，X数字会往左多1bp
+    #########
+    # local alignment
+    #########
+    # 生成 DNA_matrix
+    mismatch_score = 0.01
+    base_codes = list(xseq.ambiguous_nt.keys())
+    all_base_pairs = list(permutations(base_codes,2)) + [(x,x) for x in base_codes]
+    DNA_matrix = {x : xseq.get_base_score(*x, mismatch_score=mismatch_score) for x in all_base_pairs}
+    # 添加 PAM
+    if PAM_loc == 'downstream':
+        sgRNA_PAM_fw = sgRNA_seq + PAM
+    else:
+        sgRNA_PAM_fw = PAM + sgRNA_seq
+    sgRNA_PAM_rv = xseq.reverse_complement(sgRNA_PAM_fw)
+    list_args_fw=[]
+    list_args_rv=[]
+    for a_key, a_seq in fasta.items():
+        # 2025.04.25 修正大小写问题
+        a_seq = re.sub('[^ATCG]','N',a_seq.upper())
+        list_args_fw.append( [a_key, sgRNA_PAM_fw, a_seq, frag_len, DNA_matrix, mismatch_score] )
+        list_args_rv.append( [a_key, sgRNA_PAM_rv, a_seq, frag_len, DNA_matrix, mismatch_score] )
+    st = time.time()
+    with mp.Pool(n_threads) as p:
+        list_align_forward = p.starmap(xseq.sgRNA_alignment, list_args_fw)
+    ed = time.time()
+    print('align_forward:{:.2f}'.format(ed-st))
+    st = time.time()
+    with mp.Pool(n_threads) as p:
+        list_align_reverse = p.starmap(xseq.sgRNA_alignment, list_args_rv)
+    ed = time.time()
+    print('align_reverse:{:.2f}'.format(ed-st))
+    #
+    df_align_forward = pd.DataFrame(list_align_forward, columns= ['fw_score','fw_pct','fw_target','fw_location','fw_deletion','fw_insertion','fw_mismatch'])
+    df_align_reverse = pd.DataFrame(list_align_reverse, columns= ['rv_score','rv_pct','rv_target','rv_location','rv_deletion','rv_insertion','rv_mismatch'])
+    df_align_reverse['rv_target'] = df_align_reverse['rv_target'].apply(xseq.reverse_complement)
+    df_candidate = pd.concat([df_align_forward,df_align_reverse],axis=1)
+    df_candidate['location'] = fasta.keys()
+    df_candidate['alignment_score'] = df_candidate[['fw_score','rv_score']].max(axis=1)
+    #df_candidate['fw_score_2'] = df_candidate['fw_score']*(pct_params-df_candidate['fw_pct'].abs())
+    #df_candidate['rv_score_2'] = df_candidate['rv_score']*(pct_params-df_candidate['rv_pct'].abs())
+    #df_candidate['best_seq_score'] = df_candidate[['fw_score_2', 'rv_score_2']].max(axis=1)
+    #df_candidate['best_strand'] = df_candidate[['fw_score_2', 'rv_score_2']].idxmax(axis='columns').replace({'fw_score_2':'+', 'rv_score_2':'-'})
+    #df_candidate.loc[df_candidate['fw_score_2']==df_candidate['rv_score_2'],'best_strand']='equal_score'
+    df_candidate['best_seq_score'] = df_candidate[['fw_score', 'rv_score']].max(axis=1)
+    df_candidate['best_strand'] = df_candidate[['fw_score', 'rv_score']].idxmax(axis='columns').replace({'fw_score':'+', 'rv_score':'-'})
+    df_candidate.loc[df_candidate['fw_score']==df_candidate['rv_score'],'best_strand']='equal_score'
+    # GG check
+    # 2023.12.05 增加 cleavage_site 推测
+    list_best_target = []
+    list_best_location = []
+    list_cleavage_site = []
+    list_delete = []
+    list_insert = []
+    list_mismat = []
+    list_GG = []
+    for a_row in df_candidate.iterrows():
+        if a_row[1]['best_strand']=='+':
+            list_best_target.append(a_row[1]['fw_target'])
+            list_best_location.append(a_row[1]['fw_location'])
+            list_cleavage_site.append(int(a_row[1]['fw_location'].split('-')[1]) - 6)
+            list_delete.append(a_row[1]['fw_deletion'])
+            list_insert.append(a_row[1]['fw_insertion'])
+            list_mismat.append(a_row[1]['fw_mismatch'])
+            if a_row[1]['fw_target'][-2:]=='GG':
+                list_GG.append('OK')
+            else:
+                list_GG.append('NO')
+        elif a_row[1]['best_strand']=='-':
+            list_best_target.append(a_row[1]['rv_target'])
+            list_best_location.append(a_row[1]['rv_location'])
+            list_cleavage_site.append(int(a_row[1]['rv_location'].split('-')[0].split(':')[1]) + 5)
+            list_delete.append(a_row[1]['rv_deletion'])
+            list_insert.append(a_row[1]['rv_insertion'])
+            list_mismat.append(a_row[1]['rv_mismatch'])
+            if a_row[1]['rv_target'][-2:]=='GG':
+                list_GG.append('OK')
+            else:
+                list_GG.append('NO')
+        else:
+            if a_row[1]['fw_target'][-2:]=='GG':
+                list_best_target.append(a_row[1]['fw_target'])
+                list_best_location.append(a_row[1]['fw_location'])
+                list_cleavage_site.append(int(a_row[1]['fw_location'].split('-')[1]) - 6)
+                list_delete.append(a_row[1]['fw_deletion'])
+                list_insert.append(a_row[1]['fw_insertion'])
+                list_mismat.append(a_row[1]['fw_mismatch'])
+                list_GG.append('OK_same_score')
+            # 发现没有 GG 则看 RC
+            elif a_row[1]['rv_target'][-2:]=='GG':
+                list_best_target.append(a_row[1]['rv_target'])
+                list_best_location.append(a_row[1]['rv_location'])
+                list_cleavage_site.append(int(a_row[1]['rv_location'].split('-')[0].split(':')[1]) + 5)
+                list_delete.append(a_row[1]['rv_deletion'])
+                list_insert.append(a_row[1]['rv_insertion'])
+                list_mismat.append(a_row[1]['rv_mismatch'])
+                list_GG.append('OK_same_score')
+            else:
+                list_best_target.append(a_row[1]['fw_target'])
+                list_best_location.append(a_row[1]['fw_location'])
+                list_cleavage_site.append(int(a_row[1]['fw_location'].split('-')[1]) - 6)
+                list_delete.append(a_row[1]['fw_deletion'])
+                list_insert.append(a_row[1]['fw_insertion'])
+                list_mismat.append(a_row[1]['fw_mismatch'])
+                list_GG.append('NO_same_score')
+    # 记入 df_candidate
+    df_candidate['deletion'] = list_delete
+    df_candidate['insertion'] = list_insert
+    df_candidate['mismatch'] = list_mismat
+    df_candidate['GG'] = list_GG
+    df_candidate['best_target'] = list_best_target
+    df_candidate['target_location'] = list_best_location
+    df_candidate['cleavage_site'] = list_cleavage_site
+    # 2.0 更新一下格式
+    df_candidate = df_candidate.drop_duplicates(subset=['target_location']).reset_index(drop=True)
+    df_candidate = pd.concat([xseq.bedfmt(df_candidate['target_location']), df_candidate],axis=1)
+    # df_candidate['midpoint'] = ((df_candidate['ed'] + df_candidate['st'])/2).astype(int)
+    df_candidate = xseq.add_ID(df_candidate, midpoint='cleavage_site')
+    df_candidate.to_csv(dir_df_candidate)
+    print(f'Output df_candidate_{sgRNA_name}.csv')
+    os.remove(temp_bed)
+    return 'Done!'
+if __name__ == '__main__' :
+    result = main()
+    print(result)

{offtracker-2.7.10 → offtracker-2.10.1}/scripts/offtracker_config.py RENAMED Viewed

@@ -1,20 +1,22 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-# 2023.08.11. v1.1	adding a option for not normalizing the bw file
+# 2023.08.11. adding a option for not normalizing the bw file
+# 2025.05.22. refine the structure
+# 2025.06.05. 增加 ignore_chr 选项，默认只取 common chromosomes，用于 1.1_bed2fr.py
 import argparse
 import os, glob, yaml
 import pandas as pd
 import shutil, re
 import offtracker
+import offtracker.X_sequence as xseq
 script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
-script_folder= os.path.join(script_dir, 'mapping')
-os.chmod( os.path.join(script_folder, 'bedGraphToBigWig'), 0o755)
+utility_dir = os.path.join(script_dir, 'utility')
 ###
 parser = argparse.ArgumentParser()
-parser.description='Mapping fastq files of Track-seq.'
+parser.description='Mapping fastq files of Tracking-seq.'
 parser.add_argument('-f','--folder', type=str, required=True,  help='Directory of the input folder' )
 parser.add_argument('-r','--ref'   , type=str, required=True,  help='The fasta file of reference genome')
 parser.add_argument('-i','--index' , type=str, required=True,  help='The index file of chromap')
@@ -25,12 +27,13 @@ parser.add_argument('-t','--thread', type=int, default=4,      help='Number of t
 parser.add_argument('--blacklist'  , type=str, default='same', help='Blacklist of genome regions in bed format. "none" for no filter')
 parser.add_argument('--binsize'    , type=str, default=100,    help='Bin size for calculating bw residue')
 parser.add_argument('--normalize'  , type=str, default='True', help='Whether to normalize the BigWig file. "True" or "False"')
+parser.add_argument('--ignore_chr' , action='store_true', help='If not set, only chr1-chr22, chrX, chrY, chrM will be analyzed.')
-args = parser.parse_args()
+args = parser.parse_args()
 if (args.genome == 'hg38') or (args.genome == 'mm10'):
-    dir_chrom_sizes = os.path.join(script_folder, f'{args.genome}.chrom.sizes')
+    dir_chrom_sizes = os.path.join(utility_dir, f'{args.genome}.chrom.sizes')
 else:
     dir_chrom_sizes = args.genome
@@ -42,7 +45,7 @@ if args.blacklist == 'same':
     args.blacklist = args.genome
 if (args.blacklist == 'hg38') or (args.blacklist == 'mm10'):
-    blacklist = os.path.join(script_folder, f'offtracker_blacklist_{args.blacklist}.merged.bed')
+    blacklist = os.path.join(utility_dir, f'offtracker_blacklist_{args.blacklist}.merged.bed')
 else:
     blacklist = args.blacklist
@@ -52,59 +55,40 @@ else:
     if not os.path.exists(args.outdir):
         os.makedirs(args.outdir)
-gz_R2 = []
-for fastq in ['*2.*fq','*2.*fastq','*2.*fq.gz','*2.*fastq.gz']:
-    fq_files = glob.glob( os.path.join(args.folder, args.subfolder*'*/', fastq ) )
-    print('{} {} samples detected'.format( len(fq_files), fastq[4:] ) )
-    gz_R2.extend( fq_files )
-gz_R2.sort()
-gz_R2 = pd.Series(gz_R2)
-suffix = gz_R2.str.extract('(fastq.*|fq.*)',expand=False)
-prefix = gz_R2.str.extract('(.*)(?:.fq|.fastq)',expand=False)
-nametype = None
-for a_type in ['_trimmed_2', '_2_val_2','_R2_val_2','_R2','_2']:
-    len_type = len(a_type)
-    if prefix[0][-len_type:] == a_type:
-        nametype = a_type
-        sample_dir = prefix.str[:-len_type]
-        break
-if nametype is None:
-    # pattern 搜索模式，可能会出 bug
-    # find "_R2." or "_2." in prefix[0]
-    pattern = re.compile(r'(_R2\.|_2\.)')
-    m = pattern.search(prefix[0])
-    if m:
-        nametype = prefix[0][m.span()[0]:]
-        len_type = len(nametype)
-        sample_dir = prefix.str[:-len_type]
-assert nametype is not None, 'No fastq detected or the file name is invaild!'
-sample_name = sample_dir.apply(os.path.basename)
+if args.ignore_chr:
+    args.ignore_chr = '--ignore_chr'
+else:
+    args.ignore_chr = ''
+# 搜索 folder 的 n级子目录下的所有 fastq/fastq.gz/fq/fq.gz 文件
+sample_names, files_R1, files_R2 = xseq.detect_fastq(args.folder, n_subfolder=args.subfolder, NGS_type=args.NGS_type)
+assert not isinstance(sample_names, str), 'No fastq file is detected!'
 dict_yaml = {
-    'suffix':suffix[0],
-    'sample':dict(zip(sample_name,sample_dir)),
+    # fastq 信息
+    'files_R1':dict(zip(sample_names,files_R1)),
+    'files_R2':dict(zip(sample_names,files_R2)), # 单端 files_R2=[] 结果会自动为 {}
+    'NGS_type':args.NGS_type,
+    # 输入输出文件夹
     'input_dir':args.folder,
     'output_dir':args.outdir,
+    # 运行参数
     'thread':args.thread,
     'index':args.index,
     'fasta':args.ref,
     'binsize':args.binsize,
     'blacklist':blacklist,
-    'nametype':nametype,
     'genomelen':dir_chrom_sizes,
     'normalize':args.normalize,
-    'script_folder':script_folder
+    'utility_dir':utility_dir,
+    'ignore_chr':args.ignore_chr,
     }
 with open( os.path.join(args.outdir,'config.yaml'), 'w') as outfile:
     yaml.dump(dict_yaml, outfile, default_flow_style=False)
-snakefile = os.path.join(script_dir, 'mapping/Snakefile_offtracker')
+snakefile = os.path.join(script_dir, 'snakefile/Snakefile_offtracker.smk')
 shutil.copy(snakefile, os.path.join(args.outdir,'Snakefile'))

offtracker-2.10.1/scripts/offtracker_qc.py ADDED Viewed

@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+THIS_VERSION = '0.4.1'
+import argparse
+import os, glob, yaml
+import pandas as pd
+import shutil, re
+import offtracker
+import offtracker.X_sequence as xseq
+script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
+utility_dir = os.path.join(script_dir, 'utility')
+os.chmod( os.path.join(utility_dir, 'bedGraphToBigWig'), 0o755)
+###
+parser = argparse.ArgumentParser()
+parser.description=f'xbulk_qc v{THIS_VERSION}. QC and trim fastq files.'
+parser.add_argument('-f','--folder', type=str, required=True,        help='Directory of the input folder' )
+parser.add_argument('-o','--outdir', type=str, default='same',       help='The output folder')
+parser.add_argument('--subfolder'  , type=int, default=0,            help='subfolder level')
+parser.add_argument('-t','--thread', type=int, default=8,            help='Number of threads to be used')
+parser.add_argument('--NGS_type'   , type=str, default='paired-end', help='paired-end or single-end')
+args = parser.parse_args()
+# 自动化的参数调整和报错
+if args.outdir == 'same':
+    args.outdir = os.path.join(args.folder,'Trimmed_data')
+    if not os.path.exists( args.outdir ):
+        os.makedirs( args.outdir )
+else:
+    if not os.path.exists(args.outdir):
+        os.makedirs(args.outdir)
+# 搜索 folder 的 n级子目录下的所有 fastq/fastq.gz/fq/fq.gz 文件
+sample_names, files_R1, files_R2 = xseq.detect_fastq(args.folder, n_subfolder=args.subfolder, NGS_type=args.NGS_type)
+assert not isinstance(sample_names, str), 'No fastq file is detected!'
+dict_yaml = {
+    # fastq 信息
+    'files_R1':dict(zip(sample_names,files_R1)),
+    'files_R2':dict(zip(sample_names,files_R2)), # 单端 files_R2=[] 结果会自动为 {}
+    'NGS_type':args.NGS_type,
+    # 输入输出文件夹
+    'input_dir':args.folder,
+    'output_dir':args.outdir,
+    # 运行参数
+    'thread':args.thread,
+    'utility_dir':utility_dir
+    }
+with open( os.path.join(args.outdir,'config.yaml'), 'w', encoding='utf-8') as outfile:
+    yaml.dump(dict_yaml, outfile, default_flow_style=False)
+snakefile = os.path.join(script_dir, 'snakefile/Snakefile_QC.smk')
+shutil.copy(snakefile, os.path.join(args.outdir,'Snakefile'))

{offtracker-2.7.10 → offtracker-2.10.1}/setup.py RENAMED Viewed

@@ -11,7 +11,7 @@ from setuptools import find_packages, setup, Command
 NAME = 'offtracker'
 DESCRIPTION = 'Tracking-seq data analysis'
 AUTHOR = 'Runda Xu'
-EMAIL = 'runda.xu@foxmail.com'
+EMAIL = 'xrd18@tsinghua.org.cn'
 URL = 'https://github.com/Lan-lab/offtracker'
 REQUIRES_PYTHON = '>=3.6.0'
@@ -47,9 +47,10 @@ setup(
     author_email=EMAIL,
     url=URL,
     python_requires=REQUIRES_PYTHON,
-    packages=find_packages(),
-    package_data={'offtracker': ['mapping/*']},
-    scripts = ['scripts/offtracker_config.py',
+    packages=['offtracker'],
+    package_data={'offtracker': ['snakefile/*','utility/*']},
+    scripts = ['scripts/offtracker_qc.py',
+               'scripts/offtracker_config.py',
                'scripts/offtracker_candidates.py',
                'scripts/offtracker_analysis.py',
                'scripts/offtracker_plot.py'],

offtracker 2.7.10__zip → 2.10.1__zip

offtracker 2.7.10zip → 2.10.1zip