PyPI - offtracker - Versions diffs - 2.7.10__zip → 2.10.0__zip - Mend

offtracker 2.7.10zip → 2.10.0zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

offtracker-2.7.10/offtracker/mapping/Snakefile_offtracker DELETED Viewed

@@ -1,245 +0,0 @@
-# 2023.08.11. adding a option for not normalizing the bw file
-# 2024.01.23. add --fixedStep to bigwigCompare for not merging neighbouring bins with equal values.
-configfile: "config.yaml"
-_threads = config["thread"]
-BinSize = str(config["binsize"])
-normalize = config["normalize"]
-output_dir = config["output_dir"]
-nametype = config["nametype"]
-suffix = config["suffix"]
-name1 = nametype.replace('2','1') + '.' + suffix
-name2 = nametype + '.' + suffix
-import os
-if normalize == "True":
-    rule all:
-        input:
-            expand( os.path.join(output_dir,"{sample}.fw.bed"), sample=config["sample"] ),
-            expand( os.path.join(output_dir,"{sample}.rv.bed"), sample=config["sample"] ),
-            expand( os.path.join(output_dir,"{sample}.fw.scaled.bw"), sample=config["sample"] ),
-            expand( os.path.join(output_dir,"{sample}.rv.scaled.bw"), sample=config["sample"] ),
-            expand( os.path.join(output_dir,"{sample}." + BinSize + ".add.bdg"),sample=config["sample"] ),
-elif normalize == "False":
-    rule all:
-        input:
-            expand( os.path.join(output_dir,"{sample}.fw.bed"), sample=config["sample"] ),
-            expand( os.path.join(output_dir,"{sample}.rv.bed"), sample=config["sample"] ),
-            expand( os.path.join(output_dir,"{sample}.fw.raw.bw"), sample=config["sample"] ),
-            expand( os.path.join(output_dir,"{sample}.rv.raw.bw"), sample=config["sample"] ),
-else:
-    raise ValueError('Please provide "True" or "False" for "--normalize" when running offtracker_config.py')
-rule chromap:
-    input:
-        R1= lambda w: config["sample"][w.sample] + name1,
-        R2= lambda w: config["sample"][w.sample] + name2
-    threads:
-        _threads
-    params:
-        index=config["index"],
-        fasta=config["fasta"]
-    output:
-        temp(os.path.join(output_dir,"{sample}.chromapx.bed"))
-    shell:
-        """
-        chromap -l 3000 --low-mem --BED --remove-pcr-duplicates \
-        --min-read-length 10 --allocate-multi-mappings \
-        -x {params.index} -r {params.fasta} -t {threads} -1 {input.R1} -2 {input.R2} -o {output}
-        """
-if config["blacklist"] != 'none':
-    rule remove_blacklist:
-        input:
-            os.path.join(output_dir,"{sample}.chromapx.bed")
-        threads:
-            _threads
-        params:
-            blacklist=config["blacklist"]
-        output:
-            temp(os.path.join(output_dir,"{sample}.filtered.bed"))
-        shell:
-            "bedtools intersect -a {input} -b {params.blacklist} -v > {output}"
-    rule bed2fr:
-        input:
-            os.path.join(output_dir,"{sample}.filtered.bed")
-        threads:
-            _threads
-        params:
-            dir_script=config["script_folder"]
-        output:
-            fw=os.path.join(output_dir,"{sample}.fw.bed"),
-            rv=os.path.join(output_dir,"{sample}.rv.bed")
-        shell:
-            "python {params.dir_script}/1.1_bed2fr_v4.5.py -b {input}"
-else:
-    rule bed2fr:
-        input:
-            os.path.join(output_dir,"{sample}.chromapx.bed")
-        threads:
-            _threads
-        params:
-            dir_script=config["script_folder"]
-        output:
-            fw=os.path.join(output_dir,"{sample}.fw.bed"),
-            rv=os.path.join(output_dir,"{sample}.rv.bed")
-        shell:
-            "python {params.dir_script}/1.1_bed2fr_v4.5.py -b {input}"
-rule bed2bdg_fw:
-    input:
-        os.path.join(output_dir,"{sample}.fw.bed")
-    threads:
-        _threads
-    params:
-        gl=config["genomelen"]
-    output:
-        temp(os.path.join(output_dir,"{sample}.fw.bdg"))
-    shell:
-        "bedtools genomecov -bg -i {input} -g {params.gl} > {output}"
-rule bed2bdg_rv:
-    input:
-        os.path.join(output_dir,"{sample}.rv.bed")
-    threads:
-        _threads
-    params:
-        gl=config["genomelen"]
-    output:
-        temp(os.path.join(output_dir,"{sample}.rv.bdg"))
-    shell:
-        "bedtools genomecov -bg -i {input} -g {params.gl} > {output}"
-rule bdg_sort_fw:
-    input:
-        fw=os.path.join(output_dir,"{sample}.fw.bdg")
-    threads:
-        _threads
-    output:
-        temp(os.path.join(output_dir,"{sample}.fw.sorted.bdg"))
-    shell:
-        "bedtools sort -i {input.fw} > {output}"
-rule bdg_sort_rv:
-    input:
-        rv=os.path.join(output_dir,"{sample}.rv.bdg")
-    threads:
-        _threads
-    output:
-        temp(os.path.join(output_dir,"{sample}.rv.sorted.bdg"))
-    shell:
-        "bedtools sort -i {input.rv} > {output}"
-if normalize == "True":
-    rule bdg_normalize_fw:
-        input:
-            bdg=os.path.join(output_dir,"{sample}.fw.sorted.bdg"),
-            bed=os.path.join(output_dir,"{sample}.fw.bed")
-        threads:
-            _threads
-        params:
-            dir_script=config["script_folder"]
-        output:
-            temp(os.path.join(output_dir,"{sample}.fw.scaled.bdg"))
-        shell:
-            "python {params.dir_script}/1.3_bdg_normalize_v4.0.py --bdg {input.bdg} --bed {input.bed}"
-    rule bdg_normalize_rv:
-        input:
-            bdg=os.path.join(output_dir,"{sample}.rv.sorted.bdg"),
-            bed=os.path.join(output_dir,"{sample}.rv.bed")
-        threads:
-            _threads
-        params:
-            dir_script=config["script_folder"]
-        output:
-            temp(os.path.join(output_dir,"{sample}.rv.scaled.bdg"))
-        shell:
-            "python {params.dir_script}/1.3_bdg_normalize_v4.0.py --bdg {input.bdg} --bed {input.bed}"
-    rule bdg2bw_fw:
-        input:
-            os.path.join(output_dir,"{sample}.fw.scaled.bdg")
-        threads:
-            _threads
-        params:
-            gl=config["genomelen"],
-            dir_script=config["script_folder"]
-        output:
-            os.path.join(output_dir,"{sample}.fw.scaled.bw")
-        shell:
-            "{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
-    rule bdg2bw_rv:
-        input:
-            os.path.join(output_dir,"{sample}.rv.scaled.bdg")
-        threads:
-            _threads
-        params:
-            gl=config["genomelen"],
-            dir_script=config["script_folder"]
-        output:
-            os.path.join(output_dir,"{sample}.rv.scaled.bw")
-        shell:
-            "{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
-    rule bwAdd:
-        input:
-            fw=os.path.join(output_dir,"{sample}.fw.scaled.bw"),
-            rv=os.path.join(output_dir,"{sample}.rv.scaled.bw")
-        threads:
-            _threads
-        output:
-            os.path.join(output_dir,"{sample}." + BinSize + ".add.bdg")
-        shell:
-            """
-            bigwigCompare --binSize {BinSize} -p {threads} --verbose -o {output} \
-            --outFileFormat bedgraph --fixedStep \
-            --bigwig1 {input.fw} \
-            --bigwig2 {input.rv} \
-            --operation add
-            """
-else:
-    rule bdg_reverse_rv:
-        input:
-            os.path.join(output_dir,"{sample}.rv.sorted.bdg")
-        threads:
-            _threads
-        output:
-            temp(os.path.join(output_dir,"{sample}.rv.sorted_r.bdg"))
-        shell:
-            "awk -F '\t' -v OFS='\t' '{{$4=-$4; print}}' {input} > {output}"
-    rule bdg2bw_fw:
-        input:
-            os.path.join(output_dir,"{sample}.fw.sorted.bdg")
-        threads:
-            _threads
-        params:
-            gl=config["genomelen"],
-            dir_script=config["script_folder"]
-        output:
-            os.path.join(output_dir,"{sample}.fw.raw.bw")
-        shell:
-            "{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
-    rule bdg2bw_rv:
-        input:
-            os.path.join(output_dir,"{sample}.rv.sorted_r.bdg")
-        threads:
-            _threads
-        params:
-            gl=config["genomelen"],
-            dir_script=config["script_folder"]
-        output:
-            os.path.join(output_dir,"{sample}.rv.raw.bw")
-        shell:
-            "{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"

offtracker-2.7.10/offtracker.egg-info/SOURCES.txt DELETED Viewed

@@ -1,26 +0,0 @@
-LICENSE.txt
-MANIFEST.in
-README.md
-setup.py
-offtracker/X_offplot.py
-offtracker/X_offtracker.py
-offtracker/X_sequence.py
-offtracker/__init__.py
-offtracker/_version.py
-offtracker.egg-info/PKG-INFO
-offtracker.egg-info/SOURCES.txt
-offtracker.egg-info/dependency_links.txt
-offtracker.egg-info/requires.txt
-offtracker.egg-info/top_level.txt
-offtracker/mapping/1.1_bed2fr_v4.5.py
-offtracker/mapping/1.3_bdg_normalize_v4.0.py
-offtracker/mapping/Snakefile_offtracker
-offtracker/mapping/bedGraphToBigWig
-offtracker/mapping/hg38.chrom.sizes
-offtracker/mapping/mm10.chrom.sizes
-offtracker/mapping/offtracker_blacklist_hg38.merged.bed
-offtracker/mapping/offtracker_blacklist_mm10.merged.bed
-scripts/offtracker_analysis.py
-scripts/offtracker_candidates.py
-scripts/offtracker_config.py
-scripts/offtracker_plot.py

offtracker-2.7.10/scripts/offtracker_candidates.py DELETED Viewed

@@ -1,307 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# 2023.10.27. v2.0: 2.0以target_location midpoint为中心，因此取消 pct 计算
-# 2023.12.06. v2.1: 2.1增加 cleavage_site 推测, 修正 deletion 错位, 以 cleavage_site 为中心
-import os,sys,re,time
-from itertools import product
-if sys.version_info < (3,0):
-    import platform
-    raise Exception(f'python3 is needed, while running {platform.python_version()} now')
-import offtracker
-import offtracker.X_sequence as xseq
-script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
-script_folder= os.path.join(script_dir, 'mapping')
-import argparse
-import pandas as pd
-import pybedtools
-import multiprocessing as mp
-from Bio.Blast.Applications import NcbiblastnCommandline
-def main():
-    parser = argparse.ArgumentParser()
-    parser.description='Generate candidate regions by sgRNA sequence'
-    parser.add_argument('--sgrna' ,       type=str, required=True, help='sgRNA sequence without PAM' )
-    parser.add_argument('--pam'   ,       type=str, required=True, help='The protospacer adjacent motif' )
-    parser.add_argument('--name'  ,       type=str, required=True, help='custom name of the sgRNA' )
-    parser.add_argument('-r','--ref'    , type=str, required=True, help='The fasta file of reference genome')
-    parser.add_argument('-b','--blastdb', type=str, required=True, help='blast database')
-    parser.add_argument('-o','--outdir' , type=str, required=True, help='The output folder')
-    parser.add_argument('-g','--genome' , type=str, default='hg38', help='File of chromosome sizes, or "hg38", "mm10" ')
-    parser.add_argument('-t','--thread' , type=int, default=4,     help='Number of threads for parallel computing')
-    parser.add_argument('--quick_mode'  , action='store_true',  help='BLAST faster but less candidates.')
-    args = parser.parse_args()
-    if (args.genome == 'hg38') or (args.genome == 'mm10'):
-        dir_chrom_sizes = os.path.join(script_folder, f'{args.genome}.chrom.sizes')
-    else:
-        dir_chrom_sizes = args.genome
-    sgRNA_name = args.name
-    sgRNA_seq  = args.sgrna
-    PAM = args.pam
-    n_threads  = args.thread
-    dir_output = args.outdir
-    if not os.path.exists(dir_output):
-        os.makedirs(dir_output)
-    dir_ref_fa = args.ref
-    blast_db   = args.blastdb
-    quick_mode = args.quick_mode
-    # parameters for alignment
-    half_width = 100
-    pct_params = 1.0
-    frag_len= half_width*2
-    dir_df_candidate = os.path.join(dir_output, f'df_candidate_{sgRNA_name}.csv')
-    sgRNA_seq = sgRNA_seq.upper()
-    PAM = PAM.upper()
-    dir_sgRNA_fasta = os.path.join(dir_output, f'{sgRNA_name}_PAM.fasta')
-    dir_sgRNA_blast = os.path.join(dir_output, f'{sgRNA_name}_PAM.blast')
-    dir_sgRNA_bed   = os.path.join(dir_output, f'{sgRNA_name}_PAM.bed')
-    possible_sgRNA_PAM = list(product([sgRNA_seq],xseq.possible_seq(PAM)))
-    possible_sgRNA_PAM = [''.join(combination) for combination in possible_sgRNA_PAM]
-    n_seq = len(possible_sgRNA_PAM)
-    ID = pd.Series(['seq']*n_seq) + pd.Series(range(1,n_seq+1)).astype(str)
-    df_sgRNA_PAM = pd.DataFrame({'ID':ID,'sequence':possible_sgRNA_PAM})
-    xseq.write_fasta(df_sgRNA_PAM, dir_sgRNA_fasta)
-    #########
-    # BLAST #
-    #########
-    if os.path.isfile(dir_sgRNA_blast):
-        print(f'{dir_sgRNA_blast} exists, skipped.')
-    else:
-        if quick_mode:
-            print('Using quick mode for BLAST')
-            blastx_cline = NcbiblastnCommandline(query=dir_sgRNA_fasta, task='blastn-short',out=dir_sgRNA_blast,
-                                                db=blast_db, evalue=10000,outfmt=6, num_threads=n_threads,
-                                                gapopen=4, gapextend=2, reward=2, word_size=5, dust='no', soft_masking=False)
-        else:
-            blastx_cline = NcbiblastnCommandline(query=dir_sgRNA_fasta, task='blastn-short',out=dir_sgRNA_blast,
-                                                db=blast_db, evalue=10000,outfmt=6, num_threads=n_threads,
-                                                gapopen=4, gapextend=2, reward=2, word_size=4, dust='no', soft_masking=False)
-        print(f'BLAST for candidate off-target sites of {sgRNA_name}.')
-        blastx_cline()
-        print(f'BLAST finished.')
-    ##############
-    # Output bed #
-    ##############
-    blast_regions = pd.read_csv(dir_sgRNA_blast, sep='\t',header=None)
-    blast_regions.columns = ['query acc.','chr','% identity','alignment length','mismatches','gap opens','q. start','q. end','st','ed','evalue','bit score']
-    blast_regions = blast_regions[blast_regions.evalue<10000]
-    # reverse strand
-    blast_regions['reverse'] = (blast_regions['st']>blast_regions['ed']).astype(int)
-    blast_regions_f = blast_regions[blast_regions.reverse==0].copy()
-    blast_regions_r = blast_regions[blast_regions.reverse==1].copy()
-    temp = blast_regions_r['st'].copy()
-    blast_regions_r['st'] = blast_regions_r['ed']
-    blast_regions_r['ed'] = temp
-    blast_regions = pd.concat([blast_regions_f, blast_regions_r])
-    # sort and add location
-    blast_regions = blast_regions.sort_values('evalue').reset_index(drop=True)
-    blast_regions['location']=blast_regions['chr'].str[:] + ':' + blast_regions['st'].astype(str).str[:] + '-' + blast_regions['ed'].astype(str).str[:]
-    blast_regions = blast_regions.drop_duplicates(subset='location').copy()
-    # alignment length 筛选
-    len_sgRNA=len(sgRNA_seq)
-    min_len = len_sgRNA-8
-    blast_regions = blast_regions[blast_regions['alignment length']>=min_len].copy().reset_index(drop=True)
-    blast_regions = blast_regions.reindex(columns = ['chr', 'st', 'ed' , 'query acc.', '% identity', 'alignment length', 'mismatches',
-        'gap opens', 'q. start', 'q. end', 'evalue', 'bit score', 'reverse', 'location'] )
-    # 输出 bed 用于后续 alignment score 计算
-    blast_regions_bed = blast_regions[['chr','st','ed']]
-    xseq.write_bed(blast_regions_bed, dir_sgRNA_bed)
-    # 对 bed 进行排序但不合并
-    a = pybedtools.BedTool(dir_sgRNA_bed)
-    a.sort(g=dir_chrom_sizes).saveas( dir_sgRNA_bed )
-    print(f'Output {sgRNA_name}_PAM.bed')
-    ###################
-    # alignment score #
-    ###################
-    if os.path.isfile(dir_df_candidate):
-        print(f'{dir_df_candidate} exists, skipped.')
-    else:
-        #########
-        # 读取 blast bed
-        #########
-        common_chr = pd.Series(['chr']*23).str[:] + pd.Series(range(23)).astype(str).str[:]
-        common_chr = pd.concat([common_chr, pd.Series(['chrX','chrY'])]).to_numpy()
-        bed_short = xseq.X_readbed(dir_sgRNA_bed)
-        bed_short = bed_short[bed_short['chr'].isin(common_chr)].copy()
-        bed_short['midpoint'] = ((bed_short['st'] + bed_short['ed'])/2).astype(int)
-        bed_short['st'] = bed_short['midpoint'] - half_width
-        bed_short['ed'] = bed_short['midpoint'] + half_width
-        bed_short.loc[bed_short['st']<0,'st']=0
-        bed_short = bed_short.drop_duplicates()
-        #########
-        # 根据 bed_f 位点 ed 前后 half_width 取基因组序列
-        #########
-        temp_bed = os.path.join(dir_output, 'temp.bed')
-        xseq.write_bed(bed_short.iloc[:,:3], temp_bed)
-        a = pybedtools.BedTool(temp_bed)
-        fasta = pybedtools.example_filename(dir_ref_fa)
-        a = a.sequence(fi=fasta)
-        with open(a.seqfn) as f:
-            fasta = {}
-            for line in f:
-                line = line.strip() # 去除末尾换行符
-                if line[0] == '>':
-                    header = line[1:]
-                else:
-                    sequence = line
-                    fasta[header] = fasta.get(header,'') + sequence
-        # pybedtools 得到位置 chrA:X-Y 时，X数字会往左多1bp
-        #########
-        # local alignment
-        #########
-        DNA_matrix = {('A','A'): 2, ('A','T'):0.01, ('A','C'):0.01, ('A','G'):0.01, ('A','N'):0.01,
-                    ('T','T'): 2, ('T','A'):0.01, ('T','C'):0.01, ('T','G'):0.01, ('T','N'):0.01,
-                    ('G','G'): 2, ('G','A'):0.01, ('G','C'):0.01, ('G','T'):0.01, ('G','N'):0.01,
-                    ('C','C'): 2, ('C','A'):0.01, ('C','G'):0.01, ('C','T'):0.01, ('C','N'):0.01,
-                    ('N','N'): 2, ('N','C'):2, ('N','A'): 2, ('N','G'): 2, ('N','T'): 2}
-        mismatch_score = 0.01
-        # 添加 PAM
-        sgRNA_PAM_fw = sgRNA_seq + PAM
-        sgRNA_PAM_rv = xseq.reverse_complement(sgRNA_PAM_fw)
-        list_args_fw=[]
-        list_args_rv=[]
-        for a_key in fasta.keys():
-            seq = re.sub('[^ATCG]','N',fasta[a_key])
-            list_args_fw.append( [a_key, sgRNA_PAM_fw, seq, frag_len, DNA_matrix, mismatch_score] )
-            list_args_rv.append( [a_key, sgRNA_PAM_rv, seq, frag_len, DNA_matrix, mismatch_score] )
-        st = time.time()
-        with mp.Pool(n_threads) as p:
-            list_align_forward = p.starmap(xseq.sgRNA_alignment, list_args_fw)
-        ed = time.time()
-        print('align_forward:{:.2f}'.format(ed-st))
-        st = time.time()
-        with mp.Pool(n_threads) as p:
-            list_align_reverse = p.starmap(xseq.sgRNA_alignment, list_args_rv)
-        ed = time.time()
-        print('align_reverse:{:.2f}'.format(ed-st))
-        #
-        df_align_forward = pd.DataFrame(list_align_forward, columns= ['fw_score','fw_pct','fw_target','fw_location','fw_deletion','fw_insertion','fw_mismatch'])
-        df_align_reverse = pd.DataFrame(list_align_reverse, columns= ['rv_score','rv_pct','rv_target','rv_location','rv_deletion','rv_insertion','rv_mismatch'])
-        df_align_reverse['rv_target'] = df_align_reverse['rv_target'].apply(xseq.reverse_complement)
-        df_candidate = pd.concat([df_align_forward,df_align_reverse],axis=1)
-        df_candidate['location'] = fasta.keys()
-        df_candidate['alignment_score'] = df_candidate[['fw_score','rv_score']].max(axis=1)
-        #df_candidate['fw_score_2'] = df_candidate['fw_score']*(pct_params-df_candidate['fw_pct'].abs())
-        #df_candidate['rv_score_2'] = df_candidate['rv_score']*(pct_params-df_candidate['rv_pct'].abs())
-        #df_candidate['best_seq_score'] = df_candidate[['fw_score_2', 'rv_score_2']].max(axis=1)
-        #df_candidate['best_strand'] = df_candidate[['fw_score_2', 'rv_score_2']].idxmax(axis='columns').replace({'fw_score_2':'+', 'rv_score_2':'-'})
-        #df_candidate.loc[df_candidate['fw_score_2']==df_candidate['rv_score_2'],'best_strand']='equal_score'
-        df_candidate['best_seq_score'] = df_candidate[['fw_score', 'rv_score']].max(axis=1)
-        df_candidate['best_strand'] = df_candidate[['fw_score', 'rv_score']].idxmax(axis='columns').replace({'fw_score':'+', 'rv_score':'-'})
-        df_candidate.loc[df_candidate['fw_score']==df_candidate['rv_score'],'best_strand']='equal_score'
-        # GG check
-        # 2023.12.05 增加 cleavage_site 推测
-        list_best_target = []
-        list_best_location = []
-        list_cleavage_site = []
-        list_delete = []
-        list_insert = []
-        list_mismat = []
-        list_GG = []
-        for a_row in df_candidate.iterrows():
-            if a_row[1]['best_strand']=='+':
-                list_best_target.append(a_row[1]['fw_target'])
-                list_best_location.append(a_row[1]['fw_location'])
-                list_cleavage_site.append(int(a_row[1]['fw_location'].split('-')[1]) - 6)
-                list_delete.append(a_row[1]['fw_deletion'])
-                list_insert.append(a_row[1]['fw_insertion'])
-                list_mismat.append(a_row[1]['fw_mismatch'])
-                if a_row[1]['fw_target'][-2:]=='GG':
-                    list_GG.append('OK')
-                else:
-                    list_GG.append('NO')
-            elif a_row[1]['best_strand']=='-':
-                list_best_target.append(a_row[1]['rv_target'])
-                list_best_location.append(a_row[1]['rv_location'])
-                list_cleavage_site.append(int(a_row[1]['rv_location'].split('-')[0].split(':')[1]) + 5)
-                list_delete.append(a_row[1]['rv_deletion'])
-                list_insert.append(a_row[1]['rv_insertion'])
-                list_mismat.append(a_row[1]['rv_mismatch'])
-                if a_row[1]['rv_target'][-2:]=='GG':
-                    list_GG.append('OK')
-                else:
-                    list_GG.append('NO')
-            else:
-                if a_row[1]['fw_target'][-2:]=='GG':
-                    list_best_target.append(a_row[1]['fw_target'])
-                    list_best_location.append(a_row[1]['fw_location'])
-                    list_cleavage_site.append(int(a_row[1]['fw_location'].split('-')[1]) - 6)
-                    list_delete.append(a_row[1]['fw_deletion'])
-                    list_insert.append(a_row[1]['fw_insertion'])
-                    list_mismat.append(a_row[1]['fw_mismatch'])
-                    list_GG.append('OK_same_score')
-                # 发现没有 GG 则看 RC
-                elif a_row[1]['rv_target'][-2:]=='GG':
-                    list_best_target.append(a_row[1]['rv_target'])
-                    list_best_location.append(a_row[1]['rv_location'])
-                    list_cleavage_site.append(int(a_row[1]['rv_location'].split('-')[0].split(':')[1]) + 5)
-                    list_delete.append(a_row[1]['rv_deletion'])
-                    list_insert.append(a_row[1]['rv_insertion'])
-                    list_mismat.append(a_row[1]['rv_mismatch'])
-                    list_GG.append('OK_same_score')
-                else:
-                    list_best_target.append(a_row[1]['fw_target'])
-                    list_best_location.append(a_row[1]['fw_location'])
-                    list_cleavage_site.append(int(a_row[1]['fw_location'].split('-')[1]) - 6)
-                    list_delete.append(a_row[1]['fw_deletion'])
-                    list_insert.append(a_row[1]['fw_insertion'])
-                    list_mismat.append(a_row[1]['fw_mismatch'])
-                    list_GG.append('NO_same_score')
-        # 记入 df_candidate
-        df_candidate['deletion'] = list_delete
-        df_candidate['insertion'] = list_insert
-        df_candidate['mismatch'] = list_mismat
-        df_candidate['GG'] = list_GG
-        df_candidate['best_target'] = list_best_target
-        df_candidate['target_location'] = list_best_location
-        df_candidate['cleavage_site'] = list_cleavage_site
-        # 2.0 更新一下格式
-        df_candidate = df_candidate.drop_duplicates(subset=['target_location']).reset_index(drop=True)
-        df_candidate = pd.concat([xseq.bedfmt(df_candidate['target_location']), df_candidate],axis=1)
-        # df_candidate['midpoint'] = ((df_candidate['ed'] + df_candidate['st'])/2).astype(int)
-        df_candidate = xseq.add_ID(df_candidate, midpoint='cleavage_site')
-        df_candidate.to_csv(dir_df_candidate)
-        print(f'Output df_candidate_{sgRNA_name}.csv')
-        os.remove(temp_bed)
-    return 'Done!'
-if __name__ == '__main__' :
-    result = main()
-    print(result)