PyPI - offtracker - Versions diffs - 2.7.8__zip → 2.10.0__zip - Mend

offtracker 2.7.8zip → 2.10.0zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

{offtracker-2.7.8 → offtracker-2.10.0}/offtracker/X_sequence.py RENAMED Viewed

@@ -3,6 +3,7 @@ import math
 import pandas as pd
 from itertools import product
 import numpy as np
+import os, glob
 ambiguous_nt = {'A': ['A'],
                 'T': ['T'],
@@ -19,7 +20,7 @@ ambiguous_nt = {'A': ['A'],
                 'H': ['A', 'C', 'T'],
                 'D': ['A', 'G', 'T'],
                 'B': ['C', 'G', 'T'],
-                'N': ['A', 'T', 'C', 'G']}
+                'N': ['A', 'C', 'G', 'T']}
 def is_seq_valid(sequence, extra=True, ambiguous_nt=ambiguous_nt):
     if extra:
@@ -43,12 +44,24 @@ def possible_seq(sequence):
         raise KeyError(f'Unvalid character \'{valid_check}\' in sequence')
     return sequences
+# 包含 degenerate base pairs
+def get_base_score(base1, base2, exact_score=2, partial_match=2, mismatch_score=0.01):
+    base1 = ambiguous_nt[base1]
+    base2 = ambiguous_nt[base2]
+    if base1 == base2:
+        return exact_score
+    if list(np.union1d(base1,base2)) == base1 or list(np.union1d(base1,base2)) == base2:
+        # 其中一个是子集，注意顺序不一致会导致不等，所以必须排好序
+        return partial_match
+    return mismatch_score
 def complement(seq):
-    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N', '-':'-',
+    dict_complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N', '-':'-',
                   'M': 'K', 'R': 'Y', 'W': 'W', 'S': 'S', 'Y': 'R', 'K':'M',
                   'V': 'B', 'H': 'D', 'D': 'H', 'B': 'V'}
     bases = list(seq)
-    letters = [complement[base] for base in bases]
+    letters = [dict_complement[base] for base in bases]
     return ''.join(letters)
 def reverse(seq):
@@ -100,14 +113,107 @@ def add_ID(df, chr_col=0, midpoint='cleavage_site'):#, midpoint='midpoint'):
 	df.loc[point_tail>=500,'ID_2'] = df[chr_col_name] + ':' + (point_head+1).astype(str)
 	return df
+def detect_fastq(folder, n_subfolder, NGS_type='paired-end'):
+    """
+    搜索 folder 的 n级子目录下的所有 fastq/fastq.gz/fq/fq.gz 文件
+    paired-end 模式 : 识别 2.fq/2.fastq 为 paired-end 的 R2 文件，并验证对应 R1 文件
+    single-end 模式 : 所有 fastq/fastq.gz/fq/fq.gz 文件都视为 single-end 文件
+    不建议 2. 和 fq/fastq 之间有其他字符，如 2.trimmed.fq.gz，因为中间字符不确定，使用通配符容易误判文件名其他的2.
+    样本名不要带点，建议用_分割特征，同特征内分割不要用_可以用-，如 sample_day-hour_type_batch_rep_1.fq.gz
+    Input
+    ----------
+    folder : 根目录
+    n_subfolder : n级子目录
+    Parameter
+    ----------
+    NGS_type : 'paired-end' or 'single-end'
+    Output
+    ----------
+    sample_names : 识别的样品名
+    files_R1 : R1文件的完整路径
+    files_R2 : R2文件的完整路径
+    """
+    # import os, sys, glob
+    # import pandas as pd
+    if NGS_type == 'paired-end':
+        print('paired-end mode')
+        files_R2 = []
+        # 支持四种文件扩展名
+        # 个人习惯包含绝对路径
+        for fastq in ['*2.fq','*2.fastq','*2.fq.gz','*2.fastq.gz']:
+            fq_files = glob.glob( os.path.join(folder, n_subfolder*'*/', fastq ) )
+            print(f'{len(fq_files)} {fastq[2:]} samples detected')
+            files_R2.extend( fq_files )
+        #
+        if len(files_R2) > 0:
+            files_R2 = pd.Series(files_R2).sort_values().reset_index(drop=True)
+            # 拆分文件名
+            suffix = files_R2.str.extract('(\.fastq.*|\.fq.*)',expand=False)
+            prefix = files_R2.str.extract('(.*)(?:.fq|.fastq)',expand=False)
+            # 将 prefix 进一步拆分为 sample_dir （真样品名） 和 nametype （某种统一后缀），支持五种样本名后缀
+            nametype = []
+            sample_dir = []
+            for a_prefix in prefix:
+                for a_type in ['_trimmed_2', '_2_val_2','_R2_val_2','_R2','_2']:
+                    len_type = len(a_type)
+                    if a_prefix[-len_type:] == a_type:
+                        nametype.append(a_type)
+                        sample_dir.append(a_prefix[:-len_type])
+                        break
+            assert len(nametype) == len(files_R2), 'The file name pattern is invaild!'
+            nametype = pd.Series(nametype)
+            sample_dir = pd.Series(sample_dir)
+            # 根据 R2 文件，检查 R1 文件是否存在
+            files_R1 = sample_dir + nametype.str.replace('2','1') + suffix
+            for i in range(len(files_R1)):
+                assert os.path.exists(files_R1[i]), f'{files_R1[i]} not found!'
+            sample_names = sample_dir.apply(os.path.basename)
+        else:
+            print('No paired-end samples detected!')
+            sample_names = 'no sample'
+            files_R1 = []
+    elif NGS_type == 'single-end':
+        print('single-end mode')
+        files_R1 = []
+        files_R2 = [] # 占位
+        # 支持四种文件扩展名
+        # 个人习惯包含绝对路径
+        for fastq in ['*.fq','*.fastq','*.fq.gz','*.fastq.gz']:
+            fq_files = glob.glob( os.path.join(folder, n_subfolder*'*/', fastq ) )
+            print(f'{len(fq_files)} {fastq[1:]} samples detected')
+            files_R1.extend( fq_files )
+        files_R1 = pd.Series(files_R1).sort_values()
+        #
+        if len(files_R1) > 0:
+            # 拆分文件名
+            suffix = files_R1.str.extract('(\.fastq.*|\.fq.*)',expand=False)
+            prefix = files_R1.str.extract('(.*)(?:.fq|.fastq)',expand=False)
+            # 单端模式下，所有前缀都视为样品名
+            sample_names = prefix.apply(os.path.basename)
+        else:
+            print('No single-end samples detected!')
+            sample_names = 'no sample'
+            files_R1 = []
+    return sample_names, files_R1, files_R2
 def sgRNA_alignment(a_key, sgRNA, seq, frag_len, DNA_matrix=None, mismatch_score = 0.01, return_align=False):
     from Bio import pairwise2
     import numpy as np
     if DNA_matrix is None:
-        DNA_matrix = {('A','A'): 2, ('A','T'):0.01, ('A','C'):0.01, ('A','G'):0.01, ('A','N'):0.01,
-                    ('T','T'): 2, ('T','A'):0.01, ('T','C'):0.01, ('T','G'):0.01, ('T','N'):0.01,
-                    ('G','G'): 2, ('G','A'):0.01, ('G','C'):0.01, ('G','T'):0.01, ('G','N'):0.01,
-                    ('C','C'): 2, ('C','A'):0.01, ('C','G'):0.01, ('C','T'):0.01, ('C','N'):0.01,
+        DNA_matrix = {('A','A'): 2, ('A','T'):0.01, ('A','C'):0.01, ('A','G'):0.01, ('A','N'):2,
+                    ('T','T'): 2, ('T','A'):0.01, ('T','C'):0.01, ('T','G'):0.01, ('T','N'):2,
+                    ('G','G'): 2, ('G','A'):0.01, ('G','C'):0.01, ('G','T'):0.01, ('G','N'):2,
+                    ('C','C'): 2, ('C','A'):0.01, ('C','G'):0.01, ('C','T'):0.01, ('C','N'):2,
                     ('N','N'): 2, ('N','C'):2, ('N','A'): 2, ('N','G'): 2, ('N','T'): 2}
     # a_key 是 pybedtools 得到的位置 chrA:X-Y 而 X 数字会往左多1bp
     alignments = pairwise2.align.localds( sgRNA, seq, DNA_matrix, -2, -2, penalize_extend_when_opening=False)

offtracker-2.10.0/offtracker/_version.py ADDED Viewed

@@ -0,0 +1,36 @@
+__version__ = "2.10.0"
+# 2023.08.11. v1.1.0	adding a option for not normalizing the bw file
+# 2023.10.26. v1.9.0	prerelease for v2.0
+# 2023.10.27. v2.0.0	大更新，还没微调
+# 2023.10.28. v2.1.0	修复bug，增加计算信号长度的功能
+# 2023.10.28. v2.2.0	修复bug，改变计算信号长度的算法
+# 2023.10.29. v2.3.0	增加 overall signal 计算
+# 2023.11.01. v2.3.1	增加 signal_only 选项
+# 2023.11.02. v2.3.2	修改 sample signal 和 group mean 的计算顺序
+# 2023.11.04. v2.3.3	修复 overall score 标准化时排序错误的问题
+# 2023.11.05. v2.3.4	修复判断单边溢出信号时的列名选取错误
+# 2023.11.13. v2.3.5	微调 track score
+# 2023.12.05. v2.3.6	candidates 增加 cleavage site，修正 alignment 有 deletion 会错位的 bug
+# 2023.12.05. v2.3.7	用 cleavage site 代替 midpoint # 还没改完
+# 2023.12.07. v2.3.8	df_score 增加 df_exp, df_ctr 各自列。修复没 df_ctr 时的 bug。track score 用 proximal
+# 2023.12.09. v2.4.0	为了兼顾 proximal 和 overall，当 normalized overall signal 高于 2 时，增加 overall signal 的加分
+# 2023.12.09. v2.5.0	尝试新的加权位置
+# 2023.12.10. v2.6.0	加入 trackseq v4 的计算分支，即考虑 Region 内的 positive_pct，避免短而尖锐的信号
+# 2023.12.10. v2.6.1	有些非特异信号数值很大，如果在 control 组是大负数，可能导致减 control 后假高信号，因此给负数一个 clip
+# 2023.12.30. v2.7.0	增加 X_offplot 模块，用于绘图
+# 2023.12.31. v2.7.1	control 的负数值 clip 由 -5 改为 -1，进一步减少假阳性。另外不加 overall 了
+# 2024.01.01. v2.7.2	权重改为 proximal + pct = 1 + 1. 防信号外溢假阳性标准由<0改为<=0
+# 2024.01.02. v2.7.3	flank regions 默认值改为 1000 2000 3000 5000。之前 control 的负数值 clip 相当于直接在 final score，现在改为每个单独 clip 后重新算 score，默认值为 CtrClip=-0.5
+# 2024.01.03. v2.7.4	更新了 blacklist.bed
+# 2024.01.04. v2.7.5	更新了 hg38 blacklist.bed
+# 2024.01.12. v2.7.6	修复小bug，输出 fdr 改为 <0.05。
+# 2024.01.23. v2.7.7	Snakefile_offtracker: add --fixedStep to bigwigCompare for not merging neighbouring bins with equal values.
+# 2024.02.01. v2.7.8	逐步添加 X_offplot.py 功能
+# 2024.06.02. v2.7.9	添加 offtracker_plot.py
+# 2024.06.03. v2.7.10	修复 bugs，offtable 添加 threshold = 2 的分界
+# 2024.06.04. v2.7.11	readme 修改
+# 2024.11.19. v2.7.12	offtracker_candidates.py 新增 --pam_location 参数指定 upstream 或 downstream，用于非 Cas9 情况
+# 2025.04.25. v2.8.0	修复了 offtracker candidates 会把小写序列转换成 N 的 bug
+# 2025.05.22. v2.9.0	翻新部分代码结构
+# 2025.06.05. v2.10.0	增加了QC模块。保留了负数score的记录，并在plot时显示为红字。增加了 "--ignore_chr" 用于跳过common chr过滤。

offtracker-2.10.0/offtracker/snakefile/Snakefile_QC.smk ADDED Viewed

@@ -0,0 +1,66 @@
+# 更新记录:
+# 2022.05.04. v1.0:    初步运行, fastp + multiqc
+# 2024.01.17. v2.0:    翻新结构，匹配 X_NGS 框架
+# 参数列表
+configfile: "config.yaml"
+### config['files_R1'], config['files_R2'] 为 dict型
+# # fastq 信息
+_files_R1 = config['files_R1'] # dict型, key 为 sample
+_files_R2 = config['files_R2'] # dict型, key 为 sample
+# # 输入输出文件夹
+# config['input_dir']
+_output_dir = config["output_dir"]
+# # 运行参数
+_thread = config['thread']
+# config['utility_dir']
+import os
+############################
+# conditional output_files #
+############################
+output_HT = expand( os.path.join(_output_dir,"{sample}_fastp.html"), sample=_files_R1)
+output_JS = expand( os.path.join(_output_dir,"{sample}_fastp.json"), sample=_files_R1)
+output_MQC = os.path.join(_output_dir,"MultiQC_Report_Raw.html")
+output_R1 = expand( os.path.join(_output_dir,"{sample}_trimmed_1.fq.gz"), sample=_files_R1) # dict 会自动迭代 keys
+output_R2 = expand( os.path.join(_output_dir,"{sample}_trimmed_2.fq.gz"), sample=_files_R1)
+output_files = output_HT + output_JS + [output_MQC] + output_R1 + output_R2
+rule all:
+    input:
+        output_files
+#######################
+## fastp and multiQC ##
+#######################
+rule QCtrim:
+    input:
+        R1=lambda w: _files_R1[w.sample],
+        R2=lambda w: _files_R2[w.sample]
+    threads:
+        _thread
+    output:
+        R1=os.path.join(_output_dir,"{sample}_trimmed_1.fq.gz"),
+        R2=os.path.join(_output_dir,"{sample}_trimmed_2.fq.gz"),
+        HT=os.path.join(_output_dir,"{sample}_fastp.html"),
+        JS=os.path.join(_output_dir,"{sample}_fastp.json")
+    shell:
+        """
+        fastp -i {input.R1} -I {input.R2} -o {output.R1} -O {output.R2} \
+        -h {wildcards.sample}_fastp.html -j {wildcards.sample}_fastp.json \
+        --length_required 10 --thread {threads} --detect_adapter_for_pe --disable_quality_filtering
+        """
+rule multiqc:
+    input:
+        expand( os.path.join(_output_dir,"{sample}_fastp.html"), sample=_files_R1 )
+    threads:
+        _thread
+    output:
+        os.path.join(_output_dir,"MultiQC_Report_Raw.html")
+    shell:
+        "multiqc {_output_dir} -n MultiQC_Report_Raw --outdir {_output_dir}"

offtracker-2.10.0/offtracker/snakefile/Snakefile_offtracker.smk ADDED Viewed

@@ -0,0 +1,249 @@
+# 2023.08.11. adding a option for not normalizing the bw file
+# 2024.01.23. add --fixedStep to bigwigCompare for not merging neighbouring bins with equal values.
+# 2025.05.22. refine the structure
+configfile: "config.yaml"
+# # fastq 信息
+_files_R1 = config['files_R1'] # dict型, key 为 sample
+_files_R2 = config['files_R2'] # dict型, key 为 sample
+# # 运行参数
+_output_dir = config["output_dir"]
+_thread = config['thread']
+_BinSize = str(config["binsize"])
+_normalize = config["normalize"]
+import os
+if _normalize == "True":
+    rule all:
+        input:
+            expand( os.path.join(_output_dir,"{sample}.fw.bed"), sample=_files_R1 ),
+            expand( os.path.join(_output_dir,"{sample}.rv.bed"), sample=_files_R1 ),
+            expand( os.path.join(_output_dir,"{sample}.fw.scaled.bw"), sample=_files_R1 ),
+            expand( os.path.join(_output_dir,"{sample}.rv.scaled.bw"), sample=_files_R1 ),
+            expand( os.path.join(_output_dir,"{sample}." + _BinSize + ".add.bdg"),sample=_files_R1 ),
+elif _normalize == "False":
+    rule all:
+        input:
+            expand( os.path.join(_output_dir,"{sample}.fw.bed"), sample=_files_R1 ),
+            expand( os.path.join(_output_dir,"{sample}.rv.bed"), sample=_files_R1 ),
+            expand( os.path.join(_output_dir,"{sample}.fw.raw.bw"), sample=_files_R1 ),
+            expand( os.path.join(_output_dir,"{sample}.rv.raw.bw"), sample=_files_R1 ),
+else:
+    raise ValueError('Please provide "True" or "False" for "--normalize" when running offtracker_config.py')
+rule chromap:
+    input:
+        R1=lambda w: _files_R1[w.sample],
+        R2=lambda w: _files_R2[w.sample]
+    threads:
+        _threads
+    params:
+        index=config["index"],
+        fasta=config["fasta"]
+    output:
+        temp(os.path.join(_output_dir,"{sample}.chromapx.bed"))
+    shell:
+        """
+        chromap -l 3000 --low-mem --BED --remove-pcr-duplicates \
+        --min-read-length 10 --allocate-multi-mappings \
+        -x {params.index} -r {params.fasta} -t {threads} -1 {input.R1} -2 {input.R2} -o {output}
+        """
+if config["blacklist"] != 'none':
+    rule remove_blacklist:
+        input:
+            os.path.join(_output_dir,"{sample}.chromapx.bed")
+        threads:
+            _threads
+        params:
+            blacklist=config["blacklist"]
+        output:
+            temp(os.path.join(_output_dir,"{sample}.filtered.bed"))
+        shell:
+            "bedtools intersect -a {input} -b {params.blacklist} -v > {output}"
+    rule bed2fr:
+        input:
+            os.path.join(_output_dir,"{sample}.filtered.bed")
+        threads:
+            _threads
+        params:
+            dir_script=config["utility_dir"],
+            ignore_chr=config["ignore_chr"],
+        output:
+            fw=os.path.join(_output_dir,"{sample}.fw.bed"),
+            rv=os.path.join(_output_dir,"{sample}.rv.bed")
+        shell:
+            "python {params.dir_script}/1.1_bed2fr.py -b {input} {params.ignore_chr}"
+else:
+    rule bed2fr:
+        input:
+            os.path.join(_output_dir,"{sample}.chromapx.bed")
+        threads:
+            _threads
+        params:
+            dir_script=config["utility_dir"],
+            ignore_chr=config["ignore_chr"],
+        output:
+            fw=os.path.join(_output_dir,"{sample}.fw.bed"),
+            rv=os.path.join(_output_dir,"{sample}.rv.bed")
+        shell:
+            "python {params.dir_script}/1.1_bed2fr.py -b {input} {params.ignore_chr}"
+rule bed2bdg_fw:
+    input:
+        os.path.join(_output_dir,"{sample}.fw.bed")
+    threads:
+        _threads
+    params:
+        gl=config["genomelen"]
+    output:
+        temp(os.path.join(_output_dir,"{sample}.fw.bdg"))
+    shell:
+        "bedtools genomecov -bg -i {input} -g {params.gl} > {output}"
+rule bed2bdg_rv:
+    input:
+        os.path.join(_output_dir,"{sample}.rv.bed")
+    threads:
+        _threads
+    params:
+        gl=config["genomelen"]
+    output:
+        temp(os.path.join(_output_dir,"{sample}.rv.bdg"))
+    shell:
+        "bedtools genomecov -bg -i {input} -g {params.gl} > {output}"
+rule bdg_sort_fw:
+    input:
+        fw=os.path.join(_output_dir,"{sample}.fw.bdg")
+    threads:
+        _threads
+    output:
+        temp(os.path.join(_output_dir,"{sample}.fw.sorted.bdg"))
+    shell:
+        "bedtools sort -i {input.fw} > {output}"
+rule bdg_sort_rv:
+    input:
+        rv=os.path.join(_output_dir,"{sample}.rv.bdg")
+    threads:
+        _threads
+    output:
+        temp(os.path.join(_output_dir,"{sample}.rv.sorted.bdg"))
+    shell:
+        "bedtools sort -i {input.rv} > {output}"
+if _normalize == "True":
+    rule bdg_normalize_fw:
+        input:
+            bdg=os.path.join(_output_dir,"{sample}.fw.sorted.bdg"),
+            bed=os.path.join(_output_dir,"{sample}.fw.bed")
+        threads:
+            _threads
+        params:
+            dir_script=config["utility_dir"]
+        output:
+            temp(os.path.join(_output_dir,"{sample}.fw.scaled.bdg"))
+        shell:
+            "python {params.dir_script}/1.3_bdg_normalize_v4.0.py --bdg {input.bdg} --bed {input.bed}"
+    rule bdg_normalize_rv:
+        input:
+            bdg=os.path.join(_output_dir,"{sample}.rv.sorted.bdg"),
+            bed=os.path.join(_output_dir,"{sample}.rv.bed")
+        threads:
+            _threads
+        params:
+            dir_script=config["utility_dir"]
+        output:
+            temp(os.path.join(_output_dir,"{sample}.rv.scaled.bdg"))
+        shell:
+            "python {params.dir_script}/1.3_bdg_normalize_v4.0.py --bdg {input.bdg} --bed {input.bed}"
+    rule bdg2bw_fw:
+        input:
+            os.path.join(_output_dir,"{sample}.fw.scaled.bdg")
+        threads:
+            _threads
+        params:
+            gl=config["genomelen"],
+            dir_script=config["utility_dir"]
+        output:
+            os.path.join(_output_dir,"{sample}.fw.scaled.bw")
+        shell:
+            "{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
+    rule bdg2bw_rv:
+        input:
+            os.path.join(_output_dir,"{sample}.rv.scaled.bdg")
+        threads:
+            _threads
+        params:
+            gl=config["genomelen"],
+            dir_script=config["utility_dir"]
+        output:
+            os.path.join(_output_dir,"{sample}.rv.scaled.bw")
+        shell:
+            "{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
+    rule bwAdd:
+        input:
+            fw=os.path.join(_output_dir,"{sample}.fw.scaled.bw"),
+            rv=os.path.join(_output_dir,"{sample}.rv.scaled.bw")
+        threads:
+            _threads
+        output:
+            os.path.join(_output_dir,"{sample}." + _BinSize + ".add.bdg")
+        shell:
+            """
+            bigwigCompare --binSize {_BinSize} -p {threads} --verbose -o {output} \
+            --outFileFormat bedgraph --fixedStep \
+            --bigwig1 {input.fw} \
+            --bigwig2 {input.rv} \
+            --operation add
+            """
+else:
+    rule bdg_reverse_rv:
+        input:
+            os.path.join(_output_dir,"{sample}.rv.sorted.bdg")
+        threads:
+            _threads
+        output:
+            temp(os.path.join(_output_dir,"{sample}.rv.sorted_r.bdg"))
+        shell:
+            "awk -F '\t' -v OFS='\t' '{{$4=-$4; print}}' {input} > {output}"
+    rule bdg2bw_fw:
+        input:
+            os.path.join(_output_dir,"{sample}.fw.sorted.bdg")
+        threads:
+            _threads
+        params:
+            gl=config["genomelen"],
+            dir_script=config["utility_dir"]
+        output:
+            os.path.join(_output_dir,"{sample}.fw.raw.bw")
+        shell:
+            "{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
+    rule bdg2bw_rv:
+        input:
+            os.path.join(_output_dir,"{sample}.rv.sorted_r.bdg")
+        threads:
+            _threads
+        params:
+            gl=config["genomelen"],
+            dir_script=config["utility_dir"]
+        output:
+            os.path.join(_output_dir,"{sample}.rv.raw.bw")
+        shell:
+            "{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"

offtracker-2.7.8/offtracker/mapping/1.1_bed2fr_v4.5.py → offtracker-2.10.0/offtracker/utility/1.1_bed2fr.py RENAMED Viewed

@@ -8,19 +8,21 @@ parser.description='这算一个小彩蛋'
 # 2022.10.21. v3.0: 文件名长度 chromap -> filtered
 # 2022.10.26. v4.0: f,r 改成 fw,rv
 # 2022.01.11. v4.5: 只取 common chromosomes (chr1-chr22, chrX, chrY, chrM)
+# 2025.06.05. v5.0: 增加 ignore_chr 选项，默认只取 common chromosomes
 # 单文件处理脚本，配合snakemake使用
 parser.add_argument("-b", "--bed", type=str, metavar="dir_bed" , required=True, help="dir of bed file")
+parser.add_argument('--ignore_chr', action='store_true', help='If not set, only chr1-chr22, chrX, chrY, chrM will be analyzed.')
 args = parser.parse_args()
 bed_file = pd.read_csv( args.bed, sep='\t', header=None)
-common_chr = pd.Series(['chr']*22).str[:] + pd.Series(range(1,23)).astype(str).str[:]
-common_chr = pd.concat([common_chr, pd.Series(['chrX','chrY','chrM'])]).to_numpy()
-bed_file = bed_file[bed_file[0].isin(common_chr)]
+if not args.ignore_chr:
+    common_chr = pd.Series(['chr']*22).str[:] + pd.Series(range(1,23)).astype(str).str[:]
+    common_chr = pd.concat([common_chr, pd.Series(['chrX','chrY','chrM'])]).to_numpy()
+    bed_file = bed_file[bed_file[0].isin(common_chr)]
 bed_f = bed_file[bed_file[5]=='+']
 bed_r = bed_file[bed_file[5]=='-']

offtracker 2.7.8__zip → 2.10.0__zip

offtracker 2.7.8zip → 2.10.0zip