offtracker 2.10.10__zip → 2.11.0__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {offtracker-2.10.10/offtracker.egg-info → offtracker-2.11.0}/PKG-INFO +1 -1
  2. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/X_offplot.py +15 -4
  3. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/_version.py +4 -2
  4. {offtracker-2.10.10 → offtracker-2.11.0/offtracker.egg-info}/PKG-INFO +1 -1
  5. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker.egg-info/SOURCES.txt +0 -1
  6. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker.egg-info/requires.txt +1 -0
  7. {offtracker-2.10.10 → offtracker-2.11.0}/scripts/offtracker_candidates.py +40 -31
  8. offtracker-2.11.0/scripts/offtracker_config.py +107 -0
  9. offtracker-2.11.0/scripts/offtracker_qc.py +73 -0
  10. {offtracker-2.10.10 → offtracker-2.11.0}/setup.py +2 -3
  11. offtracker-2.10.10/scripts/offtracker_config.py +0 -97
  12. offtracker-2.10.10/scripts/offtracker_qc.py +0 -63
  13. {offtracker-2.10.10 → offtracker-2.11.0}/LICENSE.txt +0 -0
  14. {offtracker-2.10.10 → offtracker-2.11.0}/MANIFEST.in +0 -0
  15. {offtracker-2.10.10 → offtracker-2.11.0}/README.md +0 -0
  16. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/X_offtracker.py +0 -0
  17. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/X_sequence.py +0 -0
  18. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/__init__.py +0 -0
  19. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/snakefile/Snakefile_QC.smk +0 -0
  20. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/snakefile/Snakefile_offtracker.smk +0 -0
  21. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/utility/1.1_bed2fr.py +0 -0
  22. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/utility/1.3_bdg_normalize_v4.0.py +0 -0
  23. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/utility/bedGraphToBigWig +0 -0
  24. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/utility/hg38.chrom.sizes +0 -0
  25. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/utility/mm10.chrom.sizes +0 -0
  26. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/utility/offtracker_blacklist_hg38.merged.bed +0 -0
  27. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker/utility/offtracker_blacklist_mm10.merged.bed +0 -0
  28. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker.egg-info/dependency_links.txt +0 -0
  29. {offtracker-2.10.10 → offtracker-2.11.0}/offtracker.egg-info/top_level.txt +0 -0
  30. {offtracker-2.10.10 → offtracker-2.11.0}/scripts/offtracker_analysis.py +0 -0
  31. {offtracker-2.10.10 → offtracker-2.11.0}/scripts/offtracker_init.py +0 -0
  32. {offtracker-2.10.10 → offtracker-2.11.0}/scripts/offtracker_plot.py +0 -0
  33. {offtracker-2.10.10 → offtracker-2.11.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: offtracker
3
- Version: 2.10.10
3
+ Version: 2.11.0
4
4
  Summary: Tracking-seq data analysis
5
5
  Home-page: https://github.com/Lan-lab/offtracker
6
6
  Author: Runda Xu
@@ -5,10 +5,21 @@ import matplotlib.pyplot as plt
5
5
  import matplotlib.patches as patches
6
6
  from matplotlib import rcParams
7
7
  # 和用 plt.rcParams or matplotlib.rcParams 是一样的
8
- dict_rc = {
9
- 'pdf.fonttype': 42,
10
- 'font.family': ['Arial']
11
- }
8
+ import sys
9
+ if sys.platform[:3] == 'win':
10
+ dict_rc = {
11
+ 'pdf.fonttype': 42,
12
+ 'font.family': ['Arial']
13
+ }
14
+ elif sys.platform[:5] == 'linux':
15
+ dict_rc = {
16
+ 'pdf.fonttype': 42,
17
+ 'font.family': ['Arial']
18
+ }
19
+ else:
20
+ dict_rc = {
21
+ 'pdf.fonttype': 42,
22
+ }
12
23
  rcParams.update(dict_rc)
13
24
 
14
25
  # 2024.06.03. offtable 添加 threshold 分界线,默认为 None,常用的是 2
@@ -1,4 +1,4 @@
1
- __version__ = "2.10.10"
1
+ __version__ = "2.11.0"
2
2
  # 2023.08.11. v1.1.0 adding a option for not normalizing the bw file
3
3
  # 2023.10.26. v1.9.0 prerelease for v2.0
4
4
  # 2023.10.27. v2.0.0 大更新,还没微调
@@ -36,4 +36,6 @@ __version__ = "2.10.10"
36
36
  # 2025.06.17. v2.10.7 修复翻新代码结构导致的bug
37
37
  # 2025.06.27. v2.10.8 将 chmod 放在了 setup.py 里
38
38
  # 2025.06.28. v2.10.9 现在 pip 都是从 wheel 安装,不再运行 setup.py,所以增加一个 offtracker_init.py
39
- # 2025.06.28. v2.10.10 直接塞 script 里试试
39
+ # 2025.06.28. v2.10.10 直接塞 script 里试试
40
+ # 2025.06.28. v2.10.11 回滚到2.10.9外加修正
41
+ # 2025.07.02. v2.11.0 基于 blast 的缺陷更新 candidates
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: offtracker
3
- Version: 2.10.10
3
+ Version: 2.11.0
4
4
  Summary: Tracking-seq data analysis
5
5
  Home-page: https://github.com/Lan-lab/offtracker
6
6
  Author: Runda Xu
@@ -12,7 +12,6 @@ offtracker.egg-info/SOURCES.txt
12
12
  offtracker.egg-info/dependency_links.txt
13
13
  offtracker.egg-info/requires.txt
14
14
  offtracker.egg-info/top_level.txt
15
- offtracker/utility/bedGraphToBigWig
16
15
  offtracker/snakefile/Snakefile_QC.smk
17
16
  offtracker/snakefile/Snakefile_offtracker.smk
18
17
  offtracker/utility/1.1_bed2fr.py
@@ -1,4 +1,5 @@
1
1
  pandas
2
+ polars>=1.19.0
2
3
  numpy
3
4
  biopython<=1.85
4
5
  pybedtools
@@ -20,6 +20,7 @@ script_folder= os.path.join(script_dir, 'utility')
20
20
 
21
21
  import argparse
22
22
  import pandas as pd
23
+ import polars as pl
23
24
  import pybedtools
24
25
  import multiprocessing as mp
25
26
  from Bio.Blast.Applications import NcbiblastnCommandline
@@ -89,18 +90,20 @@ def main():
89
90
  #########
90
91
  # BLAST #
91
92
  #########
93
+ # 2025.07.02 基于 blast 的缺陷更新
94
+
92
95
  if os.path.isfile(dir_sgRNA_blast):
93
96
  print(f'{dir_sgRNA_blast} exists, skipped.')
94
97
  else:
95
98
  if quick_mode:
96
99
  print('Using quick mode for BLAST')
97
100
  blastx_cline = NcbiblastnCommandline(query=dir_sgRNA_fasta, task='blastn-short',out=dir_sgRNA_blast,
98
- db=blast_db, evalue=10000,outfmt=6, num_threads=n_threads,
99
- gapopen=4, gapextend=2, reward=2, word_size=5, dust='no', soft_masking=False)
101
+ db=blast_db, evalue=100000,outfmt=6, num_threads=n_threads,
102
+ gapopen=4, gapextend=2, reward=2, word_size=6, dust='no', soft_masking=False)
100
103
  else:
101
104
  blastx_cline = NcbiblastnCommandline(query=dir_sgRNA_fasta, task='blastn-short',out=dir_sgRNA_blast,
102
- db=blast_db, evalue=10000,outfmt=6, num_threads=n_threads,
103
- gapopen=4, gapextend=2, reward=2, word_size=4, dust='no', soft_masking=False)
105
+ db=blast_db, evalue=100000,outfmt=6, num_threads=n_threads,
106
+ gapopen=4, gapextend=2, reward=2, word_size=5, dust='no', soft_masking=False)
104
107
  print(f'BLAST for candidate off-target sites of {sgRNA_name}.')
105
108
  blastx_cline()
106
109
  print(f'BLAST finished.')
@@ -109,33 +112,39 @@ def main():
109
112
  # Output bed #
110
113
  ##############
111
114
 
112
- blast_regions = pd.read_csv(dir_sgRNA_blast, sep='\t',header=None)
115
+ # 2025.07.02 基于 blast 的缺陷更新
116
+ len_sgRNA = len(sgRNA_seq)
117
+ blast_regions = pl.read_csv(dir_sgRNA_blast, separator='\t',has_header=False)
113
118
  blast_regions.columns = ['query acc.','chr','% identity','alignment length','mismatches','gap opens','q. start','q. end','st','ed','evalue','bit score']
114
- blast_regions = blast_regions[blast_regions.evalue<10000]
115
-
116
- # reverse strand
117
- blast_regions['reverse'] = (blast_regions['st']>blast_regions['ed']).astype(int)
118
- blast_regions_f = blast_regions[blast_regions.reverse==0].copy()
119
- blast_regions_r = blast_regions[blast_regions.reverse==1].copy()
120
- temp = blast_regions_r['st'].copy()
121
- blast_regions_r['st'] = blast_regions_r['ed']
122
- blast_regions_r['ed'] = temp
123
- blast_regions = pd.concat([blast_regions_f, blast_regions_r])
124
- # sort and add location
125
- blast_regions = blast_regions.sort_values('evalue').reset_index(drop=True)
126
- blast_regions['location']=blast_regions['chr'].str[:] + ':' + blast_regions['st'].astype(str).str[:] + '-' + blast_regions['ed'].astype(str).str[:]
127
- blast_regions = blast_regions.drop_duplicates(subset='location').copy()
128
-
129
- # alignment length 筛选
130
- len_sgRNA=len(sgRNA_seq)
131
- min_len = len_sgRNA-8
132
- blast_regions = blast_regions[blast_regions['alignment length']>=min_len].copy().reset_index(drop=True)
133
- blast_regions = blast_regions.reindex(columns = ['chr', 'st', 'ed' , 'query acc.', '% identity', 'alignment length', 'mismatches',
134
- 'gap opens', 'q. start', 'q. end', 'evalue', 'bit score', 'reverse', 'location'] )
135
-
119
+
120
+ # reverse strand
121
+ blast_regions = blast_regions.with_columns((pl.col('st') > pl.col('ed')).cast(pl.Int8).alias('reverse'))
122
+ blast_regions_f = blast_regions.filter(pl.col('reverse') == 0)
123
+ blast_regions_r = blast_regions.filter(pl.col('reverse') == 1)
124
+ blast_regions_r = blast_regions_r.with_columns([
125
+ pl.col('ed').alias('st'),
126
+ pl.col('st').alias('ed')
127
+ ])
128
+ blast_regions = pl.concat([blast_regions_f, blast_regions_r])
129
+
130
+ # add location
131
+ blast_regions = blast_regions.with_column(
132
+ (pl.col('chr') + ':' + pl.col('st').cast(str) + '-' + pl.col('ed').cast(str)).alias('location')
133
+ )
134
+ # filter, sort, dedup
135
+ blast_regions = blast_regions.with_columns(mis=(len_sgRNA - 1 - pl.col('q. end')+pl.col('q. start')+pl.col('mismatches')+pl.col('gap opens')).cast(pl.Int8))
136
+ blast_regions = blast_regions.with_columns(mis2=(len_sgRNA - pl.col('alignment length')*pl.col('% identity')/100).round().cast(pl.Int8))
137
+ blast_regions = blast_regions.filter((pl.col('mis')<8)|(pl.col('mis2')<8))
138
+ blast_regions = blast_regions.sort('mis').unique('location',keep='first', maintain_order=True)
139
+ blast_regions = blast_regions.select([
140
+ 'chr', 'st', 'ed', 'query acc.', '% identity', 'alignment length', 'mismatches',
141
+ 'gap opens', 'q. start', 'q. end', 'evalue', 'bit score', 'reverse', 'location', 'mis', 'mis2'
142
+ ])
143
+
136
144
  # 输出 bed 用于后续 alignment score 计算
137
- blast_regions_bed = blast_regions[['chr','st','ed']]
138
- xseq.write_bed(blast_regions_bed, dir_sgRNA_bed)
145
+ blast_regions_bed = blast_regions.select(['chr', 'st', 'ed'])
146
+ blast_regions_bed.write_csv(dir_sgRNA_bed, separator='\t', has_header=False)
147
+
139
148
  # 对 bed 进行排序但不合并
140
149
  a = pybedtools.BedTool(dir_sgRNA_bed)
141
150
  a.sort(g=dir_chrom_sizes).saveas( dir_sgRNA_bed )
@@ -155,10 +164,10 @@ def main():
155
164
  bed_short = xseq.X_readbed(dir_sgRNA_bed)
156
165
  bed_short = bed_short[bed_short['chr'].isin(common_chr)].copy()
157
166
  bed_short['midpoint'] = ((bed_short['st'] + bed_short['ed'])/2).astype(int)
158
- bed_short['st'] = bed_short['midpoint'] - half_width
167
+ bed_short['st'] = bed_short['midpoint'] - half_width
159
168
  bed_short['ed'] = bed_short['midpoint'] + half_width
160
169
  bed_short.loc[bed_short['st']<0,'st']=0
161
- bed_short = bed_short.drop_duplicates()
170
+ bed_short = bed_short.drop_duplicates()
162
171
 
163
172
  #########
164
173
  # 根据 bed_f 位点 ed 前后 half_width 取基因组序列
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # 2023.08.11. adding a option for not normalizing the bw file
5
+ # 2025.05.22. refine the structure
6
+ # 2025.06.05. 增加 ignore_chr 选项,默认只取 common chromosomes,用于 1.1_bed2fr.py
7
+
8
+ import argparse
9
+ import os, glob, yaml
10
+ import pandas as pd
11
+ import shutil, re
12
+ import offtracker
13
+ import offtracker.X_sequence as xseq
14
+ script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
15
+ utility_dir = os.path.join(script_dir, 'utility')
16
+ file_path = os.path.join(utility_dir, 'bedGraphToBigWig')
17
+ file_stat = os.stat(file_path)
18
+ file_mode = oct(file_stat.st_mode & 0o777)
19
+ if file_mode != '0o755':
20
+ try:
21
+ os.chmod( os.path.join(utility_dir, 'bedGraphToBigWig'), 0o755)
22
+ except:
23
+ print('offtracker may be installed in root but not initialized. Please run "offtracker_init.py" with root permission first.')
24
+
25
+ ###
26
+ def main():
27
+ parser = argparse.ArgumentParser()
28
+ parser.description='Mapping fastq files of Tracking-seq.'
29
+ parser.add_argument('-f','--folder', type=str, required=True, help='Directory of the input folder' )
30
+ parser.add_argument('-r','--ref' , type=str, required=True, help='The fasta file of reference genome')
31
+ parser.add_argument('-i','--index' , type=str, required=True, help='The index file of chromap')
32
+ parser.add_argument('-g','--genome', type=str, required=True, help='File of chromosome sizes, or "hg38", "mm10" ')
33
+ parser.add_argument('-o','--outdir', type=str, default='same', help='The output folder')
34
+ parser.add_argument('--subfolder' , type=int, default=0, help='subfolder level')
35
+ parser.add_argument('-t','--thread', type=int, default=4, help='Number of threads to be used')
36
+ parser.add_argument('--blacklist' , type=str, default='same', help='Blacklist of genome regions in bed format. "none" for no filter')
37
+ parser.add_argument('--binsize' , type=str, default=100, help='Bin size for calculating bw residue')
38
+ parser.add_argument('--normalize' , type=str, default='True', help='Whether to normalize the BigWig file. "True" or "False"')
39
+ parser.add_argument('--ignore_chr' , action='store_true', help='If not set, only chr1-chr22, chrX, chrY, chrM will be analyzed.')
40
+
41
+
42
+ args = parser.parse_args()
43
+
44
+ if (args.genome == 'hg38') or (args.genome == 'mm10'):
45
+ dir_chrom_sizes = os.path.join(utility_dir, f'{args.genome}.chrom.sizes')
46
+ else:
47
+ dir_chrom_sizes = args.genome
48
+
49
+ if (args.normalize != 'True') & (args.normalize != 'False'):
50
+ raise ValueError('Please provide "True" or "False" for "--normalize"')
51
+
52
+ if args.blacklist == 'same':
53
+ assert ((args.genome == 'hg38') or (args.genome == 'mm10')), 'Please provide blacklist file, or "--blacklist none" to skip'
54
+ args.blacklist = args.genome
55
+
56
+ if (args.blacklist == 'hg38') or (args.blacklist == 'mm10'):
57
+ blacklist = os.path.join(utility_dir, f'offtracker_blacklist_{args.blacklist}.merged.bed')
58
+ else:
59
+ blacklist = args.blacklist
60
+
61
+ if args.outdir == 'same':
62
+ args.outdir = args.folder
63
+ else:
64
+ if not os.path.exists(args.outdir):
65
+ os.makedirs(args.outdir)
66
+
67
+ if args.ignore_chr:
68
+ args.ignore_chr = '--ignore_chr'
69
+ else:
70
+ args.ignore_chr = ''
71
+
72
+ # 搜索 folder 的 n级子目录下的所有 fastq/fastq.gz/fq/fq.gz 文件
73
+ sample_names, files_R1, files_R2 = xseq.detect_fastq(args.folder, n_subfolder=args.subfolder)
74
+
75
+ assert not isinstance(sample_names, str), 'No fastq file is detected!'
76
+
77
+ dict_yaml = {
78
+ # fastq 信息
79
+ 'files_R1':dict(zip(sample_names,files_R1)),
80
+ 'files_R2':dict(zip(sample_names,files_R2)), # 单端 files_R2=[] 结果会自动为 {}
81
+ # 输入输出文件夹
82
+ 'input_dir':args.folder,
83
+ 'output_dir':args.outdir,
84
+ # 运行参数
85
+ 'thread':args.thread,
86
+ 'index':args.index,
87
+ 'fasta':args.ref,
88
+ 'binsize':args.binsize,
89
+ 'blacklist':blacklist,
90
+ 'genomelen':dir_chrom_sizes,
91
+ 'normalize':args.normalize,
92
+ 'utility_dir':utility_dir,
93
+ 'ignore_chr':args.ignore_chr,
94
+ }
95
+
96
+ with open( os.path.join(args.outdir,'config.yaml'), 'w') as outfile:
97
+ yaml.dump(dict_yaml, outfile, default_flow_style=False)
98
+
99
+ snakefile = os.path.join(script_dir, 'snakefile/Snakefile_offtracker.smk')
100
+ shutil.copy(snakefile, os.path.join(args.outdir,'Snakefile'))
101
+
102
+ return 'config_main finished'
103
+
104
+ if __name__ == '__main__' :
105
+ result = main()
106
+ print(result)
107
+
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ THIS_VERSION = '0.4.1'
5
+
6
+ import argparse
7
+ import os, glob, yaml
8
+ import pandas as pd
9
+ import shutil, re
10
+ import offtracker
11
+ import offtracker.X_sequence as xseq
12
+
13
+ script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
14
+ utility_dir = os.path.join(script_dir, 'utility')
15
+ file_path = os.path.join(utility_dir, 'bedGraphToBigWig')
16
+ file_stat = os.stat(file_path)
17
+ file_mode = oct(file_stat.st_mode & 0o777)
18
+ if file_mode != '0o755':
19
+ try:
20
+ os.chmod( os.path.join(utility_dir, 'bedGraphToBigWig'), 0o755)
21
+ except:
22
+ print('offtracker may be installed in root but not initialized. Please run "offtracker_init.py" with root permission first.')
23
+
24
+ ###
25
+ def main():
26
+ parser = argparse.ArgumentParser()
27
+ parser.description=f'xbulk_qc v{THIS_VERSION}. QC and trim fastq files.'
28
+ parser.add_argument('-f','--folder', type=str, required=True, help='Directory of the input folder' )
29
+ parser.add_argument('-o','--outdir', type=str, default='same', help='The output folder')
30
+ parser.add_argument('--subfolder' , type=int, default=0, help='subfolder level')
31
+ parser.add_argument('-t','--thread', type=int, default=8, help='Number of threads to be used')
32
+
33
+ args = parser.parse_args()
34
+
35
+ # 自动化的参数调整和报错
36
+ if args.outdir == 'same':
37
+ args.outdir = os.path.join(args.folder,'Trimmed_data')
38
+ if not os.path.exists( args.outdir ):
39
+ os.makedirs( args.outdir )
40
+ else:
41
+ if not os.path.exists(args.outdir):
42
+ os.makedirs(args.outdir)
43
+
44
+ # 搜索 folder 的 n级子目录下的所有 fastq/fastq.gz/fq/fq.gz 文件
45
+ sample_names, files_R1, files_R2 = xseq.detect_fastq(args.folder, n_subfolder=args.subfolder)
46
+
47
+ assert not isinstance(sample_names, str), 'No fastq file is detected!'
48
+
49
+ dict_yaml = {
50
+ # fastq 信息
51
+ 'files_R1':dict(zip(sample_names,files_R1)),
52
+ 'files_R2':dict(zip(sample_names,files_R2)), # 单端 files_R2=[] 结果会自动为 {}
53
+ # 输入输出文件夹
54
+ 'input_dir':args.folder,
55
+ 'output_dir':args.outdir,
56
+ # 运行参数
57
+ 'thread':args.thread,
58
+ 'utility_dir':utility_dir
59
+ }
60
+
61
+
62
+ with open( os.path.join(args.outdir,'config.yaml'), 'w', encoding='utf-8') as outfile:
63
+ yaml.dump(dict_yaml, outfile, default_flow_style=False)
64
+
65
+ snakefile = os.path.join(script_dir, 'snakefile/Snakefile_QC.smk')
66
+ shutil.copy(snakefile, os.path.join(args.outdir,'Snakefile'))
67
+
68
+ return 'config_qc finished'
69
+
70
+ if __name__ == '__main__' :
71
+ result = main()
72
+ print(result)
73
+
@@ -26,7 +26,7 @@ with open(os.path.join(here, package_folder, '_version.py'),'r',encoding='utf-8'
26
26
 
27
27
  # requirements
28
28
  REQUIRED = [
29
- 'pandas', 'numpy', 'biopython<=1.85', 'pybedtools', 'pyyaml',
29
+ 'pandas', 'polars>=1.19.0', 'numpy', 'biopython<=1.85', 'pybedtools', 'pyyaml',
30
30
  ]
31
31
  ## pybedtools may be not supported in Windows
32
32
 
@@ -49,8 +49,7 @@ setup(
49
49
  python_requires=REQUIRES_PYTHON,
50
50
  packages=['offtracker'],
51
51
  package_data={'offtracker': ['snakefile/*','utility/*']},
52
- scripts = [ 'offtracker/utility/bedGraphToBigWig',
53
- 'scripts/offtracker_init.py',
52
+ scripts = [ 'scripts/offtracker_init.py',
54
53
  'scripts/offtracker_qc.py',
55
54
  'scripts/offtracker_config.py',
56
55
  'scripts/offtracker_candidates.py',
@@ -1,97 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
-
4
- # 2023.08.11. adding a option for not normalizing the bw file
5
- # 2025.05.22. refine the structure
6
- # 2025.06.05. 增加 ignore_chr 选项,默认只取 common chromosomes,用于 1.1_bed2fr.py
7
-
8
- import argparse
9
- import os, glob, yaml
10
- import pandas as pd
11
- import shutil, re
12
- import offtracker
13
- import offtracker.X_sequence as xseq
14
- script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
15
- utility_dir = os.path.join(script_dir, 'utility')
16
- # try:
17
- # os.chmod( os.path.join(utility_dir, 'bedGraphToBigWig'), 0o755)
18
- # except:
19
- # print('offtracker may be installed in root but not initialized. Please run "offtracker_init.py" with root permission first.')
20
-
21
- ###
22
- parser = argparse.ArgumentParser()
23
- parser.description='Mapping fastq files of Tracking-seq.'
24
- parser.add_argument('-f','--folder', type=str, required=True, help='Directory of the input folder' )
25
- parser.add_argument('-r','--ref' , type=str, required=True, help='The fasta file of reference genome')
26
- parser.add_argument('-i','--index' , type=str, required=True, help='The index file of chromap')
27
- parser.add_argument('-g','--genome', type=str, required=True, help='File of chromosome sizes, or "hg38", "mm10" ')
28
- parser.add_argument('-o','--outdir', type=str, default='same', help='The output folder')
29
- parser.add_argument('--subfolder' , type=int, default=0, help='subfolder level')
30
- parser.add_argument('-t','--thread', type=int, default=4, help='Number of threads to be used')
31
- parser.add_argument('--blacklist' , type=str, default='same', help='Blacklist of genome regions in bed format. "none" for no filter')
32
- parser.add_argument('--binsize' , type=str, default=100, help='Bin size for calculating bw residue')
33
- parser.add_argument('--normalize' , type=str, default='True', help='Whether to normalize the BigWig file. "True" or "False"')
34
- parser.add_argument('--ignore_chr' , action='store_true', help='If not set, only chr1-chr22, chrX, chrY, chrM will be analyzed.')
35
-
36
-
37
- args = parser.parse_args()
38
-
39
- if (args.genome == 'hg38') or (args.genome == 'mm10'):
40
- dir_chrom_sizes = os.path.join(utility_dir, f'{args.genome}.chrom.sizes')
41
- else:
42
- dir_chrom_sizes = args.genome
43
-
44
- if (args.normalize != 'True') & (args.normalize != 'False'):
45
- raise ValueError('Please provide "True" or "False" for "--normalize"')
46
-
47
- if args.blacklist == 'same':
48
- assert ((args.genome == 'hg38') or (args.genome == 'mm10')), 'Please provide blacklist file, or "--blacklist none" to skip'
49
- args.blacklist = args.genome
50
-
51
- if (args.blacklist == 'hg38') or (args.blacklist == 'mm10'):
52
- blacklist = os.path.join(utility_dir, f'offtracker_blacklist_{args.blacklist}.merged.bed')
53
- else:
54
- blacklist = args.blacklist
55
-
56
- if args.outdir == 'same':
57
- args.outdir = args.folder
58
- else:
59
- if not os.path.exists(args.outdir):
60
- os.makedirs(args.outdir)
61
-
62
- if args.ignore_chr:
63
- args.ignore_chr = '--ignore_chr'
64
- else:
65
- args.ignore_chr = ''
66
-
67
- # 搜索 folder 的 n级子目录下的所有 fastq/fastq.gz/fq/fq.gz 文件
68
- sample_names, files_R1, files_R2 = xseq.detect_fastq(args.folder, n_subfolder=args.subfolder)
69
-
70
- assert not isinstance(sample_names, str), 'No fastq file is detected!'
71
-
72
- dict_yaml = {
73
- # fastq 信息
74
- 'files_R1':dict(zip(sample_names,files_R1)),
75
- 'files_R2':dict(zip(sample_names,files_R2)), # 单端 files_R2=[] 结果会自动为 {}
76
- # 输入输出文件夹
77
- 'input_dir':args.folder,
78
- 'output_dir':args.outdir,
79
- # 运行参数
80
- 'thread':args.thread,
81
- 'index':args.index,
82
- 'fasta':args.ref,
83
- 'binsize':args.binsize,
84
- 'blacklist':blacklist,
85
- 'genomelen':dir_chrom_sizes,
86
- 'normalize':args.normalize,
87
- 'utility_dir':utility_dir,
88
- 'ignore_chr':args.ignore_chr,
89
- }
90
-
91
- with open( os.path.join(args.outdir,'config.yaml'), 'w') as outfile:
92
- yaml.dump(dict_yaml, outfile, default_flow_style=False)
93
-
94
- snakefile = os.path.join(script_dir, 'snakefile/Snakefile_offtracker.smk')
95
- shutil.copy(snakefile, os.path.join(args.outdir,'Snakefile'))
96
-
97
-
@@ -1,63 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
-
4
- THIS_VERSION = '0.4.1'
5
-
6
- import argparse
7
- import os, glob, yaml
8
- import pandas as pd
9
- import shutil, re
10
- import offtracker
11
- import offtracker.X_sequence as xseq
12
-
13
- script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
14
- utility_dir = os.path.join(script_dir, 'utility')
15
- # try:
16
- # os.chmod( os.path.join(utility_dir, 'bedGraphToBigWig'), 0o755)
17
- # except:
18
- # print('offtracker may be installed in root but not initialized. Please run "offtracker_init.py" with root permission first.')
19
-
20
- ###
21
- parser = argparse.ArgumentParser()
22
- parser.description=f'xbulk_qc v{THIS_VERSION}. QC and trim fastq files.'
23
- parser.add_argument('-f','--folder', type=str, required=True, help='Directory of the input folder' )
24
- parser.add_argument('-o','--outdir', type=str, default='same', help='The output folder')
25
- parser.add_argument('--subfolder' , type=int, default=0, help='subfolder level')
26
- parser.add_argument('-t','--thread', type=int, default=8, help='Number of threads to be used')
27
-
28
- args = parser.parse_args()
29
-
30
- # 自动化的参数调整和报错
31
- if args.outdir == 'same':
32
- args.outdir = os.path.join(args.folder,'Trimmed_data')
33
- if not os.path.exists( args.outdir ):
34
- os.makedirs( args.outdir )
35
- else:
36
- if not os.path.exists(args.outdir):
37
- os.makedirs(args.outdir)
38
-
39
- # 搜索 folder 的 n级子目录下的所有 fastq/fastq.gz/fq/fq.gz 文件
40
- sample_names, files_R1, files_R2 = xseq.detect_fastq(args.folder, n_subfolder=args.subfolder)
41
-
42
- assert not isinstance(sample_names, str), 'No fastq file is detected!'
43
-
44
- dict_yaml = {
45
- # fastq 信息
46
- 'files_R1':dict(zip(sample_names,files_R1)),
47
- 'files_R2':dict(zip(sample_names,files_R2)), # 单端 files_R2=[] 结果会自动为 {}
48
- # 输入输出文件夹
49
- 'input_dir':args.folder,
50
- 'output_dir':args.outdir,
51
- # 运行参数
52
- 'thread':args.thread,
53
- 'utility_dir':utility_dir
54
- }
55
-
56
-
57
- with open( os.path.join(args.outdir,'config.yaml'), 'w', encoding='utf-8') as outfile:
58
- yaml.dump(dict_yaml, outfile, default_flow_style=False)
59
-
60
- snakefile = os.path.join(script_dir, 'snakefile/Snakefile_QC.smk')
61
- shutil.copy(snakefile, os.path.join(args.outdir,'Snakefile'))
62
-
63
-
File without changes
File without changes
File without changes
File without changes