PyPI - offtracker - Versions diffs - 2.10.11__zip → 2.11.0__zip - Mend

offtracker 2.10.11zip → 2.11.0zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{offtracker-2.10.11/offtracker.egg-info → offtracker-2.11.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: offtracker
-Version: 2.10.11
+Version: 2.11.0
 Summary: Tracking-seq data analysis
 Home-page: https://github.com/Lan-lab/offtracker
 Author: Runda Xu

{offtracker-2.10.11 → offtracker-2.11.0}/offtracker/X_offplot.py RENAMED Viewed

@@ -5,10 +5,21 @@ import matplotlib.pyplot as plt
 import matplotlib.patches as patches
 from matplotlib import rcParams
 # 和用 plt.rcParams or matplotlib.rcParams 是一样的
-dict_rc = {
-    'pdf.fonttype': 42,
-    'font.family': ['Arial']
-}
+import sys
+if sys.platform[:3] == 'win':
+    dict_rc = {
+        'pdf.fonttype': 42,
+        'font.family': ['Arial']
+    }
+elif sys.platform[:5] == 'linux':
+    dict_rc = {
+        'pdf.fonttype': 42,
+        'font.family': ['Arial']
+    }
+else:
+    dict_rc = {
+        'pdf.fonttype': 42,
+    }
 rcParams.update(dict_rc)
 # 2024.06.03. offtable 添加 threshold 分界线，默认为 None，常用的是 2

{offtracker-2.10.11 → offtracker-2.11.0}/offtracker/_version.py RENAMED Viewed

@@ -1,4 +1,4 @@
-__version__ = "2.10.11"
+__version__ = "2.11.0"
 # 2023.08.11. v1.1.0	adding a option for not normalizing the bw file
 # 2023.10.26. v1.9.0	prerelease for v2.0
 # 2023.10.27. v2.0.0	大更新，还没微调
@@ -37,4 +37,5 @@ __version__ = "2.10.11"
 # 2025.06.27. v2.10.8   将 chmod 放在了 setup.py 里
 # 2025.06.28. v2.10.9   现在 pip 都是从 wheel 安装，不再运行 setup.py，所以增加一个 offtracker_init.py
 # 2025.06.28. v2.10.10  直接塞 script 里试试
-# 2025.06.28. v2.10.11  回滚到2.10.9外加修正
+# 2025.06.28. v2.10.11  回滚到2.10.9外加修正
+# 2025.07.02. v2.11.0  基于 blast 的缺陷更新 candidates

{offtracker-2.10.11 → offtracker-2.11.0/offtracker.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: offtracker
-Version: 2.10.11
+Version: 2.11.0
 Summary: Tracking-seq data analysis
 Home-page: https://github.com/Lan-lab/offtracker
 Author: Runda Xu

{offtracker-2.10.11 → offtracker-2.11.0}/offtracker.egg-info/requires.txt RENAMED Viewed

@@ -1,4 +1,5 @@
 pandas
+polars>=1.19.0
 numpy
 biopython<=1.85
 pybedtools

{offtracker-2.10.11 → offtracker-2.11.0}/scripts/offtracker_candidates.py RENAMED Viewed

@@ -20,6 +20,7 @@ script_folder= os.path.join(script_dir, 'utility')
 import argparse
 import pandas as pd
+import polars as pl
 import pybedtools
 import multiprocessing as mp
 from Bio.Blast.Applications import NcbiblastnCommandline
@@ -89,18 +90,20 @@ def main():
     #########
     # BLAST #
     #########
+    # 2025.07.02 基于 blast 的缺陷更新
     if os.path.isfile(dir_sgRNA_blast):
         print(f'{dir_sgRNA_blast} exists, skipped.')
     else:
         if quick_mode:
             print('Using quick mode for BLAST')
             blastx_cline = NcbiblastnCommandline(query=dir_sgRNA_fasta, task='blastn-short',out=dir_sgRNA_blast,
-                                                db=blast_db, evalue=10000,outfmt=6, num_threads=n_threads,
-                                                gapopen=4, gapextend=2, reward=2, word_size=5, dust='no', soft_masking=False)
+                                                db=blast_db, evalue=100000,outfmt=6, num_threads=n_threads,
+                                                gapopen=4, gapextend=2, reward=2, word_size=6, dust='no', soft_masking=False)
         else:
             blastx_cline = NcbiblastnCommandline(query=dir_sgRNA_fasta, task='blastn-short',out=dir_sgRNA_blast,
-                                                db=blast_db, evalue=10000,outfmt=6, num_threads=n_threads,
-                                                gapopen=4, gapextend=2, reward=2, word_size=4, dust='no', soft_masking=False)
+                                                db=blast_db, evalue=100000,outfmt=6, num_threads=n_threads,
+                                                gapopen=4, gapextend=2, reward=2, word_size=5, dust='no', soft_masking=False)
         print(f'BLAST for candidate off-target sites of {sgRNA_name}.')
         blastx_cline()
         print(f'BLAST finished.')
@@ -109,33 +112,39 @@ def main():
     # Output bed #
     ##############
-    blast_regions = pd.read_csv(dir_sgRNA_blast, sep='\t',header=None)
+    # 2025.07.02 基于 blast 的缺陷更新
+    len_sgRNA = len(sgRNA_seq)
+    blast_regions = pl.read_csv(dir_sgRNA_blast, separator='\t',has_header=False)
     blast_regions.columns = ['query acc.','chr','% identity','alignment length','mismatches','gap opens','q. start','q. end','st','ed','evalue','bit score']
-    blast_regions = blast_regions[blast_regions.evalue<10000]
-    # reverse strand
-    blast_regions['reverse'] = (blast_regions['st']>blast_regions['ed']).astype(int)
-    blast_regions_f = blast_regions[blast_regions.reverse==0].copy()
-    blast_regions_r = blast_regions[blast_regions.reverse==1].copy()
-    temp = blast_regions_r['st'].copy()
-    blast_regions_r['st'] = blast_regions_r['ed']
-    blast_regions_r['ed'] = temp
-    blast_regions = pd.concat([blast_regions_f, blast_regions_r])
-    # sort and add location
-    blast_regions = blast_regions.sort_values('evalue').reset_index(drop=True)
-    blast_regions['location']=blast_regions['chr'].str[:] + ':' + blast_regions['st'].astype(str).str[:] + '-' + blast_regions['ed'].astype(str).str[:]
-    blast_regions = blast_regions.drop_duplicates(subset='location').copy()
-    # alignment length 筛选
-    len_sgRNA=len(sgRNA_seq)
-    min_len = len_sgRNA-8
-    blast_regions = blast_regions[blast_regions['alignment length']>=min_len].copy().reset_index(drop=True)
-    blast_regions = blast_regions.reindex(columns = ['chr', 'st', 'ed' , 'query acc.', '% identity', 'alignment length', 'mismatches',
-        'gap opens', 'q. start', 'q. end', 'evalue', 'bit score', 'reverse', 'location'] )
+    # reverse strand
+    blast_regions = blast_regions.with_columns((pl.col('st') > pl.col('ed')).cast(pl.Int8).alias('reverse'))
+    blast_regions_f = blast_regions.filter(pl.col('reverse') == 0)
+    blast_regions_r = blast_regions.filter(pl.col('reverse') == 1)
+    blast_regions_r = blast_regions_r.with_columns([
+        pl.col('ed').alias('st'),
+        pl.col('st').alias('ed')
+    ])
+    blast_regions = pl.concat([blast_regions_f, blast_regions_r])
+    # add location
+    blast_regions = blast_regions.with_column(
+        (pl.col('chr') + ':' + pl.col('st').cast(str) + '-' + pl.col('ed').cast(str)).alias('location')
+    )
+    # filter, sort, dedup
+    blast_regions = blast_regions.with_columns(mis=(len_sgRNA - 1 - pl.col('q. end')+pl.col('q. start')+pl.col('mismatches')+pl.col('gap opens')).cast(pl.Int8))
+    blast_regions = blast_regions.with_columns(mis2=(len_sgRNA - pl.col('alignment length')*pl.col('% identity')/100).round().cast(pl.Int8))
+    blast_regions = blast_regions.filter((pl.col('mis')<8)|(pl.col('mis2')<8))
+    blast_regions = blast_regions.sort('mis').unique('location',keep='first', maintain_order=True)
+    blast_regions = blast_regions.select([
+        'chr', 'st', 'ed', 'query acc.', '% identity', 'alignment length', 'mismatches',
+        'gap opens', 'q. start', 'q. end', 'evalue', 'bit score', 'reverse', 'location', 'mis', 'mis2'
+    ])
     # 输出 bed 用于后续 alignment score 计算
-    blast_regions_bed = blast_regions[['chr','st','ed']]
-    xseq.write_bed(blast_regions_bed, dir_sgRNA_bed)
+    blast_regions_bed = blast_regions.select(['chr', 'st', 'ed'])
+    blast_regions_bed.write_csv(dir_sgRNA_bed, separator='\t', has_header=False)
     # 对 bed 进行排序但不合并
     a = pybedtools.BedTool(dir_sgRNA_bed)
     a.sort(g=dir_chrom_sizes).saveas( dir_sgRNA_bed )
@@ -155,10 +164,10 @@ def main():
     bed_short = xseq.X_readbed(dir_sgRNA_bed)
     bed_short = bed_short[bed_short['chr'].isin(common_chr)].copy()
     bed_short['midpoint'] = ((bed_short['st'] + bed_short['ed'])/2).astype(int)
-    bed_short['st'] = bed_short['midpoint'] - half_width
+    bed_short['st'] = bed_short['midpoint'] - half_width
     bed_short['ed'] = bed_short['midpoint'] + half_width
     bed_short.loc[bed_short['st']<0,'st']=0
-    bed_short = bed_short.drop_duplicates()
+    bed_short = bed_short.drop_duplicates()
     #########
     # 根据 bed_f 位点 ed 前后 half_width 取基因组序列

{offtracker-2.10.11 → offtracker-2.11.0}/setup.py RENAMED Viewed

@@ -26,7 +26,7 @@ with open(os.path.join(here, package_folder, '_version.py'),'r',encoding='utf-8'
 # requirements
 REQUIRED = [
-   'pandas', 'numpy', 'biopython<=1.85', 'pybedtools', 'pyyaml',
+   'pandas', 'polars>=1.19.0', 'numpy', 'biopython<=1.85', 'pybedtools', 'pyyaml',
 ]
 ## pybedtools may be not supported in Windows