RiboParser 0.2.2__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {riboparser-0.2.2 → riboparser-0.2.5}/PKG-INFO +283 -15
- {riboparser-0.2.2 → riboparser-0.2.5}/README.md +282 -14
- {riboparser-0.2.2 → riboparser-0.2.5}/RiboParser.egg-info/PKG-INFO +283 -15
- {riboparser-0.2.2 → riboparser-0.2.5}/RiboParser.egg-info/SOURCES.txt +22 -10
- {riboparser-0.2.2 → riboparser-0.2.5}/RiboParser.egg-info/entry_points.txt +3 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/pyproject.toml +19 -1
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/__init__.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/bedgraph/bg2meta.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/bedgraph/rpm_smooth.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/bowtie/merge_bwt_log.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/fa_gc_sum.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/fa_len_flt.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/fa_len_sum.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/fa_split.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/line_feed.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/nt2aa.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/rand_seq.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/retrieve_seq.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/revs.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq2fa.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq2txt.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq_len_flt.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq_len_sum.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq_length.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq_split.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq_trim.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/phred_quality.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/simulate_fastq.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_cdt.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_coverage.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_cst.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_digestion.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_dst_list.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_length.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_metagene.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_occupancy.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_odd_ratio.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_offset.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_offset_detail.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_offset_end.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_pausing.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_period.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_quant.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_saturation.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/oligo/get_overlap_seq.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/oligo/get_tissue_freq.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/oligo/get_win_seq.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/ribocode/ribocode_bed_format.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/ribotish/ribotish_format.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/rsem/merge_rsem.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/unix/dos2unix.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/__init__.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/data/RiboParser.py +5 -5
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/ArgsParser.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Bam2Wig.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/BamFilter.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/CDT.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/CST.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Codon.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Coefficient_of_Variation.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Coverage.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Cumulative_CoV.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Density.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Digestion.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Ensembl_Ref.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/GenePred.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/MetaCodon.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Metaplot.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Occupancy.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Odd_Ratio.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Offset.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Offset_RSBM.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Pausing.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Percentage.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Periodicity.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Quality.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Quant.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/RNA.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/RPFs.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Retrieve.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Ribo.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Shift.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Shuffle.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/__init__.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/riboparser.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rna_Density.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rna_Offset.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Bam2bw.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Bam_Filter.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_CDT.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_CST.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Check.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_CoV.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Corr.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Coverage.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Cumulative_CoV.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Density.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Digest.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Geneplot.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Merge.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Meta_Codon.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Metaplot.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Occupancy.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Odd_Ratio.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Offset.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Offset_RSBM.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Pausing.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Percent.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Periodicity.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Quant.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Reference.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Retrieve.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Shift.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Shuffle.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_end.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/serp/Properties.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/serp/SeRP.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/serp_overlap.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/serp_peak.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/serp_properties.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/smorf/__init__.py +1 -1
- riboparser-0.2.5/utils/smorf/smorf_classifier.py +101 -0
- riboparser-0.2.2/utils/smorf/coordinate.py → riboparser-0.2.5/utils/smorf/smorf_coordinate.py +2 -2
- riboparser-0.2.5/utils/smorf/smorf_filter.py +257 -0
- riboparser-0.2.2/utils/smorf/genepred.py → riboparser-0.2.5/utils/smorf/smorf_genepred.py +1 -1
- riboparser-0.2.5/utils/smorf/smorf_kozak.py +470 -0
- riboparser-0.2.5/utils/smorf/smorf_overlap.py +234 -0
- riboparser-0.2.5/utils/smorf/smorf_pipeline.py +287 -0
- riboparser-0.2.5/utils/smorf/smorf_riboseq_constants.py +50 -0
- riboparser-0.2.5/utils/smorf/smorf_riboseq_density.py +185 -0
- riboparser-0.2.5/utils/smorf/smorf_riboseq_integrate.py +462 -0
- riboparser-0.2.5/utils/smorf/smorf_riboseq_io.py +318 -0
- riboparser-0.2.5/utils/smorf/smorf_riboseq_metrics.py +289 -0
- riboparser-0.2.5/utils/smorf/smorf_riboseq_pipeline.py +218 -0
- riboparser-0.2.5/utils/smorf/smorf_riboseq_profile.py +97 -0
- riboparser-0.2.2/utils/smorf/scanner.py → riboparser-0.2.5/utils/smorf/smorf_scanner.py +3 -3
- riboparser-0.2.2/utils/smorf/writer.py → riboparser-0.2.5/utils/smorf/smorf_writer.py +1 -1
- riboparser-0.2.5/utils/smorf_evidence.py +98 -0
- riboparser-0.2.5/utils/smorf_filter.py +322 -0
- riboparser-0.2.5/utils/smorf_integrate.py +78 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/smorf_scanner.py +9 -0
- riboparser-0.2.2/utils/smorf/classifier.py +0 -76
- riboparser-0.2.2/utils/smorf/overlap.py +0 -76
- riboparser-0.2.2/utils/smorf/pipeline.py +0 -158
- {riboparser-0.2.2 → riboparser-0.2.5}/RiboParser.egg-info/dependency_links.txt +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/RiboParser.egg-info/requires.txt +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/RiboParser.egg-info/top_level.txt +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/bedgraph/__init__.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/bowtie/__init__.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/__init__.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/__init__.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/__init__.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/oligo/__init__.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/ribocode/__init__.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/ribotish/__init__.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/rsem/__init__.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/scripts/unix/__init__.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/setup.cfg +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/EndSite.py +0 -0
- {riboparser-0.2.2 → riboparser-0.2.5}/utils/serp/__init__.py +0 -0
- /riboparser-0.2.2/utils/smorf/fasta.py → /riboparser-0.2.5/utils/smorf/smorf_fasta.py +0 -0
- /riboparser-0.2.2/utils/smorf/models.py → /riboparser-0.2.5/utils/smorf/smorf_models.py +0 -0
- /riboparser-0.2.2/utils/smorf/sequence.py → /riboparser-0.2.5/utils/smorf/smorf_sequence.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: RiboParser
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Summary: A pipeline for ribosome profiling data analysis
|
|
5
5
|
Author-email: Ren Shuchao <rensc0718@163.com>
|
|
6
6
|
License-Expression: GPL-3.0-or-later
|
|
@@ -2433,8 +2433,276 @@ RIBO_AAA.png # Metaplot of AAA codon
|
|
|
2433
2433
|
```
|
|
2434
2434
|
|
|
2435
2435
|
|
|
2436
|
-
## 6.
|
|
2437
|
-
### 6.1
|
|
2436
|
+
## 6. smORF identification
|
|
2437
|
+
### 6.1 Transcriptome-wide ORF scanning
|
|
2438
|
+
|
|
2439
|
+
1. Explanation of `smorf_scanner`
|
|
2440
|
+
|
|
2441
|
+
Use the entire transcriptome sequences to scan all potential ORFs from scratch, outputting both known and novel ORFs.
|
|
2442
|
+
|
|
2443
|
+
```bash
|
|
2444
|
+
$ smorf_scanner -h
|
|
2445
|
+
|
|
2446
|
+
usage: smorf_scanner
|
|
2447
|
+
[-h] -g GENOME -a ANNOTATION [-o OUT_PREFIX] [--orf-prefix ORF_PREFIX] [--start-codons START_CODONS]
|
|
2448
|
+
[--min-aa MIN_AA] [--max-aa MAX_AA] [--scan-strand {sense,antisense,both}] [--kozak-up KOZAK_UP]
|
|
2449
|
+
[--kozak-down KOZAK_DOWN] [-t THREADS] [--mark-overlap] [--remove-discarded] [--include-stop]
|
|
2450
|
+
|
|
2451
|
+
Scan transcript-centric smORFs from genome FASTA and genePred annotation.
|
|
2452
|
+
|
|
2453
|
+
options:
|
|
2454
|
+
-h, --help show this help message and exit
|
|
2455
|
+
-g, --genome GENOME Input genome FASTA file.
|
|
2456
|
+
-a, --annotation ANNOTATION
|
|
2457
|
+
Input genePred annotation file.
|
|
2458
|
+
-o, --out-prefix OUT_PREFIX
|
|
2459
|
+
Output prefix.
|
|
2460
|
+
--orf-prefix ORF_PREFIX
|
|
2461
|
+
Prefix for ORF IDs.
|
|
2462
|
+
--start-codons START_CODONS
|
|
2463
|
+
Comma-separated start codons, such as ATG,CTG,GTG,TTG.
|
|
2464
|
+
--min-aa MIN_AA Minimum ORF length in amino acids.
|
|
2465
|
+
--max-aa MAX_AA Maximum ORF length in amino acids.
|
|
2466
|
+
--scan-strand {sense,antisense,both}
|
|
2467
|
+
Scan sense, antisense, or both strands.
|
|
2468
|
+
--kozak-up KOZAK_UP Number of upstream nucleotides for Kozak sequence.
|
|
2469
|
+
--kozak-down KOZAK_DOWN
|
|
2470
|
+
Number of downstream nucleotides after start codon for Kozak sequence.
|
|
2471
|
+
-t, --threads THREADS
|
|
2472
|
+
Number of worker processes for parallel ORF scanning.
|
|
2473
|
+
--mark-overlap Mark nested or overlapping ORFs.
|
|
2474
|
+
--remove-discarded Remove same-frame internal ORFs.
|
|
2475
|
+
--include-stop Keep stop codon symbol in peptide sequence.
|
|
2476
|
+
|
|
2477
|
+
# example
|
|
2478
|
+
smorf_scanner \
|
|
2479
|
+
--genome ../genome/GCF_mine_genomic.fna \
|
|
2480
|
+
--annotation ../norm/mine.genepred \
|
|
2481
|
+
--out-prefix mine \
|
|
2482
|
+
--start-codons ATG \
|
|
2483
|
+
--min-aa 8 \
|
|
2484
|
+
--max-aa 10000 \
|
|
2485
|
+
--scan-strand both \
|
|
2486
|
+
--kozak-up 6 \
|
|
2487
|
+
--kozak-down 6 \
|
|
2488
|
+
--mark-overlap \
|
|
2489
|
+
--threads 20
|
|
2490
|
+
|
|
2491
|
+
```
|
|
2492
|
+
|
|
2493
|
+
|
|
2494
|
+
2. Use `smorf_filter` to filter scanned ORFs
|
|
2495
|
+
|
|
2496
|
+
The transcriptome-wide scan results are based only on open reading frame positions, so there are many false positives. Therefore, false positives need to be filtered.
|
|
2497
|
+
At the same time, some smORFs can be selected for study.
|
|
2498
|
+
|
|
2499
|
+
```bash
|
|
2500
|
+
$ smorf_filter -h
|
|
2501
|
+
usage: smorf_filter
|
|
2502
|
+
[-h] -i INPUT [-o OUT_PREFIX] [--keep-start-codons KEEP_START_CODONS] [--min-aa MIN_AA] [--max-aa MAX_AA]
|
|
2503
|
+
[--keep-categories KEEP_CATEGORIES] [--remove-categories REMOVE_CATEGORIES] [--keep-antisense] [--keep-secondary]
|
|
2504
|
+
[--keep-partial] [--kozak-mode {none,annotated,builtin,pwm,sequence}]
|
|
2505
|
+
[--builtin-kozak {arabidopsis,drosophila,maize,plant,rice,terrestrial_plant,vertebrate,yeast}]
|
|
2506
|
+
[--kozak-pwm KOZAK_PWM] [--kozak-seq KOZAK_SEQ] [--annotated-categories ANNOTATED_CATEGORIES]
|
|
2507
|
+
[--min-annotated-kozak MIN_ANNOTATED_KOZAK]
|
|
2508
|
+
[--fallback-builtin-kozak {arabidopsis,drosophila,maize,plant,rice,terrestrial_plant,vertebrate,yeast}]
|
|
2509
|
+
[--no-kozak-fallback] [--min-kozak-pwm-score MIN_KOZAK_PWM_SCORE] [--export-kozak-pwm EXPORT_KOZAK_PWM]
|
|
2510
|
+
[--list-builtin-kozak]
|
|
2511
|
+
|
|
2512
|
+
Filter ORF.message.txt using rule-based criteria and Kozak PWM scoring.
|
|
2513
|
+
|
|
2514
|
+
options:
|
|
2515
|
+
-h, --help show this help message and exit
|
|
2516
|
+
-i, --input INPUT Input ORF.message.txt file.
|
|
2517
|
+
-o, --out-prefix OUT_PREFIX
|
|
2518
|
+
Output prefix. (default: ORF.filtered).
|
|
2519
|
+
--keep-start-codons KEEP_START_CODONS
|
|
2520
|
+
Comma-separated start codons to keep. (default: ATG,CTG,GTG,TTG).
|
|
2521
|
+
--min-aa MIN_AA Minimum ORF peptide length. (default: 8).
|
|
2522
|
+
--max-aa MAX_AA Maximum ORF peptide length. (default: 10000).
|
|
2523
|
+
--keep-categories KEEP_CATEGORIES
|
|
2524
|
+
Comma-separated ORF categories to keep. (default:
|
|
2525
|
+
uORF,dORF,lncORF,iORF,emORF,overlap_uORF,overlap_dORF,other_ORF,annotated_ORF).
|
|
2526
|
+
--remove-categories REMOVE_CATEGORIES
|
|
2527
|
+
Comma-separated ORF categories to remove. (default: same_frame_iORF,antisense_ORF).
|
|
2528
|
+
--keep-antisense Keep antisense ORFs. (default: False).
|
|
2529
|
+
--keep-secondary Keep secondary ORFs. (default: False).
|
|
2530
|
+
--keep-partial Keep incomplete ORFs. (default: False).
|
|
2531
|
+
--kozak-mode {none,annotated,builtin,pwm,sequence}
|
|
2532
|
+
Kozak PWM mode. (default: annotated).
|
|
2533
|
+
--builtin-kozak {arabidopsis,drosophila,maize,plant,rice,terrestrial_plant,vertebrate,yeast}
|
|
2534
|
+
Built-in Kozak PWM name. (default: plant).
|
|
2535
|
+
--kozak-pwm KOZAK_PWM
|
|
2536
|
+
Custom Kozak PWM matrix file. (default: None).
|
|
2537
|
+
--kozak-seq KOZAK_SEQ
|
|
2538
|
+
Aligned Kozak sequence file used to build PWM. (default: None).
|
|
2539
|
+
--annotated-categories ANNOTATED_CATEGORIES
|
|
2540
|
+
Categories used to build annotated ORF Kozak PWM. (default: annotated_ORF).
|
|
2541
|
+
--min-annotated-kozak MIN_ANNOTATED_KOZAK
|
|
2542
|
+
Minimum annotated ORF Kozak sequences required to build PWM. (default: 100).
|
|
2543
|
+
--fallback-builtin-kozak {arabidopsis,drosophila,maize,plant,rice,terrestrial_plant,vertebrate,yeast}
|
|
2544
|
+
Fallback built-in Kozak PWM if annotated mode fails. (default: plant).
|
|
2545
|
+
--no-kozak-fallback Do not fallback to built-in PWM if annotated PWM construction fails. (default: False).
|
|
2546
|
+
--min-kozak-pwm-score MIN_KOZAK_PWM_SCORE
|
|
2547
|
+
Minimum normalized Kozak PWM score. Range: 0-1. (default: 0.0).
|
|
2548
|
+
--export-kozak-pwm EXPORT_KOZAK_PWM
|
|
2549
|
+
Export loaded or constructed Kozak PWM. (default: None).
|
|
2550
|
+
--list-builtin-kozak List built-in Kozak PWM models and exit.
|
|
2551
|
+
|
|
2552
|
+
# example
|
|
2553
|
+
smorf_filter \
|
|
2554
|
+
-i mine.message.txt \
|
|
2555
|
+
-o mine.reliable \
|
|
2556
|
+
--min-aa 8 \
|
|
2557
|
+
--max-aa 10000 \
|
|
2558
|
+
--kozak-mode annotated \
|
|
2559
|
+
--keep-categories uORF,dORF,lncORF,overlap_uORF,overlap_dORF
|
|
2560
|
+
|
|
2561
|
+
```
|
|
2562
|
+
|
|
2563
|
+
3. Check smORF translation with `smorf_evidence`
|
|
2564
|
+
|
|
2565
|
+
In general, stably expressed smORFs can be sensitively captured by Ribo-seq, so Ribo-seq data can be used to validate smORFs and identify reliable ones. Multiple datasets are supported to detect smORFs that are consistently present over different time points.
|
|
2566
|
+
|
|
2567
|
+
```bash
|
|
2568
|
+
$ smorf_evidence -h
|
|
2569
|
+
usage: smorf_evidence
|
|
2570
|
+
[-h] -i ORF_TABLE -o OUTPUT [--genepred GENEPRED] [--chrom-sizes CHROM_SIZES] [--density-list DENSITY_LIST]
|
|
2571
|
+
[--density-plus DENSITY_PLUS] [--density-minus DENSITY_MINUS] [--density DENSITY] [--sample SAMPLE]
|
|
2572
|
+
[--density-format {auto,wig,bedgraph}] [--coord-mode {0based-half-open,1based-closed}]
|
|
2573
|
+
[--post-stop-codons POST_STOP_CODONS] [--pseudocount PSEUDOCOUNT] [--min-rpf-sum MIN_RPF_SUM]
|
|
2574
|
+
[--min-covered-codon MIN_COVERED_CODON] [--min-coverage-ratio MIN_COVERAGE_RATIO]
|
|
2575
|
+
[--strong-periodicity STRONG_PERIODICITY] [--moderate-periodicity MODERATE_PERIODICITY]
|
|
2576
|
+
[--strong-start-pause STRONG_START_PAUSE] [--moderate-start-pause MODERATE_START_PAUSE]
|
|
2577
|
+
[--strong-stop-pause STRONG_STOP_PAUSE] [--moderate-stop-pause MODERATE_STOP_PAUSE]
|
|
2578
|
+
[--strong-release STRONG_RELEASE] [--moderate-release MODERATE_RELEASE]
|
|
2579
|
+
[--uniform-coverage-ratio UNIFORM_COVERAGE_RATIO] [--uniform-gini UNIFORM_GINI]
|
|
2580
|
+
[--uniform-max-to-mean UNIFORM_MAX_TO_MEAN] [--skewed-max-to-mean SKEWED_MAX_TO_MEAN]
|
|
2581
|
+
[--skewed-top-fraction SKEWED_TOP_FRACTION] [--disperse-coverage-ratio DISPERSE_COVERAGE_RATIO]
|
|
2582
|
+
[--progress-every PROGRESS_EVERY] [--keep-no-evidence]
|
|
2583
|
+
|
|
2584
|
+
Evaluate smORF translation evidence using Ribo-seq P-site density.
|
|
2585
|
+
|
|
2586
|
+
options:
|
|
2587
|
+
-h, --help show this help message and exit
|
|
2588
|
+
-i, --orf-table ORF_TABLE
|
|
2589
|
+
Filtered smORF table from smorf_filter.
|
|
2590
|
+
-o, --output OUTPUT Output evidence table in TSV format.
|
|
2591
|
+
--genepred GENEPRED Optional genePred file for ORF exon blocks. (default: None)
|
|
2592
|
+
--chrom-sizes CHROM_SIZES
|
|
2593
|
+
Optional two-column chromosome size file. If not provided, chromosome sizes will be inferred from density files.
|
|
2594
|
+
(default: None)
|
|
2595
|
+
--density-list DENSITY_LIST
|
|
2596
|
+
TSV with columns: sample, strand, path, optional format. (default: None)
|
|
2597
|
+
--density-plus DENSITY_PLUS
|
|
2598
|
+
Plus-strand P-site density file. (default: None)
|
|
2599
|
+
--density-minus DENSITY_MINUS
|
|
2600
|
+
Minus-strand P-site density file. (default: None)
|
|
2601
|
+
--density DENSITY Unstranded P-site density file. (default: None)
|
|
2602
|
+
--sample SAMPLE Sample name for direct density input. (default: sample1)
|
|
2603
|
+
--density-format {auto,wig,bedgraph}
|
|
2604
|
+
Density file format. (default: auto)
|
|
2605
|
+
--coord-mode {0based-half-open,1based-closed}
|
|
2606
|
+
Coordinate mode for ORF table and genePred-like blocks. (default: 0based-half-open)
|
|
2607
|
+
--post-stop-codons POST_STOP_CODONS
|
|
2608
|
+
Number of codons after stop codon used for release signal. (default: 10)
|
|
2609
|
+
--pseudocount PSEUDOCOUNT
|
|
2610
|
+
Pseudocount for ratio calculation. (default: 0.1)
|
|
2611
|
+
--min-rpf-sum MIN_RPF_SUM
|
|
2612
|
+
Minimum ORF-level RPF sum for evidence scoring. (default: 3.0)
|
|
2613
|
+
--min-covered-codon MIN_COVERED_CODON
|
|
2614
|
+
Minimum covered codon count. (default: 2)
|
|
2615
|
+
--min-coverage-ratio MIN_COVERAGE_RATIO
|
|
2616
|
+
Minimum nucleotide-level coverage ratio. (default: 0.1)
|
|
2617
|
+
--strong-periodicity STRONG_PERIODICITY
|
|
2618
|
+
Frame-0 ratio threshold for strong periodicity. (default: 0.7)
|
|
2619
|
+
--moderate-periodicity MODERATE_PERIODICITY
|
|
2620
|
+
Frame-0 ratio threshold for moderate periodicity. (default: 0.55)
|
|
2621
|
+
--strong-start-pause STRONG_START_PAUSE
|
|
2622
|
+
Start pausing ratio threshold for strong signal. (default: 1.5)
|
|
2623
|
+
--moderate-start-pause MODERATE_START_PAUSE
|
|
2624
|
+
Start pausing ratio threshold for moderate signal. (default: 1.2)
|
|
2625
|
+
--strong-stop-pause STRONG_STOP_PAUSE
|
|
2626
|
+
Pre-stop pausing ratio threshold for strong signal. (default: 1.5)
|
|
2627
|
+
--moderate-stop-pause MODERATE_STOP_PAUSE
|
|
2628
|
+
Pre-stop pausing ratio threshold for moderate signal. (default: 1.2)
|
|
2629
|
+
--strong-release STRONG_RELEASE
|
|
2630
|
+
Release ratio threshold for strong signal. (default: 3.0)
|
|
2631
|
+
--moderate-release MODERATE_RELEASE
|
|
2632
|
+
Release ratio threshold for moderate signal. (default: 1.5)
|
|
2633
|
+
--uniform-coverage-ratio UNIFORM_COVERAGE_RATIO
|
|
2634
|
+
Coverage ratio threshold for Uniform shape. (default: 0.4)
|
|
2635
|
+
--uniform-gini UNIFORM_GINI
|
|
2636
|
+
Gini threshold for Uniform shape. (default: 0.5)
|
|
2637
|
+
--uniform-max-to-mean UNIFORM_MAX_TO_MEAN
|
|
2638
|
+
Max/mean threshold for Uniform shape. (default: 5.0)
|
|
2639
|
+
--skewed-max-to-mean SKEWED_MAX_TO_MEAN
|
|
2640
|
+
Max/mean threshold for Skewed shape. (default: 10.0)
|
|
2641
|
+
--skewed-top-fraction SKEWED_TOP_FRACTION
|
|
2642
|
+
Top 10 percent density fraction threshold for Skewed shape. (default: 0.7)
|
|
2643
|
+
--disperse-coverage-ratio DISPERSE_COVERAGE_RATIO
|
|
2644
|
+
Coverage ratio threshold below which coverage is Disperse. (default: 0.2)
|
|
2645
|
+
--progress-every PROGRESS_EVERY
|
|
2646
|
+
Print progress every N ORFs per chromosome. (default: 10000)
|
|
2647
|
+
--keep-no-evidence Keep ORFs with no RPF evidence in output. (default: False)
|
|
2648
|
+
|
|
2649
|
+
|
|
2650
|
+
# example
|
|
2651
|
+
smorf_evidence \
|
|
2652
|
+
-i mine.reliable.passed.message.txt \
|
|
2653
|
+
--genepred mine.genePred \
|
|
2654
|
+
--density-list ribo.bedgraph.list \
|
|
2655
|
+
-o mine.smorf.riboseq_evidence.txt
|
|
2656
|
+
|
|
2657
|
+
# cat ribo.bedgraph.list
|
|
2658
|
+
# sample strand path format
|
|
2659
|
+
# ribo1 + /project/mine/ribo/bedgraph/ribo1_plus.rpf.bedgraph bedgraph
|
|
2660
|
+
# ribo1 - /project/mine/ribo/bedgraph/ribo1_minus.rpf.bedgraph bedgraph
|
|
2661
|
+
# ribo2 + /project/mine/ribo/bedgraph/ribo2_plus.rpf.bedgraph bedgraph
|
|
2662
|
+
# ribo2 - /project/mine/ribo/bedgraph/ribo2_minus.rpf.bedgraph bedgraph
|
|
2663
|
+
|
|
2664
|
+
# these bedgraph are generated from `rpf_Bam2bw`
|
|
2665
|
+
|
|
2666
|
+
```
|
|
2667
|
+
|
|
2668
|
+
4. Use `smorf_integrate` to integrate all high-confidence smORFs
|
|
2669
|
+
|
|
2670
|
+
Combine results filtered by different sample Ribo-seq data into one comprehensive table.
|
|
2671
|
+
You can then filter the output table and the annotation file from the first scanning step, and use RiboParser's rpf module for complete quality control and quantification analysis.
|
|
2672
|
+
|
|
2673
|
+
```bash
|
|
2674
|
+
$ smorf_integrate -h
|
|
2675
|
+
usage: smorf_integrate
|
|
2676
|
+
[-h] -i INPUT [--output-matrix OUTPUT_MATRIX] [--output-integrated OUTPUT_INTEGRATED]
|
|
2677
|
+
[--capture-labels CAPTURE_LABELS] [--pass-labels PASS_LABELS] [--excellent-min-samples EXCELLENT_MIN_SAMPLES]
|
|
2678
|
+
|
|
2679
|
+
Integrate long-format smORF Ribo-seq evidence into matrices and ORF-level summary tables.
|
|
2680
|
+
|
|
2681
|
+
options:
|
|
2682
|
+
-h, --help show this help message and exit
|
|
2683
|
+
-i, --input INPUT Long-format smORF Ribo-seq evidence table generated by smorf_evidence.py.
|
|
2684
|
+
--output-matrix OUTPUT_MATRIX
|
|
2685
|
+
Output ORF-by-sample matrix table for frame density and selected sample metrics. (default: None)
|
|
2686
|
+
--output-integrated OUTPUT_INTEGRATED
|
|
2687
|
+
Output integrated ORF-level evidence table. (default: None)
|
|
2688
|
+
--capture-labels CAPTURE_LABELS
|
|
2689
|
+
Translation evidence labels used to define captured samples. (default: LowConfidence,MediumConfidence,HighConfidence)
|
|
2690
|
+
--pass-labels PASS_LABELS
|
|
2691
|
+
Translation evidence labels used to define reliable/pass samples. (default: MediumConfidence,HighConfidence)
|
|
2692
|
+
--excellent-min-samples EXCELLENT_MIN_SAMPLES
|
|
2693
|
+
Minimum number of pass samples required to mark an ORF as Excellent. (default: 2)
|
|
2694
|
+
|
|
2695
|
+
# example
|
|
2696
|
+
smorf_integrate \
|
|
2697
|
+
-i mine.smorf.riboseq_evidence.txt \
|
|
2698
|
+
--output-matrix mine.smorf.frame_density_matrix.txt \
|
|
2699
|
+
--output-integrated mine.smorf.integrated_evidence.txt
|
|
2700
|
+
|
|
2701
|
+
```
|
|
2702
|
+
|
|
2703
|
+
|
|
2704
|
+
## 7. Other toolkits
|
|
2705
|
+
### 7.1 Data shuffling
|
|
2438
2706
|
|
|
2439
2707
|
Some analysis processes require randomly assigned data for control,
|
|
2440
2708
|
so a step is added here to reshuffling the RPFs density file.
|
|
@@ -2504,7 +2772,7 @@ RIBO_shuffle.txt # Shuffled RPFs density file
|
|
|
2504
2772
|
```
|
|
2505
2773
|
|
|
2506
2774
|
|
|
2507
|
-
###
|
|
2775
|
+
### 7.2 Retrieve and format the gene density
|
|
2508
2776
|
|
|
2509
2777
|
In many cases, it is necessary to perform some additional operations on the gene set in the RPFs density file,
|
|
2510
2778
|
such as filtering, RPM standardization, long and width data format conversion, etc.
|
|
@@ -2582,7 +2850,7 @@ RIBO.log
|
|
|
2582
2850
|
RIBO_retrieve.txt
|
|
2583
2851
|
```
|
|
2584
2852
|
|
|
2585
|
-
###
|
|
2853
|
+
### 7.3 Filter the frame shifting genes
|
|
2586
2854
|
|
|
2587
2855
|
A frameshift in translation occurs when the ribosome shifts by one or more nucleotides in the mRNA sequence, causing a misreading of the codons.
|
|
2588
2856
|
This results in a completely altered amino acid sequence downstream of the shift,
|
|
@@ -2649,9 +2917,9 @@ RIBO_SRR1944912_gene_frame_shift.txt
|
|
|
2649
2917
|
```
|
|
2650
2918
|
|
|
2651
2919
|
|
|
2652
|
-
##
|
|
2920
|
+
## 8. one step for pipeline
|
|
2653
2921
|
|
|
2654
|
-
###
|
|
2922
|
+
### 8.0 Prepare the directories and design file for your project
|
|
2655
2923
|
|
|
2656
2924
|
1. create the directories to store the raw-data and results
|
|
2657
2925
|
|
|
@@ -2682,7 +2950,7 @@ SRR1944917 ncs2d_ribo_YPD
|
|
|
2682
2950
|
```
|
|
2683
2951
|
|
|
2684
2952
|
|
|
2685
|
-
###
|
|
2953
|
+
### 8.1 run_step1.sh
|
|
2686
2954
|
|
|
2687
2955
|
This step is used for constructing the database, which is essential for the alignment of reads and subsequent analysis using `RiboParser`.
|
|
2688
2956
|
|
|
@@ -2694,7 +2962,7 @@ This step is suitable for most genome and gene annotation files derived from `NC
|
|
|
2694
2962
|
$ nohup sh run_step1.sh &
|
|
2695
2963
|
```
|
|
2696
2964
|
|
|
2697
|
-
###
|
|
2965
|
+
### 8.2 run_step2.sh
|
|
2698
2966
|
|
|
2699
2967
|
This step is used for analyzing `RNA-seq` data, including data cleaning,
|
|
2700
2968
|
alignment, and expression quantification.
|
|
@@ -2706,7 +2974,7 @@ method used in your project!
|
|
|
2706
2974
|
$ nohup sh run_step2.sh &
|
|
2707
2975
|
```
|
|
2708
2976
|
|
|
2709
|
-
###
|
|
2977
|
+
### 8.3 run_step3.sh
|
|
2710
2978
|
|
|
2711
2979
|
This step is used for analyzing `Ribo-seq` data, including data cleaning,
|
|
2712
2980
|
alignment, and expression quantification.
|
|
@@ -2718,7 +2986,7 @@ sequencing method used in your project!
|
|
|
2718
2986
|
$ nohup sh run_step3.sh &
|
|
2719
2987
|
```
|
|
2720
2988
|
|
|
2721
|
-
###
|
|
2989
|
+
### 8.4 run_step4.sh
|
|
2722
2990
|
|
|
2723
2991
|
This step is used for analyzing `RNA-seq` data, utilizing `RiboParser` to check the
|
|
2724
2992
|
sequencing quality of the `RNA-seq` data and prepare formatted files for subsequent
|
|
@@ -2731,7 +2999,7 @@ modified according to the files defined for your project!
|
|
|
2731
2999
|
$ nohup sh run_step4.sh &
|
|
2732
3000
|
```
|
|
2733
3001
|
|
|
2734
|
-
###
|
|
3002
|
+
### 8.5 run_step5.sh
|
|
2735
3003
|
|
|
2736
3004
|
This step is used for analyzing `Ribo-seq` data, utilizing `RiboParser` to check the
|
|
2737
3005
|
sequencing quality of the `Ribo-seq` data.
|
|
@@ -2744,7 +3012,7 @@ $ nohup sh run_step5.sh &
|
|
|
2744
3012
|
```
|
|
2745
3013
|
|
|
2746
3014
|
|
|
2747
|
-
##
|
|
3015
|
+
## 9. Computational performance of the RiboParser
|
|
2748
3016
|
We assessed the workflow on a CentOS 7 system using 12 threads, with RNA-seq and Ribo-seq data from three different species (S. cerevisiae, M. musculus, and H. sapiens).
|
|
2749
3017
|
|
|
2750
3018
|
| | | | | | | | | | | |
|
|
@@ -2772,7 +3040,7 @@ Optimal Configuration
|
|
|
2772
3040
|
- Storage: ≥ 512 GB NVMe SSD for rapid I/O and 2 TB HDD (SATA III)
|
|
2773
3041
|
|
|
2774
3042
|
|
|
2775
|
-
##
|
|
3043
|
+
## 10. Contribution
|
|
2776
3044
|
|
|
2777
3045
|
Thanks for all the open source tools used in the process.
|
|
2778
3046
|
|
|
@@ -2783,6 +3051,6 @@ Contribute to our open-source project by submitting questions and code.
|
|
|
2783
3051
|
Contact `rensc0718@163.com` for more information.
|
|
2784
3052
|
|
|
2785
3053
|
|
|
2786
|
-
##
|
|
3054
|
+
## 11. License
|
|
2787
3055
|
|
|
2788
3056
|
GPL License.
|