RiboParser 0.2.2__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. {riboparser-0.2.2 → riboparser-0.2.5}/PKG-INFO +283 -15
  2. {riboparser-0.2.2 → riboparser-0.2.5}/README.md +282 -14
  3. {riboparser-0.2.2 → riboparser-0.2.5}/RiboParser.egg-info/PKG-INFO +283 -15
  4. {riboparser-0.2.2 → riboparser-0.2.5}/RiboParser.egg-info/SOURCES.txt +22 -10
  5. {riboparser-0.2.2 → riboparser-0.2.5}/RiboParser.egg-info/entry_points.txt +3 -0
  6. {riboparser-0.2.2 → riboparser-0.2.5}/pyproject.toml +19 -1
  7. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/__init__.py +0 -0
  8. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/bedgraph/bg2meta.py +0 -0
  9. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/bedgraph/rpm_smooth.py +0 -0
  10. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/bowtie/merge_bwt_log.py +0 -0
  11. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/fa_gc_sum.py +0 -0
  12. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/fa_len_flt.py +0 -0
  13. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/fa_len_sum.py +0 -0
  14. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/fa_split.py +0 -0
  15. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/line_feed.py +0 -0
  16. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/nt2aa.py +0 -0
  17. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/rand_seq.py +0 -0
  18. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/retrieve_seq.py +0 -0
  19. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/revs.py +0 -0
  20. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq2fa.py +0 -0
  21. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq2txt.py +0 -0
  22. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq_len_flt.py +0 -0
  23. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq_len_sum.py +0 -0
  24. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq_length.py +0 -0
  25. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq_split.py +0 -0
  26. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/fq_trim.py +0 -0
  27. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/phred_quality.py +0 -0
  28. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/simulate_fastq.py +0 -0
  29. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_cdt.py +0 -0
  30. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_coverage.py +0 -0
  31. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_cst.py +0 -0
  32. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_digestion.py +0 -0
  33. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_dst_list.py +0 -0
  34. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_length.py +0 -0
  35. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_metagene.py +0 -0
  36. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_occupancy.py +0 -0
  37. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_odd_ratio.py +0 -0
  38. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_offset.py +0 -0
  39. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_offset_detail.py +0 -0
  40. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_offset_end.py +0 -0
  41. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_pausing.py +0 -0
  42. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_period.py +0 -0
  43. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_quant.py +0 -0
  44. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/merge_saturation.py +0 -0
  45. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/oligo/get_overlap_seq.py +0 -0
  46. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/oligo/get_tissue_freq.py +0 -0
  47. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/oligo/get_win_seq.py +0 -0
  48. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/ribocode/ribocode_bed_format.py +0 -0
  49. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/ribotish/ribotish_format.py +0 -0
  50. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/rsem/merge_rsem.py +0 -0
  51. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/unix/dos2unix.py +0 -0
  52. {riboparser-0.2.2 → riboparser-0.2.5}/utils/__init__.py +0 -0
  53. {riboparser-0.2.2 → riboparser-0.2.5}/utils/data/RiboParser.py +5 -5
  54. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/ArgsParser.py +0 -0
  55. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Bam2Wig.py +0 -0
  56. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/BamFilter.py +0 -0
  57. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/CDT.py +0 -0
  58. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/CST.py +0 -0
  59. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Codon.py +0 -0
  60. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Coefficient_of_Variation.py +0 -0
  61. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Coverage.py +0 -0
  62. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Cumulative_CoV.py +0 -0
  63. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Density.py +0 -0
  64. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Digestion.py +0 -0
  65. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Ensembl_Ref.py +0 -0
  66. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/GenePred.py +0 -0
  67. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/MetaCodon.py +0 -0
  68. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Metaplot.py +0 -0
  69. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Occupancy.py +0 -0
  70. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Odd_Ratio.py +0 -0
  71. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Offset.py +0 -0
  72. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Offset_RSBM.py +0 -0
  73. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Pausing.py +0 -0
  74. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Percentage.py +0 -0
  75. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Periodicity.py +0 -0
  76. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Quality.py +0 -0
  77. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Quant.py +0 -0
  78. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/RNA.py +0 -0
  79. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/RPFs.py +0 -0
  80. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Retrieve.py +0 -0
  81. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Ribo.py +0 -0
  82. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Shift.py +0 -0
  83. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/Shuffle.py +0 -0
  84. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/__init__.py +0 -0
  85. {riboparser-0.2.2 → riboparser-0.2.5}/utils/riboparser.py +0 -0
  86. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rna_Density.py +0 -0
  87. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rna_Offset.py +0 -0
  88. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Bam2bw.py +0 -0
  89. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Bam_Filter.py +0 -0
  90. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_CDT.py +0 -0
  91. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_CST.py +0 -0
  92. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Check.py +0 -0
  93. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_CoV.py +0 -0
  94. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Corr.py +0 -0
  95. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Coverage.py +0 -0
  96. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Cumulative_CoV.py +0 -0
  97. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Density.py +0 -0
  98. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Digest.py +0 -0
  99. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Geneplot.py +0 -0
  100. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Merge.py +0 -0
  101. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Meta_Codon.py +0 -0
  102. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Metaplot.py +0 -0
  103. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Occupancy.py +0 -0
  104. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Odd_Ratio.py +0 -0
  105. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Offset.py +0 -0
  106. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Offset_RSBM.py +0 -0
  107. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Pausing.py +0 -0
  108. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Percent.py +0 -0
  109. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Periodicity.py +0 -0
  110. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Quant.py +0 -0
  111. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Reference.py +0 -0
  112. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Retrieve.py +0 -0
  113. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Shift.py +0 -0
  114. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_Shuffle.py +0 -0
  115. {riboparser-0.2.2 → riboparser-0.2.5}/utils/rpf_end.py +0 -0
  116. {riboparser-0.2.2 → riboparser-0.2.5}/utils/serp/Properties.py +0 -0
  117. {riboparser-0.2.2 → riboparser-0.2.5}/utils/serp/SeRP.py +0 -0
  118. {riboparser-0.2.2 → riboparser-0.2.5}/utils/serp_overlap.py +0 -0
  119. {riboparser-0.2.2 → riboparser-0.2.5}/utils/serp_peak.py +0 -0
  120. {riboparser-0.2.2 → riboparser-0.2.5}/utils/serp_properties.py +0 -0
  121. {riboparser-0.2.2 → riboparser-0.2.5}/utils/smorf/__init__.py +1 -1
  122. riboparser-0.2.5/utils/smorf/smorf_classifier.py +101 -0
  123. riboparser-0.2.2/utils/smorf/coordinate.py → riboparser-0.2.5/utils/smorf/smorf_coordinate.py +2 -2
  124. riboparser-0.2.5/utils/smorf/smorf_filter.py +257 -0
  125. riboparser-0.2.2/utils/smorf/genepred.py → riboparser-0.2.5/utils/smorf/smorf_genepred.py +1 -1
  126. riboparser-0.2.5/utils/smorf/smorf_kozak.py +470 -0
  127. riboparser-0.2.5/utils/smorf/smorf_overlap.py +234 -0
  128. riboparser-0.2.5/utils/smorf/smorf_pipeline.py +287 -0
  129. riboparser-0.2.5/utils/smorf/smorf_riboseq_constants.py +50 -0
  130. riboparser-0.2.5/utils/smorf/smorf_riboseq_density.py +185 -0
  131. riboparser-0.2.5/utils/smorf/smorf_riboseq_integrate.py +462 -0
  132. riboparser-0.2.5/utils/smorf/smorf_riboseq_io.py +318 -0
  133. riboparser-0.2.5/utils/smorf/smorf_riboseq_metrics.py +289 -0
  134. riboparser-0.2.5/utils/smorf/smorf_riboseq_pipeline.py +218 -0
  135. riboparser-0.2.5/utils/smorf/smorf_riboseq_profile.py +97 -0
  136. riboparser-0.2.2/utils/smorf/scanner.py → riboparser-0.2.5/utils/smorf/smorf_scanner.py +3 -3
  137. riboparser-0.2.2/utils/smorf/writer.py → riboparser-0.2.5/utils/smorf/smorf_writer.py +1 -1
  138. riboparser-0.2.5/utils/smorf_evidence.py +98 -0
  139. riboparser-0.2.5/utils/smorf_filter.py +322 -0
  140. riboparser-0.2.5/utils/smorf_integrate.py +78 -0
  141. {riboparser-0.2.2 → riboparser-0.2.5}/utils/smorf_scanner.py +9 -0
  142. riboparser-0.2.2/utils/smorf/classifier.py +0 -76
  143. riboparser-0.2.2/utils/smorf/overlap.py +0 -76
  144. riboparser-0.2.2/utils/smorf/pipeline.py +0 -158
  145. {riboparser-0.2.2 → riboparser-0.2.5}/RiboParser.egg-info/dependency_links.txt +0 -0
  146. {riboparser-0.2.2 → riboparser-0.2.5}/RiboParser.egg-info/requires.txt +0 -0
  147. {riboparser-0.2.2 → riboparser-0.2.5}/RiboParser.egg-info/top_level.txt +0 -0
  148. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/bedgraph/__init__.py +0 -0
  149. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/bowtie/__init__.py +0 -0
  150. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fasta/__init__.py +0 -0
  151. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/fastq/__init__.py +0 -0
  152. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/merge_ribo/__init__.py +0 -0
  153. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/oligo/__init__.py +0 -0
  154. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/ribocode/__init__.py +0 -0
  155. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/ribotish/__init__.py +0 -0
  156. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/rsem/__init__.py +0 -0
  157. {riboparser-0.2.2 → riboparser-0.2.5}/scripts/unix/__init__.py +0 -0
  158. {riboparser-0.2.2 → riboparser-0.2.5}/setup.cfg +0 -0
  159. {riboparser-0.2.2 → riboparser-0.2.5}/utils/ribo/EndSite.py +0 -0
  160. {riboparser-0.2.2 → riboparser-0.2.5}/utils/serp/__init__.py +0 -0
  161. /riboparser-0.2.2/utils/smorf/fasta.py → /riboparser-0.2.5/utils/smorf/smorf_fasta.py +0 -0
  162. /riboparser-0.2.2/utils/smorf/models.py → /riboparser-0.2.5/utils/smorf/smorf_models.py +0 -0
  163. /riboparser-0.2.2/utils/smorf/sequence.py → /riboparser-0.2.5/utils/smorf/smorf_sequence.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: RiboParser
3
- Version: 0.2.2
3
+ Version: 0.2.5
4
4
  Summary: A pipeline for ribosome profiling data analysis
5
5
  Author-email: Ren Shuchao <rensc0718@163.com>
6
6
  License-Expression: GPL-3.0-or-later
@@ -2433,8 +2433,276 @@ RIBO_AAA.png # Metaplot of AAA codon
2433
2433
  ```
2434
2434
 
2435
2435
 
2436
- ## 6. Other toolkits
2437
- ### 6.1 Data shuffling
2436
+ ## 6. smORF identification
2437
+ ### 6.1 Transcriptome-wide ORF scanning
2438
+
2439
+ 1. Explanation of `smorf_scanner`
2440
+
2441
+ Use the entire transcriptome sequences to scan all potential ORFs from scratch, outputting both known and novel ORFs.
2442
+
2443
+ ```bash
2444
+ $ smorf_scanner -h
2445
+
2446
+ usage: smorf_scanner
2447
+ [-h] -g GENOME -a ANNOTATION [-o OUT_PREFIX] [--orf-prefix ORF_PREFIX] [--start-codons START_CODONS]
2448
+ [--min-aa MIN_AA] [--max-aa MAX_AA] [--scan-strand {sense,antisense,both}] [--kozak-up KOZAK_UP]
2449
+ [--kozak-down KOZAK_DOWN] [-t THREADS] [--mark-overlap] [--remove-discarded] [--include-stop]
2450
+
2451
+ Scan transcript-centric smORFs from genome FASTA and genePred annotation.
2452
+
2453
+ options:
2454
+ -h, --help show this help message and exit
2455
+ -g, --genome GENOME Input genome FASTA file.
2456
+ -a, --annotation ANNOTATION
2457
+ Input genePred annotation file.
2458
+ -o, --out-prefix OUT_PREFIX
2459
+ Output prefix.
2460
+ --orf-prefix ORF_PREFIX
2461
+ Prefix for ORF IDs.
2462
+ --start-codons START_CODONS
2463
+ Comma-separated start codons, such as ATG,CTG,GTG,TTG.
2464
+ --min-aa MIN_AA Minimum ORF length in amino acids.
2465
+ --max-aa MAX_AA Maximum ORF length in amino acids.
2466
+ --scan-strand {sense,antisense,both}
2467
+ Scan sense, antisense, or both strands.
2468
+ --kozak-up KOZAK_UP Number of upstream nucleotides for Kozak sequence.
2469
+ --kozak-down KOZAK_DOWN
2470
+ Number of downstream nucleotides after start codon for Kozak sequence.
2471
+ -t, --threads THREADS
2472
+ Number of worker processes for parallel ORF scanning.
2473
+ --mark-overlap Mark nested or overlapping ORFs.
2474
+ --remove-discarded Remove same-frame internal ORFs.
2475
+ --include-stop Keep stop codon symbol in peptide sequence.
2476
+
2477
+ # example
2478
+ smorf_scanner \
2479
+ --genome ../genome/GCF_mine_genomic.fna \
2480
+ --annotation ../norm/mine.genepred \
2481
+ --out-prefix mine \
2482
+ --start-codons ATG \
2483
+ --min-aa 8 \
2484
+ --max-aa 10000 \
2485
+ --scan-strand both \
2486
+ --kozak-up 6 \
2487
+ --kozak-down 6 \
2488
+ --mark-overlap \
2489
+ --threads 20
2490
+
2491
+ ```
2492
+
2493
+
2494
+ 2. Use `smorf_filter` to filter scanned ORFs
2495
+
2496
+ The transcriptome-wide scan results are based only on open reading frame positions, so there are many false positives. Therefore, false positives need to be filtered.
2497
+ At the same time, some smORFs can be selected for study.
2498
+
2499
+ ```bash
2500
+ $ smorf_filter -h
2501
+ usage: smorf_filter
2502
+ [-h] -i INPUT [-o OUT_PREFIX] [--keep-start-codons KEEP_START_CODONS] [--min-aa MIN_AA] [--max-aa MAX_AA]
2503
+ [--keep-categories KEEP_CATEGORIES] [--remove-categories REMOVE_CATEGORIES] [--keep-antisense] [--keep-secondary]
2504
+ [--keep-partial] [--kozak-mode {none,annotated,builtin,pwm,sequence}]
2505
+ [--builtin-kozak {arabidopsis,drosophila,maize,plant,rice,terrestrial_plant,vertebrate,yeast}]
2506
+ [--kozak-pwm KOZAK_PWM] [--kozak-seq KOZAK_SEQ] [--annotated-categories ANNOTATED_CATEGORIES]
2507
+ [--min-annotated-kozak MIN_ANNOTATED_KOZAK]
2508
+ [--fallback-builtin-kozak {arabidopsis,drosophila,maize,plant,rice,terrestrial_plant,vertebrate,yeast}]
2509
+ [--no-kozak-fallback] [--min-kozak-pwm-score MIN_KOZAK_PWM_SCORE] [--export-kozak-pwm EXPORT_KOZAK_PWM]
2510
+ [--list-builtin-kozak]
2511
+
2512
+ Filter ORF.message.txt using rule-based criteria and Kozak PWM scoring.
2513
+
2514
+ options:
2515
+ -h, --help show this help message and exit
2516
+ -i, --input INPUT Input ORF.message.txt file.
2517
+ -o, --out-prefix OUT_PREFIX
2518
+ Output prefix. (default: ORF.filtered).
2519
+ --keep-start-codons KEEP_START_CODONS
2520
+ Comma-separated start codons to keep. (default: ATG,CTG,GTG,TTG).
2521
+ --min-aa MIN_AA Minimum ORF peptide length. (default: 8).
2522
+ --max-aa MAX_AA Maximum ORF peptide length. (default: 10000).
2523
+ --keep-categories KEEP_CATEGORIES
2524
+ Comma-separated ORF categories to keep. (default:
2525
+ uORF,dORF,lncORF,iORF,emORF,overlap_uORF,overlap_dORF,other_ORF,annotated_ORF).
2526
+ --remove-categories REMOVE_CATEGORIES
2527
+ Comma-separated ORF categories to remove. (default: same_frame_iORF,antisense_ORF).
2528
+ --keep-antisense Keep antisense ORFs. (default: False).
2529
+ --keep-secondary Keep secondary ORFs. (default: False).
2530
+ --keep-partial Keep incomplete ORFs. (default: False).
2531
+ --kozak-mode {none,annotated,builtin,pwm,sequence}
2532
+ Kozak PWM mode. (default: annotated).
2533
+ --builtin-kozak {arabidopsis,drosophila,maize,plant,rice,terrestrial_plant,vertebrate,yeast}
2534
+ Built-in Kozak PWM name. (default: plant).
2535
+ --kozak-pwm KOZAK_PWM
2536
+ Custom Kozak PWM matrix file. (default: None).
2537
+ --kozak-seq KOZAK_SEQ
2538
+ Aligned Kozak sequence file used to build PWM. (default: None).
2539
+ --annotated-categories ANNOTATED_CATEGORIES
2540
+ Categories used to build annotated ORF Kozak PWM. (default: annotated_ORF).
2541
+ --min-annotated-kozak MIN_ANNOTATED_KOZAK
2542
+ Minimum annotated ORF Kozak sequences required to build PWM. (default: 100).
2543
+ --fallback-builtin-kozak {arabidopsis,drosophila,maize,plant,rice,terrestrial_plant,vertebrate,yeast}
2544
+ Fallback built-in Kozak PWM if annotated mode fails. (default: plant).
2545
+ --no-kozak-fallback Do not fallback to built-in PWM if annotated PWM construction fails. (default: False).
2546
+ --min-kozak-pwm-score MIN_KOZAK_PWM_SCORE
2547
+ Minimum normalized Kozak PWM score. Range: 0-1. (default: 0.0).
2548
+ --export-kozak-pwm EXPORT_KOZAK_PWM
2549
+ Export loaded or constructed Kozak PWM. (default: None).
2550
+ --list-builtin-kozak List built-in Kozak PWM models and exit.
2551
+
2552
+ # example
2553
+ smorf_filter \
2554
+ -i mine.message.txt \
2555
+ -o mine.reliable \
2556
+ --min-aa 8 \
2557
+ --max-aa 10000 \
2558
+ --kozak-mode annotated \
2559
+ --keep-categories uORF,dORF,lncORF,overlap_uORF,overlap_dORF
2560
+
2561
+ ```
2562
+
2563
+ 3. Check smORF translation with `smorf_evidence`
2564
+
2565
+ In general, stably expressed smORFs can be sensitively captured by Ribo-seq, so Ribo-seq data can be used to validate smORFs and identify reliable ones. Multiple datasets are supported to detect smORFs that are consistently present over different time points.
2566
+
2567
+ ```bash
2568
+ $ smorf_evidence -h
2569
+ usage: smorf_evidence
2570
+ [-h] -i ORF_TABLE -o OUTPUT [--genepred GENEPRED] [--chrom-sizes CHROM_SIZES] [--density-list DENSITY_LIST]
2571
+ [--density-plus DENSITY_PLUS] [--density-minus DENSITY_MINUS] [--density DENSITY] [--sample SAMPLE]
2572
+ [--density-format {auto,wig,bedgraph}] [--coord-mode {0based-half-open,1based-closed}]
2573
+ [--post-stop-codons POST_STOP_CODONS] [--pseudocount PSEUDOCOUNT] [--min-rpf-sum MIN_RPF_SUM]
2574
+ [--min-covered-codon MIN_COVERED_CODON] [--min-coverage-ratio MIN_COVERAGE_RATIO]
2575
+ [--strong-periodicity STRONG_PERIODICITY] [--moderate-periodicity MODERATE_PERIODICITY]
2576
+ [--strong-start-pause STRONG_START_PAUSE] [--moderate-start-pause MODERATE_START_PAUSE]
2577
+ [--strong-stop-pause STRONG_STOP_PAUSE] [--moderate-stop-pause MODERATE_STOP_PAUSE]
2578
+ [--strong-release STRONG_RELEASE] [--moderate-release MODERATE_RELEASE]
2579
+ [--uniform-coverage-ratio UNIFORM_COVERAGE_RATIO] [--uniform-gini UNIFORM_GINI]
2580
+ [--uniform-max-to-mean UNIFORM_MAX_TO_MEAN] [--skewed-max-to-mean SKEWED_MAX_TO_MEAN]
2581
+ [--skewed-top-fraction SKEWED_TOP_FRACTION] [--disperse-coverage-ratio DISPERSE_COVERAGE_RATIO]
2582
+ [--progress-every PROGRESS_EVERY] [--keep-no-evidence]
2583
+
2584
+ Evaluate smORF translation evidence using Ribo-seq P-site density.
2585
+
2586
+ options:
2587
+ -h, --help show this help message and exit
2588
+ -i, --orf-table ORF_TABLE
2589
+ Filtered smORF table from smorf_filter.
2590
+ -o, --output OUTPUT Output evidence table in TSV format.
2591
+ --genepred GENEPRED Optional genePred file for ORF exon blocks. (default: None)
2592
+ --chrom-sizes CHROM_SIZES
2593
+ Optional two-column chromosome size file. If not provided, chromosome sizes will be inferred from density files.
2594
+ (default: None)
2595
+ --density-list DENSITY_LIST
2596
+ TSV with columns: sample, strand, path, optional format. (default: None)
2597
+ --density-plus DENSITY_PLUS
2598
+ Plus-strand P-site density file. (default: None)
2599
+ --density-minus DENSITY_MINUS
2600
+ Minus-strand P-site density file. (default: None)
2601
+ --density DENSITY Unstranded P-site density file. (default: None)
2602
+ --sample SAMPLE Sample name for direct density input. (default: sample1)
2603
+ --density-format {auto,wig,bedgraph}
2604
+ Density file format. (default: auto)
2605
+ --coord-mode {0based-half-open,1based-closed}
2606
+ Coordinate mode for ORF table and genePred-like blocks. (default: 0based-half-open)
2607
+ --post-stop-codons POST_STOP_CODONS
2608
+ Number of codons after stop codon used for release signal. (default: 10)
2609
+ --pseudocount PSEUDOCOUNT
2610
+ Pseudocount for ratio calculation. (default: 0.1)
2611
+ --min-rpf-sum MIN_RPF_SUM
2612
+ Minimum ORF-level RPF sum for evidence scoring. (default: 3.0)
2613
+ --min-covered-codon MIN_COVERED_CODON
2614
+ Minimum covered codon count. (default: 2)
2615
+ --min-coverage-ratio MIN_COVERAGE_RATIO
2616
+ Minimum nucleotide-level coverage ratio. (default: 0.1)
2617
+ --strong-periodicity STRONG_PERIODICITY
2618
+ Frame-0 ratio threshold for strong periodicity. (default: 0.7)
2619
+ --moderate-periodicity MODERATE_PERIODICITY
2620
+ Frame-0 ratio threshold for moderate periodicity. (default: 0.55)
2621
+ --strong-start-pause STRONG_START_PAUSE
2622
+ Start pausing ratio threshold for strong signal. (default: 1.5)
2623
+ --moderate-start-pause MODERATE_START_PAUSE
2624
+ Start pausing ratio threshold for moderate signal. (default: 1.2)
2625
+ --strong-stop-pause STRONG_STOP_PAUSE
2626
+ Pre-stop pausing ratio threshold for strong signal. (default: 1.5)
2627
+ --moderate-stop-pause MODERATE_STOP_PAUSE
2628
+ Pre-stop pausing ratio threshold for moderate signal. (default: 1.2)
2629
+ --strong-release STRONG_RELEASE
2630
+ Release ratio threshold for strong signal. (default: 3.0)
2631
+ --moderate-release MODERATE_RELEASE
2632
+ Release ratio threshold for moderate signal. (default: 1.5)
2633
+ --uniform-coverage-ratio UNIFORM_COVERAGE_RATIO
2634
+ Coverage ratio threshold for Uniform shape. (default: 0.4)
2635
+ --uniform-gini UNIFORM_GINI
2636
+ Gini threshold for Uniform shape. (default: 0.5)
2637
+ --uniform-max-to-mean UNIFORM_MAX_TO_MEAN
2638
+ Max/mean threshold for Uniform shape. (default: 5.0)
2639
+ --skewed-max-to-mean SKEWED_MAX_TO_MEAN
2640
+ Max/mean threshold for Skewed shape. (default: 10.0)
2641
+ --skewed-top-fraction SKEWED_TOP_FRACTION
2642
+ Top 10 percent density fraction threshold for Skewed shape. (default: 0.7)
2643
+ --disperse-coverage-ratio DISPERSE_COVERAGE_RATIO
2644
+ Coverage ratio threshold below which coverage is Disperse. (default: 0.2)
2645
+ --progress-every PROGRESS_EVERY
2646
+ Print progress every N ORFs per chromosome. (default: 10000)
2647
+ --keep-no-evidence Keep ORFs with no RPF evidence in output. (default: False)
2648
+
2649
+
2650
+ # example
2651
+ smorf_evidence \
2652
+ -i mine.reliable.passed.message.txt \
2653
+ --genepred mine.genePred \
2654
+ --density-list ribo.bedgraph.list \
2655
+ -o mine.smorf.riboseq_evidence.txt
2656
+
2657
+ # cat ribo.bedgraph.list
2658
+ # sample strand path format
2659
+ # ribo1 + /project/mine/ribo/bedgraph/ribo1_plus.rpf.bedgraph bedgraph
2660
+ # ribo1 - /project/mine/ribo/bedgraph/ribo1_minus.rpf.bedgraph bedgraph
2661
+ # ribo2 + /project/mine/ribo/bedgraph/ribo2_plus.rpf.bedgraph bedgraph
2662
+ # ribo2 - /project/mine/ribo/bedgraph/ribo2_minus.rpf.bedgraph bedgraph
2663
+
2664
+ # these bedgraph are generated from `rpf_Bam2bw`
2665
+
2666
+ ```
2667
+
2668
+ 4. Use `smorf_integrate` to integrate all high-confidence smORFs
2669
+
2670
+ Combine results filtered by different sample Ribo-seq data into one comprehensive table.
2671
+ You can then filter the output table and the annotation file from the first scanning step, and use RiboParser's rpf module for complete quality control and quantification analysis.
2672
+
2673
+ ```bash
2674
+ $ smorf_integrate -h
2675
+ usage: smorf_integrate
2676
+ [-h] -i INPUT [--output-matrix OUTPUT_MATRIX] [--output-integrated OUTPUT_INTEGRATED]
2677
+ [--capture-labels CAPTURE_LABELS] [--pass-labels PASS_LABELS] [--excellent-min-samples EXCELLENT_MIN_SAMPLES]
2678
+
2679
+ Integrate long-format smORF Ribo-seq evidence into matrices and ORF-level summary tables.
2680
+
2681
+ options:
2682
+ -h, --help show this help message and exit
2683
+ -i, --input INPUT Long-format smORF Ribo-seq evidence table generated by smorf_evidence.py.
2684
+ --output-matrix OUTPUT_MATRIX
2685
+ Output ORF-by-sample matrix table for frame density and selected sample metrics. (default: None)
2686
+ --output-integrated OUTPUT_INTEGRATED
2687
+ Output integrated ORF-level evidence table. (default: None)
2688
+ --capture-labels CAPTURE_LABELS
2689
+ Translation evidence labels used to define captured samples. (default: LowConfidence,MediumConfidence,HighConfidence)
2690
+ --pass-labels PASS_LABELS
2691
+ Translation evidence labels used to define reliable/pass samples. (default: MediumConfidence,HighConfidence)
2692
+ --excellent-min-samples EXCELLENT_MIN_SAMPLES
2693
+ Minimum number of pass samples required to mark an ORF as Excellent. (default: 2)
2694
+
2695
+ # example
2696
+ smorf_integrate \
2697
+ -i mine.smorf.riboseq_evidence.txt \
2698
+ --output-matrix mine.smorf.frame_density_matrix.txt \
2699
+ --output-integrated mine.smorf.integrated_evidence.txt
2700
+
2701
+ ```
2702
+
2703
+
2704
+ ## 7. Other toolkits
2705
+ ### 7.1 Data shuffling
2438
2706
 
2439
2707
  Some analysis processes require randomly assigned data for control,
2440
2708
  so a step is added here to reshuffling the RPFs density file.
@@ -2504,7 +2772,7 @@ RIBO_shuffle.txt # Shuffled RPFs density file
2504
2772
  ```
2505
2773
 
2506
2774
 
2507
- ### 6.2 Retrieve and format the gene density
2775
+ ### 7.2 Retrieve and format the gene density
2508
2776
 
2509
2777
  In many cases, it is necessary to perform some additional operations on the gene set in the RPFs density file,
2510
2778
  such as filtering, RPM standardization, long and width data format conversion, etc.
@@ -2582,7 +2850,7 @@ RIBO.log
2582
2850
  RIBO_retrieve.txt
2583
2851
  ```
2584
2852
 
2585
- ### 6.3 Filter the frame shifting genes
2853
+ ### 7.3 Filter the frame shifting genes
2586
2854
 
2587
2855
  A frameshift in translation occurs when the ribosome shifts by one or more nucleotides in the mRNA sequence, causing a misreading of the codons.
2588
2856
  This results in a completely altered amino acid sequence downstream of the shift,
@@ -2649,9 +2917,9 @@ RIBO_SRR1944912_gene_frame_shift.txt
2649
2917
  ```
2650
2918
 
2651
2919
 
2652
- ## 7. one step for pipeline
2920
+ ## 8. one step for pipeline
2653
2921
 
2654
- ### 7.0 Prepare the directories and design file for your project
2922
+ ### 8.0 Prepare the directories and design file for your project
2655
2923
 
2656
2924
  1. create the directories to store the raw-data and results
2657
2925
 
@@ -2682,7 +2950,7 @@ SRR1944917 ncs2d_ribo_YPD
2682
2950
  ```
2683
2951
 
2684
2952
 
2685
- ### 7.1 run_step1.sh
2953
+ ### 8.1 run_step1.sh
2686
2954
 
2687
2955
  This step is used for constructing the database, which is essential for the alignment of reads and subsequent analysis using `RiboParser`.
2688
2956
 
@@ -2694,7 +2962,7 @@ This step is suitable for most genome and gene annotation files derived from `NC
2694
2962
  $ nohup sh run_step1.sh &
2695
2963
  ```
2696
2964
 
2697
- ### 7.2 run_step2.sh
2965
+ ### 8.2 run_step2.sh
2698
2966
 
2699
2967
  This step is used for analyzing `RNA-seq` data, including data cleaning,
2700
2968
  alignment, and expression quantification.
@@ -2706,7 +2974,7 @@ method used in your project!
2706
2974
  $ nohup sh run_step2.sh &
2707
2975
  ```
2708
2976
 
2709
- ### 7.3 run_step3.sh
2977
+ ### 8.3 run_step3.sh
2710
2978
 
2711
2979
  This step is used for analyzing `Ribo-seq` data, including data cleaning,
2712
2980
  alignment, and expression quantification.
@@ -2718,7 +2986,7 @@ sequencing method used in your project!
2718
2986
  $ nohup sh run_step3.sh &
2719
2987
  ```
2720
2988
 
2721
- ### 7.4 run_step4.sh
2989
+ ### 8.4 run_step4.sh
2722
2990
 
2723
2991
  This step is used for analyzing `RNA-seq` data, utilizing `RiboParser` to check the
2724
2992
  sequencing quality of the `RNA-seq` data and prepare formatted files for subsequent
@@ -2731,7 +2999,7 @@ modified according to the files defined for your project!
2731
2999
  $ nohup sh run_step4.sh &
2732
3000
  ```
2733
3001
 
2734
- ### 7.5 run_step5.sh
3002
+ ### 8.5 run_step5.sh
2735
3003
 
2736
3004
  This step is used for analyzing `Ribo-seq` data, utilizing `RiboParser` to check the
2737
3005
  sequencing quality of the `Ribo-seq` data.
@@ -2744,7 +3012,7 @@ $ nohup sh run_step5.sh &
2744
3012
  ```
2745
3013
 
2746
3014
 
2747
- ## 8. Computational performance of the RiboParser
3015
+ ## 9. Computational performance of the RiboParser
2748
3016
  We assessed the workflow on a CentOS 7 system using 12 threads, with RNA-seq and Ribo-seq data from three different species (S. cerevisiae, M. musculus, and H. sapiens).
2749
3017
 
2750
3018
  | | | | | | | | | | | |
@@ -2772,7 +3040,7 @@ Optimal Configuration
2772
3040
  - Storage: ≥ 512 GB NVMe SSD for rapid I/O and 2 TB HDD (SATA III)
2773
3041
 
2774
3042
 
2775
- ## 9. Contribution
3043
+ ## 10. Contribution
2776
3044
 
2777
3045
  Thanks for all the open source tools used in the process.
2778
3046
 
@@ -2783,6 +3051,6 @@ Contribute to our open-source project by submitting questions and code.
2783
3051
  Contact `rensc0718@163.com` for more information.
2784
3052
 
2785
3053
 
2786
- ## 10. License
3054
+ ## 11. License
2787
3055
 
2788
3056
  GPL License.