RiboParser 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. {riboparser-0.2.1 → riboparser-0.2.2}/PKG-INFO +1 -1
  2. {riboparser-0.2.1 → riboparser-0.2.2}/RiboParser.egg-info/PKG-INFO +1 -1
  3. {riboparser-0.2.1 → riboparser-0.2.2}/RiboParser.egg-info/SOURCES.txt +0 -2
  4. {riboparser-0.2.1 → riboparser-0.2.2}/pyproject.toml +1 -1
  5. riboparser-0.2.2/utils/data/RiboParser.py +135 -0
  6. riboparser-0.2.1/utils/data/RiboParser.py +0 -184
  7. riboparser-0.2.1/utils/make_ensb_ref.py +0 -308
  8. riboparser-0.2.1/utils/make_ribo_ref.py +0 -39
  9. {riboparser-0.2.1 → riboparser-0.2.2}/README.md +0 -0
  10. {riboparser-0.2.1 → riboparser-0.2.2}/RiboParser.egg-info/dependency_links.txt +0 -0
  11. {riboparser-0.2.1 → riboparser-0.2.2}/RiboParser.egg-info/entry_points.txt +0 -0
  12. {riboparser-0.2.1 → riboparser-0.2.2}/RiboParser.egg-info/requires.txt +0 -0
  13. {riboparser-0.2.1 → riboparser-0.2.2}/RiboParser.egg-info/top_level.txt +0 -0
  14. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/__init__.py +0 -0
  15. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/bedgraph/__init__.py +0 -0
  16. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/bedgraph/bg2meta.py +0 -0
  17. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/bedgraph/rpm_smooth.py +0 -0
  18. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/bowtie/__init__.py +0 -0
  19. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/bowtie/merge_bwt_log.py +0 -0
  20. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fasta/__init__.py +0 -0
  21. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fasta/fa_gc_sum.py +0 -0
  22. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fasta/fa_len_flt.py +0 -0
  23. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fasta/fa_len_sum.py +0 -0
  24. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fasta/fa_split.py +0 -0
  25. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fasta/line_feed.py +0 -0
  26. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fasta/nt2aa.py +0 -0
  27. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fasta/rand_seq.py +0 -0
  28. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fasta/retrieve_seq.py +0 -0
  29. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fasta/revs.py +0 -0
  30. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fastq/__init__.py +0 -0
  31. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fastq/fq2fa.py +0 -0
  32. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fastq/fq2txt.py +0 -0
  33. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fastq/fq_len_flt.py +0 -0
  34. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fastq/fq_len_sum.py +0 -0
  35. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fastq/fq_length.py +0 -0
  36. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fastq/fq_split.py +0 -0
  37. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fastq/fq_trim.py +0 -0
  38. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fastq/phred_quality.py +0 -0
  39. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/fastq/simulate_fastq.py +0 -0
  40. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/__init__.py +0 -0
  41. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_cdt.py +0 -0
  42. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_coverage.py +0 -0
  43. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_cst.py +0 -0
  44. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_digestion.py +0 -0
  45. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_dst_list.py +0 -0
  46. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_length.py +0 -0
  47. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_metagene.py +0 -0
  48. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_occupancy.py +0 -0
  49. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_odd_ratio.py +0 -0
  50. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_offset.py +0 -0
  51. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_offset_detail.py +0 -0
  52. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_offset_end.py +0 -0
  53. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_pausing.py +0 -0
  54. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_period.py +0 -0
  55. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_quant.py +0 -0
  56. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/merge_ribo/merge_saturation.py +0 -0
  57. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/oligo/__init__.py +0 -0
  58. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/oligo/get_overlap_seq.py +0 -0
  59. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/oligo/get_tissue_freq.py +0 -0
  60. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/oligo/get_win_seq.py +0 -0
  61. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/ribocode/__init__.py +0 -0
  62. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/ribocode/ribocode_bed_format.py +0 -0
  63. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/ribotish/__init__.py +0 -0
  64. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/ribotish/ribotish_format.py +0 -0
  65. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/rsem/__init__.py +0 -0
  66. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/rsem/merge_rsem.py +0 -0
  67. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/unix/__init__.py +0 -0
  68. {riboparser-0.2.1 → riboparser-0.2.2}/scripts/unix/dos2unix.py +0 -0
  69. {riboparser-0.2.1 → riboparser-0.2.2}/setup.cfg +0 -0
  70. {riboparser-0.2.1 → riboparser-0.2.2}/utils/__init__.py +0 -0
  71. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/ArgsParser.py +0 -0
  72. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Bam2Wig.py +0 -0
  73. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/BamFilter.py +0 -0
  74. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/CDT.py +0 -0
  75. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/CST.py +0 -0
  76. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Codon.py +0 -0
  77. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Coefficient_of_Variation.py +0 -0
  78. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Coverage.py +0 -0
  79. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Cumulative_CoV.py +0 -0
  80. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Density.py +0 -0
  81. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Digestion.py +0 -0
  82. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/EndSite.py +0 -0
  83. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Ensembl_Ref.py +0 -0
  84. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/GenePred.py +0 -0
  85. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/MetaCodon.py +0 -0
  86. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Metaplot.py +0 -0
  87. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Occupancy.py +0 -0
  88. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Odd_Ratio.py +0 -0
  89. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Offset.py +0 -0
  90. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Offset_RSBM.py +0 -0
  91. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Pausing.py +0 -0
  92. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Percentage.py +0 -0
  93. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Periodicity.py +0 -0
  94. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Quality.py +0 -0
  95. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Quant.py +0 -0
  96. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/RNA.py +0 -0
  97. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/RPFs.py +0 -0
  98. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Retrieve.py +0 -0
  99. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Ribo.py +0 -0
  100. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Shift.py +0 -0
  101. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/Shuffle.py +0 -0
  102. {riboparser-0.2.1 → riboparser-0.2.2}/utils/ribo/__init__.py +0 -0
  103. {riboparser-0.2.1 → riboparser-0.2.2}/utils/riboparser.py +0 -0
  104. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rna_Density.py +0 -0
  105. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rna_Offset.py +0 -0
  106. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Bam2bw.py +0 -0
  107. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Bam_Filter.py +0 -0
  108. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_CDT.py +0 -0
  109. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_CST.py +0 -0
  110. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Check.py +0 -0
  111. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_CoV.py +0 -0
  112. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Corr.py +0 -0
  113. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Coverage.py +0 -0
  114. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Cumulative_CoV.py +0 -0
  115. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Density.py +0 -0
  116. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Digest.py +0 -0
  117. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Geneplot.py +0 -0
  118. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Merge.py +0 -0
  119. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Meta_Codon.py +0 -0
  120. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Metaplot.py +0 -0
  121. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Occupancy.py +0 -0
  122. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Odd_Ratio.py +0 -0
  123. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Offset.py +0 -0
  124. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Offset_RSBM.py +0 -0
  125. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Pausing.py +0 -0
  126. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Percent.py +0 -0
  127. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Periodicity.py +0 -0
  128. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Quant.py +0 -0
  129. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Reference.py +0 -0
  130. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Retrieve.py +0 -0
  131. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Shift.py +0 -0
  132. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_Shuffle.py +0 -0
  133. {riboparser-0.2.1 → riboparser-0.2.2}/utils/rpf_end.py +0 -0
  134. {riboparser-0.2.1 → riboparser-0.2.2}/utils/serp/Properties.py +0 -0
  135. {riboparser-0.2.1 → riboparser-0.2.2}/utils/serp/SeRP.py +0 -0
  136. {riboparser-0.2.1 → riboparser-0.2.2}/utils/serp/__init__.py +0 -0
  137. {riboparser-0.2.1 → riboparser-0.2.2}/utils/serp_overlap.py +0 -0
  138. {riboparser-0.2.1 → riboparser-0.2.2}/utils/serp_peak.py +0 -0
  139. {riboparser-0.2.1 → riboparser-0.2.2}/utils/serp_properties.py +0 -0
  140. {riboparser-0.2.1 → riboparser-0.2.2}/utils/smorf/__init__.py +0 -0
  141. {riboparser-0.2.1 → riboparser-0.2.2}/utils/smorf/classifier.py +0 -0
  142. {riboparser-0.2.1 → riboparser-0.2.2}/utils/smorf/coordinate.py +0 -0
  143. {riboparser-0.2.1 → riboparser-0.2.2}/utils/smorf/fasta.py +0 -0
  144. {riboparser-0.2.1 → riboparser-0.2.2}/utils/smorf/genepred.py +0 -0
  145. {riboparser-0.2.1 → riboparser-0.2.2}/utils/smorf/models.py +0 -0
  146. {riboparser-0.2.1 → riboparser-0.2.2}/utils/smorf/overlap.py +0 -0
  147. {riboparser-0.2.1 → riboparser-0.2.2}/utils/smorf/pipeline.py +0 -0
  148. {riboparser-0.2.1 → riboparser-0.2.2}/utils/smorf/scanner.py +0 -0
  149. {riboparser-0.2.1 → riboparser-0.2.2}/utils/smorf/sequence.py +0 -0
  150. {riboparser-0.2.1 → riboparser-0.2.2}/utils/smorf/writer.py +0 -0
  151. {riboparser-0.2.1 → riboparser-0.2.2}/utils/smorf_scanner.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: RiboParser
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: A pipeline for ribosome profiling data analysis
5
5
  Author-email: Ren Shuchao <rensc0718@163.com>
6
6
  License-Expression: GPL-3.0-or-later
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: RiboParser
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: A pipeline for ribosome profiling data analysis
5
5
  Author-email: Ren Shuchao <rensc0718@163.com>
6
6
  License-Expression: GPL-3.0-or-later
@@ -62,8 +62,6 @@ scripts/rsem/merge_rsem.py
62
62
  scripts/unix/__init__.py
63
63
  scripts/unix/dos2unix.py
64
64
  utils/__init__.py
65
- utils/make_ensb_ref.py
66
- utils/make_ribo_ref.py
67
65
  utils/riboparser.py
68
66
  utils/rna_Density.py
69
67
  utils/rna_Offset.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "RiboParser"
7
- version = "0.2.1"
7
+ version = "0.2.2"
8
8
  authors = [{ name = "Ren Shuchao", email = "rensc0718@163.com" }]
9
9
  description = "A pipeline for ribosome profiling data analysis"
10
10
  readme = "README.md"
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # @Project : riboParser
4
+ # @Script : riboparser.py
5
+
6
+
7
+ import pkg_resources
8
+
9
+
10
+ class RiboParserInfo:
11
+ try:
12
+ version = pkg_resources.get_distribution("RiboParser").version
13
+ except Exception:
14
+ version = "unknown"
15
+
16
+ update_date = "2026-05-21"
17
+ citation = (
18
+ '''
19
+ Shuchao Ren, Yinan Li, Zhipeng Zhou.
20
+ RiboParser/RiboShiny: An integrated platform for comprehensive analysis and visualization of ribo-seq data.
21
+ Journal of Genetics and Genomics (2025)
22
+ doi:10.1016/j.jgg.2025.04.010.
23
+ '''
24
+ )
25
+ required_packages = ["pandas", "polars", "numpy", "matplotlib-venn", "seqlogo",
26
+ "matplotlib", "seaborn", "biopython",
27
+ "scipy", "scikit-learn", "statsmodels",
28
+ "pysam", "joblib"]
29
+
30
+ @classmethod
31
+ def show_version(cls):
32
+ print(f"RiboParser version: {cls.version}")
33
+ print(f"Last update: {cls.update_date}")
34
+
35
+ @classmethod
36
+ def show_citation(cls):
37
+ print("Please cite:")
38
+ print(cls.citation)
39
+
40
+ @classmethod
41
+ def check_dependencies(cls):
42
+ missing = []
43
+ for pkg in cls.required_packages:
44
+ try:
45
+ pkg_resources.get_distribution(pkg)
46
+ except pkg_resources.DistributionNotFound:
47
+ missing.append(pkg)
48
+ if missing:
49
+ print(f"Missing dependencies: {', '.join(missing)}")
50
+ return False
51
+ else:
52
+ print(cls.required_packages)
53
+ print("All required dependencies are installed.")
54
+ return True
55
+
56
+ @classmethod
57
+ def check_package_modules(cls, module_type: str = "all"):
58
+ from pathlib import Path
59
+ import sys
60
+ import importlib
61
+
62
+ script_path = Path(__file__).resolve()
63
+
64
+ # Find project root
65
+ root = script_path.parent
66
+ for _ in range(10):
67
+ if any((root / name).exists() for name in ("pyproject.toml", "README.md", ".git", "utils", "scripts")):
68
+ break
69
+ if root.parent == root:
70
+ break
71
+ root = root.parent
72
+
73
+ # Make local modules importable
74
+ if str(root) not in sys.path:
75
+ sys.path.insert(0, str(root))
76
+
77
+ utils_dir = root / "utils"
78
+ scripts_dir = root / "scripts"
79
+
80
+ modules = {
81
+ "ribo": [],
82
+ "serp": [],
83
+ "smorf": [],
84
+ "scripts": []
85
+ }
86
+
87
+ def module_name_from_path(p: Path) -> str:
88
+ rel = p.relative_to(root)
89
+ return ".".join(rel.with_suffix("").parts)
90
+
91
+ def add_module(p: Path):
92
+ if p.name.startswith("_") or p.name == "__init__.py":
93
+ return
94
+
95
+ mod = module_name_from_path(p)
96
+ parts = p.relative_to(root).parts
97
+ stem = p.stem
98
+
99
+ if "smorf" in parts or stem.startswith("smorf_"):
100
+ modules["smorf"].append(mod)
101
+ elif "serp" in parts or stem.startswith("serp_"):
102
+ modules["serp"].append(mod)
103
+ elif "ribo" in parts or stem.startswith(("rpf_", "rna_")):
104
+ modules["ribo"].append(mod)
105
+ elif "scripts" in parts:
106
+ modules["scripts"].append(mod)
107
+
108
+ if utils_dir.exists():
109
+ for p in utils_dir.rglob("*.py"):
110
+ add_module(p)
111
+
112
+ if scripts_dir.exists():
113
+ for p in scripts_dir.rglob("*.py"):
114
+ add_module(p)
115
+
116
+ for key in modules:
117
+ modules[key] = sorted(set(modules[key]))
118
+
119
+ def try_import(module_name: str) -> bool:
120
+ try:
121
+ importlib.import_module(module_name)
122
+ return True
123
+ except Exception as e:
124
+ return False
125
+
126
+ show_keys = modules.keys() if module_type == "all" else [module_type]
127
+
128
+ for key in show_keys:
129
+ print(f"{key} modules:")
130
+ if modules.get(key):
131
+ for mod in modules[key]:
132
+ status = "[import OK]" if try_import(mod) else "[import FAILED]"
133
+ print(f" - {mod} {status}")
134
+ else:
135
+ print(" - (not found)")
@@ -1,184 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
- # @Project : riboParser
4
- # @Script : riboparser.py
5
-
6
-
7
- import pkg_resources
8
-
9
-
10
- class RiboParserInfo:
11
- try:
12
- version = pkg_resources.get_distribution("RiboParser").version
13
- except Exception:
14
- version = "unknown"
15
-
16
- update_date = "2024-06-10"
17
- citation = (
18
- '''
19
- Shuchao Ren, Yinan Li, Zhipeng Zhou.
20
- RiboParser/RiboShiny: An integrated platform for comprehensive analysis and visualization of ribo-seq data.
21
- Journal of Genetics and Genomics (2025)
22
- doi:10.1016/j.jgg.2025.04.010.
23
- '''
24
- )
25
- required_packages = ["pandas", "polars", "numpy", "matplotlib-venn", "seqlogo",
26
- "matplotlib", "seaborn", "biopython",
27
- "scipy", "scikit-learn", "statsmodels",
28
- "pysam", "joblib"]
29
-
30
- @classmethod
31
- def show_version(cls):
32
- print(f"RiboParser version: {cls.version}")
33
- print(f"Last update: {cls.update_date}")
34
-
35
- @classmethod
36
- def show_citation(cls):
37
- print("Please cite:")
38
- print(cls.citation)
39
-
40
- @classmethod
41
- def check_dependencies(cls):
42
- missing = []
43
- for pkg in cls.required_packages:
44
- try:
45
- pkg_resources.get_distribution(pkg)
46
- except pkg_resources.DistributionNotFound:
47
- missing.append(pkg)
48
- if missing:
49
- print(f"Missing dependencies: {', '.join(missing)}")
50
- return False
51
- else:
52
- print(cls.required_packages)
53
- print("All required dependencies are installed.")
54
- return True
55
-
56
- @classmethod
57
- def check_package_modules(cls):
58
-
59
- from pathlib import Path
60
-
61
- project_path = Path(__file__).resolve()
62
- # check the root directory(directory contains include pyproject.toml / README.md / .git)
63
- for _ in range(8):
64
- if any((project_path / name).exists() for name in ("pyproject.toml", "README.md", ".git")):
65
- break
66
- if project_path.parent == project_path:
67
- break
68
- project_path = project_path.parent
69
- root = project_path
70
-
71
- utils_dir = root / "utils"
72
- scripts_dir = root / "scripts"
73
-
74
- @staticmethod
75
- def module_name_from_path(p: Path):
76
- try:
77
- rel = p.relative_to(root)
78
- except Exception:
79
- rel = p
80
- return ".".join(rel.with_suffix("").parts)
81
-
82
- rpf = []
83
- serp = []
84
- smorf = []
85
- classes = []
86
- others = []
87
-
88
- if utils_dir.exists():
89
- for now_path in utils_dir.iterdir():
90
- if now_path.is_file() and now_path.suffix == ".py" and not now_path.name.startswith("_"):
91
- mod = module_name_from_path(now_path)
92
- name = now_path.stem
93
- if name.startswith("rpf_") or name.startswith("rna_"):
94
- rpf.append(mod)
95
- elif name.startswith("serp_"):
96
- serp.append(mod)
97
- elif name.startswith("smorf_"):
98
- smorf.append(mod)
99
- else:
100
- others.append(mod)
101
- elif now_path.is_dir():
102
- for sub in now_path.rglob("*.py"):
103
- if sub.name.startswith("_") or sub.name == "__init__.py":
104
- continue
105
- mod = module_name_from_path(sub)
106
- if sub.stem.startswith("rpf_"):
107
- rpf.append(mod)
108
- elif sub.stem.startswith("serp_"):
109
- serp.append(mod)
110
- elif sub.stem.startswith("smorf_"):
111
- smorf.append(mod)
112
- else:
113
- classes.append(mod)
114
-
115
- if scripts_dir.exists():
116
- for now_path in scripts_dir.rglob("*.py"):
117
- if now_path.name.startswith("_") or now_path.name == "__init__.py":
118
- continue
119
- mod = module_name_from_path(now_path)
120
- if now_path.stem.startswith("rpf_"):
121
- rpf.append(mod)
122
- elif now_path.stem.startswith("serp_"):
123
- serp.append(mod)
124
- elif now_path.stem.startswith("smorf_"):
125
- smorf.append(mod)
126
- else:
127
- others.append(mod)
128
-
129
- # sort and unique
130
- rpf = sorted(set(rpf))
131
- serp = sorted(set(serp))
132
- smorf = sorted(set(smorf))
133
- classes = sorted(set(classes))
134
- others = sorted(set(others))
135
-
136
- @staticmethod
137
- def try_import(module_name: str) -> bool:
138
- try:
139
- import importlib
140
-
141
- importlib.import_module(module_name)
142
- return True
143
- except Exception:
144
- return False
145
-
146
- print("RPF modules:")
147
- if rpf:
148
- for now_module in rpf:
149
- status = "[import OK]" if try_import(now_module) else "[import FAILED]"
150
- print(f" - {now_module} {status}")
151
- else:
152
- print(" - (not found)")
153
-
154
- print("SERP modules:")
155
- if serp:
156
- for now_module in serp:
157
- status = "[import OK]" if try_import(now_module) else "[import FAILED]"
158
- print(f" - {now_module} {status}")
159
- else:
160
- print(" - (not found)")
161
-
162
- print("smORF modules:")
163
- if smorf:
164
- for now_module in smorf:
165
- status = "[import OK]" if try_import(now_module) else "[import FAILED]"
166
- print(f" - {now_module} {status}")
167
- else:
168
- print(" - (not found)")
169
-
170
- print("Classes:")
171
- if classes:
172
- for now_module in classes:
173
- status = "[import OK]" if try_import(now_module) else "[import FAILED]"
174
- print(f" - {now_module} {status}")
175
- else:
176
- print(" - (not found)")
177
-
178
- print("Other scripts:")
179
- if others:
180
- for now_module in others:
181
- status = "[import OK]" if try_import(now_module) else "[import FAILED]"
182
- print(f" - {now_module} {status}")
183
- else:
184
- print(" - (not found)")
@@ -1,308 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
- # @Project : riboParser
4
- # @Script : make_ensb_ref.py
5
-
6
-
7
- import sys
8
-
9
- import numpy as np
10
- from Bio import SeqIO
11
-
12
- from utils.ribo import ArgsParser
13
- from utils.ribo.Ensembl_Ref import *
14
-
15
-
16
- def readline(record):
17
- chrom, source, feature, start, end, score, strand, phase, attr = record.split('\t')
18
-
19
- attr_dict = OrderedDict()
20
- for num, mess in enumerate(attr.strip(';').split(';')):
21
- idx, values = mess.strip().split(' "')
22
- attr_dict[idx] = values.strip('"')
23
-
24
- section = {"chrom": chrom, "source": source, "feature": feature,
25
- "start": int(start), "end": int(end), "strand": strand, "attr": attr,
26
- "attr_dict": attr_dict, "mess": record}
27
-
28
- return section
29
-
30
-
31
- def detect_cds_periodicity(gtf_filename):
32
- # CDS frame in gtf file could be fit the 3nt periodicity, commonly, the frame of CDS are Closed-interval
33
- # but some times the frame is Right-open-interval
34
- transcripts_dict = OrderedDict()
35
- now_mrna = 0
36
- with open(gtf_filename, 'r') as gtf_in:
37
- for line in gtf_in:
38
- record = line.strip()
39
- if not record or line.startswith('#'):
40
- continue
41
-
42
- section = readline(record)
43
- if section["attr_dict"]["gene_biotype"] == "protein_coding":
44
- if section["feature"] == "transcript":
45
- transcripts_dict[section["attr_dict"]["transcript_id"]] = np.array([0, 0, 0])
46
- now_mrna += 1
47
- if now_mrna >= 2000:
48
- sys.stdout.writelines("The first 1000 genes was used to detect the format of CDS position.\n")
49
- break
50
- elif section["feature"] == "CDS":
51
- cds_frame0 = section["end"] - section["start"]
52
- cds_frame1 = section["end"] - section["start"] + 1
53
- cds_frame2 = section["end"] - section["start"] - 1
54
- transcripts_dict[section["attr_dict"]["transcript_id"]] += [cds_frame0, cds_frame1, cds_frame2]
55
-
56
- cds_shift = 1
57
- cds_type = ["Right-open-interval", "Closed-interval", "Open-interval"]
58
- for frame in [0, 1, -1]:
59
- mrna_length_array = np.array(list(map(lambda length: length[frame], transcripts_dict.values())))
60
-
61
- cds_frame = mrna_length_array % 3
62
- out_frame_num = np.count_nonzero(cds_frame)
63
- if out_frame_num == 0:
64
- # print(transcripts_dict)
65
- sys.stdout.writelines("The frame type of CDS is {now_type}.\n".format(now_type=cds_type[frame]))
66
- cds_shift = frame
67
- break
68
- elif frame == -1:
69
- sys.stdout.writelines("Some CDS in GTF file does not fit to 3 nt periodicity.\n")
70
- cds_shift = 1
71
- else:
72
- continue
73
-
74
- return cds_shift
75
-
76
-
77
- def read_gtf(gtf_filename):
78
- title_list = []
79
- genes_dict = OrderedDict()
80
- transcripts_dict = OrderedDict()
81
- now_row = 0
82
-
83
- cds_shift = detect_cds_periodicity(gtf_filename)
84
-
85
- with open(gtf_filename, 'r') as gtf_in:
86
- for line in gtf_in:
87
- now_row += 1
88
- if now_row % 10000 == 0:
89
- sys.stdout.writelines("Rows: {number}\n".format(number=now_row))
90
-
91
- record = line.strip()
92
-
93
- # skip the '#' lines and blank lines
94
- if not record:
95
- continue
96
- if line.startswith('#'):
97
- title_list.append(record)
98
- continue
99
-
100
- section = readline(record)
101
-
102
- # merge the gene lines
103
- if section["feature"] == "gene":
104
- genes_dict[section["attr_dict"]["gene_id"]] = Gene(section)
105
-
106
- # merge the mRNA
107
- elif section["feature"] == "transcript":
108
- now_rna = Transcripts(section)
109
- transcripts_dict[now_rna.transcript_id] = now_rna
110
-
111
- # merge the exon, cds, start_codon, stop_codon
112
- elif section["feature"] in ["exon", "CDS", "start_codon", "stop_codon"]:
113
- trans_id = section["attr_dict"]["transcript_id"]
114
- transcripts_dict[trans_id].add_feature(section, cds_shift)
115
-
116
- # skip the other genes
117
- else:
118
- # sys.stdout.write("Skip: {row} ".format(row=record))
119
- continue
120
-
121
- sys.stdout.writelines("Rows: {number}\n".format(number=now_row))
122
-
123
- return title_list, genes_dict, transcripts_dict
124
-
125
-
126
- def gene_tree(utr_len, chroms_dict, transcripts_dict, genes_dict):
127
- for trans_id, trans_info in transcripts_dict.items():
128
-
129
- if trans_info.gene_id in genes_dict:
130
- if trans_info.transcript_biotype == "protein_coding":
131
- try:
132
- trans_info.add_utr(utr_len, chroms_dict)
133
- except IndexError:
134
- sys.stdout.write("IndexError: {gene}\n".format(gene=trans_info.gene_id))
135
- continue
136
-
137
- genes_dict[trans_info.gene_id].add_transcript(trans_info)
138
-
139
- else:
140
- raise KeyError("Error: {trans} not found in {gene}!".format(trans=trans_id, gene=trans_info.gene_id))
141
-
142
- return genes_dict
143
-
144
-
145
- def format_results(gene_mess):
146
- mrna_gtf = []
147
- gene_gtf = []
148
- mrna_txt = []
149
- mrna_region = []
150
-
151
- gene_gtf.append('\t'.join([gene_mess.chrom, gene_mess.source, gene_mess.feature, str(gene_mess.start),
152
- str(gene_mess.end), '.', gene_mess.strand, '.', gene_mess.attr]))
153
-
154
- if gene_mess.gene_type == "protein_coding":
155
- mrna_gtf.append('\t'.join([gene_mess.chrom, gene_mess.source, gene_mess.feature, str(gene_mess.start),
156
- str(gene_mess.end), '.', gene_mess.strand, '.', gene_mess.attr]))
157
-
158
- for trans_ids, trans_info in gene_mess.transcript.items():
159
- if trans_info.transcript_biotype == "protein_coding":
160
- # Determine whether the CDS length is an integer multiple of 3.
161
- if trans_info.cds_length % 3 != 0:
162
- sys.stdout.write("Warning! {gene} CDS length doesn't fit the 3nt periodicity. \n".format(gene=trans_ids))
163
- continue
164
-
165
- if gene_mess.rep_transcript == trans_info.transcript_id:
166
- rep_transcript = True
167
- else:
168
- rep_transcript = False
169
-
170
- mrna_txt.append('\t'.join([trans_info.chrom, gene_mess.gene_id, trans_info.gene_name, trans_info.transcript_id,
171
- str(trans_info.start), str(trans_info.end), str(trans_info.utr5), str(trans_info.cds_length),
172
- str(trans_info.utr3), trans_info.strand, str(rep_transcript), str(trans_info.modified)]))
173
- mrna_region.append([trans_ids, trans_info.chrom, trans_info.exons, trans_info.strand])
174
-
175
- if not trans_info.modified:
176
- mrna_gtf.extend(trans_info.mess)
177
- gene_gtf.extend(trans_info.mess)
178
-
179
- else:
180
- mrna_gtf.append('\t'.join([trans_info.chrom, trans_info.source, trans_info.feature, str(trans_info.start),
181
- str(trans_info.end), '.', trans_info.strand, '.', trans_info.attr]))
182
- gene_gtf.append('\t'.join([trans_info.chrom, trans_info.source, trans_info.feature, str(trans_info.start),
183
- str(trans_info.end), '.', trans_info.strand, '.', trans_info.attr]))
184
-
185
- for exon in trans_info.exon_feature:
186
- mrna_gtf.append('\t'.join([str(i) for i in exon]))
187
- gene_gtf.append('\t'.join([str(i) for i in exon]))
188
-
189
- for cds in trans_info.cds_feature:
190
- mrna_gtf.append('\t'.join([str(i) for i in cds]))
191
- gene_gtf.append('\t'.join([str(i) for i in cds]))
192
- else:
193
- gene_gtf.extend(trans_info.mess)
194
-
195
- return mrna_gtf, gene_gtf, mrna_txt, mrna_region
196
-
197
-
198
- def filter_genes(genes_dict, coding):
199
- filtered_gtf = []
200
- filtered_txt = []
201
- filtered_region = []
202
-
203
- if coding:
204
- for gene_name, gene_mess in genes_dict.items():
205
- mrna_gtf, gene_gtf, mrna_txt, mrna_region = format_results(gene_mess)
206
- filtered_gtf.extend(mrna_gtf)
207
- filtered_txt.extend(mrna_txt)
208
- filtered_region.extend(mrna_region)
209
-
210
- elif not coding:
211
- for gene_name, gene_mess in genes_dict.items():
212
- if gene_mess.gene_type == 'protein_coding':
213
- mrna_gtf, gene_gtf, mrna_txt, mrna_region = format_results(gene_mess)
214
- filtered_gtf.extend(gene_gtf)
215
- filtered_txt.extend(mrna_txt)
216
- filtered_region.extend(mrna_region)
217
- else:
218
- mrna_gtf, gene_gtf, mrna_txt, mrna_region = format_results(gene_mess)
219
- filtered_gtf.extend(gene_gtf)
220
-
221
- return filtered_gtf, filtered_txt, filtered_region
222
-
223
-
224
- def read_genome(genome):
225
- chroms_dict = OrderedDict()
226
-
227
- record = SeqIO.parse(genome, "fasta")
228
- for line in record:
229
- sys.stdout.writelines("import chromosome: {chrom}\n".format(chrom=line.id))
230
- chroms_dict[line.id] = Chrom(line)
231
-
232
- return chroms_dict
233
-
234
-
235
- def get_seq(chroms_dict, mrna_region):
236
- mrna_seq = OrderedDict()
237
-
238
- for transcript in mrna_region:
239
- transcript_seq = ''
240
-
241
- if transcript[-1] == "-":
242
- for exon in reversed(transcript[2]):
243
- exon_start, exon_end = exon[0], exon[1]
244
- transcript_seq += chroms_dict[transcript[1]].seq[exon_start - 1: exon_end]
245
- transcript_seq = transcript_seq.reverse_complement()
246
-
247
- else:
248
- for exon in transcript[2]:
249
- exon_start, exon_end = exon[0], exon[1]
250
- transcript_seq += chroms_dict[transcript[1]].seq[exon_start - 1: exon_end]
251
-
252
- mrna_seq[transcript[0]] = transcript_seq
253
-
254
- return mrna_seq
255
-
256
-
257
- def output_results(output_prefix, title_list, filtered_gtf, mrna_txt, mrna_seq):
258
- gtf_out_file = output_prefix + '.norm.gtf'
259
- with open(gtf_out_file, 'w') as gtf_out:
260
- for line in title_list:
261
- gtf_out.writelines(''.join(line) + '\n')
262
-
263
- for line in filtered_gtf:
264
- gtf_out.writelines(''.join(line) + '\n')
265
-
266
- txt_out_file = output_prefix + '.norm.txt'
267
- with open(txt_out_file, 'w') as txt_out:
268
- txt_out.writelines('\t'.join(["chromosome", "gene_id", "gene_name", "transcript_id", "start", "end", "utr5_length",
269
- "cds_length", "utr3_length", "strand", "rep_transcript", "modified"]) + '\n')
270
- for line in mrna_txt:
271
- txt_out.writelines(''.join(line) + '\n')
272
-
273
- seq_out_file = output_prefix + '.norm.fa'
274
- with open(seq_out_file, 'w') as seq_out:
275
- for mrna, sequence in mrna_seq.items():
276
- seq_out.writelines('\n'.join([">" + mrna, str(sequence)]) + '\n')
277
-
278
-
279
- def main():
280
- ArgsParser.now_time()
281
- sys.stdout.writelines('\nMake the gene annotation files.\n')
282
- sys.stdout.writelines('Step1: Checking the input Arguments.\n')
283
- args = ArgsParser.gtf_args_parser()
284
-
285
- sys.stdout.writelines('\nStep2: Import the gtf file.\n')
286
- title_list, genes_dict, transcripts_dict = read_gtf(args.transcript)
287
-
288
- sys.stdout.writelines('\nStep3: Import the genome file.\n')
289
- chroms_dict = read_genome(args.sequence)
290
-
291
- sys.stdout.writelines('\nStep4: Make the gene tree.\n')
292
- genes_dict = gene_tree(args.utr, chroms_dict, transcripts_dict, genes_dict)
293
-
294
- sys.stdout.writelines('\nStep5: Screening genes.\n')
295
- filtered_gtf, mrna_txt, mrna_region = filter_genes(genes_dict, args.coding)
296
-
297
- sys.stdout.writelines('\nStep6: Retrieve the mRNA sequence from genome.\n')
298
- mrna_seq = get_seq(chroms_dict, mrna_region)
299
-
300
- sys.stdout.writelines('\nStep7: Output the results.\n')
301
- output_results(args.output, title_list, filtered_gtf, mrna_txt, mrna_seq)
302
-
303
- sys.stdout.writelines('\nALL DONE!\n\n')
304
- ArgsParser.now_time()
305
-
306
-
307
- if __name__ == "__main__":
308
- main()