PyPI - RiboParser - Versions diffs - 0.2.1__tar.gz → 0.2.2__tar.gz - Mend

RiboParser 0.2.1tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (151) hide show

{riboparser-0.2.1 → riboparser-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: RiboParser
-Version: 0.2.1
+Version: 0.2.2
 Summary: A pipeline for ribosome profiling data analysis
 Author-email: Ren Shuchao <rensc0718@163.com>
 License-Expression: GPL-3.0-or-later

{riboparser-0.2.1 → riboparser-0.2.2}/RiboParser.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: RiboParser
-Version: 0.2.1
+Version: 0.2.2
 Summary: A pipeline for ribosome profiling data analysis
 Author-email: Ren Shuchao <rensc0718@163.com>
 License-Expression: GPL-3.0-or-later

{riboparser-0.2.1 → riboparser-0.2.2}/RiboParser.egg-info/SOURCES.txt RENAMED Viewed

@@ -62,8 +62,6 @@ scripts/rsem/merge_rsem.py
 scripts/unix/__init__.py
 scripts/unix/dos2unix.py
 utils/__init__.py
-utils/make_ensb_ref.py
-utils/make_ribo_ref.py
 utils/riboparser.py
 utils/rna_Density.py
 utils/rna_Offset.py

{riboparser-0.2.1 → riboparser-0.2.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "RiboParser"
-version = "0.2.1"
+version = "0.2.2"
 authors = [{ name = "Ren Shuchao", email = "rensc0718@163.com" }]
 description = "A pipeline for ribosome profiling data analysis"
 readme = "README.md"

riboparser-0.2.2/utils/data/RiboParser.py ADDED Viewed

@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Project : riboParser
+# @Script  : riboparser.py
+import pkg_resources
+class RiboParserInfo:
+    try:
+        version = pkg_resources.get_distribution("RiboParser").version
+    except Exception:
+        version = "unknown"
+    update_date = "2026-05-21"
+    citation = (
+        '''
+        Shuchao Ren, Yinan Li, Zhipeng Zhou.
+        RiboParser/RiboShiny: An integrated platform for comprehensive analysis and visualization of ribo-seq data.
+        Journal of Genetics and Genomics (2025)
+        doi:10.1016/j.jgg.2025.04.010.
+        '''
+    )
+    required_packages = ["pandas", "polars", "numpy", "matplotlib-venn", "seqlogo",
+                         "matplotlib", "seaborn", "biopython",
+                         "scipy", "scikit-learn", "statsmodels",
+                         "pysam", "joblib"]
+    @classmethod
+    def show_version(cls):
+        print(f"RiboParser version: {cls.version}")
+        print(f"Last update: {cls.update_date}")
+    @classmethod
+    def show_citation(cls):
+        print("Please cite:")
+        print(cls.citation)
+    @classmethod
+    def check_dependencies(cls):
+        missing = []
+        for pkg in cls.required_packages:
+            try:
+                pkg_resources.get_distribution(pkg)
+            except pkg_resources.DistributionNotFound:
+                missing.append(pkg)
+        if missing:
+            print(f"Missing dependencies: {', '.join(missing)}")
+            return False
+        else:
+            print(cls.required_packages)
+        print("All required dependencies are installed.")
+        return True
+    @classmethod
+    def check_package_modules(cls, module_type: str = "all"):
+        from pathlib import Path
+        import sys
+        import importlib
+        script_path = Path(__file__).resolve()
+        # Find project root
+        root = script_path.parent
+        for _ in range(10):
+            if any((root / name).exists() for name in ("pyproject.toml", "README.md", ".git", "utils", "scripts")):
+                break
+            if root.parent == root:
+                break
+            root = root.parent
+        # Make local modules importable
+        if str(root) not in sys.path:
+            sys.path.insert(0, str(root))
+        utils_dir = root / "utils"
+        scripts_dir = root / "scripts"
+        modules = {
+            "ribo": [],
+            "serp": [],
+            "smorf": [],
+            "scripts": []
+        }
+        def module_name_from_path(p: Path) -> str:
+            rel = p.relative_to(root)
+            return ".".join(rel.with_suffix("").parts)
+        def add_module(p: Path):
+            if p.name.startswith("_") or p.name == "__init__.py":
+                return
+            mod = module_name_from_path(p)
+            parts = p.relative_to(root).parts
+            stem = p.stem
+            if "smorf" in parts or stem.startswith("smorf_"):
+                modules["smorf"].append(mod)
+            elif "serp" in parts or stem.startswith("serp_"):
+                modules["serp"].append(mod)
+            elif "ribo" in parts or stem.startswith(("rpf_", "rna_")):
+                modules["ribo"].append(mod)
+            elif "scripts" in parts:
+                modules["scripts"].append(mod)
+        if utils_dir.exists():
+            for p in utils_dir.rglob("*.py"):
+                add_module(p)
+        if scripts_dir.exists():
+            for p in scripts_dir.rglob("*.py"):
+                add_module(p)
+        for key in modules:
+            modules[key] = sorted(set(modules[key]))
+        def try_import(module_name: str) -> bool:
+            try:
+                importlib.import_module(module_name)
+                return True
+            except Exception as e:
+                return False
+        show_keys = modules.keys() if module_type == "all" else [module_type]
+        for key in show_keys:
+            print(f"{key} modules:")
+            if modules.get(key):
+                for mod in modules[key]:
+                    status = "[import OK]" if try_import(mod) else "[import FAILED]"
+                    print(f" - {mod} {status}")
+            else:
+                print(" - (not found)")

riboparser-0.2.1/utils/data/RiboParser.py DELETED Viewed

@@ -1,184 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Project : riboParser
-# @Script  : riboparser.py
-import pkg_resources
-class RiboParserInfo:
-    try:
-        version = pkg_resources.get_distribution("RiboParser").version
-    except Exception:
-        version = "unknown"
-    update_date = "2024-06-10"
-    citation = (
-        '''
-        Shuchao Ren, Yinan Li, Zhipeng Zhou.
-        RiboParser/RiboShiny: An integrated platform for comprehensive analysis and visualization of ribo-seq data.
-        Journal of Genetics and Genomics (2025)
-        doi:10.1016/j.jgg.2025.04.010.
-        '''
-    )
-    required_packages = ["pandas", "polars", "numpy", "matplotlib-venn", "seqlogo",
-                         "matplotlib", "seaborn", "biopython",
-                         "scipy", "scikit-learn", "statsmodels",
-                         "pysam", "joblib"]
-    @classmethod
-    def show_version(cls):
-        print(f"RiboParser version: {cls.version}")
-        print(f"Last update: {cls.update_date}")
-    @classmethod
-    def show_citation(cls):
-        print("Please cite:")
-        print(cls.citation)
-    @classmethod
-    def check_dependencies(cls):
-        missing = []
-        for pkg in cls.required_packages:
-            try:
-                pkg_resources.get_distribution(pkg)
-            except pkg_resources.DistributionNotFound:
-                missing.append(pkg)
-        if missing:
-            print(f"Missing dependencies: {', '.join(missing)}")
-            return False
-        else:
-            print(cls.required_packages)
-        print("All required dependencies are installed.")
-        return True
-    @classmethod
-    def check_package_modules(cls):
-        from pathlib import Path
-        project_path = Path(__file__).resolve()
-        # check the root directory（directory contains include pyproject.toml / README.md / .git）
-        for _ in range(8):
-            if any((project_path / name).exists() for name in ("pyproject.toml", "README.md", ".git")):
-                break
-            if project_path.parent == project_path:
-                break
-            project_path = project_path.parent
-        root = project_path
-        utils_dir = root / "utils"
-        scripts_dir = root / "scripts"
-        @staticmethod
-        def module_name_from_path(p: Path):
-            try:
-                rel = p.relative_to(root)
-            except Exception:
-                rel = p
-            return ".".join(rel.with_suffix("").parts)
-        rpf = []
-        serp = []
-        smorf = []
-        classes = []
-        others = []
-        if utils_dir.exists():
-            for now_path in utils_dir.iterdir():
-                if now_path.is_file() and now_path.suffix == ".py" and not now_path.name.startswith("_"):
-                    mod = module_name_from_path(now_path)
-                    name = now_path.stem
-                    if name.startswith("rpf_") or name.startswith("rna_"):
-                        rpf.append(mod)
-                    elif name.startswith("serp_"):
-                        serp.append(mod)
-                    elif name.startswith("smorf_"):
-                        smorf.append(mod)
-                    else:
-                        others.append(mod)
-                elif now_path.is_dir():
-                    for sub in now_path.rglob("*.py"):
-                        if sub.name.startswith("_") or sub.name == "__init__.py":
-                            continue
-                        mod = module_name_from_path(sub)
-                        if sub.stem.startswith("rpf_"):
-                            rpf.append(mod)
-                        elif sub.stem.startswith("serp_"):
-                            serp.append(mod)
-                        elif sub.stem.startswith("smorf_"):
-                            smorf.append(mod)
-                        else:
-                            classes.append(mod)
-        if scripts_dir.exists():
-            for now_path in scripts_dir.rglob("*.py"):
-                if now_path.name.startswith("_") or now_path.name == "__init__.py":
-                    continue
-                mod = module_name_from_path(now_path)
-                if now_path.stem.startswith("rpf_"):
-                    rpf.append(mod)
-                elif now_path.stem.startswith("serp_"):
-                    serp.append(mod)
-                elif now_path.stem.startswith("smorf_"):
-                    smorf.append(mod)
-                else:
-                    others.append(mod)
-        # sort and unique
-        rpf = sorted(set(rpf))
-        serp = sorted(set(serp))
-        smorf = sorted(set(smorf))
-        classes = sorted(set(classes))
-        others = sorted(set(others))
-        @staticmethod
-        def try_import(module_name: str) -> bool:
-            try:
-                import importlib
-                importlib.import_module(module_name)
-                return True
-            except Exception:
-                return False
-        print("RPF modules:")
-        if rpf:
-            for now_module in rpf:
-                status = "[import OK]" if try_import(now_module) else "[import FAILED]"
-                print(f" - {now_module} {status}")
-        else:
-            print(" - (not found)")
-        print("SERP modules:")
-        if serp:
-            for now_module in serp:
-                status = "[import OK]" if try_import(now_module) else "[import FAILED]"
-                print(f" - {now_module} {status}")
-        else:
-            print(" - (not found)")
-        print("smORF modules:")
-        if smorf:
-            for now_module in smorf:
-                status = "[import OK]" if try_import(now_module) else "[import FAILED]"
-                print(f" - {now_module} {status}")
-        else:
-            print(" - (not found)")
-        print("Classes:")
-        if classes:
-            for now_module in classes:
-                status = "[import OK]" if try_import(now_module) else "[import FAILED]"
-                print(f" - {now_module} {status}")
-        else:
-            print(" - (not found)")
-        print("Other scripts:")
-        if others:
-            for now_module in others:
-                status = "[import OK]" if try_import(now_module) else "[import FAILED]"
-                print(f" - {now_module} {status}")
-        else:
-            print(" - (not found)")

riboparser-0.2.1/utils/make_ensb_ref.py DELETED Viewed

@@ -1,308 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Project : riboParser
-# @Script  : make_ensb_ref.py
-import sys
-import numpy as np
-from Bio import SeqIO
-from utils.ribo import ArgsParser
-from utils.ribo.Ensembl_Ref import *
-def readline(record):
-    chrom, source, feature, start, end, score, strand, phase, attr = record.split('\t')
-    attr_dict = OrderedDict()
-    for num, mess in enumerate(attr.strip(';').split(';')):
-        idx, values = mess.strip().split(' "')
-        attr_dict[idx] = values.strip('"')
-    section = {"chrom": chrom, "source": source, "feature": feature,
-               "start": int(start), "end": int(end), "strand": strand, "attr": attr,
-               "attr_dict": attr_dict, "mess": record}
-    return section
-def detect_cds_periodicity(gtf_filename):
-    # CDS frame in gtf file could be fit the 3nt periodicity, commonly, the frame of CDS are Closed-interval
-    # but some times the frame is Right-open-interval
-    transcripts_dict = OrderedDict()
-    now_mrna = 0
-    with open(gtf_filename, 'r') as gtf_in:
-        for line in gtf_in:
-            record = line.strip()
-            if not record or line.startswith('#'):
-                continue
-            section = readline(record)
-            if section["attr_dict"]["gene_biotype"] == "protein_coding":
-                if section["feature"] == "transcript":
-                    transcripts_dict[section["attr_dict"]["transcript_id"]] = np.array([0, 0, 0])
-                    now_mrna += 1
-                    if now_mrna >= 2000:
-                        sys.stdout.writelines("The first 1000 genes was used to detect the format of CDS position.\n")
-                        break
-                elif section["feature"] == "CDS":
-                    cds_frame0 = section["end"] - section["start"]
-                    cds_frame1 = section["end"] - section["start"] + 1
-                    cds_frame2 = section["end"] - section["start"] - 1
-                    transcripts_dict[section["attr_dict"]["transcript_id"]] += [cds_frame0, cds_frame1, cds_frame2]
-    cds_shift = 1
-    cds_type = ["Right-open-interval", "Closed-interval", "Open-interval"]
-    for frame in [0, 1, -1]:
-        mrna_length_array = np.array(list(map(lambda length: length[frame], transcripts_dict.values())))
-        cds_frame = mrna_length_array % 3
-        out_frame_num = np.count_nonzero(cds_frame)
-        if out_frame_num == 0:
-            # print(transcripts_dict)
-            sys.stdout.writelines("The frame type of CDS is {now_type}.\n".format(now_type=cds_type[frame]))
-            cds_shift = frame
-            break
-        elif frame == -1:
-            sys.stdout.writelines("Some CDS in GTF file does not fit to 3 nt periodicity.\n")
-            cds_shift = 1
-        else:
-            continue
-    return cds_shift
-def read_gtf(gtf_filename):
-    title_list = []
-    genes_dict = OrderedDict()
-    transcripts_dict = OrderedDict()
-    now_row = 0
-    cds_shift = detect_cds_periodicity(gtf_filename)
-    with open(gtf_filename, 'r') as gtf_in:
-        for line in gtf_in:
-            now_row += 1
-            if now_row % 10000 == 0:
-                sys.stdout.writelines("Rows:  {number}\n".format(number=now_row))
-            record = line.strip()
-            # skip the '#' lines and blank lines
-            if not record:
-                continue
-            if line.startswith('#'):
-                title_list.append(record)
-                continue
-            section = readline(record)
-            # merge the gene lines
-            if section["feature"] == "gene":
-                genes_dict[section["attr_dict"]["gene_id"]] = Gene(section)
-            # merge the mRNA
-            elif section["feature"] == "transcript":
-                now_rna = Transcripts(section)
-                transcripts_dict[now_rna.transcript_id] = now_rna
-            # merge the exon, cds, start_codon, stop_codon
-            elif section["feature"] in ["exon", "CDS", "start_codon", "stop_codon"]:
-                trans_id = section["attr_dict"]["transcript_id"]
-                transcripts_dict[trans_id].add_feature(section, cds_shift)
-            # skip the other genes
-            else:
-                # sys.stdout.write("Skip: {row} ".format(row=record))
-                continue
-    sys.stdout.writelines("Rows: {number}\n".format(number=now_row))
-    return title_list, genes_dict, transcripts_dict
-def gene_tree(utr_len, chroms_dict, transcripts_dict, genes_dict):
-    for trans_id, trans_info in transcripts_dict.items():
-        if trans_info.gene_id in genes_dict:
-            if trans_info.transcript_biotype == "protein_coding":
-                try:
-                    trans_info.add_utr(utr_len, chroms_dict)
-                except IndexError:
-                    sys.stdout.write("IndexError: {gene}\n".format(gene=trans_info.gene_id))
-                    continue
-            genes_dict[trans_info.gene_id].add_transcript(trans_info)
-        else:
-            raise KeyError("Error: {trans} not found in {gene}!".format(trans=trans_id, gene=trans_info.gene_id))
-    return genes_dict
-def format_results(gene_mess):
-    mrna_gtf = []
-    gene_gtf = []
-    mrna_txt = []
-    mrna_region = []
-    gene_gtf.append('\t'.join([gene_mess.chrom, gene_mess.source, gene_mess.feature, str(gene_mess.start),
-                               str(gene_mess.end), '.', gene_mess.strand, '.', gene_mess.attr]))
-    if gene_mess.gene_type == "protein_coding":
-        mrna_gtf.append('\t'.join([gene_mess.chrom, gene_mess.source, gene_mess.feature, str(gene_mess.start),
-                                   str(gene_mess.end), '.', gene_mess.strand, '.', gene_mess.attr]))
-    for trans_ids, trans_info in gene_mess.transcript.items():
-        if trans_info.transcript_biotype == "protein_coding":
-            # Determine whether the CDS length is an integer multiple of 3.
-            if trans_info.cds_length % 3 != 0:
-                sys.stdout.write("Warning! {gene} CDS length doesn't fit the 3nt periodicity. \n".format(gene=trans_ids))
-                continue
-            if gene_mess.rep_transcript == trans_info.transcript_id:
-                rep_transcript = True
-            else:
-                rep_transcript = False
-            mrna_txt.append('\t'.join([trans_info.chrom, gene_mess.gene_id, trans_info.gene_name, trans_info.transcript_id,
-                                       str(trans_info.start), str(trans_info.end), str(trans_info.utr5), str(trans_info.cds_length),
-                                       str(trans_info.utr3), trans_info.strand, str(rep_transcript), str(trans_info.modified)]))
-            mrna_region.append([trans_ids, trans_info.chrom, trans_info.exons, trans_info.strand])
-            if not trans_info.modified:
-                mrna_gtf.extend(trans_info.mess)
-                gene_gtf.extend(trans_info.mess)
-            else:
-                mrna_gtf.append('\t'.join([trans_info.chrom, trans_info.source, trans_info.feature, str(trans_info.start),
-                                           str(trans_info.end), '.', trans_info.strand, '.', trans_info.attr]))
-                gene_gtf.append('\t'.join([trans_info.chrom, trans_info.source, trans_info.feature, str(trans_info.start),
-                                           str(trans_info.end), '.', trans_info.strand, '.', trans_info.attr]))
-                for exon in trans_info.exon_feature:
-                    mrna_gtf.append('\t'.join([str(i) for i in exon]))
-                    gene_gtf.append('\t'.join([str(i) for i in exon]))
-                for cds in trans_info.cds_feature:
-                    mrna_gtf.append('\t'.join([str(i) for i in cds]))
-                    gene_gtf.append('\t'.join([str(i) for i in cds]))
-        else:
-            gene_gtf.extend(trans_info.mess)
-    return mrna_gtf, gene_gtf, mrna_txt, mrna_region
-def filter_genes(genes_dict, coding):
-    filtered_gtf = []
-    filtered_txt = []
-    filtered_region = []
-    if coding:
-        for gene_name, gene_mess in genes_dict.items():
-            mrna_gtf, gene_gtf, mrna_txt, mrna_region = format_results(gene_mess)
-            filtered_gtf.extend(mrna_gtf)
-            filtered_txt.extend(mrna_txt)
-            filtered_region.extend(mrna_region)
-    elif not coding:
-        for gene_name, gene_mess in genes_dict.items():
-            if gene_mess.gene_type == 'protein_coding':
-                mrna_gtf, gene_gtf, mrna_txt, mrna_region = format_results(gene_mess)
-                filtered_gtf.extend(gene_gtf)
-                filtered_txt.extend(mrna_txt)
-                filtered_region.extend(mrna_region)
-            else:
-                mrna_gtf, gene_gtf, mrna_txt, mrna_region = format_results(gene_mess)
-                filtered_gtf.extend(gene_gtf)
-    return filtered_gtf, filtered_txt, filtered_region
-def read_genome(genome):
-    chroms_dict = OrderedDict()
-    record = SeqIO.parse(genome, "fasta")
-    for line in record:
-        sys.stdout.writelines("import chromosome: {chrom}\n".format(chrom=line.id))
-        chroms_dict[line.id] = Chrom(line)
-    return chroms_dict
-def get_seq(chroms_dict, mrna_region):
-    mrna_seq = OrderedDict()
-    for transcript in mrna_region:
-        transcript_seq = ''
-        if transcript[-1] == "-":
-            for exon in reversed(transcript[2]):
-                exon_start, exon_end = exon[0], exon[1]
-                transcript_seq += chroms_dict[transcript[1]].seq[exon_start - 1: exon_end]
-            transcript_seq = transcript_seq.reverse_complement()
-        else:
-            for exon in transcript[2]:
-                exon_start, exon_end = exon[0], exon[1]
-                transcript_seq += chroms_dict[transcript[1]].seq[exon_start - 1: exon_end]
-        mrna_seq[transcript[0]] = transcript_seq
-    return mrna_seq
-def output_results(output_prefix, title_list, filtered_gtf, mrna_txt, mrna_seq):
-    gtf_out_file = output_prefix + '.norm.gtf'
-    with open(gtf_out_file, 'w') as gtf_out:
-        for line in title_list:
-            gtf_out.writelines(''.join(line) + '\n')
-        for line in filtered_gtf:
-            gtf_out.writelines(''.join(line) + '\n')
-    txt_out_file = output_prefix + '.norm.txt'
-    with open(txt_out_file, 'w') as txt_out:
-        txt_out.writelines('\t'.join(["chromosome", "gene_id", "gene_name", "transcript_id", "start", "end", "utr5_length",
-                                      "cds_length", "utr3_length", "strand", "rep_transcript", "modified"]) + '\n')
-        for line in mrna_txt:
-            txt_out.writelines(''.join(line) + '\n')
-    seq_out_file = output_prefix + '.norm.fa'
-    with open(seq_out_file, 'w') as seq_out:
-        for mrna, sequence in mrna_seq.items():
-            seq_out.writelines('\n'.join([">" + mrna, str(sequence)]) + '\n')
-def main():
-    ArgsParser.now_time()
-    sys.stdout.writelines('\nMake the gene annotation files.\n')
-    sys.stdout.writelines('Step1: Checking the input Arguments.\n')
-    args = ArgsParser.gtf_args_parser()
-    sys.stdout.writelines('\nStep2: Import the gtf file.\n')
-    title_list, genes_dict, transcripts_dict = read_gtf(args.transcript)
-    sys.stdout.writelines('\nStep3: Import the genome file.\n')
-    chroms_dict = read_genome(args.sequence)
-    sys.stdout.writelines('\nStep4: Make the gene tree.\n')
-    genes_dict = gene_tree(args.utr, chroms_dict, transcripts_dict, genes_dict)
-    sys.stdout.writelines('\nStep5: Screening genes.\n')
-    filtered_gtf, mrna_txt, mrna_region = filter_genes(genes_dict, args.coding)
-    sys.stdout.writelines('\nStep6: Retrieve the mRNA sequence from genome.\n')
-    mrna_seq = get_seq(chroms_dict, mrna_region)
-    sys.stdout.writelines('\nStep7: Output the results.\n')
-    output_results(args.output, title_list, filtered_gtf, mrna_txt, mrna_seq)
-    sys.stdout.writelines('\nALL DONE!\n\n')
-    ArgsParser.now_time()
-if __name__ == "__main__":
-    main()

RiboParser 0.2.1__tar.gz → 0.2.2__tar.gz

RiboParser 0.2.1tar.gz → 0.2.2tar.gz