darkprofiler 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ MIT License
2
+ Copyright (c) 2025 Hanjun Lee, Stephen J. Elledge
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+ The above copyright notice and this permission notice shall be included in all
10
+ copies or substantial portions of the Software.
11
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17
+ SOFTWARE.
@@ -0,0 +1,124 @@
1
+ Metadata-Version: 2.1
2
+ Name: darkprofiler
3
+ Version: 0.1.0
4
+ Summary: DarkProfiler: Alignment and Classification of Peptides from Reference-Independent De Novo Peptide Sequencing Experiments.
5
+ Author-email: Hanjun Lee <hanjun@alum.mit.edu>
6
+ License: MIT
7
+ Keywords: proteomics,immunopeptidomics,neoantigen,bioinformatics
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
13
+ Requires-Python: >=3.7
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE.txt
16
+ Requires-Dist: biopython>=1.78
17
+ Requires-Dist: matplotlib>=3.3
18
+
19
+ # DarkProfiler
20
+
21
+ **DarkProfiler: Alignment and Classification of Peptides from Reference-Independent De Novo Peptide Sequencing Experiments**
22
+
23
+ DarkProfiler takes peptide sequences (e.g. from de novo sequencing) and classifies them into:
24
+
25
+ - **Canonical proteome**
26
+ - **Alternative splicing**
27
+ - **Neoantigens (SNV-derived mutanome)**
28
+ - **Alternative reading frame peptides**
29
+ - **Amino acid misincorporations**
30
+ - **Unknown / unaligned**
31
+
32
+ It supports human and mouse references: `hg19`, `hg38`, `mm10`, `mm39`.
33
+
34
+ ---
35
+
36
+ ## Installation
37
+
38
+ ### Install with pip (PyPI)
39
+
40
+ ```bash
41
+ pip install darkprofiler
42
+ ```
43
+
44
+ ### Install with conda (bioconda)
45
+
46
+ ```bash
47
+ conda install bioconda::darkprofiler
48
+ ```
49
+
50
+ ---
51
+
52
+ ## Reference genome
53
+
54
+ DarkProfiler supports human and mouse reference genomes.
55
+
56
+ Supported genome assemblies are:
57
+
58
+ ```
59
+ hg19 (GENCODE release 19)
60
+ hg38 (GENCODE release 37)
61
+ mm10 (GENCODE release M19)
62
+ mm39 (GENCODE release M37)
63
+ ```
64
+
65
+ ---
66
+
67
+ ## Command-line usage
68
+
69
+ ### Download reference data
70
+
71
+ ```bash
72
+ darkprofiler download hg38
73
+ ```
74
+
75
+ ### Run classification
76
+
77
+ ```bash
78
+ darkprofiler run hg38 peptides.fa output_dir
79
+ ```
80
+
81
+ Optional flags:
82
+
83
+ ```
84
+ --vcf-path FILE
85
+ --database-path DIR
86
+ --num-threads N
87
+ ```
88
+
89
+ ---
90
+
91
+ ## Python API
92
+
93
+ ```python
94
+ from darkprofiler.run import classify_peptides
95
+
96
+ classify_peptides(
97
+ reference="hg38",
98
+ peptide_fasta="peptides.fa",
99
+ output_dir="output",
100
+ vcf_path=None,
101
+ database_path=None,
102
+ num_threads=4
103
+ )
104
+ ```
105
+
106
+ ---
107
+
108
+ ## Outputs
109
+
110
+ - canonicalProteome.fa
111
+ - alternativeSplicing.fa
112
+ - neoantigen.fa
113
+ - alternativeReadingFrame.fa
114
+ - aminoAcidMisincorporation.fa
115
+ - unknown.fa
116
+ - pieChart.tsv
117
+ - pieChart.pdf
118
+
119
+ ---
120
+
121
+ ## License
122
+
123
+ MIT License
124
+ Copyright (c) 2025
@@ -0,0 +1,106 @@
1
+ # DarkProfiler
2
+
3
+ **DarkProfiler: Alignment and Classification of Peptides from Reference-Independent De Novo Peptide Sequencing Experiments**
4
+
5
+ DarkProfiler takes peptide sequences (e.g. from de novo sequencing) and classifies them into:
6
+
7
+ - **Canonical proteome**
8
+ - **Alternative splicing**
9
+ - **Neoantigens (SNV-derived mutanome)**
10
+ - **Alternative reading frame peptides**
11
+ - **Amino acid misincorporations**
12
+ - **Unknown / unaligned**
13
+
14
+ It supports human and mouse references: `hg19`, `hg38`, `mm10`, `mm39`.
15
+
16
+ ---
17
+
18
+ ## Installation
19
+
20
+ ### Install with pip (PyPI)
21
+
22
+ ```bash
23
+ pip install darkprofiler
24
+ ```
25
+
26
+ ### Install with conda (bioconda)
27
+
28
+ ```bash
29
+ conda install bioconda::darkprofiler
30
+ ```
31
+
32
+ ---
33
+
34
+ ## Reference genome
35
+
36
+ DarkProfiler supports human and mouse reference genomes.
37
+
38
+ Supported genome assemblies are:
39
+
40
+ ```
41
+ hg19 (GENCODE release 19)
42
+ hg38 (GENCODE release 37)
43
+ mm10 (GENCODE release M19)
44
+ mm39 (GENCODE release M37)
45
+ ```
46
+
47
+ ---
48
+
49
+ ## Command-line usage
50
+
51
+ ### Download reference data
52
+
53
+ ```bash
54
+ darkprofiler download hg38
55
+ ```
56
+
57
+ ### Run classification
58
+
59
+ ```bash
60
+ darkprofiler run hg38 peptides.fa output_dir
61
+ ```
62
+
63
+ Optional flags:
64
+
65
+ ```
66
+ --vcf-path FILE
67
+ --database-path DIR
68
+ --num-threads N
69
+ ```
70
+
71
+ ---
72
+
73
+ ## Python API
74
+
75
+ ```python
76
+ from darkprofiler.run import classify_peptides
77
+
78
+ classify_peptides(
79
+ reference="hg38",
80
+ peptide_fasta="peptides.fa",
81
+ output_dir="output",
82
+ vcf_path=None,
83
+ database_path=None,
84
+ num_threads=4
85
+ )
86
+ ```
87
+
88
+ ---
89
+
90
+ ## Outputs
91
+
92
+ - canonicalProteome.fa
93
+ - alternativeSplicing.fa
94
+ - neoantigen.fa
95
+ - alternativeReadingFrame.fa
96
+ - aminoAcidMisincorporation.fa
97
+ - unknown.fa
98
+ - pieChart.tsv
99
+ - pieChart.pdf
100
+
101
+ ---
102
+
103
+ ## License
104
+
105
+ MIT License
106
+ Copyright (c) 2025
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "darkprofiler"
7
+ version = "0.1.0"
8
+ description = "DarkProfiler: Alignment and Classification of Peptides from Reference-Independent De Novo Peptide Sequencing Experiments."
9
+ readme = "README.md"
10
+ requires-python = ">=3.7"
11
+ license = { text = "MIT" } # or whatever license you want
12
+ authors = [
13
+ { name = "Hanjun Lee", email = "hanjun@alum.mit.edu" }
14
+ ]
15
+ keywords = ["proteomics", "immunopeptidomics", "neoantigen", "bioinformatics"]
16
+ classifiers = [
17
+ "Programming Language :: Python :: 3",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: OS Independent",
20
+ "Intended Audience :: Science/Research",
21
+ "Topic :: Scientific/Engineering :: Bio-Informatics"
22
+ ]
23
+
24
+ dependencies = [
25
+ "biopython>=1.78",
26
+ "matplotlib>=3.3"
27
+ ]
28
+
29
+ [project.scripts]
30
+ darkprofiler = "darkprofiler.cli:main"
31
+
32
+ [tool.setuptools.packages.find]
33
+ where = ["src"]
34
+
35
+ [tool.setuptools.package-data]
36
+ darkprofiler = ["genome/*/*"]
37
+
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,6 @@
1
+ from .run import classify_peptides
2
+
3
+ __all__ = ["classify_peptides"]
4
+
5
+ __version__ = "0.1.0"
6
+
@@ -0,0 +1,142 @@
1
+ import argparse
2
+ import os
3
+ import sys
4
+ import zipfile
5
+ from pathlib import Path
6
+ from urllib.request import urlopen
7
+
8
+ from .run import classify_peptides
9
+
10
+ SUPPORTED_REFERENCES = ("hg19", "hg38", "mm10", "mm39")
11
+
12
+ # Reference Zip Files from Elledge Lab
13
+ URL_PREFIX = "https://elledge.hms.harvard.edu/wp-content/uploads/2025/12/"
14
+
15
+
16
+ def _get_package_root() -> Path:
17
+ # Directory where this file lives (src/darkprofiler)
18
+ return Path(__file__).resolve().parent
19
+
20
+ def _download_reference(reference: str) -> None:
21
+ reference = reference.lower()
22
+ if reference not in SUPPORTED_REFERENCES:
23
+ raise SystemExit(
24
+ f"Unsupported reference '{reference}'. Must be one of: "
25
+ f"{', '.join(SUPPORTED_REFERENCES)}"
26
+ )
27
+
28
+ url_prefix = URL_PREFIX
29
+
30
+ pkg_root = _get_package_root()
31
+ genome_dir = pkg_root / "genome"
32
+ genome_dir.mkdir(exist_ok=True)
33
+
34
+ # URL like: {url_prefix}/darkprofiler_hg38.zip
35
+ url = f"{url_prefix.rstrip('/')}/darkprofiler_{reference}.zip"
36
+ zip_path = genome_dir / f"{reference}.zip"
37
+
38
+ print(f"[darkprofiler] Downloading {url} ...", file=sys.stderr)
39
+ try:
40
+ with urlopen(url) as resp, open(zip_path, "wb") as out_fh:
41
+ # Stream copy to disk
42
+ chunk = resp.read(8192)
43
+ while chunk:
44
+ out_fh.write(chunk)
45
+ chunk = resp.read(8192)
46
+ except Exception as e:
47
+ if zip_path.exists():
48
+ zip_path.unlink()
49
+ raise SystemExit(f"Failed to download {url}: {e}")
50
+
51
+ print(f"[darkprofiler] Extracting to {pkg_root} ...", file=sys.stderr)
52
+ try:
53
+ with zipfile.ZipFile(zip_path, "r") as zf:
54
+ zf.extractall(path=pkg_root)
55
+ except Exception as e:
56
+ raise SystemExit(f"Failed to extract {zip_path}: {e}")
57
+
58
+ print(
59
+ f"[darkprofiler] Finished. Reference '{reference}' is now available.",
60
+ file=sys.stderr,
61
+ )
62
+
63
+ def cmd_download(args: argparse.Namespace) -> None:
64
+ _download_reference(args.reference)
65
+
66
+
67
+ def cmd_run(args: argparse.Namespace) -> None:
68
+ classify_peptides(
69
+ reference=args.reference,
70
+ peptide_fasta=args.peptide_fasta,
71
+ output_dir=args.output_dir,
72
+ vcf_path=args.vcf_path,
73
+ database_path=args.database_path,
74
+ num_threads=args.num_threads,
75
+ )
76
+
77
+
78
+ def build_parser() -> argparse.ArgumentParser:
79
+ parser = argparse.ArgumentParser(
80
+ prog="darkprofiler",
81
+ description=(
82
+ "DarkProfiler: classify peptides into canonical, alternative, "
83
+ "mutant, and dark proteome categories."
84
+ ),
85
+ )
86
+ subparsers = parser.add_subparsers(dest="command", required=True)
87
+
88
+ # ---------------- download ----------------
89
+ p_download = subparsers.add_parser(
90
+ "download",
91
+ help="Download a reference genome bundle (hg19/hg38/mm10/mm39).",
92
+ )
93
+ p_download.add_argument(
94
+ "reference",
95
+ choices=SUPPORTED_REFERENCES,
96
+ help="Reference assembly version to download.",
97
+ )
98
+ p_download.set_defaults(func=cmd_download)
99
+
100
+ # ---------------- run ----------------
101
+ p_run = subparsers.add_parser(
102
+ "run",
103
+ help="Run DarkProfiler classification pipeline.",
104
+ )
105
+ p_run.add_argument(
106
+ "reference",
107
+ choices=SUPPORTED_REFERENCES,
108
+ help="Reference assembly version to use (must be downloaded first).",
109
+ )
110
+ p_run.add_argument("peptide_fasta", help="Path to peptide FASTA file.")
111
+ p_run.add_argument("output_dir", help="Output directory.")
112
+ p_run.add_argument(
113
+ "--vcf-path",
114
+ default=None,
115
+ help="Optional path to VCF or VCF.GZ file with SNVs.",
116
+ )
117
+ p_run.add_argument(
118
+ "--database-path",
119
+ default=None,
120
+ help=(
121
+ "Optional path to existing database directory containing "
122
+ "canonicalProteome.fa, alternativeSplicing.fa, mutanome.fa, "
123
+ "mutatedCanonicalTranscriptome.fa, mutatedAlternativeTranslatome.fa, "
124
+ "mutatedAlternativeORFeome.fa."
125
+ ),
126
+ )
127
+ p_run.add_argument(
128
+ "--num-threads",
129
+ type=int,
130
+ default=1,
131
+ help="Threads for amino acid misincorporation search.",
132
+ )
133
+ p_run.set_defaults(func=cmd_run)
134
+
135
+ return parser
136
+
137
+
138
+ def main(argv=None) -> None:
139
+ parser = build_parser()
140
+ args = parser.parse_args(argv)
141
+ args.func(args)
142
+