abcupload 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Khadim Gueye — African Bioinformatics Center (ABCOMICS)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,169 @@
1
+ Metadata-Version: 2.4
2
+ Name: abcupload
3
+ Version: 1.0.0
4
+ Summary: ABCOMICS Genomic Data Upload Tool — upload sequencing files to ABCOMICS
5
+ Author-email: Khadim Gueye <bioinformatics.acegid@run.edu.ng>
6
+ License: MIT
7
+ Project-URL: Homepage, https://abcomics.org
8
+ Project-URL: Repository, https://github.com/african-bioinformatics-center/ABCOMICS_site
9
+ Project-URL: Issues, https://github.com/african-bioinformatics-center/ABCOMICS_site/issues
10
+ Keywords: bioinformatics,genomics,upload,ABCOMICS,fastq,bam,vcf
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
23
+ Classifier: Topic :: Utilities
24
+ Requires-Python: >=3.8
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: requests>=2.28
28
+ Requires-Dist: rich>=13.0
29
+ Dynamic: license-file
30
+
31
+ # abcupload
32
+
33
+ **ABCOMICS Genomic Data Upload Tool**
34
+
35
+ A command-line tool to upload sequencing and genomic data files to the
36
+ [ABCOMICS](https://abcomics.org) platform. It handles the full two-step
37
+ upload process automatically: requesting a signed URL from the API, then
38
+ streaming the file directly to secure cloud storage with a live progress bar.
39
+
40
+ ---
41
+
42
+ ## Installation
43
+
44
+ ```bash
45
+ pip install abcupload
46
+ ```
47
+
48
+ Requires Python 3.8 or later. Dependencies (`requests`, `rich`) are installed
49
+ automatically.
50
+
51
+ ---
52
+
53
+ ## Quick start
54
+
55
+ ```bash
56
+ # Set your API key once (contact contact@abcomics.org to receive it)
57
+ export ABCOMICS_API_KEY=your_key_here
58
+
59
+ # Upload a single file
60
+ abcupload -u abc-000001 -p PRJAB00001 -d sample_R1.fastq.gz
61
+
62
+ # Upload all FASTQ files in the current directory
63
+ abcupload -u abc-000001 -p PRJAB00001 -d '*.fastq.gz'
64
+
65
+ # Upload an entire directory (scans for all accepted formats)
66
+ abcupload -u abc-000001 -p PRJAB00001 -d /data/project/
67
+
68
+ # Upload multiple specific files
69
+ abcupload -u abc-000001 -p PRJAB00001 -d file1.bam,file2.bam
70
+
71
+ # Test mode — files are deleted after 24 hours
72
+ abcupload -u abc-000001 -p PRJAB00001 -d '*.fastq.gz' -t
73
+ ```
74
+
75
+ ---
76
+
77
+ ## Usage
78
+
79
+ ```
80
+ abcupload -u USERNAME -p PROJECT_ID -d DATA [OPTIONS]
81
+
82
+ required:
83
+ -u USERNAME ABCOMICS username (e.g. abc-000001)
84
+ -p PROJECT_ID Project ID (e.g. PRJAB00001)
85
+ -d DATA What to upload:
86
+ single file : -d sample.fastq.gz
87
+ comma list : -d file1.fastq.gz,file2.bam
88
+ glob pattern : -d '*.fastq.gz'
89
+ directory : -d /path/to/data/
90
+
91
+ options:
92
+ -k API_KEY API key (alternative to ABCOMICS_API_KEY env var)
93
+ -t Test mode — files deleted after 24 h
94
+ -o Overwrite existing files without asking
95
+ -h, --help Show this help and exit
96
+ -V, --version Show version and exit
97
+ ```
98
+
99
+ ---
100
+
101
+ ## Environment variable
102
+
103
+ Set your API key as an environment variable so you do not have to pass `-k`
104
+ on every command:
105
+
106
+ ```bash
107
+ # Add to ~/.bashrc or ~/.zshrc for persistence
108
+ export ABCOMICS_API_KEY=your_key_here
109
+ ```
110
+
111
+ ---
112
+
113
+ ## Accepted file formats
114
+
115
+ | Format | Extensions |
116
+ |---------|------------|
117
+ | FASTQ | `.fastq`, `.fq`, `.fastq.gz`, `.fq.gz`, `.fastq.bz2`, `.fq.bz2` |
118
+ | FASTA | `.fasta`, `.fa`, `.fna`, `.faa`, `.ffn`, `.frn`, `.fasta.gz`, `.fa.gz`, `.fna.gz`, `.faa.gz` |
119
+ | BAM / SAM / CRAM | `.bam`, `.sam`, `.cram` |
120
+ | Index files | `.bai`, `.crai`, `.csi`, `.tbi` |
121
+ | VCF / BCF | `.vcf`, `.vcf.gz`, `.bcf`, `.bcf.gz` |
122
+ | Annotation | `.gff`, `.gff3`, `.gtf`, `.bed` (and `.gz` variants) |
123
+ | Tabular | `.csv`, `.tsv` |
124
+
125
+ Maximum file size: **500 GB**.
126
+
127
+ ---
128
+
129
+ ## What happens under the hood
130
+
131
+ 1. For each file, `abcupload` sends a POST request to the ABCOMICS API with
132
+ your username, project ID, and filename.
133
+ 2. The API validates the request and returns a time-limited signed URL pointing
134
+ directly to the ABCOMICS cloud storage bucket.
135
+ 3. `abcupload` streams the file to that URL with a live progress bar showing
136
+ speed, percentage, and estimated time remaining.
137
+ 4. If a file with the same name already exists, you are asked whether to
138
+ overwrite it or skip it (or use `-o` to always overwrite).
139
+
140
+ ---
141
+
142
+ ## Development install
143
+
144
+ ```bash
145
+ git clone https://github.com/african-bioinformatics-center/ABCOMICS_site.git
146
+ cd ABCOMICS_site/abcupload
147
+ pip install -e .
148
+ ```
149
+
150
+ Build a distribution:
151
+
152
+ ```bash
153
+ pip install build
154
+ python -m build
155
+ # output: dist/abcupload-1.0.0.tar.gz and dist/abcupload-1.0.0-py3-none-any.whl
156
+ ```
157
+
158
+ ---
159
+
160
+ ## Author
161
+
162
+ **Khadim Gueye** — African Bioinformatics Center (ABCOMICS)
163
+ Contact: [contact@abcomics.org](mailto:contact@abcomics.org)
164
+
165
+ ---
166
+
167
+ ## License
168
+
169
+ [MIT](LICENSE)
@@ -0,0 +1,139 @@
1
+ # abcupload
2
+
3
+ **ABCOMICS Genomic Data Upload Tool**
4
+
5
+ A command-line tool to upload sequencing and genomic data files to the
6
+ [ABCOMICS](https://abcomics.org) platform. It handles the full two-step
7
+ upload process automatically: requesting a signed URL from the API, then
8
+ streaming the file directly to secure cloud storage with a live progress bar.
9
+
10
+ ---
11
+
12
+ ## Installation
13
+
14
+ ```bash
15
+ pip install abcupload
16
+ ```
17
+
18
+ Requires Python 3.8 or later. Dependencies (`requests`, `rich`) are installed
19
+ automatically.
20
+
21
+ ---
22
+
23
+ ## Quick start
24
+
25
+ ```bash
26
+ # Set your API key once (contact contact@abcomics.org to receive it)
27
+ export ABCOMICS_API_KEY=your_key_here
28
+
29
+ # Upload a single file
30
+ abcupload -u abc-000001 -p PRJAB00001 -d sample_R1.fastq.gz
31
+
32
+ # Upload all FASTQ files in the current directory
33
+ abcupload -u abc-000001 -p PRJAB00001 -d '*.fastq.gz'
34
+
35
+ # Upload an entire directory (scans for all accepted formats)
36
+ abcupload -u abc-000001 -p PRJAB00001 -d /data/project/
37
+
38
+ # Upload multiple specific files
39
+ abcupload -u abc-000001 -p PRJAB00001 -d file1.bam,file2.bam
40
+
41
+ # Test mode — files are deleted after 24 hours
42
+ abcupload -u abc-000001 -p PRJAB00001 -d '*.fastq.gz' -t
43
+ ```
44
+
45
+ ---
46
+
47
+ ## Usage
48
+
49
+ ```
50
+ abcupload -u USERNAME -p PROJECT_ID -d DATA [OPTIONS]
51
+
52
+ required:
53
+ -u USERNAME ABCOMICS username (e.g. abc-000001)
54
+ -p PROJECT_ID Project ID (e.g. PRJAB00001)
55
+ -d DATA What to upload:
56
+ single file : -d sample.fastq.gz
57
+ comma list : -d file1.fastq.gz,file2.bam
58
+ glob pattern : -d '*.fastq.gz'
59
+ directory : -d /path/to/data/
60
+
61
+ options:
62
+ -k API_KEY API key (alternative to ABCOMICS_API_KEY env var)
63
+ -t Test mode — files deleted after 24 h
64
+ -o Overwrite existing files without asking
65
+ -h, --help Show this help and exit
66
+ -V, --version Show version and exit
67
+ ```
68
+
69
+ ---
70
+
71
+ ## Environment variable
72
+
73
+ Set your API key as an environment variable so you do not have to pass `-k`
74
+ on every command:
75
+
76
+ ```bash
77
+ # Add to ~/.bashrc or ~/.zshrc for persistence
78
+ export ABCOMICS_API_KEY=your_key_here
79
+ ```
80
+
81
+ ---
82
+
83
+ ## Accepted file formats
84
+
85
+ | Format | Extensions |
86
+ |---------|------------|
87
+ | FASTQ | `.fastq`, `.fq`, `.fastq.gz`, `.fq.gz`, `.fastq.bz2`, `.fq.bz2` |
88
+ | FASTA | `.fasta`, `.fa`, `.fna`, `.faa`, `.ffn`, `.frn`, `.fasta.gz`, `.fa.gz`, `.fna.gz`, `.faa.gz` |
89
+ | BAM / SAM / CRAM | `.bam`, `.sam`, `.cram` |
90
+ | Index files | `.bai`, `.crai`, `.csi`, `.tbi` |
91
+ | VCF / BCF | `.vcf`, `.vcf.gz`, `.bcf`, `.bcf.gz` |
92
+ | Annotation | `.gff`, `.gff3`, `.gtf`, `.bed` (and `.gz` variants) |
93
+ | Tabular | `.csv`, `.tsv` |
94
+
95
+ Maximum file size: **500 GB**.
96
+
97
+ ---
98
+
99
+ ## What happens under the hood
100
+
101
+ 1. For each file, `abcupload` sends a POST request to the ABCOMICS API with
102
+ your username, project ID, and filename.
103
+ 2. The API validates the request and returns a time-limited signed URL pointing
104
+ directly to the ABCOMICS cloud storage bucket.
105
+ 3. `abcupload` streams the file to that URL with a live progress bar showing
106
+ speed, percentage, and estimated time remaining.
107
+ 4. If a file with the same name already exists, you are asked whether to
108
+ overwrite it or skip it (or use `-o` to always overwrite).
109
+
110
+ ---
111
+
112
+ ## Development install
113
+
114
+ ```bash
115
+ git clone https://github.com/african-bioinformatics-center/ABCOMICS_site.git
116
+ cd ABCOMICS_site/abcupload
117
+ pip install -e .
118
+ ```
119
+
120
+ Build a distribution:
121
+
122
+ ```bash
123
+ pip install build
124
+ python -m build
125
+ # output: dist/abcupload-1.0.0.tar.gz and dist/abcupload-1.0.0-py3-none-any.whl
126
+ ```
127
+
128
+ ---
129
+
130
+ ## Author
131
+
132
+ **Khadim Gueye** — African Bioinformatics Center (ABCOMICS)
133
+ Contact: [contact@abcomics.org](mailto:contact@abcomics.org)
134
+
135
+ ---
136
+
137
+ ## License
138
+
139
+ [MIT](LICENSE)
@@ -0,0 +1,4 @@
1
+ """ABCOMICS Genomic Data Upload Tool."""
2
+
3
+ __version__ = "1.0.0"
4
+ __author__ = "Khadim Gueye"
@@ -0,0 +1,3 @@
1
+ from abcupload.cli import main
2
+
3
+ main()
@@ -0,0 +1,387 @@
1
+ # Author: Khadim Gueye
2
+ # ABCOMICS Genomic Data Upload Tool
3
+ # Handles: URL signing (POST) + file upload (PUT), progress, duplicate detection, test mode.
4
+
5
+ from __future__ import annotations
6
+
7
+ import argparse
8
+ import glob
9
+ import os
10
+ import re
11
+ import sys
12
+ from pathlib import Path
13
+ from typing import Generator, Optional
14
+
15
+ import requests
16
+ from rich import box
17
+ from rich.console import Console
18
+ from rich.panel import Panel
19
+ from rich.progress import (
20
+ BarColumn,
21
+ DownloadColumn,
22
+ Progress,
23
+ TextColumn,
24
+ TimeRemainingColumn,
25
+ TransferSpeedColumn,
26
+ )
27
+ from rich.table import Table
28
+ from rich.text import Text
29
+
30
+ from abcupload import __version__
31
+
32
+ PROD_API = "https://abcomics-upload-492456311172.europe-west1.run.app"
33
+ TEST_API = "https://abcomics-test-upload-492456311172.europe-west1.run.app"
34
+
35
+ ALLOWED_SUFFIXES = {
36
+ ".fastq", ".fq", ".fastq.gz", ".fq.gz", ".fastq.bz2", ".fq.bz2",
37
+ ".fasta", ".fa", ".fna", ".faa", ".ffn", ".frn",
38
+ ".fasta.gz", ".fa.gz", ".fna.gz", ".faa.gz",
39
+ ".bam", ".sam", ".cram",
40
+ ".bai", ".crai", ".csi", ".tbi",
41
+ ".vcf", ".vcf.gz", ".bcf", ".bcf.gz",
42
+ ".gff", ".gff3", ".gtf", ".bed",
43
+ ".gff.gz", ".gff3.gz", ".gtf.gz", ".bed.gz",
44
+ ".csv", ".tsv",
45
+ }
46
+
47
+ console = Console()
48
+ err_console = Console(stderr=True)
49
+
50
+
51
+ # ── Helpers ───────────────────────────────────────────────────────────────────
52
+
53
+ def _has_bio_ext(name: str) -> bool:
54
+ lower = name.lower()
55
+ for ext in sorted(ALLOWED_SUFFIXES, key=len, reverse=True):
56
+ if lower.endswith(ext):
57
+ return True
58
+ return False
59
+
60
+
61
+ def _human_size(n: int) -> str:
62
+ for unit in ("B", "KB", "MB", "GB", "TB"):
63
+ if n < 1024 or unit == "TB":
64
+ return f"{n:.1f} {unit}" if unit != "B" else f"{n} B"
65
+ n /= 1024 # type: ignore[assignment]
66
+ return str(n)
67
+
68
+
69
+ # ── File resolution ───────────────────────────────────────────────────────────
70
+
71
+ def resolve_files(data_arg: str) -> list[Path]:
72
+ """Return a list of Paths from a directory, glob, or comma-separated list."""
73
+ path = Path(data_arg)
74
+
75
+ if path.is_dir():
76
+ files = sorted(f for f in path.iterdir() if f.is_file() and _has_bio_ext(f.name))
77
+ if not files:
78
+ console.print(f" [yellow]![/] No bioinformatics files found in: [bold]{data_arg}[/]")
79
+ return files
80
+
81
+ results: list[Path] = []
82
+ for part in data_arg.split(","):
83
+ part = part.strip()
84
+ if not part:
85
+ continue
86
+ matches = sorted(glob.glob(part))
87
+ if matches:
88
+ results.extend(Path(m) for m in matches if Path(m).is_file())
89
+ elif Path(part).is_file():
90
+ results.append(Path(part))
91
+ else:
92
+ console.print(f" [yellow]![/] No match: [bold]{part}[/]")
93
+
94
+ return results
95
+
96
+
97
+ # ── API call — get signed URL ─────────────────────────────────────────────────
98
+
99
+ def get_upload_url(
100
+ api_url: str,
101
+ api_key: str,
102
+ username: str,
103
+ project_id: str,
104
+ filename: str,
105
+ overwrite: bool = False,
106
+ ) -> tuple[str, dict]:
107
+ """POST to the API to get a GCS signed upload URL.
108
+
109
+ Returns (status, data) where status is 'ok', 'exists', or 'error'.
110
+ """
111
+ payload: dict = {"username": username, "project_id": project_id, "filename": filename}
112
+ if overwrite:
113
+ payload["overwrite"] = True
114
+
115
+ try:
116
+ resp = requests.post(
117
+ f"{api_url}/generate-upload-url",
118
+ json=payload,
119
+ headers={"X-API-Key": api_key},
120
+ timeout=30,
121
+ )
122
+ except requests.exceptions.ConnectionError:
123
+ return "error", {"error": "Connection failed. Check your internet connection."}
124
+ except requests.exceptions.Timeout:
125
+ return "error", {"error": "Request timed out (30 s)."}
126
+ except requests.exceptions.RequestException as exc:
127
+ return "error", {"error": str(exc)}
128
+
129
+ try:
130
+ data = resp.json()
131
+ except ValueError:
132
+ data = {"error": f"Non-JSON response (HTTP {resp.status_code})"}
133
+
134
+ if resp.status_code == 409:
135
+ return "exists", data
136
+ if resp.status_code != 200:
137
+ return "error", data
138
+ return "ok", data
139
+
140
+
141
+ # ── File upload — PUT to GCS ──────────────────────────────────────────────────
142
+
143
+ def upload_to_gcs(file_path: Path, upload_url: str) -> tuple[bool, str]:
144
+ """Upload a file to GCS via a signed PUT URL with a rich progress bar."""
145
+ file_size = file_path.stat().st_size
146
+
147
+ with Progress(
148
+ TextColumn(" "),
149
+ BarColumn(bar_width=34),
150
+ "[progress.percentage]{task.percentage:>3.0f}%",
151
+ DownloadColumn(),
152
+ TransferSpeedColumn(),
153
+ TimeRemainingColumn(),
154
+ console=console,
155
+ transient=True,
156
+ ) as progress:
157
+ task = progress.add_task("upload", total=file_size)
158
+
159
+ def _reader() -> Generator[bytes, None, None]:
160
+ with open(file_path, "rb") as fh:
161
+ while True:
162
+ chunk = fh.read(65536)
163
+ if not chunk:
164
+ break
165
+ progress.advance(task, len(chunk))
166
+ yield chunk
167
+
168
+ try:
169
+ resp = requests.put(
170
+ upload_url,
171
+ data=_reader(),
172
+ headers={
173
+ "Content-Type": "application/octet-stream",
174
+ "Content-Length": str(file_size),
175
+ },
176
+ timeout=None,
177
+ )
178
+ except requests.exceptions.RequestException as exc:
179
+ return False, str(exc)
180
+
181
+ if resp.status_code == 200:
182
+ return True, ""
183
+ return False, f"HTTP {resp.status_code}"
184
+
185
+
186
+ # ── Per-file orchestration ────────────────────────────────────────────────────
187
+
188
+ def upload_one(
189
+ file_path: Path,
190
+ username: str,
191
+ project_id: str,
192
+ api_key: str,
193
+ test_mode: bool,
194
+ overwrite: bool,
195
+ index: int,
196
+ total: int,
197
+ ) -> str:
198
+ """Upload a single file. Returns 'ok', 'fail', or 'skip'."""
199
+ api_url = TEST_API if test_mode else PROD_API
200
+ filename = file_path.name
201
+ size_str = _human_size(file_path.stat().st_size)
202
+
203
+ console.print(f" [{index}/{total}] [bold]{filename}[/] [dim]{size_str}[/]")
204
+
205
+ status, data = get_upload_url(api_url, api_key, username, project_id, filename, overwrite)
206
+
207
+ if status == "exists":
208
+ console.print(" [yellow]![/] File already exists in your project directory.")
209
+ try:
210
+ answer = input(" Overwrite? [y/N] ").strip().lower()
211
+ except (EOFError, KeyboardInterrupt):
212
+ answer = "n"
213
+ if answer == "y":
214
+ status, data = get_upload_url(
215
+ api_url, api_key, username, project_id, filename, overwrite=True
216
+ )
217
+ else:
218
+ console.print(" [dim]−[/] Skipped.")
219
+ return "skip"
220
+
221
+ if status != "ok":
222
+ err_msg = data.get("error", "Unknown error")
223
+ console.print(f" [red]✗[/] {err_msg}")
224
+ return "fail"
225
+
226
+ upload_url = data.get("upload_url", "")
227
+ if not upload_url:
228
+ console.print(" [red]✗[/] Could not retrieve upload URL.")
229
+ return "fail"
230
+
231
+ ok, err_msg = upload_to_gcs(file_path, upload_url)
232
+
233
+ if ok:
234
+ console.print(" [green]✓[/] Uploaded successfully")
235
+ if test_mode and "auto_delete" in data:
236
+ console.print(f" [dim]{data['auto_delete']}[/]")
237
+ return "ok"
238
+
239
+ console.print(f" [red]✗[/] Upload failed: {err_msg}")
240
+ return "fail"
241
+
242
+
243
+ # ── Display helpers ───────────────────────────────────────────────────────────
244
+
245
+ def print_banner(username: str, project_id: str, test_mode: bool) -> None:
246
+ mode = (
247
+ Text("TEST · files deleted after 24 h", style="bold yellow")
248
+ if test_mode
249
+ else Text("Production", style="bold green")
250
+ )
251
+ grid = Table.grid(padding=(0, 2))
252
+ grid.add_column(style="dim")
253
+ grid.add_column()
254
+ grid.add_row("User", f"[bold]{username}[/]")
255
+ grid.add_row("Project", f"[bold]{project_id}[/]")
256
+ grid.add_row("Mode", mode)
257
+ console.print(Panel(
258
+ grid,
259
+ title=f"[bold blue]ABCOMICS[/] Genomic Upload [dim]v{__version__}[/]",
260
+ border_style="blue",
261
+ padding=(0, 1),
262
+ ))
263
+ console.print()
264
+
265
+
266
+ def print_summary(results: dict) -> None:
267
+ total = sum(results.values())
268
+ tbl = Table(box=box.SIMPLE, show_header=False, padding=(0, 1))
269
+ tbl.add_column(style="dim", width=10)
270
+ tbl.add_column()
271
+ tbl.add_row("Total", f"{total} file(s)")
272
+ if results["ok"]:
273
+ tbl.add_row("Uploaded", f"[green]{results['ok']}[/]")
274
+ if results["fail"]:
275
+ tbl.add_row("Failed", f"[red]{results['fail']}[/]")
276
+ if results["skip"]:
277
+ tbl.add_row("Skipped", f"[dim]{results['skip']}[/]")
278
+ console.print()
279
+ console.rule(style="dim")
280
+ console.print(tbl)
281
+ console.rule(style="dim")
282
+ console.print()
283
+
284
+ if results["fail"]:
285
+ console.print("[red]Some files failed to upload. Check errors above.[/]\n")
286
+ elif results["ok"]:
287
+ console.print("[green]All files uploaded successfully.[/]\n")
288
+ else:
289
+ console.print("[yellow]No files were uploaded.[/]\n")
290
+
291
+
292
+ # ── Argument parser ───────────────────────────────────────────────────────────
293
+
294
+ def build_parser() -> argparse.ArgumentParser:
295
+ parser = argparse.ArgumentParser(
296
+ prog="abcupload",
297
+ description="ABCOMICS Genomic Data Upload Tool",
298
+ formatter_class=argparse.RawDescriptionHelpFormatter,
299
+ epilog="""\
300
+ examples:
301
+ abcupload -u abc-000001 -p PRJAB00001 -d sample_R1.fastq.gz
302
+ abcupload -u abc-000001 -p PRJAB00001 -d '*.fastq.gz'
303
+ abcupload -u abc-000001 -p PRJAB00001 -d file1.bam,file2.bam -o
304
+ abcupload -u abc-000001 -p PRJAB00001 -d /path/to/data/ -t
305
+
306
+ environment variable:
307
+ ABCOMICS_API_KEY your API key (alternative to -k)
308
+ export ABCOMICS_API_KEY=your_key_here
309
+ """,
310
+ add_help=False,
311
+ )
312
+
313
+ req = parser.add_argument_group("required")
314
+ req.add_argument(
315
+ "-u", dest="username", metavar="USERNAME", required=True,
316
+ help="ABCOMICS username",
317
+ )
318
+ req.add_argument(
319
+ "-p", dest="project_id", metavar="PROJECT_ID", required=True,
320
+ help="project ID (e.g. PRJAB00001)",
321
+ )
322
+ req.add_argument(
323
+ "-d", dest="data", metavar="DATA", required=True,
324
+ help="file(s): single file | comma-list | glob pattern | directory",
325
+ )
326
+
327
+ opt = parser.add_argument_group("options")
328
+ opt.add_argument(
329
+ "-k", dest="api_key", metavar="API_KEY",
330
+ default=os.environ.get("ABCOMICS_API_KEY", ""),
331
+ help="API key (or: export ABCOMICS_API_KEY=...)",
332
+ )
333
+ opt.add_argument("-t", dest="test", action="store_true", help="test mode — files deleted after 24 h")
334
+ opt.add_argument("-o", dest="overwrite", action="store_true", help="overwrite existing files without asking")
335
+ opt.add_argument("-h", "--help", action="help", help="show this help and exit")
336
+ opt.add_argument("-V", "--version", action="version", version=f"%(prog)s {__version__}")
337
+
338
+ return parser
339
+
340
+
341
+ # ── Entry point ───────────────────────────────────────────────────────────────
342
+
343
+ def main() -> None:
344
+ parser = build_parser()
345
+ args = parser.parse_args()
346
+
347
+ if not args.api_key:
348
+ err_console.print(
349
+ "[red]Error:[/] API key required.\n"
350
+ " Use [bold]-k KEY[/] or set [bold]ABCOMICS_API_KEY[/] environment variable.\n"
351
+ " Contact [bold]contact@abcomics.org[/] if you have not received your key."
352
+ )
353
+ sys.exit(1)
354
+
355
+ if not re.match(r"^PRJAB\d+$", args.project_id):
356
+ err_console.print(
357
+ "[red]Error:[/] PROJECT_ID must start with [bold]PRJAB[/] followed by digits "
358
+ "(e.g. [bold]PRJAB00001[/])."
359
+ )
360
+ sys.exit(1)
361
+
362
+ files = resolve_files(args.data)
363
+ if not files:
364
+ err_console.print(f"[red]Error:[/] No valid files found in: [bold]{args.data}[/]")
365
+ sys.exit(1)
366
+
367
+ print_banner(args.username, args.project_id, args.test)
368
+ console.print(f" Found [bold]{len(files)}[/] file(s) to upload:\n")
369
+
370
+ results = {"ok": 0, "fail": 0, "skip": 0}
371
+
372
+ for i, fp in enumerate(files, 1):
373
+ outcome = upload_one(
374
+ file_path = fp,
375
+ username = args.username,
376
+ project_id = args.project_id,
377
+ api_key = args.api_key,
378
+ test_mode = args.test,
379
+ overwrite = args.overwrite,
380
+ index = i,
381
+ total = len(files),
382
+ )
383
+ results[outcome] += 1
384
+ console.print()
385
+
386
+ print_summary(results)
387
+ sys.exit(1 if results["fail"] > 0 else 0)
@@ -0,0 +1,169 @@
1
+ Metadata-Version: 2.4
2
+ Name: abcupload
3
+ Version: 1.0.0
4
+ Summary: ABCOMICS Genomic Data Upload Tool — upload sequencing files to ABCOMICS
5
+ Author-email: Khadim Gueye <bioinformatics.acegid@run.edu.ng>
6
+ License: MIT
7
+ Project-URL: Homepage, https://abcomics.org
8
+ Project-URL: Repository, https://github.com/african-bioinformatics-center/ABCOMICS_site
9
+ Project-URL: Issues, https://github.com/african-bioinformatics-center/ABCOMICS_site/issues
10
+ Keywords: bioinformatics,genomics,upload,ABCOMICS,fastq,bam,vcf
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
23
+ Classifier: Topic :: Utilities
24
+ Requires-Python: >=3.8
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: requests>=2.28
28
+ Requires-Dist: rich>=13.0
29
+ Dynamic: license-file
30
+
31
+ # abcupload
32
+
33
+ **ABCOMICS Genomic Data Upload Tool**
34
+
35
+ A command-line tool to upload sequencing and genomic data files to the
36
+ [ABCOMICS](https://abcomics.org) platform. It handles the full two-step
37
+ upload process automatically: requesting a signed URL from the API, then
38
+ streaming the file directly to secure cloud storage with a live progress bar.
39
+
40
+ ---
41
+
42
+ ## Installation
43
+
44
+ ```bash
45
+ pip install abcupload
46
+ ```
47
+
48
+ Requires Python 3.8 or later. Dependencies (`requests`, `rich`) are installed
49
+ automatically.
50
+
51
+ ---
52
+
53
+ ## Quick start
54
+
55
+ ```bash
56
+ # Set your API key once (contact contact@abcomics.org to receive it)
57
+ export ABCOMICS_API_KEY=your_key_here
58
+
59
+ # Upload a single file
60
+ abcupload -u abc-000001 -p PRJAB00001 -d sample_R1.fastq.gz
61
+
62
+ # Upload all FASTQ files in the current directory
63
+ abcupload -u abc-000001 -p PRJAB00001 -d '*.fastq.gz'
64
+
65
+ # Upload an entire directory (scans for all accepted formats)
66
+ abcupload -u abc-000001 -p PRJAB00001 -d /data/project/
67
+
68
+ # Upload multiple specific files
69
+ abcupload -u abc-000001 -p PRJAB00001 -d file1.bam,file2.bam
70
+
71
+ # Test mode — files are deleted after 24 hours
72
+ abcupload -u abc-000001 -p PRJAB00001 -d '*.fastq.gz' -t
73
+ ```
74
+
75
+ ---
76
+
77
+ ## Usage
78
+
79
+ ```
80
+ abcupload -u USERNAME -p PROJECT_ID -d DATA [OPTIONS]
81
+
82
+ required:
83
+ -u USERNAME ABCOMICS username (e.g. abc-000001)
84
+ -p PROJECT_ID Project ID (e.g. PRJAB00001)
85
+ -d DATA What to upload:
86
+ single file : -d sample.fastq.gz
87
+ comma list : -d file1.fastq.gz,file2.bam
88
+ glob pattern : -d '*.fastq.gz'
89
+ directory : -d /path/to/data/
90
+
91
+ options:
92
+ -k API_KEY API key (alternative to ABCOMICS_API_KEY env var)
93
+ -t Test mode — files deleted after 24 h
94
+ -o Overwrite existing files without asking
95
+ -h, --help Show this help and exit
96
+ -V, --version Show version and exit
97
+ ```
98
+
99
+ ---
100
+
101
+ ## Environment variable
102
+
103
+ Set your API key as an environment variable so you do not have to pass `-k`
104
+ on every command:
105
+
106
+ ```bash
107
+ # Add to ~/.bashrc or ~/.zshrc for persistence
108
+ export ABCOMICS_API_KEY=your_key_here
109
+ ```
110
+
111
+ ---
112
+
113
+ ## Accepted file formats
114
+
115
+ | Format | Extensions |
116
+ |---------|------------|
117
+ | FASTQ | `.fastq`, `.fq`, `.fastq.gz`, `.fq.gz`, `.fastq.bz2`, `.fq.bz2` |
118
+ | FASTA | `.fasta`, `.fa`, `.fna`, `.faa`, `.ffn`, `.frn`, `.fasta.gz`, `.fa.gz`, `.fna.gz`, `.faa.gz` |
119
+ | BAM / SAM / CRAM | `.bam`, `.sam`, `.cram` |
120
+ | Index files | `.bai`, `.crai`, `.csi`, `.tbi` |
121
+ | VCF / BCF | `.vcf`, `.vcf.gz`, `.bcf`, `.bcf.gz` |
122
+ | Annotation | `.gff`, `.gff3`, `.gtf`, `.bed` (and `.gz` variants) |
123
+ | Tabular | `.csv`, `.tsv` |
124
+
125
+ Maximum file size: **500 GB**.
126
+
127
+ ---
128
+
129
+ ## What happens under the hood
130
+
131
+ 1. For each file, `abcupload` sends a POST request to the ABCOMICS API with
132
+ your username, project ID, and filename.
133
+ 2. The API validates the request and returns a time-limited signed URL pointing
134
+ directly to the ABCOMICS cloud storage bucket.
135
+ 3. `abcupload` streams the file to that URL with a live progress bar showing
136
+ speed, percentage, and estimated time remaining.
137
+ 4. If a file with the same name already exists, you are asked whether to
138
+ overwrite it or skip it (or use `-o` to always overwrite).
139
+
140
+ ---
141
+
142
+ ## Development install
143
+
144
+ ```bash
145
+ git clone https://github.com/african-bioinformatics-center/ABCOMICS_site.git
146
+ cd ABCOMICS_site/abcupload
147
+ pip install -e .
148
+ ```
149
+
150
+ Build a distribution:
151
+
152
+ ```bash
153
+ pip install build
154
+ python -m build
155
+ # output: dist/abcupload-1.0.0.tar.gz and dist/abcupload-1.0.0-py3-none-any.whl
156
+ ```
157
+
158
+ ---
159
+
160
+ ## Author
161
+
162
+ **Khadim Gueye** — African Bioinformatics Center (ABCOMICS)
163
+ Contact: [contact@abcomics.org](mailto:contact@abcomics.org)
164
+
165
+ ---
166
+
167
+ ## License
168
+
169
+ [MIT](LICENSE)
@@ -0,0 +1,12 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ abcupload/__init__.py
5
+ abcupload/__main__.py
6
+ abcupload/cli.py
7
+ abcupload.egg-info/PKG-INFO
8
+ abcupload.egg-info/SOURCES.txt
9
+ abcupload.egg-info/dependency_links.txt
10
+ abcupload.egg-info/entry_points.txt
11
+ abcupload.egg-info/requires.txt
12
+ abcupload.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ abcupload = abcupload.cli:main
@@ -0,0 +1,2 @@
1
+ requests>=2.28
2
+ rich>=13.0
@@ -0,0 +1 @@
1
+ abcupload
@@ -0,0 +1,44 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "abcupload"
7
+ version = "1.0.0"
8
+ description = "ABCOMICS Genomic Data Upload Tool — upload sequencing files to ABCOMICS"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ authors = [{ name = "Khadim Gueye", email = "bioinformatics.acegid@run.edu.ng" }]
12
+ keywords = ["bioinformatics", "genomics", "upload", "ABCOMICS", "fastq", "bam", "vcf"]
13
+ classifiers = [
14
+ "Development Status :: 4 - Beta",
15
+ "Environment :: Console",
16
+ "Intended Audience :: Science/Research",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.8",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
26
+ "Topic :: Utilities",
27
+ ]
28
+ requires-python = ">=3.8"
29
+ dependencies = [
30
+ "requests>=2.28",
31
+ "rich>=13.0",
32
+ ]
33
+
34
+ [project.scripts]
35
+ abcupload = "abcupload.cli:main"
36
+
37
+ [project.urls]
38
+ Homepage = "https://abcomics.org"
39
+ Repository = "https://github.com/african-bioinformatics-center/ABCOMICS_site"
40
+ Issues = "https://github.com/african-bioinformatics-center/ABCOMICS_site/issues"
41
+
42
+ [tool.setuptools.packages.find]
43
+ where = ["."]
44
+ include = ["abcupload*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+