abcupload 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abcupload/__init__.py +4 -0
- abcupload/__main__.py +3 -0
- abcupload/cli.py +387 -0
- abcupload-1.0.0.dist-info/METADATA +169 -0
- abcupload-1.0.0.dist-info/RECORD +9 -0
- abcupload-1.0.0.dist-info/WHEEL +5 -0
- abcupload-1.0.0.dist-info/entry_points.txt +2 -0
- abcupload-1.0.0.dist-info/licenses/LICENSE +21 -0
- abcupload-1.0.0.dist-info/top_level.txt +1 -0
abcupload/__init__.py
ADDED
abcupload/__main__.py
ADDED
abcupload/cli.py
ADDED
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
# Author: Khadim Gueye
|
|
2
|
+
# ABCOMICS Genomic Data Upload Tool
|
|
3
|
+
# Handles: URL signing (POST) + file upload (PUT), progress, duplicate detection, test mode.
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import argparse
|
|
8
|
+
import glob
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Generator, Optional
|
|
14
|
+
|
|
15
|
+
import requests
|
|
16
|
+
from rich import box
|
|
17
|
+
from rich.console import Console
|
|
18
|
+
from rich.panel import Panel
|
|
19
|
+
from rich.progress import (
|
|
20
|
+
BarColumn,
|
|
21
|
+
DownloadColumn,
|
|
22
|
+
Progress,
|
|
23
|
+
TextColumn,
|
|
24
|
+
TimeRemainingColumn,
|
|
25
|
+
TransferSpeedColumn,
|
|
26
|
+
)
|
|
27
|
+
from rich.table import Table
|
|
28
|
+
from rich.text import Text
|
|
29
|
+
|
|
30
|
+
from abcupload import __version__
|
|
31
|
+
|
|
32
|
+
PROD_API = "https://abcomics-upload-492456311172.europe-west1.run.app"
|
|
33
|
+
TEST_API = "https://abcomics-test-upload-492456311172.europe-west1.run.app"
|
|
34
|
+
|
|
35
|
+
ALLOWED_SUFFIXES = {
|
|
36
|
+
".fastq", ".fq", ".fastq.gz", ".fq.gz", ".fastq.bz2", ".fq.bz2",
|
|
37
|
+
".fasta", ".fa", ".fna", ".faa", ".ffn", ".frn",
|
|
38
|
+
".fasta.gz", ".fa.gz", ".fna.gz", ".faa.gz",
|
|
39
|
+
".bam", ".sam", ".cram",
|
|
40
|
+
".bai", ".crai", ".csi", ".tbi",
|
|
41
|
+
".vcf", ".vcf.gz", ".bcf", ".bcf.gz",
|
|
42
|
+
".gff", ".gff3", ".gtf", ".bed",
|
|
43
|
+
".gff.gz", ".gff3.gz", ".gtf.gz", ".bed.gz",
|
|
44
|
+
".csv", ".tsv",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
console = Console()
|
|
48
|
+
err_console = Console(stderr=True)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
def _has_bio_ext(name: str) -> bool:
|
|
54
|
+
lower = name.lower()
|
|
55
|
+
for ext in sorted(ALLOWED_SUFFIXES, key=len, reverse=True):
|
|
56
|
+
if lower.endswith(ext):
|
|
57
|
+
return True
|
|
58
|
+
return False
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _human_size(n: int) -> str:
|
|
62
|
+
for unit in ("B", "KB", "MB", "GB", "TB"):
|
|
63
|
+
if n < 1024 or unit == "TB":
|
|
64
|
+
return f"{n:.1f} {unit}" if unit != "B" else f"{n} B"
|
|
65
|
+
n /= 1024 # type: ignore[assignment]
|
|
66
|
+
return str(n)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ── File resolution ───────────────────────────────────────────────────────────
|
|
70
|
+
|
|
71
|
+
def resolve_files(data_arg: str) -> list[Path]:
|
|
72
|
+
"""Return a list of Paths from a directory, glob, or comma-separated list."""
|
|
73
|
+
path = Path(data_arg)
|
|
74
|
+
|
|
75
|
+
if path.is_dir():
|
|
76
|
+
files = sorted(f for f in path.iterdir() if f.is_file() and _has_bio_ext(f.name))
|
|
77
|
+
if not files:
|
|
78
|
+
console.print(f" [yellow]![/] No bioinformatics files found in: [bold]{data_arg}[/]")
|
|
79
|
+
return files
|
|
80
|
+
|
|
81
|
+
results: list[Path] = []
|
|
82
|
+
for part in data_arg.split(","):
|
|
83
|
+
part = part.strip()
|
|
84
|
+
if not part:
|
|
85
|
+
continue
|
|
86
|
+
matches = sorted(glob.glob(part))
|
|
87
|
+
if matches:
|
|
88
|
+
results.extend(Path(m) for m in matches if Path(m).is_file())
|
|
89
|
+
elif Path(part).is_file():
|
|
90
|
+
results.append(Path(part))
|
|
91
|
+
else:
|
|
92
|
+
console.print(f" [yellow]![/] No match: [bold]{part}[/]")
|
|
93
|
+
|
|
94
|
+
return results
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# ── API call — get signed URL ─────────────────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
def get_upload_url(
|
|
100
|
+
api_url: str,
|
|
101
|
+
api_key: str,
|
|
102
|
+
username: str,
|
|
103
|
+
project_id: str,
|
|
104
|
+
filename: str,
|
|
105
|
+
overwrite: bool = False,
|
|
106
|
+
) -> tuple[str, dict]:
|
|
107
|
+
"""POST to the API to get a GCS signed upload URL.
|
|
108
|
+
|
|
109
|
+
Returns (status, data) where status is 'ok', 'exists', or 'error'.
|
|
110
|
+
"""
|
|
111
|
+
payload: dict = {"username": username, "project_id": project_id, "filename": filename}
|
|
112
|
+
if overwrite:
|
|
113
|
+
payload["overwrite"] = True
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
resp = requests.post(
|
|
117
|
+
f"{api_url}/generate-upload-url",
|
|
118
|
+
json=payload,
|
|
119
|
+
headers={"X-API-Key": api_key},
|
|
120
|
+
timeout=30,
|
|
121
|
+
)
|
|
122
|
+
except requests.exceptions.ConnectionError:
|
|
123
|
+
return "error", {"error": "Connection failed. Check your internet connection."}
|
|
124
|
+
except requests.exceptions.Timeout:
|
|
125
|
+
return "error", {"error": "Request timed out (30 s)."}
|
|
126
|
+
except requests.exceptions.RequestException as exc:
|
|
127
|
+
return "error", {"error": str(exc)}
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
data = resp.json()
|
|
131
|
+
except ValueError:
|
|
132
|
+
data = {"error": f"Non-JSON response (HTTP {resp.status_code})"}
|
|
133
|
+
|
|
134
|
+
if resp.status_code == 409:
|
|
135
|
+
return "exists", data
|
|
136
|
+
if resp.status_code != 200:
|
|
137
|
+
return "error", data
|
|
138
|
+
return "ok", data
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# ── File upload — PUT to GCS ──────────────────────────────────────────────────
|
|
142
|
+
|
|
143
|
+
def upload_to_gcs(file_path: Path, upload_url: str) -> tuple[bool, str]:
|
|
144
|
+
"""Upload a file to GCS via a signed PUT URL with a rich progress bar."""
|
|
145
|
+
file_size = file_path.stat().st_size
|
|
146
|
+
|
|
147
|
+
with Progress(
|
|
148
|
+
TextColumn(" "),
|
|
149
|
+
BarColumn(bar_width=34),
|
|
150
|
+
"[progress.percentage]{task.percentage:>3.0f}%",
|
|
151
|
+
DownloadColumn(),
|
|
152
|
+
TransferSpeedColumn(),
|
|
153
|
+
TimeRemainingColumn(),
|
|
154
|
+
console=console,
|
|
155
|
+
transient=True,
|
|
156
|
+
) as progress:
|
|
157
|
+
task = progress.add_task("upload", total=file_size)
|
|
158
|
+
|
|
159
|
+
def _reader() -> Generator[bytes, None, None]:
|
|
160
|
+
with open(file_path, "rb") as fh:
|
|
161
|
+
while True:
|
|
162
|
+
chunk = fh.read(65536)
|
|
163
|
+
if not chunk:
|
|
164
|
+
break
|
|
165
|
+
progress.advance(task, len(chunk))
|
|
166
|
+
yield chunk
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
resp = requests.put(
|
|
170
|
+
upload_url,
|
|
171
|
+
data=_reader(),
|
|
172
|
+
headers={
|
|
173
|
+
"Content-Type": "application/octet-stream",
|
|
174
|
+
"Content-Length": str(file_size),
|
|
175
|
+
},
|
|
176
|
+
timeout=None,
|
|
177
|
+
)
|
|
178
|
+
except requests.exceptions.RequestException as exc:
|
|
179
|
+
return False, str(exc)
|
|
180
|
+
|
|
181
|
+
if resp.status_code == 200:
|
|
182
|
+
return True, ""
|
|
183
|
+
return False, f"HTTP {resp.status_code}"
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
# ── Per-file orchestration ────────────────────────────────────────────────────
|
|
187
|
+
|
|
188
|
+
def upload_one(
|
|
189
|
+
file_path: Path,
|
|
190
|
+
username: str,
|
|
191
|
+
project_id: str,
|
|
192
|
+
api_key: str,
|
|
193
|
+
test_mode: bool,
|
|
194
|
+
overwrite: bool,
|
|
195
|
+
index: int,
|
|
196
|
+
total: int,
|
|
197
|
+
) -> str:
|
|
198
|
+
"""Upload a single file. Returns 'ok', 'fail', or 'skip'."""
|
|
199
|
+
api_url = TEST_API if test_mode else PROD_API
|
|
200
|
+
filename = file_path.name
|
|
201
|
+
size_str = _human_size(file_path.stat().st_size)
|
|
202
|
+
|
|
203
|
+
console.print(f" [{index}/{total}] [bold]{filename}[/] [dim]{size_str}[/]")
|
|
204
|
+
|
|
205
|
+
status, data = get_upload_url(api_url, api_key, username, project_id, filename, overwrite)
|
|
206
|
+
|
|
207
|
+
if status == "exists":
|
|
208
|
+
console.print(" [yellow]![/] File already exists in your project directory.")
|
|
209
|
+
try:
|
|
210
|
+
answer = input(" Overwrite? [y/N] ").strip().lower()
|
|
211
|
+
except (EOFError, KeyboardInterrupt):
|
|
212
|
+
answer = "n"
|
|
213
|
+
if answer == "y":
|
|
214
|
+
status, data = get_upload_url(
|
|
215
|
+
api_url, api_key, username, project_id, filename, overwrite=True
|
|
216
|
+
)
|
|
217
|
+
else:
|
|
218
|
+
console.print(" [dim]−[/] Skipped.")
|
|
219
|
+
return "skip"
|
|
220
|
+
|
|
221
|
+
if status != "ok":
|
|
222
|
+
err_msg = data.get("error", "Unknown error")
|
|
223
|
+
console.print(f" [red]✗[/] {err_msg}")
|
|
224
|
+
return "fail"
|
|
225
|
+
|
|
226
|
+
upload_url = data.get("upload_url", "")
|
|
227
|
+
if not upload_url:
|
|
228
|
+
console.print(" [red]✗[/] Could not retrieve upload URL.")
|
|
229
|
+
return "fail"
|
|
230
|
+
|
|
231
|
+
ok, err_msg = upload_to_gcs(file_path, upload_url)
|
|
232
|
+
|
|
233
|
+
if ok:
|
|
234
|
+
console.print(" [green]✓[/] Uploaded successfully")
|
|
235
|
+
if test_mode and "auto_delete" in data:
|
|
236
|
+
console.print(f" [dim]{data['auto_delete']}[/]")
|
|
237
|
+
return "ok"
|
|
238
|
+
|
|
239
|
+
console.print(f" [red]✗[/] Upload failed: {err_msg}")
|
|
240
|
+
return "fail"
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
# ── Display helpers ───────────────────────────────────────────────────────────
|
|
244
|
+
|
|
245
|
+
def print_banner(username: str, project_id: str, test_mode: bool) -> None:
|
|
246
|
+
mode = (
|
|
247
|
+
Text("TEST · files deleted after 24 h", style="bold yellow")
|
|
248
|
+
if test_mode
|
|
249
|
+
else Text("Production", style="bold green")
|
|
250
|
+
)
|
|
251
|
+
grid = Table.grid(padding=(0, 2))
|
|
252
|
+
grid.add_column(style="dim")
|
|
253
|
+
grid.add_column()
|
|
254
|
+
grid.add_row("User", f"[bold]{username}[/]")
|
|
255
|
+
grid.add_row("Project", f"[bold]{project_id}[/]")
|
|
256
|
+
grid.add_row("Mode", mode)
|
|
257
|
+
console.print(Panel(
|
|
258
|
+
grid,
|
|
259
|
+
title=f"[bold blue]ABCOMICS[/] Genomic Upload [dim]v{__version__}[/]",
|
|
260
|
+
border_style="blue",
|
|
261
|
+
padding=(0, 1),
|
|
262
|
+
))
|
|
263
|
+
console.print()
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def print_summary(results: dict) -> None:
|
|
267
|
+
total = sum(results.values())
|
|
268
|
+
tbl = Table(box=box.SIMPLE, show_header=False, padding=(0, 1))
|
|
269
|
+
tbl.add_column(style="dim", width=10)
|
|
270
|
+
tbl.add_column()
|
|
271
|
+
tbl.add_row("Total", f"{total} file(s)")
|
|
272
|
+
if results["ok"]:
|
|
273
|
+
tbl.add_row("Uploaded", f"[green]{results['ok']}[/]")
|
|
274
|
+
if results["fail"]:
|
|
275
|
+
tbl.add_row("Failed", f"[red]{results['fail']}[/]")
|
|
276
|
+
if results["skip"]:
|
|
277
|
+
tbl.add_row("Skipped", f"[dim]{results['skip']}[/]")
|
|
278
|
+
console.print()
|
|
279
|
+
console.rule(style="dim")
|
|
280
|
+
console.print(tbl)
|
|
281
|
+
console.rule(style="dim")
|
|
282
|
+
console.print()
|
|
283
|
+
|
|
284
|
+
if results["fail"]:
|
|
285
|
+
console.print("[red]Some files failed to upload. Check errors above.[/]\n")
|
|
286
|
+
elif results["ok"]:
|
|
287
|
+
console.print("[green]All files uploaded successfully.[/]\n")
|
|
288
|
+
else:
|
|
289
|
+
console.print("[yellow]No files were uploaded.[/]\n")
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
# ── Argument parser ───────────────────────────────────────────────────────────
|
|
293
|
+
|
|
294
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
295
|
+
parser = argparse.ArgumentParser(
|
|
296
|
+
prog="abcupload",
|
|
297
|
+
description="ABCOMICS Genomic Data Upload Tool",
|
|
298
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
299
|
+
epilog="""\
|
|
300
|
+
examples:
|
|
301
|
+
abcupload -u abc-000001 -p PRJAB00001 -d sample_R1.fastq.gz
|
|
302
|
+
abcupload -u abc-000001 -p PRJAB00001 -d '*.fastq.gz'
|
|
303
|
+
abcupload -u abc-000001 -p PRJAB00001 -d file1.bam,file2.bam -o
|
|
304
|
+
abcupload -u abc-000001 -p PRJAB00001 -d /path/to/data/ -t
|
|
305
|
+
|
|
306
|
+
environment variable:
|
|
307
|
+
ABCOMICS_API_KEY your API key (alternative to -k)
|
|
308
|
+
export ABCOMICS_API_KEY=your_key_here
|
|
309
|
+
""",
|
|
310
|
+
add_help=False,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
req = parser.add_argument_group("required")
|
|
314
|
+
req.add_argument(
|
|
315
|
+
"-u", dest="username", metavar="USERNAME", required=True,
|
|
316
|
+
help="ABCOMICS username",
|
|
317
|
+
)
|
|
318
|
+
req.add_argument(
|
|
319
|
+
"-p", dest="project_id", metavar="PROJECT_ID", required=True,
|
|
320
|
+
help="project ID (e.g. PRJAB00001)",
|
|
321
|
+
)
|
|
322
|
+
req.add_argument(
|
|
323
|
+
"-d", dest="data", metavar="DATA", required=True,
|
|
324
|
+
help="file(s): single file | comma-list | glob pattern | directory",
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
opt = parser.add_argument_group("options")
|
|
328
|
+
opt.add_argument(
|
|
329
|
+
"-k", dest="api_key", metavar="API_KEY",
|
|
330
|
+
default=os.environ.get("ABCOMICS_API_KEY", ""),
|
|
331
|
+
help="API key (or: export ABCOMICS_API_KEY=...)",
|
|
332
|
+
)
|
|
333
|
+
opt.add_argument("-t", dest="test", action="store_true", help="test mode — files deleted after 24 h")
|
|
334
|
+
opt.add_argument("-o", dest="overwrite", action="store_true", help="overwrite existing files without asking")
|
|
335
|
+
opt.add_argument("-h", "--help", action="help", help="show this help and exit")
|
|
336
|
+
opt.add_argument("-V", "--version", action="version", version=f"%(prog)s {__version__}")
|
|
337
|
+
|
|
338
|
+
return parser
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
# ── Entry point ───────────────────────────────────────────────────────────────
|
|
342
|
+
|
|
343
|
+
def main() -> None:
|
|
344
|
+
parser = build_parser()
|
|
345
|
+
args = parser.parse_args()
|
|
346
|
+
|
|
347
|
+
if not args.api_key:
|
|
348
|
+
err_console.print(
|
|
349
|
+
"[red]Error:[/] API key required.\n"
|
|
350
|
+
" Use [bold]-k KEY[/] or set [bold]ABCOMICS_API_KEY[/] environment variable.\n"
|
|
351
|
+
" Contact [bold]contact@abcomics.org[/] if you have not received your key."
|
|
352
|
+
)
|
|
353
|
+
sys.exit(1)
|
|
354
|
+
|
|
355
|
+
if not re.match(r"^PRJAB\d+$", args.project_id):
|
|
356
|
+
err_console.print(
|
|
357
|
+
"[red]Error:[/] PROJECT_ID must start with [bold]PRJAB[/] followed by digits "
|
|
358
|
+
"(e.g. [bold]PRJAB00001[/])."
|
|
359
|
+
)
|
|
360
|
+
sys.exit(1)
|
|
361
|
+
|
|
362
|
+
files = resolve_files(args.data)
|
|
363
|
+
if not files:
|
|
364
|
+
err_console.print(f"[red]Error:[/] No valid files found in: [bold]{args.data}[/]")
|
|
365
|
+
sys.exit(1)
|
|
366
|
+
|
|
367
|
+
print_banner(args.username, args.project_id, args.test)
|
|
368
|
+
console.print(f" Found [bold]{len(files)}[/] file(s) to upload:\n")
|
|
369
|
+
|
|
370
|
+
results = {"ok": 0, "fail": 0, "skip": 0}
|
|
371
|
+
|
|
372
|
+
for i, fp in enumerate(files, 1):
|
|
373
|
+
outcome = upload_one(
|
|
374
|
+
file_path = fp,
|
|
375
|
+
username = args.username,
|
|
376
|
+
project_id = args.project_id,
|
|
377
|
+
api_key = args.api_key,
|
|
378
|
+
test_mode = args.test,
|
|
379
|
+
overwrite = args.overwrite,
|
|
380
|
+
index = i,
|
|
381
|
+
total = len(files),
|
|
382
|
+
)
|
|
383
|
+
results[outcome] += 1
|
|
384
|
+
console.print()
|
|
385
|
+
|
|
386
|
+
print_summary(results)
|
|
387
|
+
sys.exit(1 if results["fail"] > 0 else 0)
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: abcupload
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: ABCOMICS Genomic Data Upload Tool — upload sequencing files to ABCOMICS
|
|
5
|
+
Author-email: Khadim Gueye <bioinformatics.acegid@run.edu.ng>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://abcomics.org
|
|
8
|
+
Project-URL: Repository, https://github.com/african-bioinformatics-center/ABCOMICS_site
|
|
9
|
+
Project-URL: Issues, https://github.com/african-bioinformatics-center/ABCOMICS_site/issues
|
|
10
|
+
Keywords: bioinformatics,genomics,upload,ABCOMICS,fastq,bam,vcf
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
23
|
+
Classifier: Topic :: Utilities
|
|
24
|
+
Requires-Python: >=3.8
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: requests>=2.28
|
|
28
|
+
Requires-Dist: rich>=13.0
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
# abcupload
|
|
32
|
+
|
|
33
|
+
**ABCOMICS Genomic Data Upload Tool**
|
|
34
|
+
|
|
35
|
+
A command-line tool to upload sequencing and genomic data files to the
|
|
36
|
+
[ABCOMICS](https://abcomics.org) platform. It handles the full two-step
|
|
37
|
+
upload process automatically: requesting a signed URL from the API, then
|
|
38
|
+
streaming the file directly to secure cloud storage with a live progress bar.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install abcupload
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Requires Python 3.8 or later. Dependencies (`requests`, `rich`) are installed
|
|
49
|
+
automatically.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Quick start
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# Set your API key once (contact contact@abcomics.org to receive it)
|
|
57
|
+
export ABCOMICS_API_KEY=your_key_here
|
|
58
|
+
|
|
59
|
+
# Upload a single file
|
|
60
|
+
abcupload -u abc-000001 -p PRJAB00001 -d sample_R1.fastq.gz
|
|
61
|
+
|
|
62
|
+
# Upload all FASTQ files in the current directory
|
|
63
|
+
abcupload -u abc-000001 -p PRJAB00001 -d '*.fastq.gz'
|
|
64
|
+
|
|
65
|
+
# Upload an entire directory (scans for all accepted formats)
|
|
66
|
+
abcupload -u abc-000001 -p PRJAB00001 -d /data/project/
|
|
67
|
+
|
|
68
|
+
# Upload multiple specific files
|
|
69
|
+
abcupload -u abc-000001 -p PRJAB00001 -d file1.bam,file2.bam
|
|
70
|
+
|
|
71
|
+
# Test mode — files are deleted after 24 hours
|
|
72
|
+
abcupload -u abc-000001 -p PRJAB00001 -d '*.fastq.gz' -t
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Usage
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
abcupload -u USERNAME -p PROJECT_ID -d DATA [OPTIONS]
|
|
81
|
+
|
|
82
|
+
required:
|
|
83
|
+
-u USERNAME ABCOMICS username (e.g. abc-000001)
|
|
84
|
+
-p PROJECT_ID Project ID (e.g. PRJAB00001)
|
|
85
|
+
-d DATA What to upload:
|
|
86
|
+
single file : -d sample.fastq.gz
|
|
87
|
+
comma list : -d file1.fastq.gz,file2.bam
|
|
88
|
+
glob pattern : -d '*.fastq.gz'
|
|
89
|
+
directory : -d /path/to/data/
|
|
90
|
+
|
|
91
|
+
options:
|
|
92
|
+
-k API_KEY API key (alternative to ABCOMICS_API_KEY env var)
|
|
93
|
+
-t Test mode — files deleted after 24 h
|
|
94
|
+
-o Overwrite existing files without asking
|
|
95
|
+
-h, --help Show this help and exit
|
|
96
|
+
-V, --version Show version and exit
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Environment variable
|
|
102
|
+
|
|
103
|
+
Set your API key as an environment variable so you do not have to pass `-k`
|
|
104
|
+
on every command:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
# Add to ~/.bashrc or ~/.zshrc for persistence
|
|
108
|
+
export ABCOMICS_API_KEY=your_key_here
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## Accepted file formats
|
|
114
|
+
|
|
115
|
+
| Format | Extensions |
|
|
116
|
+
|---------|------------|
|
|
117
|
+
| FASTQ | `.fastq`, `.fq`, `.fastq.gz`, `.fq.gz`, `.fastq.bz2`, `.fq.bz2` |
|
|
118
|
+
| FASTA | `.fasta`, `.fa`, `.fna`, `.faa`, `.ffn`, `.frn`, `.fasta.gz`, `.fa.gz`, `.fna.gz`, `.faa.gz` |
|
|
119
|
+
| BAM / SAM / CRAM | `.bam`, `.sam`, `.cram` |
|
|
120
|
+
| Index files | `.bai`, `.crai`, `.csi`, `.tbi` |
|
|
121
|
+
| VCF / BCF | `.vcf`, `.vcf.gz`, `.bcf`, `.bcf.gz` |
|
|
122
|
+
| Annotation | `.gff`, `.gff3`, `.gtf`, `.bed` (and `.gz` variants) |
|
|
123
|
+
| Tabular | `.csv`, `.tsv` |
|
|
124
|
+
|
|
125
|
+
Maximum file size: **500 GB**.
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## What happens under the hood
|
|
130
|
+
|
|
131
|
+
1. For each file, `abcupload` sends a POST request to the ABCOMICS API with
|
|
132
|
+
your username, project ID, and filename.
|
|
133
|
+
2. The API validates the request and returns a time-limited signed URL pointing
|
|
134
|
+
directly to the ABCOMICS cloud storage bucket.
|
|
135
|
+
3. `abcupload` streams the file to that URL with a live progress bar showing
|
|
136
|
+
speed, percentage, and estimated time remaining.
|
|
137
|
+
4. If a file with the same name already exists, you are asked whether to
|
|
138
|
+
overwrite it or skip it (or use `-o` to always overwrite).
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Development install
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
git clone https://github.com/african-bioinformatics-center/ABCOMICS_site.git
|
|
146
|
+
cd ABCOMICS_site/abcupload
|
|
147
|
+
pip install -e .
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Build a distribution:
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
pip install build
|
|
154
|
+
python -m build
|
|
155
|
+
# output: dist/abcupload-1.0.0.tar.gz and dist/abcupload-1.0.0-py3-none-any.whl
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Author
|
|
161
|
+
|
|
162
|
+
**Khadim Gueye** — African Bioinformatics Center (ABCOMICS)
|
|
163
|
+
Contact: [contact@abcomics.org](mailto:contact@abcomics.org)
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## License
|
|
168
|
+
|
|
169
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
abcupload/__init__.py,sha256=LODdOrmnvJLNCAHeXrR7Fd-a2TBaX3RJwqJeUU-nl8Y,92
|
|
2
|
+
abcupload/__main__.py,sha256=-kKzqu70ZvQr-NSQIQ4wWCALPyrBw7tCevUcb-MT2OE,38
|
|
3
|
+
abcupload/cli.py,sha256=BCY1NECN9Ejby8gVbAhvAY4fpxKED0nJwRG650bhnWs,13463
|
|
4
|
+
abcupload-1.0.0.dist-info/licenses/LICENSE,sha256=FRoGDhpexnax_On5SpHuBMT077owLqriXqLDG16EA2g,1113
|
|
5
|
+
abcupload-1.0.0.dist-info/METADATA,sha256=lAiTUksV5Z240NILYmZoQZy2NXrabPcHLevcPuiERto,4994
|
|
6
|
+
abcupload-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
7
|
+
abcupload-1.0.0.dist-info/entry_points.txt,sha256=lsNOS67j1vY7UL_VkH0KFwwIVPgiA5fCrLxu0Uc2yDk,49
|
|
8
|
+
abcupload-1.0.0.dist-info/top_level.txt,sha256=H5cLjT6Wy7RUtBQrNIjzRW_ghg4WSw28suQH3dbDYT4,10
|
|
9
|
+
abcupload-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Khadim Gueye — African Bioinformatics Center (ABCOMICS)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
abcupload
|