seashell-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,70 @@
1
+ Metadata-Version: 2.4
2
+ Name: seashell-cli
3
+ Version: 0.1.0
4
+ Summary: Seashell — Genomic data, compressed and queryable
5
+ License: Proprietary
6
+ Project-URL: Homepage, https://seashell.bio
7
+ Project-URL: Documentation, https://seashell.bio/docs
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
12
+ Requires-Python: >=3.8
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: requests>=2.28
15
+
16
+ # Seashell CLI
17
+
18
+ Command-line tool for querying and managing genomic data on Seashell.
19
+
20
+ ## Install
21
+
22
+ ```bash
23
+ pip install seashell-cli
24
+ ```
25
+
26
+ ## Quick Start
27
+
28
+ ```bash
29
+ seashell
30
+ ```
31
+
32
+ You'll be prompted for your API key (from your institution admin), username, and password. After login, you're in an interactive shell:
33
+
34
+ ```
35
+ seashell> LIST PATIENTS
36
+ seashell> FIND VARIANTS WHERE patient=NA12878 AND gene=BRCA1
37
+ seashell> EXPORT PATIENT NA12878 FORMAT CRAM
38
+ ```
39
+
40
+ ## Single Query Mode
41
+
42
+ ```bash
43
+ seashell "FIND PATIENTS WHERE gene=BRCA1 AND significance=pathogenic"
44
+ seashell "COUNT VARIANTS WHERE patient=NA12878"
45
+ seashell --format json "LIST PATIENTS"
46
+ ```
47
+
48
+ ## Commands
49
+
50
+ | Command | Description |
51
+ |---|---|
52
+ | `FIND VARIANTS WHERE ...` | Search variants by gene, significance, patient |
53
+ | `FIND PATIENTS WHERE ...` | Find patients matching criteria |
54
+ | `COUNT VARIANTS/PATIENTS WHERE ...` | Count matches |
55
+ | `LIST PATIENTS` | List all patients |
56
+ | `COMPARE p1 VS p2` | Compare two patients |
57
+ | `UPLOAD PATIENT id CRAM s3://...` | Upload from CRAM/BAM |
58
+ | `UPLOAD PATIENT id FASTQ s3://R1 s3://R2` | Upload from raw FASTQ |
59
+ | `EXPORT PATIENT id FORMAT CRAM` | Export as CRAM/BAM |
60
+ | `DELETE PATIENT id` | Remove a patient |
61
+ | `help` | Show all commands |
62
+
63
+ ## Requirements
64
+
65
+ - Python 3.8+
66
+ - A Seashell API key (contact your institution admin)
67
+
68
+ ## Documentation
69
+
70
+ https://seashell.bio/docs
@@ -0,0 +1,55 @@
1
+ # Seashell CLI
2
+
3
+ Command-line tool for querying and managing genomic data on Seashell.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install seashell-cli
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```bash
14
+ seashell
15
+ ```
16
+
17
+ You'll be prompted for your API key (from your institution admin), username, and password. After login, you're in an interactive shell:
18
+
19
+ ```
20
+ seashell> LIST PATIENTS
21
+ seashell> FIND VARIANTS WHERE patient=NA12878 AND gene=BRCA1
22
+ seashell> EXPORT PATIENT NA12878 FORMAT CRAM
23
+ ```
24
+
25
+ ## Single Query Mode
26
+
27
+ ```bash
28
+ seashell "FIND PATIENTS WHERE gene=BRCA1 AND significance=pathogenic"
29
+ seashell "COUNT VARIANTS WHERE patient=NA12878"
30
+ seashell --format json "LIST PATIENTS"
31
+ ```
32
+
33
+ ## Commands
34
+
35
+ | Command | Description |
36
+ |---|---|
37
+ | `FIND VARIANTS WHERE ...` | Search variants by gene, significance, patient |
38
+ | `FIND PATIENTS WHERE ...` | Find patients matching criteria |
39
+ | `COUNT VARIANTS/PATIENTS WHERE ...` | Count matches |
40
+ | `LIST PATIENTS` | List all patients |
41
+ | `COMPARE p1 VS p2` | Compare two patients |
42
+ | `UPLOAD PATIENT id CRAM s3://...` | Upload from CRAM/BAM |
43
+ | `UPLOAD PATIENT id FASTQ s3://R1 s3://R2` | Upload from raw FASTQ |
44
+ | `EXPORT PATIENT id FORMAT CRAM` | Export as CRAM/BAM |
45
+ | `DELETE PATIENT id` | Remove a patient |
46
+ | `help` | Show all commands |
47
+
48
+ ## Requirements
49
+
50
+ - Python 3.8+
51
+ - A Seashell API key (contact your institution admin)
52
+
53
+ ## Documentation
54
+
55
+ https://seashell.bio/docs
@@ -0,0 +1,25 @@
1
+ [build-system]
2
+ requires = ["setuptools>=42", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "seashell-cli"
7
+ version = "0.1.0"
8
+ description = "Seashell — Genomic data, compressed and queryable"
9
+ readme = "README.md"
10
+ license = {text = "Proprietary"}
11
+ requires-python = ">=3.8"
12
+ dependencies = ["requests>=2.28"]
13
+ classifiers = [
14
+ "Programming Language :: Python :: 3",
15
+ "Operating System :: OS Independent",
16
+ "Intended Audience :: Science/Research",
17
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
18
+ ]
19
+
20
+ [project.urls]
21
+ Homepage = "https://seashell.bio"
22
+ Documentation = "https://seashell.bio/docs"
23
+
24
+ [project.scripts]
25
+ seashell = "seashell.cli:main"
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,2 @@
1
+ from seashell.cli import main
2
+ main()
@@ -0,0 +1,234 @@
1
+ """Seashell CLI — interactive genomic query tool."""
2
+
3
+ import getpass
4
+ import json
5
+ import sys
6
+ import threading
7
+
8
+ from seashell.client import SeashellClient
9
+ from seashell.config import load_config, save_config, clear_config, DEFAULT_SERVER
10
+ from seashell.display import display_result, print_job_progress
11
+ from seashell.help import HELP_TEXT, welcome_banner, CYAN, BOLD, DIM, RESET, WHITE, BLUE
12
+
13
+
14
+ def _prompt_login():
15
+ """Interactive login flow. Returns (client, config) or exits on failure."""
16
+ print(welcome_banner())
17
+
18
+ config = load_config()
19
+ server = config.get("server", DEFAULT_SERVER)
20
+
21
+ # Check for cached credentials
22
+ if config.get("api_key") and config.get("email"):
23
+ print(DIM + " Reconnecting as %s (%s)..." % (config["email"], config.get("institution", "?")) + RESET)
24
+ client = SeashellClient(server, config["api_key"])
25
+ client.wake() # pre-warm while we verify
26
+ health = client.health()
27
+ if health:
28
+ count = health.get("patient_count", "?")
29
+ print(BOLD + " Ready." + RESET + " %s patients loaded.\n" % count)
30
+ return client, config
31
+ else:
32
+ print(" Server unreachable. Re-enter credentials.\n")
33
+
34
+ # Fresh login
35
+ api_key = input(CYAN + " API Key: " + RESET).strip()
36
+ if not api_key:
37
+ print(" No API key provided. Exiting.")
38
+ sys.exit(1)
39
+
40
+ # Immediately start warming up the instance (API key identifies institution)
41
+ client = SeashellClient(server, api_key)
42
+ client.wake()
43
+
44
+ # Verify the key and get institution name
45
+ health = client.health()
46
+ if health:
47
+ inst = health.get("institution", "your institution")
48
+ print(DIM + " Institution: %s " % inst + CYAN + "(warming up...)" + RESET)
49
+ else:
50
+ inst = "unknown"
51
+ print(" Could not reach server at %s" % server)
52
+ print(" Will retry on first query.\n")
53
+
54
+ email = input(CYAN + " Username: " + RESET).strip()
55
+ password = getpass.getpass(CYAN + " Password: " + RESET)
56
+
57
+ # Verify credentials
58
+ try:
59
+ resp = client.session.post(server + "/login", json={
60
+ "email": email,
61
+ "password": password,
62
+ }, timeout=15)
63
+ if resp.status_code != 200:
64
+ detail = ""
65
+ try:
66
+ detail = resp.json().get("detail", "")
67
+ except Exception:
68
+ pass
69
+ print("\n Login failed: %s" % (detail or "invalid credentials"))
70
+ sys.exit(1)
71
+ login_data = resp.json()
72
+ except Exception as e:
73
+ print("\n Connection error: %s" % str(e))
74
+ sys.exit(1)
75
+
76
+ # Save credentials
77
+ config = {
78
+ "server": server,
79
+ "api_key": api_key,
80
+ "institution": inst if inst != "unknown" else login_data.get("institution", ""),
81
+ "email": email,
82
+ "session_token": login_data.get("token", ""),
83
+ }
84
+ save_config(config)
85
+
86
+ # Check patient count
87
+ health = client.health()
88
+ count = health.get("patient_count", "?") if health else "?"
89
+ print("\n " + BOLD + "Authenticated." + RESET + " %s patients loaded. Ready.\n" % count)
90
+ return client, config
91
+
92
+
93
+ def _run_query(client, query_text, output_format="table"):
94
+ """Execute a single query and display the result."""
95
+ query_upper = query_text.strip().upper()
96
+
97
+ # Detect async operations (upload/export) that need job polling
98
+ is_upload = query_upper.startswith("UPLOAD ") or query_upper.startswith("INGEST ")
99
+ is_export = query_upper.startswith("EXPORT ")
100
+
101
+ try:
102
+ result = client.query(query_text)
103
+ except PermissionError as e:
104
+ sys.stderr.write(" Permission denied: %s\n" % str(e))
105
+ return
106
+ except RuntimeError as e:
107
+ sys.stderr.write(" %s\n" % str(e))
108
+ return
109
+ except Exception as e:
110
+ sys.stderr.write(" Error: %s\n" % str(e))
111
+ return
112
+
113
+ # Handle async jobs
114
+ job_id = result.get("job_id")
115
+ if job_id and (is_upload or is_export):
116
+ endpoint = "ingest_gql" if is_upload else "export"
117
+ print(" Job started: %s" % job_id)
118
+ print(" Patients: %s" % result.get("patients", result.get("num_patients", "?")))
119
+ print()
120
+ final = client.poll_job(endpoint, job_id, callback=print_job_progress)
121
+ print()
122
+ if final.get("errors"):
123
+ for err in final["errors"]:
124
+ sys.stderr.write(" Error: %s - %s\n" % (
125
+ err.get("patient_id", "?"), err.get("error", "?")))
126
+ elapsed = final.get("completed_at", 0) - final.get("started_at", 0)
127
+ if elapsed > 0:
128
+ print(" Completed in %.1fs" % elapsed)
129
+ return
130
+
131
+ display_result(result, output_format)
132
+
133
+
134
+ def _repl(client, config):
135
+ """Interactive REPL loop."""
136
+ while True:
137
+ try:
138
+ query = input(CYAN + "seashell> " + RESET).strip()
139
+ except (EOFError, KeyboardInterrupt):
140
+ print("\n Goodbye.")
141
+ break
142
+
143
+ if not query:
144
+ continue
145
+
146
+ lower = query.lower()
147
+
148
+ if lower in ("exit", "quit", "q"):
149
+ print(" Goodbye.")
150
+ break
151
+ elif lower == "help":
152
+ print(HELP_TEXT)
153
+ elif lower == "status":
154
+ print(" Server: %s" % config.get("server", "?"))
155
+ print(" Institution: %s" % config.get("institution", "?"))
156
+ print(" User: %s" % config.get("email", "?"))
157
+ health = client.health()
158
+ if health:
159
+ print(" Patients: %s" % health.get("patient_count", "?"))
160
+ print(" Engine: %s" % health.get("status", "?"))
161
+ else:
162
+ print(" Server: unreachable")
163
+ elif lower == "logout":
164
+ clear_config()
165
+ print(" Credentials cleared. Run 'seashell' to log in again.")
166
+ break
167
+ else:
168
+ _run_query(client, query)
169
+
170
+
171
+ def main():
172
+ """Entry point for the seashell CLI."""
173
+ args = sys.argv[1:]
174
+
175
+ # --help flag
176
+ if args and args[0] in ("--help", "-h"):
177
+ print("Usage:")
178
+ print(" seashell Interactive mode")
179
+ print(" seashell \"LIST PATIENTS\" Single query")
180
+ print(" seashell --server URL Set server URL")
181
+ print(" seashell --format json \"LIST PATIENTS\" Output as JSON")
182
+ print()
183
+ print(HELP_TEXT)
184
+ return
185
+
186
+ # Parse flags
187
+ server_override = None
188
+ output_format = "table"
189
+ query_parts = []
190
+
191
+ i = 0
192
+ while i < len(args):
193
+ if args[i] == "--server" and i + 1 < len(args):
194
+ server_override = args[i + 1]
195
+ i += 2
196
+ elif args[i] == "--format" and i + 1 < len(args):
197
+ output_format = args[i + 1]
198
+ i += 2
199
+ elif args[i] == "--json":
200
+ output_format = "json"
201
+ i += 1
202
+ elif args[i] == "--tsv":
203
+ output_format = "tsv"
204
+ i += 1
205
+ else:
206
+ query_parts.append(args[i])
207
+ i += 1
208
+
209
+ # Apply server override
210
+ if server_override:
211
+ config = load_config()
212
+ config["server"] = server_override
213
+ save_config(config)
214
+
215
+ if query_parts:
216
+ # Single query mode
217
+ query_text = " ".join(query_parts)
218
+ config = load_config()
219
+ if not config.get("api_key"):
220
+ # Not logged in — prompt first
221
+ client, config = _prompt_login()
222
+ else:
223
+ client = SeashellClient(
224
+ config.get("server", DEFAULT_SERVER),
225
+ config["api_key"])
226
+ _run_query(client, query_text, output_format)
227
+ else:
228
+ # Interactive mode
229
+ client, config = _prompt_login()
230
+ _repl(client, config)
231
+
232
+
233
+ if __name__ == "__main__":
234
+ main()
@@ -0,0 +1,81 @@
1
+ """HTTP client for the Seashell API."""
2
+
3
+ import json
4
+ import sys
5
+ import threading
6
+ import time
7
+
8
+ import requests
9
+
10
+
11
+ class SeashellClient(object):
12
+ def __init__(self, server_url, api_key):
13
+ self.server = server_url.rstrip("/")
14
+ self.api_key = api_key
15
+ self.session = requests.Session()
16
+ self.session.headers["X-API-Key"] = api_key
17
+
18
+ def query(self, query_text):
19
+ """Send a GQL query and return the result dict."""
20
+ resp = self.session.post(
21
+ self.server + "/query",
22
+ json={"query": query_text},
23
+ timeout=600,
24
+ )
25
+ if resp.status_code == 403:
26
+ raise PermissionError(resp.json().get("detail", "Access denied"))
27
+ if resp.status_code == 429:
28
+ retry = resp.headers.get("Retry-After", "60")
29
+ raise RuntimeError("Rate limited. Try again in %s seconds." % retry)
30
+ if resp.status_code != 200:
31
+ detail = ""
32
+ try:
33
+ detail = resp.json().get("detail", resp.text)
34
+ except Exception:
35
+ detail = resp.text
36
+ raise RuntimeError("Query failed (%d): %s" % (resp.status_code, detail))
37
+ return resp.json()
38
+
39
+ def health(self):
40
+ """Check server health. Returns dict with status, patient_count, etc."""
41
+ try:
42
+ resp = self.session.get(self.server + "/health", timeout=10)
43
+ if resp.status_code == 200:
44
+ return resp.json()
45
+ except Exception:
46
+ pass
47
+ return None
48
+
49
+ def wake(self):
50
+ """Pre-warm the institution's EC2 instance. Fire-and-forget."""
51
+ def _wake():
52
+ try:
53
+ self.session.post(self.server + "/wake", timeout=5)
54
+ except Exception:
55
+ pass
56
+ t = threading.Thread(target=_wake, daemon=True)
57
+ t.start()
58
+
59
+ def poll_job(self, endpoint, job_id, callback=None):
60
+ """Poll an async job (export/upload) until completion.
61
+
62
+ Args:
63
+ endpoint: "export" or "ingest_gql"
64
+ job_id: the job ID returned by the initial request
65
+ callback: optional function called with job dict on each poll
66
+ """
67
+ url = "%s/%s/%s" % (self.server, endpoint, job_id)
68
+ while True:
69
+ try:
70
+ resp = self.session.get(url, timeout=30)
71
+ if resp.status_code != 200:
72
+ return {"status": "error", "detail": resp.text}
73
+ job = resp.json()
74
+ if callback:
75
+ callback(job)
76
+ status = job.get("status", "")
77
+ if status in ("complete", "complete_with_errors", "error", "failed"):
78
+ return job
79
+ except Exception as e:
80
+ sys.stderr.write("Poll error: %s\n" % str(e))
81
+ time.sleep(3)
@@ -0,0 +1,39 @@
1
+ """Configuration management — stores credentials in ~/.seashell/config.json."""
2
+
3
+ import json
4
+ import os
5
+ import stat
6
+
7
+ CONFIG_DIR = os.path.expanduser("~/.seashell")
8
+ CONFIG_FILE = os.path.join(CONFIG_DIR, "config.json")
9
+
10
+ DEFAULT_SERVER = "https://seashell.bio"
11
+
12
+
13
+ def load_config():
14
+ """Load saved config, or return empty dict if none exists."""
15
+ if not os.path.exists(CONFIG_FILE):
16
+ return {}
17
+ try:
18
+ with open(CONFIG_FILE, "r") as f:
19
+ return json.load(f)
20
+ except (json.JSONDecodeError, IOError):
21
+ return {}
22
+
23
+
24
+ def save_config(config):
25
+ """Save config to disk with owner-only permissions (contains API key)."""
26
+ os.makedirs(CONFIG_DIR, exist_ok=True)
27
+ with open(CONFIG_FILE, "w") as f:
28
+ json.dump(config, f, indent=2)
29
+ # Set file permissions to 600 (owner read/write only)
30
+ try:
31
+ os.chmod(CONFIG_FILE, stat.S_IRUSR | stat.S_IWUSR)
32
+ except OSError:
33
+ pass # Windows doesn't support chmod
34
+
35
+
36
+ def clear_config():
37
+ """Remove saved credentials."""
38
+ if os.path.exists(CONFIG_FILE):
39
+ os.remove(CONFIG_FILE)
@@ -0,0 +1,220 @@
1
+ """Output formatting for query results."""
2
+
3
+ import json
4
+ import sys
5
+
6
+
7
+ def display_result(result, output_format="table"):
8
+ """Display a query result in the requested format."""
9
+ if "error" in result:
10
+ sys.stderr.write("Error: %s\n" % result["error"])
11
+ return
12
+
13
+ if output_format == "json":
14
+ print(json.dumps(result, indent=2))
15
+ return
16
+
17
+ if output_format == "tsv":
18
+ _print_tsv(result)
19
+ return
20
+
21
+ action = result.get("action", "")
22
+ latency = result.get("latency_ms", "?")
23
+
24
+ if action == "compare_patients":
25
+ _print_compare(result, latency)
26
+ elif action == "find_similar":
27
+ _print_similar(result, latency)
28
+ elif action in ("list_patients", "find_patients", "count_patients"):
29
+ _print_patients(result, action, latency)
30
+ elif action in ("find_variants", "count_variants"):
31
+ _print_variants(result, action, latency)
32
+ elif action == "list_genes":
33
+ print("\n %s genes loaded" % result.get("count", "?"))
34
+ print(" %s ms" % latency)
35
+ elif action == "diff_patients":
36
+ _print_diff(result, latency)
37
+ elif action == "pca":
38
+ _print_pca(result, latency)
39
+ elif action in ("coverage", "qc", "pileup"):
40
+ _print_analytics(result, action, latency)
41
+ elif action == "delete_patient":
42
+ print("\n Patient %s deleted." % result.get("patient_id", "?"))
43
+ print(" %s ms" % latency)
44
+ else:
45
+ print(json.dumps(result, indent=2))
46
+
47
+
48
+ def _print_compare(result, latency):
49
+ print("\n %s vs %s" % (result.get("patient_a", "?"), result.get("patient_b", "?")))
50
+ print(" Jaccard similarity: %s" % result.get("jaccard_similarity", "?"))
51
+ shared = result.get("shared_variants_estimate", result.get("shared_variants", "?"))
52
+ print(" Shared variants (est): %s" % shared)
53
+ unique_a = result.get("unique_to_a_estimate", result.get("unique_to_a", "?"))
54
+ unique_b = result.get("unique_to_b_estimate", result.get("unique_to_b", "?"))
55
+ print(" Unique to A (est): %s" % unique_a)
56
+ print(" Unique to B (est): %s" % unique_b)
57
+ if result.get("note"):
58
+ print(" Note: %s" % result["note"])
59
+ print("\n %s ms" % latency)
60
+
61
+
62
+ def _print_similar(result, latency):
63
+ print("\n Most similar to %s:\n" % result.get("patient", "?"))
64
+ print(" %-15s %10s" % ("PATIENT", "JACCARD"))
65
+ print(" %s %s" % ("-" * 15, "-" * 10))
66
+ for r in result.get("results", []):
67
+ print(" %-15s %10s" % (r["patient_id"], r["jaccard_similarity"]))
68
+ print("\n %s results | %s ms" % (result.get("count", 0), latency))
69
+
70
+
71
+ def _print_patients(result, action, latency):
72
+ results = result.get("results", [])
73
+ count = result.get("count", 0)
74
+ total = result.get("total", count)
75
+
76
+ if action == "count_patients":
77
+ print("\n %s of %s patients" % (count, total))
78
+ print(" %s ms" % latency)
79
+ return
80
+
81
+ if not results:
82
+ print("\n 0 patients found | %s ms" % latency)
83
+ return
84
+
85
+ has_variants = "variants" in results[0]
86
+ has_matching = "matching_variants" in results[0]
87
+ has_parent = "parent" in results[0]
88
+
89
+ print()
90
+ if has_variants and has_parent:
91
+ print(" %-15s %10s %-15s" % ("PATIENT", "VARIANTS", "PARENT"))
92
+ print(" %s %s %s" % ("-" * 15, "-" * 10, "-" * 15))
93
+ for r in results:
94
+ parent = r.get("parent") or "GRCh38"
95
+ print(" %-15s %10s %-15s" % (r["patient_id"], r["variants"], parent))
96
+ elif has_matching:
97
+ print(" %-15s %10s" % ("PATIENT", "MATCHING"))
98
+ print(" %s %s" % ("-" * 15, "-" * 10))
99
+ for r in results:
100
+ print(" %-15s %10s" % (r["patient_id"], r["matching_variants"]))
101
+ else:
102
+ print(" %-15s" % "PATIENT")
103
+ print(" %s" % ("-" * 15))
104
+ for r in results:
105
+ print(" %-15s" % r["patient_id"])
106
+
107
+ print("\n %s patients | %s ms" % (count, latency))
108
+
109
+
110
+ def _print_variants(result, action, latency):
111
+ results = result.get("results", [])
112
+ count = result.get("count", 0)
113
+
114
+ if action == "count_variants":
115
+ print("\n %s variants" % count)
116
+ print(" %s ms" % latency)
117
+ return
118
+
119
+ if not results:
120
+ print("\n 0 variants found | %s ms" % latency)
121
+ return
122
+
123
+ has_sig = any("significance" in r for r in results)
124
+ has_gene = any("gene" in r for r in results)
125
+ has_rs = any("rs_id" in r for r in results)
126
+
127
+ print()
128
+ header = " %-8s %12s %-6s %-6s" % ("CHROM", "POS", "REF", "ALT")
129
+ if has_gene:
130
+ header += " %-12s" % "GENE"
131
+ if has_sig:
132
+ header += " %-20s" % "SIGNIFICANCE"
133
+ if has_rs:
134
+ header += " %-15s" % "RS_ID"
135
+ print(header)
136
+ print(" %s" % ("-" * (len(header) - 2)))
137
+
138
+ for r in results:
139
+ line = " %-8s %12s %-6s %-6s" % (
140
+ r.get("chrom", ""), r.get("pos", 0), r.get("ref", ""), r.get("alt", ""))
141
+ if has_gene:
142
+ line += " %-12s" % r.get("gene", "")
143
+ if has_sig:
144
+ line += " %-20s" % r.get("significance", "")
145
+ if has_rs:
146
+ line += " %-15s" % r.get("rs_id", "")
147
+ print(line)
148
+
149
+ print("\n %s variants | %s ms" % (count, latency))
150
+
151
+
152
+ def _print_diff(result, latency):
153
+ print("\n %s vs %s" % (result.get("patient_a", "?"), result.get("patient_b", "?")))
154
+ print(" Only in A: %s variants" % result.get("only_in_a", "?"))
155
+ print(" Only in B: %s variants" % result.get("only_in_b", "?"))
156
+ print(" Shared: %s variants" % result.get("shared", "?"))
157
+ print("\n %s ms" % latency)
158
+
159
+
160
+ def _print_pca(result, latency):
161
+ components = result.get("components", [])
162
+ print("\n PCA (%s components, %s patients)\n" % (
163
+ result.get("n_components", "?"), result.get("n_patients", "?")))
164
+ print(" %-15s %10s %10s %10s" % ("PATIENT", "PC1", "PC2", "PC3"))
165
+ print(" %s %s %s %s" % ("-" * 15, "-" * 10, "-" * 10, "-" * 10))
166
+ for c in components:
167
+ coords = c.get("coordinates", [0, 0, 0])
168
+ pc1 = "%.4f" % coords[0] if len(coords) > 0 else ""
169
+ pc2 = "%.4f" % coords[1] if len(coords) > 1 else ""
170
+ pc3 = "%.4f" % coords[2] if len(coords) > 2 else ""
171
+ print(" %-15s %10s %10s %10s" % (c.get("patient_id", ""), pc1, pc2, pc3))
172
+ print("\n %s ms" % latency)
173
+
174
+
175
+ def _print_analytics(result, action, latency):
176
+ if action == "coverage":
177
+ print("\n Coverage: %s" % result.get("mean_coverage", "?"))
178
+ print(" Region: %s" % result.get("region", "?"))
179
+ elif action == "qc":
180
+ for key in ("total_reads", "mapped_reads", "duplicate_reads", "mean_mapq",
181
+ "mean_insert_size"):
182
+ if key in result:
183
+ print(" %s: %s" % (key, result[key]))
184
+ elif action == "pileup":
185
+ depths = result.get("depths", [])
186
+ print("\n Pileup: %s positions" % len(depths))
187
+ print("\n %s ms" % latency)
188
+
189
+
190
+ def _print_tsv(result):
191
+ results = result.get("results", [])
192
+ if not results:
193
+ return
194
+ keys = list(results[0].keys())
195
+ print("\t".join(keys))
196
+ for r in results:
197
+ print("\t".join(str(r.get(k, "")) for k in keys))
198
+
199
+
200
+ def print_job_progress(job):
201
+ """Print progress for async jobs (upload/export)."""
202
+ status = job.get("status", "?")
203
+ completed = job.get("patients_completed", 0)
204
+ total = job.get("patients_total", job.get("num_patients", "?"))
205
+ errors = len(job.get("errors", []))
206
+
207
+ line = "\r Progress: %s/%s" % (completed, total)
208
+ if errors:
209
+ line += " (%s errors)" % errors
210
+ if status in ("complete", "complete_with_errors"):
211
+ line += " - Done"
212
+ sys.stdout.write(line)
213
+ sys.stdout.flush()
214
+
215
+ if status in ("complete", "complete_with_errors"):
216
+ print() # newline after progress
217
+ if job.get("errors"):
218
+ for err in job["errors"]:
219
+ sys.stderr.write(" Error: %s — %s\n" % (
220
+ err.get("patient_id", "?"), err.get("error", "?")))
@@ -0,0 +1,65 @@
1
+ """Help text for the Seashell CLI."""
2
+
3
+ HELP_TEXT = """
4
+ QUERY COMMANDS
5
+ FIND VARIANTS WHERE ... Search variants by gene, significance, patient
6
+ FIND PATIENTS WHERE ... Find patients matching criteria
7
+ COUNT VARIANTS WHERE ... Count matching variants
8
+ COUNT PATIENTS WHERE ... Count matching patients
9
+ LIST PATIENTS List all patients in your institution
10
+ COMPARE patient1 VS patient2 Compare variants between two patients
11
+ DIFF patient1 VS patient2 Exact variant-level differences
12
+ FIND SIMILAR patient TOP n Find genetically similar patients
13
+ PCA Principal component analysis
14
+
15
+ UPLOAD
16
+ UPLOAD PATIENT id CRAM s3://path VCF s3://path Upload from aligned CRAM/BAM
17
+ UPLOAD PATIENT id FASTQ s3://R1 s3://R2 Upload from raw FASTQ
18
+ UPLOAD BATCH s3://manifest.json Batch upload from manifest
19
+
20
+ EXPORT
21
+ EXPORT PATIENT id FORMAT CRAM Export as CRAM
22
+ EXPORT PATIENT id FORMAT BAM Export as BAM
23
+ EXPORT PATIENT id FORMAT BAM REGION chr17:41M-42M Export a region
24
+ EXPORT PATIENTS WHERE gene=BRCA1 FORMAT CRAM Export by criteria
25
+
26
+ ANALYTICS
27
+ COVERAGE patient chr:start-end Read depth across a region
28
+ QC patient Quality control metrics
29
+ PILEUP patient chr:pos-pos Base-level pileup
30
+
31
+ MANAGEMENT
32
+ DELETE PATIENT id Remove a patient (admin only)
33
+
34
+ FILTERS
35
+ gene=BRCA1 Gene name
36
+ significance=pathogenic ClinVar significance
37
+ chromosome=chr17 Chromosome
38
+ patient=NA12878 Patient ID
39
+ age>50 Phenotype filters (age, sex, ancestry)
40
+
41
+ CLI COMMANDS
42
+ help Show this message
43
+ status Connection info and patient count
44
+ logout Clear saved credentials and log out
45
+ exit Quit Seashell
46
+ """.strip()
47
+
48
+
49
+ # ANSI color codes (work on macOS, Linux, and most Windows terminals)
50
+ CYAN = "\033[36m"
51
+ BOLD = "\033[1m"
52
+ DIM = "\033[2m"
53
+ RESET = "\033[0m"
54
+ WHITE = "\033[97m"
55
+ BLUE = "\033[34m"
56
+
57
+
58
+ def welcome_banner():
59
+ """Return the colored welcome banner."""
60
+ lines = []
61
+ lines.append("")
62
+ lines.append(BOLD + WHITE + " Welcome to Seashell" + RESET)
63
+ lines.append(DIM + " Genomic data, compressed and queryable." + RESET)
64
+ lines.append("")
65
+ return "\n".join(lines)
@@ -0,0 +1,70 @@
1
+ Metadata-Version: 2.4
2
+ Name: seashell-cli
3
+ Version: 0.1.0
4
+ Summary: Seashell — Genomic data, compressed and queryable
5
+ License: Proprietary
6
+ Project-URL: Homepage, https://seashell.bio
7
+ Project-URL: Documentation, https://seashell.bio/docs
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
12
+ Requires-Python: >=3.8
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: requests>=2.28
15
+
16
+ # Seashell CLI
17
+
18
+ Command-line tool for querying and managing genomic data on Seashell.
19
+
20
+ ## Install
21
+
22
+ ```bash
23
+ pip install seashell-cli
24
+ ```
25
+
26
+ ## Quick Start
27
+
28
+ ```bash
29
+ seashell
30
+ ```
31
+
32
+ You'll be prompted for your API key (from your institution admin), username, and password. After login, you're in an interactive shell:
33
+
34
+ ```
35
+ seashell> LIST PATIENTS
36
+ seashell> FIND VARIANTS WHERE patient=NA12878 AND gene=BRCA1
37
+ seashell> EXPORT PATIENT NA12878 FORMAT CRAM
38
+ ```
39
+
40
+ ## Single Query Mode
41
+
42
+ ```bash
43
+ seashell "FIND PATIENTS WHERE gene=BRCA1 AND significance=pathogenic"
44
+ seashell "COUNT VARIANTS WHERE patient=NA12878"
45
+ seashell --format json "LIST PATIENTS"
46
+ ```
47
+
48
+ ## Commands
49
+
50
+ | Command | Description |
51
+ |---|---|
52
+ | `FIND VARIANTS WHERE ...` | Search variants by gene, significance, patient |
53
+ | `FIND PATIENTS WHERE ...` | Find patients matching criteria |
54
+ | `COUNT VARIANTS/PATIENTS WHERE ...` | Count matches |
55
+ | `LIST PATIENTS` | List all patients |
56
+ | `COMPARE p1 VS p2` | Compare two patients |
57
+ | `UPLOAD PATIENT id CRAM s3://...` | Upload from CRAM/BAM |
58
+ | `UPLOAD PATIENT id FASTQ s3://R1 s3://R2` | Upload from raw FASTQ |
59
+ | `EXPORT PATIENT id FORMAT CRAM` | Export as CRAM/BAM |
60
+ | `DELETE PATIENT id` | Remove a patient |
61
+ | `help` | Show all commands |
62
+
63
+ ## Requirements
64
+
65
+ - Python 3.8+
66
+ - A Seashell API key (contact your institution admin)
67
+
68
+ ## Documentation
69
+
70
+ https://seashell.bio/docs
@@ -0,0 +1,16 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.cfg
4
+ seashell/__init__.py
5
+ seashell/__main__.py
6
+ seashell/cli.py
7
+ seashell/client.py
8
+ seashell/config.py
9
+ seashell/display.py
10
+ seashell/help.py
11
+ seashell_cli.egg-info/PKG-INFO
12
+ seashell_cli.egg-info/SOURCES.txt
13
+ seashell_cli.egg-info/dependency_links.txt
14
+ seashell_cli.egg-info/entry_points.txt
15
+ seashell_cli.egg-info/requires.txt
16
+ seashell_cli.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ seashell = seashell.cli:main
@@ -0,0 +1 @@
1
+ requests>=2.28
@@ -0,0 +1 @@
1
+ seashell
@@ -0,0 +1,27 @@
1
+ [metadata]
2
+ name = seashell-cli
3
+ version = 0.1.0
4
+ description = Seashell — Genomic data, compressed and queryable
5
+ long_description = file: README.md
6
+ long_description_content_type = text/markdown
7
+ license = Proprietary
8
+ classifiers =
9
+ Programming Language :: Python :: 3
10
+ Operating System :: OS Independent
11
+ Intended Audience :: Science/Research
12
+ Topic :: Scientific/Engineering :: Bio-Informatics
13
+
14
+ [options]
15
+ packages = seashell
16
+ python_requires = >=3.8
17
+ install_requires =
18
+ requests>=2.28
19
+
20
+ [options.entry_points]
21
+ console_scripts =
22
+ seashell = seashell.cli:main
23
+
24
+ [egg_info]
25
+ tag_build =
26
+ tag_date = 0
27
+