bdsc-cli 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bdsc_cli/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ __all__ = ["__version__"]
2
+
3
+ __version__ = "0.2.1"
bdsc_cli/cli.py ADDED
@@ -0,0 +1,612 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import csv
5
+ import json
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ from . import __version__
10
+ from .core import (
11
+ build_index,
12
+ EXPORT_DATASETS,
13
+ format_component_results,
14
+ format_dataset_results,
15
+ format_gene_results,
16
+ format_lookup_result,
17
+ format_search_results,
18
+ format_stock,
19
+ format_sync_results,
20
+ format_term_results,
21
+ get_status,
22
+ get_stock,
23
+ get_stock_by_rrid,
24
+ iter_export_rows,
25
+ iter_report_rows,
26
+ list_terms,
27
+ live_search,
28
+ LOOKUP_KINDS,
29
+ lookup_query,
30
+ QueryCriterion,
31
+ REPORT_NAMES,
32
+ REPORT_SPECS,
33
+ resolve_state_dir,
34
+ search_component,
35
+ search_driver_family,
36
+ search_fbid,
37
+ search_gene,
38
+ search_local,
39
+ search_property,
40
+ search_property_exact,
41
+ search_relationship,
42
+ sync_datasets,
43
+ TERM_SCOPES,
44
+ )
45
+
46
+
47
+ FILTER_ARGUMENTS = (
48
+ ("stock", "match stock number"),
49
+ ("rrid", "match RRID:BDSC_*"),
50
+ ("gene", "match gene symbol or FBgn"),
51
+ ("component", "match component symbol"),
52
+ ("fbid", "match FlyBase component id"),
53
+ ("property", "match component property synonym/description"),
54
+ ("property-exact", "match exact component property synonym/description"),
55
+ ("driver-family", "match true driver family signals like GAL4/LexA/QF/FLP/split"),
56
+ ("relationship", "match component-gene relationship"),
57
+ ("search", "substring search across stock text"),
58
+ )
59
+
60
+ LEGACY_HELP = argparse.SUPPRESS
61
+ LEGACY_COMMANDS = {
62
+ "filter",
63
+ "search",
64
+ "gene",
65
+ "component",
66
+ "fbid",
67
+ "rrid",
68
+ "property",
69
+ "property-exact",
70
+ "driver-family",
71
+ "relationship",
72
+ "lookup",
73
+ "live-search",
74
+ }
75
+ PUBLIC_COMMAND_METAVAR = "{sync,build-index,export,report,terms,status,find,stock}"
76
+
77
+
78
+ def _filter_dest(kind: str) -> str:
79
+ return f"{kind.replace('-', '_')}_filters"
80
+
81
+
82
+ def add_json_flags(parser: argparse.ArgumentParser, *, jsonl: bool = True) -> None:
83
+ parser.add_argument("--json", action="store_true")
84
+ if jsonl:
85
+ parser.add_argument("--jsonl", action="store_true")
86
+
87
+
88
+ def add_query_parser(
89
+ subparsers,
90
+ name: str,
91
+ help_text: str,
92
+ *,
93
+ jsonl: bool = True,
94
+ hidden: bool = False,
95
+ ):
96
+ parser = subparsers.add_parser(name, help=LEGACY_HELP if hidden else help_text)
97
+ parser.add_argument("query")
98
+ parser.add_argument("--state-dir", help="cache/index directory")
99
+ parser.add_argument("--limit", type=int, default=20)
100
+ add_json_flags(parser, jsonl=jsonl)
101
+ return parser
102
+
103
+
104
+ def hide_legacy_commands(subparsers_action) -> None:
105
+ subparsers_action._choices_actions = [
106
+ action
107
+ for action in subparsers_action._choices_actions
108
+ if action.dest not in LEGACY_COMMANDS
109
+ ]
110
+
111
+
112
+ def build_parser() -> argparse.ArgumentParser:
113
+ parser = argparse.ArgumentParser(prog="bdsc", description="Sync and query BDSC data")
114
+ parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
115
+ subparsers = parser.add_subparsers(
116
+ dest="command",
117
+ required=True,
118
+ metavar=PUBLIC_COMMAND_METAVAR,
119
+ )
120
+
121
+ sync_parser = subparsers.add_parser("sync", help="download public BDSC CSV datasets")
122
+ sync_parser.add_argument("--state-dir", help="cache/index directory")
123
+ sync_parser.add_argument("--force", action="store_true", help="skip conditional HTTP headers")
124
+ sync_parser.add_argument(
125
+ "--skip-index",
126
+ action="store_true",
127
+ help="download only; do not rebuild the local SQLite index",
128
+ )
129
+
130
+ build_parser_cmd = subparsers.add_parser(
131
+ "build-index", help="rebuild the local SQLite index from downloaded CSVs"
132
+ )
133
+ build_parser_cmd.add_argument("--state-dir", help="cache/index directory")
134
+
135
+ export_parser = subparsers.add_parser(
136
+ "export", help="stream normalized rows for stocks/components/genes/properties"
137
+ )
138
+ export_parser.add_argument("dataset", choices=EXPORT_DATASETS)
139
+ export_parser.add_argument("--state-dir", help="cache/index directory")
140
+ export_parser.add_argument("--limit", type=int, help="max rows to emit")
141
+ export_parser.add_argument("--query", help="filter exported rows by a query value")
142
+ export_parser.add_argument(
143
+ "--kind",
144
+ choices=LOOKUP_KINDS,
145
+ default="auto",
146
+ help="interpret --query as this lookup kind",
147
+ )
148
+ add_filter_arguments(export_parser)
149
+ export_parser.add_argument(
150
+ "--format",
151
+ choices=("jsonl", "csv", "tsv"),
152
+ default="jsonl",
153
+ help="output format",
154
+ )
155
+ export_parser.add_argument(
156
+ "--output",
157
+ help="output path; defaults to stdout",
158
+ )
159
+
160
+ report_parser = subparsers.add_parser(
161
+ "report",
162
+ help="canned reports for common BDSC retrieval tasks",
163
+ )
164
+ report_parser.add_argument("name", choices=REPORT_NAMES)
165
+ report_parser.add_argument(
166
+ "--dataset",
167
+ choices=EXPORT_DATASETS,
168
+ help="override the report's default dataset",
169
+ )
170
+ report_parser.add_argument("--state-dir", help="cache/index directory")
171
+ report_parser.add_argument("--limit", type=int, default=20)
172
+ report_parser.add_argument("--json", action="store_true")
173
+ report_parser.add_argument("--jsonl", action="store_true")
174
+
175
+ filter_parser = subparsers.add_parser(
176
+ "filter",
177
+ help=LEGACY_HELP,
178
+ )
179
+ filter_parser.add_argument(
180
+ "--dataset",
181
+ choices=EXPORT_DATASETS,
182
+ default="components",
183
+ help="row shape to return",
184
+ )
185
+ filter_parser.add_argument("--state-dir", help="cache/index directory")
186
+ filter_parser.add_argument("--limit", type=int, default=20)
187
+ add_filter_arguments(filter_parser)
188
+ filter_parser.add_argument("--json", action="store_true")
189
+ filter_parser.add_argument("--jsonl", action="store_true")
190
+
191
+ terms_parser = subparsers.add_parser(
192
+ "terms",
193
+ help="list available property/relationship vocab with counts",
194
+ )
195
+ terms_parser.add_argument("scope", choices=TERM_SCOPES)
196
+ terms_parser.add_argument("--state-dir", help="cache/index directory")
197
+ terms_parser.add_argument("--query", help="prefix/substring filter for the term list")
198
+ terms_parser.add_argument("--limit", type=int, default=50)
199
+ terms_parser.add_argument("--json", action="store_true")
200
+ terms_parser.add_argument("--jsonl", action="store_true")
201
+
202
+ status_parser = subparsers.add_parser(
203
+ "status", help="show local dataset/index status for the current state dir"
204
+ )
205
+ status_parser.add_argument("--state-dir", help="cache/index directory")
206
+ add_json_flags(status_parser, jsonl=False)
207
+
208
+ find_parser = subparsers.add_parser(
209
+ "find",
210
+ help="primary query command; free-text lookup or structured compound filters",
211
+ )
212
+ find_parser.add_argument("query", nargs="?")
213
+ find_parser.add_argument("--state-dir", help="cache/index directory")
214
+ find_parser.add_argument("--limit", type=int, default=20)
215
+ find_parser.add_argument(
216
+ "--kind",
217
+ choices=LOOKUP_KINDS,
218
+ default="auto",
219
+ help="interpret the positional query as this lookup kind",
220
+ )
221
+ find_parser.add_argument(
222
+ "--dataset",
223
+ choices=EXPORT_DATASETS,
224
+ help="return normalized rows for this dataset instead of auto-shaped lookup output",
225
+ )
226
+ add_filter_arguments(find_parser)
227
+ add_json_flags(find_parser)
228
+
229
+ add_query_parser(
230
+ subparsers,
231
+ "search",
232
+ "query the local SQLite index",
233
+ hidden=True,
234
+ ).set_defaults(limit=10)
235
+ add_query_parser(
236
+ subparsers,
237
+ "gene",
238
+ "query stocks by gene symbol or FBgn identifier",
239
+ hidden=True,
240
+ )
241
+ add_query_parser(
242
+ subparsers,
243
+ "component",
244
+ "query stocks by component symbol",
245
+ hidden=True,
246
+ )
247
+ add_query_parser(
248
+ subparsers,
249
+ "fbid",
250
+ "query stocks by FlyBase component identifier",
251
+ hidden=True,
252
+ )
253
+
254
+ stock_parser = subparsers.add_parser("stock", help="show local details for one stock")
255
+ stock_parser.add_argument("stknum", type=int)
256
+ stock_parser.add_argument("--state-dir", help="cache/index directory")
257
+ add_json_flags(stock_parser, jsonl=False)
258
+
259
+ rrid_parser = subparsers.add_parser("rrid", help=LEGACY_HELP)
260
+ rrid_parser.add_argument("query")
261
+ rrid_parser.add_argument("--state-dir", help="cache/index directory")
262
+ add_json_flags(rrid_parser, jsonl=False)
263
+
264
+ add_query_parser(
265
+ subparsers,
266
+ "property",
267
+ "query stocks by component property synonym or description",
268
+ hidden=True,
269
+ )
270
+ add_query_parser(
271
+ subparsers,
272
+ "property-exact",
273
+ "query stocks by exact component property synonym or description",
274
+ hidden=True,
275
+ )
276
+ add_query_parser(
277
+ subparsers,
278
+ "driver-family",
279
+ "query true driver family lines like GAL4, LexA, QF, FLP, or split drivers",
280
+ hidden=True,
281
+ )
282
+ add_query_parser(
283
+ subparsers,
284
+ "relationship",
285
+ "query stocks by component-gene relationship label",
286
+ hidden=True,
287
+ )
288
+
289
+ lookup_parser = subparsers.add_parser(
290
+ "lookup",
291
+ help=LEGACY_HELP,
292
+ )
293
+ lookup_parser.add_argument("queries", nargs="*")
294
+ lookup_parser.add_argument("--state-dir", help="cache/index directory")
295
+ lookup_parser.add_argument("--kind", choices=LOOKUP_KINDS, default="auto")
296
+ lookup_parser.add_argument("--limit", type=int, default=20)
297
+ lookup_parser.add_argument(
298
+ "--input",
299
+ help="read newline-delimited queries from a file path or '-' for stdin",
300
+ )
301
+ add_json_flags(lookup_parser)
302
+
303
+ live_parser = subparsers.add_parser(
304
+ "live-search", help=LEGACY_HELP
305
+ )
306
+ live_parser.add_argument("query")
307
+ live_parser.add_argument("--limit", type=int, default=10)
308
+ add_json_flags(live_parser)
309
+
310
+ hide_legacy_commands(subparsers)
311
+ return parser
312
+
313
+
314
+ def print_jsonl(rows: list[dict]) -> None:
315
+ for row in rows:
316
+ print(json.dumps(row, ensure_ascii=False))
317
+
318
+
319
+ def add_filter_arguments(parser: argparse.ArgumentParser) -> None:
320
+ for kind, help_text in FILTER_ARGUMENTS:
321
+ parser.add_argument(
322
+ f"--{kind}",
323
+ dest=_filter_dest(kind),
324
+ action="append",
325
+ default=[],
326
+ help=help_text,
327
+ )
328
+
329
+
330
+ def build_filter_criteria(args: argparse.Namespace) -> list[QueryCriterion]:
331
+ criteria: list[QueryCriterion] = []
332
+ for kind, _ in FILTER_ARGUMENTS:
333
+ for value in getattr(args, _filter_dest(kind), []):
334
+ if value.strip():
335
+ criteria.append(QueryCriterion(kind=kind, query=value))
336
+ return criteria
337
+
338
+
339
+ def emit_output(
340
+ payload: object,
341
+ *,
342
+ as_json: bool,
343
+ as_jsonl: bool,
344
+ formatter,
345
+ ) -> None:
346
+ if as_json:
347
+ print(json.dumps(payload, indent=2, ensure_ascii=False))
348
+ return
349
+ if as_jsonl:
350
+ if not isinstance(payload, list):
351
+ raise ValueError("jsonl output requires a list payload")
352
+ print_jsonl(payload)
353
+ return
354
+ print(formatter(payload))
355
+
356
+
357
+ def emit_query_results(
358
+ args: argparse.Namespace,
359
+ results,
360
+ *,
361
+ formatter,
362
+ ) -> int:
363
+ emit_output(
364
+ results,
365
+ as_json=args.json,
366
+ as_jsonl=args.jsonl,
367
+ formatter=formatter,
368
+ )
369
+ return 0
370
+
371
+
372
+ def emit_stock_result(args: argparse.Namespace, stock: object) -> int:
373
+ emit_output(stock, as_json=args.json, as_jsonl=False, formatter=format_stock)
374
+ return 0 if stock else 1
375
+
376
+
377
+ def emit_lookup_payload(args: argparse.Namespace, results: list[dict]) -> int:
378
+ if args.json:
379
+ print(
380
+ json.dumps(
381
+ results[0] if len(results) == 1 else results,
382
+ indent=2,
383
+ ensure_ascii=False,
384
+ )
385
+ )
386
+ elif args.jsonl:
387
+ print_jsonl(results)
388
+ else:
389
+ print("\n\n".join(format_lookup_result(result) for result in results))
390
+ return 0 if all(result["results"] for result in results) else 1
391
+
392
+
393
+ def load_queries(positional_queries: list[str], input_path: str | None) -> list[str]:
394
+ queries = [query for query in positional_queries if query.strip()]
395
+ if input_path:
396
+ if input_path == "-":
397
+ source = sys.stdin.read()
398
+ else:
399
+ source = Path(input_path).read_text(encoding="utf-8")
400
+ queries.extend(line.strip() for line in source.splitlines() if line.strip())
401
+ return queries
402
+
403
+
404
+ def emit_export_rows(
405
+ rows,
406
+ *,
407
+ output_format: str,
408
+ output_path: str | None,
409
+ ) -> None:
410
+ if output_path:
411
+ handle = Path(output_path).open("w", encoding="utf-8", newline="")
412
+ else:
413
+ handle = sys.stdout
414
+
415
+ try:
416
+ if output_format == "jsonl":
417
+ for row in rows:
418
+ handle.write(json.dumps(row, ensure_ascii=False) + "\n")
419
+ return
420
+
421
+ writer = None
422
+ delimiter = "," if output_format == "csv" else "\t"
423
+ for row in rows:
424
+ if writer is None:
425
+ writer = csv.DictWriter(handle, fieldnames=list(row.keys()), delimiter=delimiter)
426
+ writer.writeheader()
427
+ writer.writerow(row)
428
+ finally:
429
+ if output_path:
430
+ handle.close()
431
+
432
+
433
+ QUERY_COMMAND_SPECS = {
434
+ "search": (search_local, format_search_results),
435
+ "gene": (search_gene, format_gene_results),
436
+ "component": (search_component, format_component_results),
437
+ "fbid": (search_fbid, format_component_results),
438
+ "property": (search_property, format_component_results),
439
+ "property-exact": (search_property_exact, format_component_results),
440
+ "driver-family": (search_driver_family, format_component_results),
441
+ "relationship": (search_relationship, format_component_results),
442
+ }
443
+
444
+
445
+ def run_find(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int:
446
+ query = (args.query or "").strip()
447
+ criteria = build_filter_criteria(args)
448
+ if not query and not criteria:
449
+ parser.error("find requires a query or at least one filter flag")
450
+
451
+ state_dir = resolve_state_dir(args.state_dir)
452
+ if query and not criteria and not args.dataset:
453
+ return emit_lookup_payload(
454
+ args,
455
+ [lookup_query(state_dir, query, kind=args.kind, limit=args.limit)],
456
+ )
457
+
458
+ rows = list(
459
+ iter_export_rows(
460
+ state_dir,
461
+ args.dataset or "components",
462
+ limit=args.limit,
463
+ criteria=criteria,
464
+ query=query or None,
465
+ kind=args.kind,
466
+ )
467
+ )
468
+ emit_output(
469
+ rows,
470
+ as_json=args.json,
471
+ as_jsonl=args.jsonl,
472
+ formatter=lambda payload: format_dataset_results(args.dataset or "components", payload),
473
+ )
474
+ return 0 if rows else 1
475
+
476
+
477
+ def run_legacy_lookup(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int:
478
+ queries = load_queries(args.queries, args.input)
479
+ if not queries:
480
+ parser.error("lookup requires at least one query or --input")
481
+ state_dir = resolve_state_dir(args.state_dir)
482
+ lookup_results = [
483
+ lookup_query(state_dir, query, kind=args.kind, limit=args.limit)
484
+ for query in queries
485
+ ]
486
+ return emit_lookup_payload(args, lookup_results)
487
+
488
+
489
+ def main(argv: list[str] | None = None) -> int:
490
+ parser = build_parser()
491
+ args = parser.parse_args(argv)
492
+
493
+ try:
494
+ state_dir = resolve_state_dir(getattr(args, "state_dir", None))
495
+
496
+ if args.command == "sync":
497
+ results = sync_datasets(state_dir, force=args.force)
498
+ print(format_sync_results(results))
499
+ if not args.skip_index:
500
+ counts = build_index(state_dir)
501
+ print(json.dumps({"indexed": counts, "state_dir": str(state_dir)}, indent=2))
502
+ return 0
503
+
504
+ if args.command == "build-index":
505
+ counts = build_index(state_dir)
506
+ print(json.dumps({"indexed": counts, "state_dir": str(state_dir)}, indent=2))
507
+ return 0
508
+
509
+ if args.command == "export":
510
+ emit_export_rows(
511
+ iter_export_rows(
512
+ state_dir,
513
+ args.dataset,
514
+ limit=args.limit,
515
+ criteria=build_filter_criteria(args),
516
+ query=args.query,
517
+ kind=args.kind,
518
+ ),
519
+ output_format=args.format,
520
+ output_path=args.output,
521
+ )
522
+ return 0
523
+
524
+ if args.command == "report":
525
+ rows = list(
526
+ iter_report_rows(
527
+ state_dir,
528
+ args.name,
529
+ dataset=args.dataset,
530
+ limit=args.limit,
531
+ )
532
+ )
533
+ report_dataset = args.dataset or REPORT_SPECS[args.name].default_dataset
534
+ emit_output(
535
+ rows,
536
+ as_json=args.json,
537
+ as_jsonl=args.jsonl,
538
+ formatter=lambda payload: format_dataset_results(report_dataset, payload),
539
+ )
540
+ return 0 if rows else 1
541
+
542
+ if args.command == "filter":
543
+ criteria = build_filter_criteria(args)
544
+ if not criteria:
545
+ parser.error("filter requires at least one filter flag")
546
+ rows = list(
547
+ iter_export_rows(
548
+ state_dir,
549
+ args.dataset,
550
+ limit=args.limit,
551
+ criteria=criteria,
552
+ )
553
+ )
554
+ emit_output(
555
+ rows,
556
+ as_json=args.json,
557
+ as_jsonl=args.jsonl,
558
+ formatter=lambda payload: format_dataset_results(args.dataset, payload),
559
+ )
560
+ return 0 if rows else 1
561
+
562
+ if args.command == "find":
563
+ return run_find(parser, args)
564
+
565
+ if args.command == "terms":
566
+ results = list_terms(
567
+ state_dir,
568
+ args.scope,
569
+ query=args.query,
570
+ limit=args.limit,
571
+ )
572
+ emit_output(
573
+ results,
574
+ as_json=args.json,
575
+ as_jsonl=args.jsonl,
576
+ formatter=format_term_results,
577
+ )
578
+ return 0
579
+
580
+ if args.command == "status":
581
+ print(json.dumps(get_status(state_dir), indent=2, ensure_ascii=False))
582
+ return 0
583
+
584
+ if args.command in QUERY_COMMAND_SPECS:
585
+ query_fn, formatter = QUERY_COMMAND_SPECS[args.command]
586
+ results = query_fn(state_dir, args.query, limit=args.limit)
587
+ return emit_query_results(args, results, formatter=formatter)
588
+
589
+ if args.command == "stock":
590
+ stock = get_stock(state_dir, args.stknum)
591
+ return emit_stock_result(args, stock)
592
+
593
+ if args.command == "rrid":
594
+ stock = get_stock_by_rrid(state_dir, args.query)
595
+ return emit_stock_result(args, stock)
596
+
597
+ if args.command == "lookup":
598
+ return run_legacy_lookup(parser, args)
599
+
600
+ if args.command == "live-search":
601
+ results = live_search(args.query, limit=args.limit)
602
+ return emit_query_results(args, results, formatter=format_search_results)
603
+
604
+ parser.error(f"unknown command: {args.command}")
605
+ return 2
606
+ except Exception as exc:
607
+ print(f"error: {exc}", file=sys.stderr)
608
+ return 1
609
+
610
+
611
+ if __name__ == "__main__":
612
+ raise SystemExit(main())