bdsc-cli 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bdsc_cli-0.2.1/src/bdsc_cli.egg-info → bdsc_cli-0.2.3}/PKG-INFO +12 -19
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3}/README.md +11 -18
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3}/src/bdsc_cli/__init__.py +1 -1
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3}/src/bdsc_cli/cli.py +59 -25
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3}/src/bdsc_cli/core.py +76 -56
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3/src/bdsc_cli.egg-info}/PKG-INFO +12 -19
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3}/src/bdsc_cli.egg-info/SOURCES.txt +1 -2
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3}/tests/test_core.py +108 -0
- bdsc_cli-0.2.1/tests/test_release_tools.py +0 -35
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3}/LICENSE +0 -0
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3}/pyproject.toml +0 -0
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3}/setup.cfg +0 -0
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3}/src/bdsc_cli.egg-info/dependency_links.txt +0 -0
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3}/src/bdsc_cli.egg-info/entry_points.txt +0 -0
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3}/src/bdsc_cli.egg-info/requires.txt +0 -0
- {bdsc_cli-0.2.1 → bdsc_cli-0.2.3}/src/bdsc_cli.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bdsc-cli
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Sync and query BDSC datasets locally
|
|
5
5
|
Author: Gustavo Madeira Santana
|
|
6
6
|
License-Expression: MIT
|
|
@@ -21,6 +21,9 @@ Dynamic: license-file
|
|
|
21
21
|
|
|
22
22
|
# bdsc-cli
|
|
23
23
|
|
|
24
|
+
> This package has been renamed. Install `bdsc` now:
|
|
25
|
+
> `pipx install bdsc`
|
|
26
|
+
|
|
24
27
|
Small CLI for syncing public Bloomington Drosophila Stock Center datasets and
|
|
25
28
|
querying them locally.
|
|
26
29
|
|
|
@@ -50,16 +53,16 @@ brew tap gumadeiras/tap
|
|
|
50
53
|
brew install bdsc-cli
|
|
51
54
|
```
|
|
52
55
|
|
|
53
|
-
|
|
56
|
+
PyPI with `pipx`:
|
|
54
57
|
|
|
55
58
|
```bash
|
|
56
|
-
pipx install
|
|
59
|
+
pipx install bdsc-cli
|
|
57
60
|
```
|
|
58
61
|
|
|
59
|
-
|
|
62
|
+
PyPI with plain `pip`:
|
|
60
63
|
|
|
61
64
|
```bash
|
|
62
|
-
python3 -m pip install
|
|
65
|
+
python3 -m pip install bdsc-cli
|
|
63
66
|
```
|
|
64
67
|
|
|
65
68
|
Source install:
|
|
@@ -91,19 +94,8 @@ Build release artifacts locally:
|
|
|
91
94
|
python -m pip install -e .[release]
|
|
92
95
|
python -m build
|
|
93
96
|
python -m twine check dist/*
|
|
94
|
-
python scripts/render_homebrew_formula.py dist/bdsc_cli-$(python - <<'PY'
|
|
95
|
-
from bdsc_cli import __version__
|
|
96
|
-
print(__version__)
|
|
97
|
-
PY
|
|
98
|
-
).tar.gz
|
|
99
97
|
```
|
|
100
98
|
|
|
101
|
-
PyPI note:
|
|
102
|
-
|
|
103
|
-
- the GitHub release is live
|
|
104
|
-
- PyPI trusted publishing is not configured yet for `bdsc-cli`
|
|
105
|
-
- `pip install bdsc-cli` will work after that publisher is added
|
|
106
|
-
|
|
107
99
|
## Quickstart
|
|
108
100
|
|
|
109
101
|
Create a local cache and index:
|
|
@@ -190,6 +182,9 @@ bdsc export components --limit 5 --format jsonl
|
|
|
190
182
|
bdsc stock 77118 --json
|
|
191
183
|
```
|
|
192
184
|
|
|
185
|
+
Query commands return all matches by default. Use `--limit N` when you want a
|
|
186
|
+
bounded result set.
|
|
187
|
+
|
|
193
188
|
## Commands
|
|
194
189
|
|
|
195
190
|
- `bdsc sync`: download the BDSC CSV datasets; builds the index by default
|
|
@@ -354,9 +349,7 @@ bdsc terms property-descriptions --query optogenetic --jsonl
|
|
|
354
349
|
- use `property-exact` or `driver-family` when `property` is too broad for a
|
|
355
350
|
reliable LexA/QF/GAL4-style answer.
|
|
356
351
|
- tag pushes like `vX.Y.Z` run the release workflow: build artifacts, create a
|
|
357
|
-
GitHub release,
|
|
358
|
-
- `scripts/render_homebrew_formula.py` renders a Homebrew formula from a built
|
|
359
|
-
sdist; use it when updating a tap after a release.
|
|
352
|
+
GitHub release, publish to PyPI, and update `gumadeiras/homebrew-tap`.
|
|
360
353
|
- The live endpoint is undocumented and may change without notice.
|
|
361
354
|
- BDSC data is large enough that the first full sync/index can take a few
|
|
362
355
|
minutes depending on network and disk speed.
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# bdsc-cli
|
|
2
2
|
|
|
3
|
+
> This package has been renamed. Install `bdsc` now:
|
|
4
|
+
> `pipx install bdsc`
|
|
5
|
+
|
|
3
6
|
Small CLI for syncing public Bloomington Drosophila Stock Center datasets and
|
|
4
7
|
querying them locally.
|
|
5
8
|
|
|
@@ -29,16 +32,16 @@ brew tap gumadeiras/tap
|
|
|
29
32
|
brew install bdsc-cli
|
|
30
33
|
```
|
|
31
34
|
|
|
32
|
-
|
|
35
|
+
PyPI with `pipx`:
|
|
33
36
|
|
|
34
37
|
```bash
|
|
35
|
-
pipx install
|
|
38
|
+
pipx install bdsc-cli
|
|
36
39
|
```
|
|
37
40
|
|
|
38
|
-
|
|
41
|
+
PyPI with plain `pip`:
|
|
39
42
|
|
|
40
43
|
```bash
|
|
41
|
-
python3 -m pip install
|
|
44
|
+
python3 -m pip install bdsc-cli
|
|
42
45
|
```
|
|
43
46
|
|
|
44
47
|
Source install:
|
|
@@ -70,19 +73,8 @@ Build release artifacts locally:
|
|
|
70
73
|
python -m pip install -e .[release]
|
|
71
74
|
python -m build
|
|
72
75
|
python -m twine check dist/*
|
|
73
|
-
python scripts/render_homebrew_formula.py dist/bdsc_cli-$(python - <<'PY'
|
|
74
|
-
from bdsc_cli import __version__
|
|
75
|
-
print(__version__)
|
|
76
|
-
PY
|
|
77
|
-
).tar.gz
|
|
78
76
|
```
|
|
79
77
|
|
|
80
|
-
PyPI note:
|
|
81
|
-
|
|
82
|
-
- the GitHub release is live
|
|
83
|
-
- PyPI trusted publishing is not configured yet for `bdsc-cli`
|
|
84
|
-
- `pip install bdsc-cli` will work after that publisher is added
|
|
85
|
-
|
|
86
78
|
## Quickstart
|
|
87
79
|
|
|
88
80
|
Create a local cache and index:
|
|
@@ -169,6 +161,9 @@ bdsc export components --limit 5 --format jsonl
|
|
|
169
161
|
bdsc stock 77118 --json
|
|
170
162
|
```
|
|
171
163
|
|
|
164
|
+
Query commands return all matches by default. Use `--limit N` when you want a
|
|
165
|
+
bounded result set.
|
|
166
|
+
|
|
172
167
|
## Commands
|
|
173
168
|
|
|
174
169
|
- `bdsc sync`: download the BDSC CSV datasets; builds the index by default
|
|
@@ -333,9 +328,7 @@ bdsc terms property-descriptions --query optogenetic --jsonl
|
|
|
333
328
|
- use `property-exact` or `driver-family` when `property` is too broad for a
|
|
334
329
|
reliable LexA/QF/GAL4-style answer.
|
|
335
330
|
- tag pushes like `vX.Y.Z` run the release workflow: build artifacts, create a
|
|
336
|
-
GitHub release,
|
|
337
|
-
- `scripts/render_homebrew_formula.py` renders a Homebrew formula from a built
|
|
338
|
-
sdist; use it when updating a tap after a release.
|
|
331
|
+
GitHub release, publish to PyPI, and update `gumadeiras/homebrew-tap`.
|
|
339
332
|
- The live endpoint is undocumented and may change without notice.
|
|
340
333
|
- BDSC data is large enough that the first full sync/index can take a few
|
|
341
334
|
minutes depending on network and disk speed.
|
|
@@ -75,6 +75,12 @@ LEGACY_COMMANDS = {
|
|
|
75
75
|
PUBLIC_COMMAND_METAVAR = "{sync,build-index,export,report,terms,status,find,stock}"
|
|
76
76
|
|
|
77
77
|
|
|
78
|
+
class HelpOnErrorArgumentParser(argparse.ArgumentParser):
|
|
79
|
+
def error(self, message: str) -> None:
|
|
80
|
+
self.print_help(sys.stderr)
|
|
81
|
+
self.exit(2, f"\nerror: {message}\n")
|
|
82
|
+
|
|
83
|
+
|
|
78
84
|
def _filter_dest(kind: str) -> str:
|
|
79
85
|
return f"{kind.replace('-', '_')}_filters"
|
|
80
86
|
|
|
@@ -85,6 +91,24 @@ def add_json_flags(parser: argparse.ArgumentParser, *, jsonl: bool = True) -> No
|
|
|
85
91
|
parser.add_argument("--jsonl", action="store_true")
|
|
86
92
|
|
|
87
93
|
|
|
94
|
+
def add_limit_argument(parser: argparse.ArgumentParser) -> None:
|
|
95
|
+
parser.add_argument("--limit", type=int, help="max rows to emit")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def add_subcommand(subparsers, name: str, **kwargs):
|
|
99
|
+
parser = subparsers.add_parser(name, **kwargs)
|
|
100
|
+
parser.set_defaults(_command_parser=parser)
|
|
101
|
+
return parser
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def command_error(
|
|
105
|
+
root_parser: argparse.ArgumentParser,
|
|
106
|
+
args: argparse.Namespace,
|
|
107
|
+
message: str,
|
|
108
|
+
) -> None:
|
|
109
|
+
getattr(args, "_command_parser", root_parser).error(message)
|
|
110
|
+
|
|
111
|
+
|
|
88
112
|
def add_query_parser(
|
|
89
113
|
subparsers,
|
|
90
114
|
name: str,
|
|
@@ -93,10 +117,10 @@ def add_query_parser(
|
|
|
93
117
|
jsonl: bool = True,
|
|
94
118
|
hidden: bool = False,
|
|
95
119
|
):
|
|
96
|
-
parser = subparsers
|
|
120
|
+
parser = add_subcommand(subparsers, name, help=LEGACY_HELP if hidden else help_text)
|
|
97
121
|
parser.add_argument("query")
|
|
98
122
|
parser.add_argument("--state-dir", help="cache/index directory")
|
|
99
|
-
parser
|
|
123
|
+
add_limit_argument(parser)
|
|
100
124
|
add_json_flags(parser, jsonl=jsonl)
|
|
101
125
|
return parser
|
|
102
126
|
|
|
@@ -110,15 +134,16 @@ def hide_legacy_commands(subparsers_action) -> None:
|
|
|
110
134
|
|
|
111
135
|
|
|
112
136
|
def build_parser() -> argparse.ArgumentParser:
|
|
113
|
-
parser =
|
|
137
|
+
parser = HelpOnErrorArgumentParser(prog="bdsc", description="Sync and query BDSC data")
|
|
114
138
|
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
|
|
115
139
|
subparsers = parser.add_subparsers(
|
|
116
140
|
dest="command",
|
|
117
141
|
required=True,
|
|
118
142
|
metavar=PUBLIC_COMMAND_METAVAR,
|
|
143
|
+
parser_class=HelpOnErrorArgumentParser,
|
|
119
144
|
)
|
|
120
145
|
|
|
121
|
-
sync_parser = subparsers
|
|
146
|
+
sync_parser = add_subcommand(subparsers, "sync", help="download public BDSC CSV datasets")
|
|
122
147
|
sync_parser.add_argument("--state-dir", help="cache/index directory")
|
|
123
148
|
sync_parser.add_argument("--force", action="store_true", help="skip conditional HTTP headers")
|
|
124
149
|
sync_parser.add_argument(
|
|
@@ -127,17 +152,19 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
127
152
|
help="download only; do not rebuild the local SQLite index",
|
|
128
153
|
)
|
|
129
154
|
|
|
130
|
-
build_parser_cmd =
|
|
155
|
+
build_parser_cmd = add_subcommand(
|
|
156
|
+
subparsers,
|
|
131
157
|
"build-index", help="rebuild the local SQLite index from downloaded CSVs"
|
|
132
158
|
)
|
|
133
159
|
build_parser_cmd.add_argument("--state-dir", help="cache/index directory")
|
|
134
160
|
|
|
135
|
-
export_parser =
|
|
161
|
+
export_parser = add_subcommand(
|
|
162
|
+
subparsers,
|
|
136
163
|
"export", help="stream normalized rows for stocks/components/genes/properties"
|
|
137
164
|
)
|
|
138
165
|
export_parser.add_argument("dataset", choices=EXPORT_DATASETS)
|
|
139
166
|
export_parser.add_argument("--state-dir", help="cache/index directory")
|
|
140
|
-
export_parser
|
|
167
|
+
add_limit_argument(export_parser)
|
|
141
168
|
export_parser.add_argument("--query", help="filter exported rows by a query value")
|
|
142
169
|
export_parser.add_argument(
|
|
143
170
|
"--kind",
|
|
@@ -157,7 +184,8 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
157
184
|
help="output path; defaults to stdout",
|
|
158
185
|
)
|
|
159
186
|
|
|
160
|
-
report_parser =
|
|
187
|
+
report_parser = add_subcommand(
|
|
188
|
+
subparsers,
|
|
161
189
|
"report",
|
|
162
190
|
help="canned reports for common BDSC retrieval tasks",
|
|
163
191
|
)
|
|
@@ -168,11 +196,12 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
168
196
|
help="override the report's default dataset",
|
|
169
197
|
)
|
|
170
198
|
report_parser.add_argument("--state-dir", help="cache/index directory")
|
|
171
|
-
report_parser
|
|
199
|
+
add_limit_argument(report_parser)
|
|
172
200
|
report_parser.add_argument("--json", action="store_true")
|
|
173
201
|
report_parser.add_argument("--jsonl", action="store_true")
|
|
174
202
|
|
|
175
|
-
filter_parser =
|
|
203
|
+
filter_parser = add_subcommand(
|
|
204
|
+
subparsers,
|
|
176
205
|
"filter",
|
|
177
206
|
help=LEGACY_HELP,
|
|
178
207
|
)
|
|
@@ -183,12 +212,13 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
183
212
|
help="row shape to return",
|
|
184
213
|
)
|
|
185
214
|
filter_parser.add_argument("--state-dir", help="cache/index directory")
|
|
186
|
-
filter_parser
|
|
215
|
+
add_limit_argument(filter_parser)
|
|
187
216
|
add_filter_arguments(filter_parser)
|
|
188
217
|
filter_parser.add_argument("--json", action="store_true")
|
|
189
218
|
filter_parser.add_argument("--jsonl", action="store_true")
|
|
190
219
|
|
|
191
|
-
terms_parser =
|
|
220
|
+
terms_parser = add_subcommand(
|
|
221
|
+
subparsers,
|
|
192
222
|
"terms",
|
|
193
223
|
help="list available property/relationship vocab with counts",
|
|
194
224
|
)
|
|
@@ -199,19 +229,21 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
199
229
|
terms_parser.add_argument("--json", action="store_true")
|
|
200
230
|
terms_parser.add_argument("--jsonl", action="store_true")
|
|
201
231
|
|
|
202
|
-
status_parser =
|
|
232
|
+
status_parser = add_subcommand(
|
|
233
|
+
subparsers,
|
|
203
234
|
"status", help="show local dataset/index status for the current state dir"
|
|
204
235
|
)
|
|
205
236
|
status_parser.add_argument("--state-dir", help="cache/index directory")
|
|
206
237
|
add_json_flags(status_parser, jsonl=False)
|
|
207
238
|
|
|
208
|
-
find_parser =
|
|
239
|
+
find_parser = add_subcommand(
|
|
240
|
+
subparsers,
|
|
209
241
|
"find",
|
|
210
242
|
help="primary query command; free-text lookup or structured compound filters",
|
|
211
243
|
)
|
|
212
244
|
find_parser.add_argument("query", nargs="?")
|
|
213
245
|
find_parser.add_argument("--state-dir", help="cache/index directory")
|
|
214
|
-
find_parser
|
|
246
|
+
add_limit_argument(find_parser)
|
|
215
247
|
find_parser.add_argument(
|
|
216
248
|
"--kind",
|
|
217
249
|
choices=LOOKUP_KINDS,
|
|
@@ -231,7 +263,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
231
263
|
"search",
|
|
232
264
|
"query the local SQLite index",
|
|
233
265
|
hidden=True,
|
|
234
|
-
)
|
|
266
|
+
)
|
|
235
267
|
add_query_parser(
|
|
236
268
|
subparsers,
|
|
237
269
|
"gene",
|
|
@@ -251,12 +283,12 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
251
283
|
hidden=True,
|
|
252
284
|
)
|
|
253
285
|
|
|
254
|
-
stock_parser = subparsers
|
|
286
|
+
stock_parser = add_subcommand(subparsers, "stock", help="show local details for one stock")
|
|
255
287
|
stock_parser.add_argument("stknum", type=int)
|
|
256
288
|
stock_parser.add_argument("--state-dir", help="cache/index directory")
|
|
257
289
|
add_json_flags(stock_parser, jsonl=False)
|
|
258
290
|
|
|
259
|
-
rrid_parser = subparsers
|
|
291
|
+
rrid_parser = add_subcommand(subparsers, "rrid", help=LEGACY_HELP)
|
|
260
292
|
rrid_parser.add_argument("query")
|
|
261
293
|
rrid_parser.add_argument("--state-dir", help="cache/index directory")
|
|
262
294
|
add_json_flags(rrid_parser, jsonl=False)
|
|
@@ -286,25 +318,27 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
286
318
|
hidden=True,
|
|
287
319
|
)
|
|
288
320
|
|
|
289
|
-
lookup_parser =
|
|
321
|
+
lookup_parser = add_subcommand(
|
|
322
|
+
subparsers,
|
|
290
323
|
"lookup",
|
|
291
324
|
help=LEGACY_HELP,
|
|
292
325
|
)
|
|
293
326
|
lookup_parser.add_argument("queries", nargs="*")
|
|
294
327
|
lookup_parser.add_argument("--state-dir", help="cache/index directory")
|
|
295
328
|
lookup_parser.add_argument("--kind", choices=LOOKUP_KINDS, default="auto")
|
|
296
|
-
lookup_parser
|
|
329
|
+
add_limit_argument(lookup_parser)
|
|
297
330
|
lookup_parser.add_argument(
|
|
298
331
|
"--input",
|
|
299
332
|
help="read newline-delimited queries from a file path or '-' for stdin",
|
|
300
333
|
)
|
|
301
334
|
add_json_flags(lookup_parser)
|
|
302
335
|
|
|
303
|
-
live_parser =
|
|
336
|
+
live_parser = add_subcommand(
|
|
337
|
+
subparsers,
|
|
304
338
|
"live-search", help=LEGACY_HELP
|
|
305
339
|
)
|
|
306
340
|
live_parser.add_argument("query")
|
|
307
|
-
live_parser
|
|
341
|
+
add_limit_argument(live_parser)
|
|
308
342
|
add_json_flags(live_parser)
|
|
309
343
|
|
|
310
344
|
hide_legacy_commands(subparsers)
|
|
@@ -446,7 +480,7 @@ def run_find(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int:
|
|
|
446
480
|
query = (args.query or "").strip()
|
|
447
481
|
criteria = build_filter_criteria(args)
|
|
448
482
|
if not query and not criteria:
|
|
449
|
-
parser
|
|
483
|
+
command_error(parser, args, "find requires a query or at least one filter flag")
|
|
450
484
|
|
|
451
485
|
state_dir = resolve_state_dir(args.state_dir)
|
|
452
486
|
if query and not criteria and not args.dataset:
|
|
@@ -477,7 +511,7 @@ def run_find(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int:
|
|
|
477
511
|
def run_legacy_lookup(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int:
|
|
478
512
|
queries = load_queries(args.queries, args.input)
|
|
479
513
|
if not queries:
|
|
480
|
-
parser
|
|
514
|
+
command_error(parser, args, "lookup requires at least one query or --input")
|
|
481
515
|
state_dir = resolve_state_dir(args.state_dir)
|
|
482
516
|
lookup_results = [
|
|
483
517
|
lookup_query(state_dir, query, kind=args.kind, limit=args.limit)
|
|
@@ -542,7 +576,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
542
576
|
if args.command == "filter":
|
|
543
577
|
criteria = build_filter_criteria(args)
|
|
544
578
|
if not criteria:
|
|
545
|
-
parser
|
|
579
|
+
command_error(parser, args, "filter requires at least one filter flag")
|
|
546
580
|
rows = list(
|
|
547
581
|
iter_export_rows(
|
|
548
582
|
state_dir,
|
|
@@ -857,11 +857,30 @@ def _merge_ranked_matches(
|
|
|
857
857
|
)
|
|
858
858
|
|
|
859
859
|
|
|
860
|
+
def _limit_sql(limit: int | None) -> tuple[str, list[int]]:
|
|
861
|
+
if limit is None:
|
|
862
|
+
return "", []
|
|
863
|
+
return "LIMIT ?", [limit]
|
|
864
|
+
|
|
865
|
+
|
|
866
|
+
def _scaled_limit(limit: int | None, multiplier: int, floor: int) -> int | None:
|
|
867
|
+
if limit is None:
|
|
868
|
+
return None
|
|
869
|
+
return max(limit * multiplier, floor)
|
|
870
|
+
|
|
871
|
+
|
|
872
|
+
def _limit_rows(rows: list[Any], limit: int | None) -> list[Any]:
|
|
873
|
+
if limit is None:
|
|
874
|
+
return rows
|
|
875
|
+
return rows[:limit]
|
|
876
|
+
|
|
877
|
+
|
|
860
878
|
def _search_candidates_from_prefix_fts(
|
|
861
879
|
conn: sqlite3.Connection,
|
|
862
880
|
query: str,
|
|
863
|
-
limit: int,
|
|
881
|
+
limit: int | None,
|
|
864
882
|
) -> list[dict[str, Any]]:
|
|
883
|
+
limit_clause, limit_params = _limit_sql(limit)
|
|
865
884
|
has_fts = bool(
|
|
866
885
|
conn.execute(
|
|
867
886
|
"SELECT 1 FROM sqlite_master WHERE type='table' AND name='stock_fts'"
|
|
@@ -869,7 +888,7 @@ def _search_candidates_from_prefix_fts(
|
|
|
869
888
|
)
|
|
870
889
|
if not has_fts:
|
|
871
890
|
rows = conn.execute(
|
|
872
|
-
"""
|
|
891
|
+
f"""
|
|
873
892
|
SELECT
|
|
874
893
|
s.stknum,
|
|
875
894
|
s.genotype,
|
|
@@ -881,14 +900,14 @@ def _search_candidates_from_prefix_fts(
|
|
|
881
900
|
JOIN stocks s ON s.stknum = sd.stknum
|
|
882
901
|
WHERE sd.search_text LIKE ?
|
|
883
902
|
ORDER BY s.stknum
|
|
884
|
-
|
|
903
|
+
{limit_clause}
|
|
885
904
|
""",
|
|
886
|
-
(f"%{query}%",
|
|
905
|
+
(f"%{query}%", *limit_params),
|
|
887
906
|
).fetchall()
|
|
888
907
|
return [{"row": row, "score": _score_search_document(query, row) + 20.0} for row in rows]
|
|
889
908
|
|
|
890
909
|
rows = conn.execute(
|
|
891
|
-
"""
|
|
910
|
+
f"""
|
|
892
911
|
SELECT
|
|
893
912
|
s.stknum,
|
|
894
913
|
s.genotype,
|
|
@@ -902,9 +921,9 @@ def _search_candidates_from_prefix_fts(
|
|
|
902
921
|
JOIN search_documents sd ON sd.stknum = s.stknum
|
|
903
922
|
WHERE stock_fts MATCH ?
|
|
904
923
|
ORDER BY bm25(stock_fts), s.stknum
|
|
905
|
-
|
|
924
|
+
{limit_clause}
|
|
906
925
|
""",
|
|
907
|
-
(build_fts_query(query),
|
|
926
|
+
(build_fts_query(query), *limit_params),
|
|
908
927
|
).fetchall()
|
|
909
928
|
return [
|
|
910
929
|
{
|
|
@@ -918,11 +937,12 @@ def _search_candidates_from_prefix_fts(
|
|
|
918
937
|
def _search_candidates_from_trigram_fts(
|
|
919
938
|
conn: sqlite3.Connection,
|
|
920
939
|
query: str,
|
|
921
|
-
limit: int,
|
|
940
|
+
limit: int | None,
|
|
922
941
|
) -> list[dict[str, Any]]:
|
|
923
942
|
trigram_query = build_trigram_query(query)
|
|
924
943
|
if not trigram_query:
|
|
925
944
|
return []
|
|
945
|
+
limit_clause, limit_params = _limit_sql(limit)
|
|
926
946
|
|
|
927
947
|
has_trigram = bool(
|
|
928
948
|
conn.execute(
|
|
@@ -933,7 +953,7 @@ def _search_candidates_from_trigram_fts(
|
|
|
933
953
|
return []
|
|
934
954
|
|
|
935
955
|
rows = conn.execute(
|
|
936
|
-
"""
|
|
956
|
+
f"""
|
|
937
957
|
SELECT
|
|
938
958
|
s.stknum,
|
|
939
959
|
s.genotype,
|
|
@@ -947,9 +967,9 @@ def _search_candidates_from_trigram_fts(
|
|
|
947
967
|
JOIN search_documents sd ON sd.stknum = s.stknum
|
|
948
968
|
WHERE stock_trigram MATCH ?
|
|
949
969
|
ORDER BY bm25(stock_trigram), s.stknum
|
|
950
|
-
|
|
970
|
+
{limit_clause}
|
|
951
971
|
""",
|
|
952
|
-
(trigram_query,
|
|
972
|
+
(trigram_query, *limit_params),
|
|
953
973
|
).fetchall()
|
|
954
974
|
|
|
955
975
|
matches: list[dict[str, Any]] = []
|
|
@@ -963,21 +983,21 @@ def _search_candidates_from_trigram_fts(
|
|
|
963
983
|
def _candidate_stock_ids_for_query(
|
|
964
984
|
conn: sqlite3.Connection,
|
|
965
985
|
query: str,
|
|
966
|
-
limit: int,
|
|
986
|
+
limit: int | None,
|
|
967
987
|
) -> list[int]:
|
|
968
988
|
candidates: dict[int, float] = {}
|
|
969
|
-
for match in _search_candidates_from_prefix_fts(conn, query,
|
|
989
|
+
for match in _search_candidates_from_prefix_fts(conn, query, _scaled_limit(limit, 2, 20)):
|
|
970
990
|
candidates[match["row"]["stknum"]] = max(
|
|
971
991
|
match["score"],
|
|
972
992
|
candidates.get(match["row"]["stknum"], float("-inf")),
|
|
973
993
|
)
|
|
974
|
-
for match in _search_candidates_from_trigram_fts(conn, query,
|
|
994
|
+
for match in _search_candidates_from_trigram_fts(conn, query, _scaled_limit(limit, 6, 60)):
|
|
975
995
|
candidates[match["row"]["stknum"]] = max(
|
|
976
996
|
match["score"],
|
|
977
997
|
candidates.get(match["row"]["stknum"], float("-inf")),
|
|
978
998
|
)
|
|
979
999
|
ranked = sorted(candidates.items(), key=lambda item: (-item[1], item[0]))
|
|
980
|
-
return [stknum for stknum, _ in ranked
|
|
1000
|
+
return _limit_rows([stknum for stknum, _ in ranked], limit)
|
|
981
1001
|
|
|
982
1002
|
|
|
983
1003
|
def _score_field_match(query: str, text: str) -> float:
|
|
@@ -1031,7 +1051,7 @@ def _rank_direct_rows(
|
|
|
1031
1051
|
rows: list[sqlite3.Row],
|
|
1032
1052
|
*,
|
|
1033
1053
|
field_names: list[str],
|
|
1034
|
-
limit: int,
|
|
1054
|
+
limit: int | None,
|
|
1035
1055
|
min_score: float = 5.0,
|
|
1036
1056
|
key_fn=None,
|
|
1037
1057
|
) -> list[dict[str, Any]]:
|
|
@@ -1044,10 +1064,10 @@ def _rank_direct_rows(
|
|
|
1044
1064
|
if key_fn is None:
|
|
1045
1065
|
key_fn = _default_row_key
|
|
1046
1066
|
ranked = _merge_ranked_matches(scored, key_fn)
|
|
1047
|
-
return [dict(item["row"]) for item in ranked
|
|
1067
|
+
return [dict(item["row"]) for item in _limit_rows(ranked, limit)]
|
|
1048
1068
|
|
|
1049
1069
|
|
|
1050
|
-
def search_local(state_dir: Path, query: str, limit: int =
|
|
1070
|
+
def search_local(state_dir: Path, query: str, limit: int | None = None) -> list[dict[str, Any]]:
|
|
1051
1071
|
query = query.strip()
|
|
1052
1072
|
if not query:
|
|
1053
1073
|
return []
|
|
@@ -1059,14 +1079,14 @@ def search_local(state_dir: Path, query: str, limit: int = 10) -> list[dict[str,
|
|
|
1059
1079
|
return [stock] if stock else []
|
|
1060
1080
|
|
|
1061
1081
|
candidates: dict[int, dict[str, Any]] = {}
|
|
1062
|
-
for match in _search_candidates_from_prefix_fts(conn, query,
|
|
1082
|
+
for match in _search_candidates_from_prefix_fts(conn, query, _scaled_limit(limit, 3, 20)):
|
|
1063
1083
|
stknum = match["row"]["stknum"]
|
|
1064
1084
|
existing = candidates.get(stknum)
|
|
1065
1085
|
if existing is None or match["score"] > existing["score"]:
|
|
1066
1086
|
candidates[stknum] = match
|
|
1067
1087
|
|
|
1068
1088
|
if not candidates:
|
|
1069
|
-
for match in _search_candidates_from_trigram_fts(conn, query,
|
|
1089
|
+
for match in _search_candidates_from_trigram_fts(conn, query, _scaled_limit(limit, 12, 60)):
|
|
1070
1090
|
stknum = match["row"]["stknum"]
|
|
1071
1091
|
existing = candidates.get(stknum)
|
|
1072
1092
|
if existing is None or match["score"] > existing["score"]:
|
|
@@ -1076,21 +1096,22 @@ def search_local(state_dir: Path, query: str, limit: int = 10) -> list[dict[str,
|
|
|
1076
1096
|
candidates.values(),
|
|
1077
1097
|
key=lambda item: (-item["score"], item["row"]["stknum"]),
|
|
1078
1098
|
)
|
|
1079
|
-
return [_search_result_payload(item["row"]) for item in ranked
|
|
1099
|
+
return [_search_result_payload(item["row"]) for item in _limit_rows(ranked, limit)]
|
|
1080
1100
|
finally:
|
|
1081
1101
|
conn.close()
|
|
1082
1102
|
|
|
1083
1103
|
|
|
1084
|
-
def search_gene(state_dir: Path, query: str, limit: int =
|
|
1104
|
+
def search_gene(state_dir: Path, query: str, limit: int | None = None) -> list[dict[str, Any]]:
|
|
1085
1105
|
query = query.strip()
|
|
1086
1106
|
if not query:
|
|
1087
1107
|
return []
|
|
1088
1108
|
|
|
1089
1109
|
conn = _connect(state_dir)
|
|
1090
1110
|
try:
|
|
1111
|
+
limit_clause, limit_params = _limit_sql(limit)
|
|
1091
1112
|
if query.upper().startswith("FBGN"):
|
|
1092
1113
|
rows = conn.execute(
|
|
1093
|
-
"""
|
|
1114
|
+
f"""
|
|
1094
1115
|
SELECT DISTINCT
|
|
1095
1116
|
sg.stknum,
|
|
1096
1117
|
sg.genotype,
|
|
@@ -1100,13 +1121,13 @@ def search_gene(state_dir: Path, query: str, limit: int = 20) -> list[dict[str,
|
|
|
1100
1121
|
FROM stockgenes sg
|
|
1101
1122
|
WHERE UPPER(sg.fbgn) = UPPER(?)
|
|
1102
1123
|
ORDER BY sg.stknum, sg.component_symbol, sg.gene_symbol
|
|
1103
|
-
|
|
1124
|
+
{limit_clause}
|
|
1104
1125
|
""",
|
|
1105
|
-
(query,
|
|
1126
|
+
(query, *limit_params),
|
|
1106
1127
|
).fetchall()
|
|
1107
1128
|
else:
|
|
1108
1129
|
rows = conn.execute(
|
|
1109
|
-
"""
|
|
1130
|
+
f"""
|
|
1110
1131
|
SELECT DISTINCT
|
|
1111
1132
|
sg.stknum,
|
|
1112
1133
|
sg.genotype,
|
|
@@ -1121,14 +1142,14 @@ def search_gene(state_dir: Path, query: str, limit: int = 20) -> list[dict[str,
|
|
|
1121
1142
|
sg.stknum,
|
|
1122
1143
|
sg.component_symbol,
|
|
1123
1144
|
sg.gene_symbol
|
|
1124
|
-
|
|
1145
|
+
{limit_clause}
|
|
1125
1146
|
""",
|
|
1126
|
-
(query, f"{query}%", query,
|
|
1147
|
+
(query, f"{query}%", query, *limit_params),
|
|
1127
1148
|
).fetchall()
|
|
1128
1149
|
if rows:
|
|
1129
1150
|
return _rows_to_dicts(rows)
|
|
1130
1151
|
|
|
1131
|
-
stock_ids = _candidate_stock_ids_for_query(conn, query,
|
|
1152
|
+
stock_ids = _candidate_stock_ids_for_query(conn, query, _scaled_limit(limit, 4, 40))
|
|
1132
1153
|
if not stock_ids:
|
|
1133
1154
|
return []
|
|
1134
1155
|
placeholders = ", ".join("?" for _ in stock_ids)
|
|
@@ -1223,7 +1244,7 @@ def _search_component_table(
|
|
|
1223
1244
|
conn: sqlite3.Connection | None = None,
|
|
1224
1245
|
column: str,
|
|
1225
1246
|
query: str,
|
|
1226
|
-
limit: int,
|
|
1247
|
+
limit: int | None,
|
|
1227
1248
|
) -> list[dict[str, Any]]:
|
|
1228
1249
|
query = query.strip()
|
|
1229
1250
|
if not query:
|
|
@@ -1235,6 +1256,7 @@ def _search_component_table(
|
|
|
1235
1256
|
close_conn = conn is None
|
|
1236
1257
|
conn = conn or _connect(state_dir)
|
|
1237
1258
|
try:
|
|
1259
|
+
limit_clause, limit_params = _limit_sql(limit)
|
|
1238
1260
|
rows = conn.execute(
|
|
1239
1261
|
f"""
|
|
1240
1262
|
SELECT
|
|
@@ -1255,14 +1277,14 @@ def _search_component_table(
|
|
|
1255
1277
|
CASE WHEN LOWER(cc.{column}) = LOWER(?) THEN 0 ELSE 1 END,
|
|
1256
1278
|
cc.stknum,
|
|
1257
1279
|
cc.component_symbol
|
|
1258
|
-
|
|
1280
|
+
{limit_clause}
|
|
1259
1281
|
""",
|
|
1260
|
-
(query, f"{query}%", query,
|
|
1282
|
+
(query, f"{query}%", query, *limit_params),
|
|
1261
1283
|
).fetchall()
|
|
1262
1284
|
if rows:
|
|
1263
1285
|
return _rows_to_dicts(rows)
|
|
1264
1286
|
|
|
1265
|
-
stock_ids = _candidate_stock_ids_for_query(conn, query,
|
|
1287
|
+
stock_ids = _candidate_stock_ids_for_query(conn, query, _scaled_limit(limit, 4, 40))
|
|
1266
1288
|
if not stock_ids:
|
|
1267
1289
|
return []
|
|
1268
1290
|
placeholders = ", ".join("?" for _ in stock_ids)
|
|
@@ -1302,11 +1324,12 @@ def _search_component_table(
|
|
|
1302
1324
|
def _fetch_component_domain_rows(
|
|
1303
1325
|
conn: sqlite3.Connection,
|
|
1304
1326
|
query: str,
|
|
1305
|
-
limit: int,
|
|
1327
|
+
limit: int | None,
|
|
1306
1328
|
*,
|
|
1307
1329
|
cte_sql: str,
|
|
1308
1330
|
cte_params: list[Any],
|
|
1309
1331
|
) -> list[sqlite3.Row]:
|
|
1332
|
+
limit_clause, limit_params = _limit_sql(limit)
|
|
1310
1333
|
rows = conn.execute(
|
|
1311
1334
|
f"""
|
|
1312
1335
|
{cte_sql}
|
|
@@ -1331,14 +1354,14 @@ def _fetch_component_domain_rows(
|
|
|
1331
1354
|
cc.mapstatement,
|
|
1332
1355
|
sg0.bdsc_symbol_id
|
|
1333
1356
|
ORDER BY cc.stknum, cc.component_symbol
|
|
1334
|
-
|
|
1357
|
+
{limit_clause}
|
|
1335
1358
|
""",
|
|
1336
|
-
(*cte_params,
|
|
1359
|
+
(*cte_params, *limit_params),
|
|
1337
1360
|
).fetchall()
|
|
1338
1361
|
if rows:
|
|
1339
1362
|
return rows
|
|
1340
1363
|
|
|
1341
|
-
stock_ids = _candidate_stock_ids_for_query(conn, query,
|
|
1364
|
+
stock_ids = _candidate_stock_ids_for_query(conn, query, _scaled_limit(limit, 4, 40))
|
|
1342
1365
|
if not stock_ids:
|
|
1343
1366
|
return []
|
|
1344
1367
|
placeholders = ", ".join("?" for _ in stock_ids)
|
|
@@ -1370,7 +1393,7 @@ def _fetch_component_domain_rows(
|
|
|
1370
1393
|
def _search_component_domain(
|
|
1371
1394
|
state_dir: Path,
|
|
1372
1395
|
query: str,
|
|
1373
|
-
limit: int,
|
|
1396
|
+
limit: int | None,
|
|
1374
1397
|
*,
|
|
1375
1398
|
cte_sql: str,
|
|
1376
1399
|
cte_params: list[Any],
|
|
@@ -1400,7 +1423,7 @@ def _search_component_domain(
|
|
|
1400
1423
|
conn.close()
|
|
1401
1424
|
|
|
1402
1425
|
|
|
1403
|
-
def search_property(state_dir: Path, query: str, limit: int =
|
|
1426
|
+
def search_property(state_dir: Path, query: str, limit: int | None = None) -> list[dict[str, Any]]:
|
|
1404
1427
|
query = query.strip()
|
|
1405
1428
|
return _search_component_domain(
|
|
1406
1429
|
state_dir,
|
|
@@ -1420,7 +1443,7 @@ def search_property(state_dir: Path, query: str, limit: int = 20) -> list[dict[s
|
|
|
1420
1443
|
)
|
|
1421
1444
|
|
|
1422
1445
|
|
|
1423
|
-
def search_property_exact(state_dir: Path, query: str, limit: int =
|
|
1446
|
+
def search_property_exact(state_dir: Path, query: str, limit: int | None = None) -> list[dict[str, Any]]:
|
|
1424
1447
|
query = query.strip()
|
|
1425
1448
|
return _search_component_domain(
|
|
1426
1449
|
state_dir,
|
|
@@ -1439,7 +1462,7 @@ def search_property_exact(state_dir: Path, query: str, limit: int = 20) -> list[
|
|
|
1439
1462
|
)
|
|
1440
1463
|
|
|
1441
1464
|
|
|
1442
|
-
def search_driver_family(state_dir: Path, query: str, limit: int =
|
|
1465
|
+
def search_driver_family(state_dir: Path, query: str, limit: int | None = None) -> list[dict[str, Any]]:
|
|
1443
1466
|
query = query.strip()
|
|
1444
1467
|
_, tokens = normalize_driver_family(query)
|
|
1445
1468
|
clause, params = _driver_family_clause(
|
|
@@ -1469,7 +1492,7 @@ def search_driver_family(state_dir: Path, query: str, limit: int = 20) -> list[d
|
|
|
1469
1492
|
)
|
|
1470
1493
|
|
|
1471
1494
|
|
|
1472
|
-
def search_relationship(state_dir: Path, query: str, limit: int =
|
|
1495
|
+
def search_relationship(state_dir: Path, query: str, limit: int | None = None) -> list[dict[str, Any]]:
|
|
1473
1496
|
query = query.strip()
|
|
1474
1497
|
return _search_component_domain(
|
|
1475
1498
|
state_dir,
|
|
@@ -1504,11 +1527,11 @@ def get_stock_by_rrid(state_dir: Path, query: str) -> dict[str, Any] | None:
|
|
|
1504
1527
|
return get_stock(state_dir, stknum)
|
|
1505
1528
|
|
|
1506
1529
|
|
|
1507
|
-
def search_fbid(state_dir: Path, query: str, limit: int =
|
|
1530
|
+
def search_fbid(state_dir: Path, query: str, limit: int | None = None) -> list[dict[str, Any]]:
|
|
1508
1531
|
return _search_component_table(state_dir, column="fbid", query=query, limit=limit)
|
|
1509
1532
|
|
|
1510
1533
|
|
|
1511
|
-
def search_component(state_dir: Path, query: str, limit: int =
|
|
1534
|
+
def search_component(state_dir: Path, query: str, limit: int | None = None) -> list[dict[str, Any]]:
|
|
1512
1535
|
return _search_component_table(
|
|
1513
1536
|
state_dir,
|
|
1514
1537
|
column="component_symbol",
|
|
@@ -1963,7 +1986,7 @@ def lookup_query(
|
|
|
1963
1986
|
query: str,
|
|
1964
1987
|
*,
|
|
1965
1988
|
kind: str = "auto",
|
|
1966
|
-
limit: int =
|
|
1989
|
+
limit: int | None = None,
|
|
1967
1990
|
) -> dict[str, Any]:
|
|
1968
1991
|
requested_kind = kind
|
|
1969
1992
|
resolved_kind = detect_query_kind(query) if kind == "auto" else kind
|
|
@@ -2073,7 +2096,7 @@ def get_stock(state_dir: Path, stknum: int) -> dict[str, Any] | None:
|
|
|
2073
2096
|
conn.close()
|
|
2074
2097
|
|
|
2075
2098
|
|
|
2076
|
-
def live_search(query: str, limit: int =
|
|
2099
|
+
def live_search(query: str, limit: int | None = None) -> list[dict[str, Any]]:
|
|
2077
2100
|
simple_payload = parse.urlencode({"presearch": query, "type": "contains"}).encode("utf-8")
|
|
2078
2101
|
req = request.Request(
|
|
2079
2102
|
"https://bdsc.indiana.edu/Home/GetSearchResults",
|
|
@@ -2089,7 +2112,7 @@ def live_search(query: str, limit: int = 10) -> list[dict[str, Any]]:
|
|
|
2089
2112
|
data = json.loads(response.read().decode("utf-8"))
|
|
2090
2113
|
rows = data.get("Data") or []
|
|
2091
2114
|
if rows:
|
|
2092
|
-
return rows
|
|
2115
|
+
return _limit_rows(rows, limit)
|
|
2093
2116
|
|
|
2094
2117
|
advanced_payload = parse.urlencode(
|
|
2095
2118
|
{
|
|
@@ -2119,7 +2142,7 @@ def live_search(query: str, limit: int = 10) -> list[dict[str, Any]]:
|
|
|
2119
2142
|
)
|
|
2120
2143
|
with request.urlopen(advanced_req) as response:
|
|
2121
2144
|
advanced_data = json.loads(response.read().decode("utf-8"))
|
|
2122
|
-
return (advanced_data.get("Data") or [])
|
|
2145
|
+
return _limit_rows(advanced_data.get("Data") or [], limit)
|
|
2123
2146
|
|
|
2124
2147
|
|
|
2125
2148
|
def get_status(state_dir: Path) -> dict[str, Any]:
|
|
@@ -2282,11 +2305,10 @@ def iter_dataset_rows(
|
|
|
2282
2305
|
sql += f"\n{where_clause}"
|
|
2283
2306
|
sql += f"\n{_dataset_sort_clause(dataset)}"
|
|
2284
2307
|
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
cursor = conn.execute(sql, params)
|
|
2308
|
+
limit_clause, limit_params = _limit_sql(limit)
|
|
2309
|
+
if limit_clause:
|
|
2310
|
+
sql += f"\n{limit_clause}"
|
|
2311
|
+
cursor = conn.execute(sql, (*params, *limit_params))
|
|
2290
2312
|
|
|
2291
2313
|
columns = [description[0] for description in cursor.description]
|
|
2292
2314
|
try:
|
|
@@ -2400,9 +2422,7 @@ def iter_report_rows(
|
|
|
2400
2422
|
break
|
|
2401
2423
|
|
|
2402
2424
|
deduped = _merge_report_rows(resolved_dataset, merged_rows)
|
|
2403
|
-
|
|
2404
|
-
deduped = deduped[:limit]
|
|
2405
|
-
for row in deduped:
|
|
2425
|
+
for row in _limit_rows(deduped, limit):
|
|
2406
2426
|
yield row
|
|
2407
2427
|
|
|
2408
2428
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bdsc-cli
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Sync and query BDSC datasets locally
|
|
5
5
|
Author: Gustavo Madeira Santana
|
|
6
6
|
License-Expression: MIT
|
|
@@ -21,6 +21,9 @@ Dynamic: license-file
|
|
|
21
21
|
|
|
22
22
|
# bdsc-cli
|
|
23
23
|
|
|
24
|
+
> This package has been renamed. Install `bdsc` now:
|
|
25
|
+
> `pipx install bdsc`
|
|
26
|
+
|
|
24
27
|
Small CLI for syncing public Bloomington Drosophila Stock Center datasets and
|
|
25
28
|
querying them locally.
|
|
26
29
|
|
|
@@ -50,16 +53,16 @@ brew tap gumadeiras/tap
|
|
|
50
53
|
brew install bdsc-cli
|
|
51
54
|
```
|
|
52
55
|
|
|
53
|
-
|
|
56
|
+
PyPI with `pipx`:
|
|
54
57
|
|
|
55
58
|
```bash
|
|
56
|
-
pipx install
|
|
59
|
+
pipx install bdsc-cli
|
|
57
60
|
```
|
|
58
61
|
|
|
59
|
-
|
|
62
|
+
PyPI with plain `pip`:
|
|
60
63
|
|
|
61
64
|
```bash
|
|
62
|
-
python3 -m pip install
|
|
65
|
+
python3 -m pip install bdsc-cli
|
|
63
66
|
```
|
|
64
67
|
|
|
65
68
|
Source install:
|
|
@@ -91,19 +94,8 @@ Build release artifacts locally:
|
|
|
91
94
|
python -m pip install -e .[release]
|
|
92
95
|
python -m build
|
|
93
96
|
python -m twine check dist/*
|
|
94
|
-
python scripts/render_homebrew_formula.py dist/bdsc_cli-$(python - <<'PY'
|
|
95
|
-
from bdsc_cli import __version__
|
|
96
|
-
print(__version__)
|
|
97
|
-
PY
|
|
98
|
-
).tar.gz
|
|
99
97
|
```
|
|
100
98
|
|
|
101
|
-
PyPI note:
|
|
102
|
-
|
|
103
|
-
- the GitHub release is live
|
|
104
|
-
- PyPI trusted publishing is not configured yet for `bdsc-cli`
|
|
105
|
-
- `pip install bdsc-cli` will work after that publisher is added
|
|
106
|
-
|
|
107
99
|
## Quickstart
|
|
108
100
|
|
|
109
101
|
Create a local cache and index:
|
|
@@ -190,6 +182,9 @@ bdsc export components --limit 5 --format jsonl
|
|
|
190
182
|
bdsc stock 77118 --json
|
|
191
183
|
```
|
|
192
184
|
|
|
185
|
+
Query commands return all matches by default. Use `--limit N` when you want a
|
|
186
|
+
bounded result set.
|
|
187
|
+
|
|
193
188
|
## Commands
|
|
194
189
|
|
|
195
190
|
- `bdsc sync`: download the BDSC CSV datasets; builds the index by default
|
|
@@ -354,9 +349,7 @@ bdsc terms property-descriptions --query optogenetic --jsonl
|
|
|
354
349
|
- use `property-exact` or `driver-family` when `property` is too broad for a
|
|
355
350
|
reliable LexA/QF/GAL4-style answer.
|
|
356
351
|
- tag pushes like `vX.Y.Z` run the release workflow: build artifacts, create a
|
|
357
|
-
GitHub release,
|
|
358
|
-
- `scripts/render_homebrew_formula.py` renders a Homebrew formula from a built
|
|
359
|
-
sdist; use it when updating a tap after a release.
|
|
352
|
+
GitHub release, publish to PyPI, and update `gumadeiras/homebrew-tap`.
|
|
360
353
|
- The live endpoint is undocumented and may change without notice.
|
|
361
354
|
- BDSC data is large enough that the first full sync/index can take a few
|
|
362
355
|
minutes depending on network and disk speed.
|
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import contextlib
|
|
4
4
|
import io
|
|
5
|
+
import json
|
|
5
6
|
import tempfile
|
|
6
7
|
import unittest
|
|
7
8
|
from pathlib import Path
|
|
@@ -123,6 +124,44 @@ class CoreTests(unittest.TestCase):
|
|
|
123
124
|
def tearDown(self) -> None:
|
|
124
125
|
self.temp_dir.cleanup()
|
|
125
126
|
|
|
127
|
+
def add_many_cschrimson_rows(self, count: int = 60) -> None:
|
|
128
|
+
raw_dir = self.state_dir / "raw"
|
|
129
|
+
bloomington_rows = []
|
|
130
|
+
component_rows = []
|
|
131
|
+
stockgene_rows = []
|
|
132
|
+
for index in range(count):
|
|
133
|
+
stknum = 78000 + index
|
|
134
|
+
component = f"P{{20XUAS-CsChrimson.synthetic{index}}}attP2"
|
|
135
|
+
genotype = f"w[1118]; {component}"
|
|
136
|
+
bloomington_rows.append(
|
|
137
|
+
f'{stknum},"{genotype}","2","","5/21/2019","Donor: Janelia","synthetic CsChrimson row"'
|
|
138
|
+
)
|
|
139
|
+
component_rows.append(
|
|
140
|
+
f'{stknum},"{genotype}","{component}","FBti78{index:04d}","","CsChrimson synthetic construct","",""'
|
|
141
|
+
)
|
|
142
|
+
stockgene_rows.append(
|
|
143
|
+
f'{stknum},"{genotype}","{component}","CsChrimson","FBto0000558",{78000 + index},20'
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
for filename, rows in (
|
|
147
|
+
("bloomington.csv", bloomington_rows),
|
|
148
|
+
("stockcomps_map_comments.csv", component_rows),
|
|
149
|
+
("stockgenes.csv", stockgene_rows),
|
|
150
|
+
):
|
|
151
|
+
path = raw_dir / filename
|
|
152
|
+
path.write_text(path.read_text(encoding="utf-8") + "\n".join(rows) + "\n", encoding="utf-8")
|
|
153
|
+
|
|
154
|
+
def assert_cli_error_shows_help(self, argv: list[str], *expected: str) -> str:
|
|
155
|
+
stderr = io.StringIO()
|
|
156
|
+
with contextlib.redirect_stderr(stderr):
|
|
157
|
+
with self.assertRaises(SystemExit) as raised:
|
|
158
|
+
main(argv)
|
|
159
|
+
self.assertEqual(raised.exception.code, 2)
|
|
160
|
+
output = stderr.getvalue()
|
|
161
|
+
for text in expected:
|
|
162
|
+
self.assertIn(text, output)
|
|
163
|
+
return output
|
|
164
|
+
|
|
126
165
|
def test_build_fts_query_tokenizes_text(self) -> None:
|
|
127
166
|
self.assertEqual(build_fts_query("10XUAS-Chronos"), "10xuas* chronos*")
|
|
128
167
|
|
|
@@ -368,6 +407,75 @@ class CoreTests(unittest.TestCase):
|
|
|
368
407
|
self.assertIn('"kind": "gene"', payload)
|
|
369
408
|
self.assertIn('"Chronos"', payload)
|
|
370
409
|
|
|
410
|
+
def test_find_and_search_have_no_default_limit(self) -> None:
|
|
411
|
+
self.add_many_cschrimson_rows()
|
|
412
|
+
build_index(self.state_dir)
|
|
413
|
+
|
|
414
|
+
stdout = io.StringIO()
|
|
415
|
+
with contextlib.redirect_stdout(stdout):
|
|
416
|
+
exit_code = main(
|
|
417
|
+
[
|
|
418
|
+
"find",
|
|
419
|
+
"CsChrimson",
|
|
420
|
+
"--state-dir",
|
|
421
|
+
str(self.state_dir),
|
|
422
|
+
"--json",
|
|
423
|
+
]
|
|
424
|
+
)
|
|
425
|
+
self.assertEqual(exit_code, 0)
|
|
426
|
+
find_payload = json.loads(stdout.getvalue())
|
|
427
|
+
self.assertEqual(find_payload["result_count"], 63)
|
|
428
|
+
self.assertIn(82182, {row["stknum"] for row in find_payload["results"]})
|
|
429
|
+
|
|
430
|
+
stdout = io.StringIO()
|
|
431
|
+
with contextlib.redirect_stdout(stdout):
|
|
432
|
+
exit_code = main(
|
|
433
|
+
[
|
|
434
|
+
"search",
|
|
435
|
+
"CsChrimson",
|
|
436
|
+
"--state-dir",
|
|
437
|
+
str(self.state_dir),
|
|
438
|
+
"--json",
|
|
439
|
+
]
|
|
440
|
+
)
|
|
441
|
+
self.assertEqual(exit_code, 0)
|
|
442
|
+
search_payload = json.loads(stdout.getvalue())
|
|
443
|
+
self.assertEqual(len(search_payload), 63)
|
|
444
|
+
self.assertIn(82182, {row["stknum"] for row in search_payload})
|
|
445
|
+
|
|
446
|
+
def test_missing_arguments_show_relevant_help(self) -> None:
|
|
447
|
+
self.assert_cli_error_shows_help([], "usage: bdsc", "find", "stock")
|
|
448
|
+
self.assert_cli_error_shows_help(
|
|
449
|
+
["export"],
|
|
450
|
+
"usage: bdsc export",
|
|
451
|
+
"stocks",
|
|
452
|
+
"error: the following arguments are required: dataset",
|
|
453
|
+
)
|
|
454
|
+
self.assert_cli_error_shows_help(
|
|
455
|
+
["stock"],
|
|
456
|
+
"usage: bdsc stock",
|
|
457
|
+
"stknum",
|
|
458
|
+
"error: the following arguments are required: stknum",
|
|
459
|
+
)
|
|
460
|
+
self.assert_cli_error_shows_help(
|
|
461
|
+
["find"],
|
|
462
|
+
"usage: bdsc find",
|
|
463
|
+
"--gene",
|
|
464
|
+
"error: find requires a query or at least one filter flag",
|
|
465
|
+
)
|
|
466
|
+
self.assert_cli_error_shows_help(
|
|
467
|
+
["filter"],
|
|
468
|
+
"usage: bdsc filter",
|
|
469
|
+
"--gene",
|
|
470
|
+
"error: filter requires at least one filter flag",
|
|
471
|
+
)
|
|
472
|
+
self.assert_cli_error_shows_help(
|
|
473
|
+
["lookup"],
|
|
474
|
+
"usage: bdsc lookup",
|
|
475
|
+
"--input",
|
|
476
|
+
"error: lookup requires at least one query or --input",
|
|
477
|
+
)
|
|
478
|
+
|
|
371
479
|
def test_find_command_filter_mode(self) -> None:
|
|
372
480
|
build_index(self.state_dir)
|
|
373
481
|
stdout = io.StringIO()
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import hashlib
|
|
4
|
-
import importlib.util
|
|
5
|
-
import tempfile
|
|
6
|
-
import unittest
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
SPEC = importlib.util.spec_from_file_location(
|
|
11
|
-
"render_homebrew_formula",
|
|
12
|
-
Path(__file__).resolve().parents[1] / "scripts" / "render_homebrew_formula.py",
|
|
13
|
-
)
|
|
14
|
-
assert SPEC is not None and SPEC.loader is not None
|
|
15
|
-
MODULE = importlib.util.module_from_spec(SPEC)
|
|
16
|
-
SPEC.loader.exec_module(MODULE)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class ReleaseToolTests(unittest.TestCase):
|
|
20
|
-
def test_render_formula_uses_sdist_version_and_sha(self) -> None:
|
|
21
|
-
with tempfile.TemporaryDirectory() as temp_dir:
|
|
22
|
-
sdist = Path(temp_dir) / "bdsc_cli-1.2.3.tar.gz"
|
|
23
|
-
sdist.write_bytes(b"bdsc-cli-test")
|
|
24
|
-
formula = MODULE.render_formula(sdist)
|
|
25
|
-
|
|
26
|
-
self.assertIn('url "https://github.com/gumadeiras/bdsc-cli/releases/download/v1.2.3/bdsc_cli-1.2.3.tar.gz"', formula)
|
|
27
|
-
self.assertIn(hashlib.sha256(b"bdsc-cli-test").hexdigest(), formula)
|
|
28
|
-
|
|
29
|
-
def test_version_from_sdist_rejects_unexpected_name(self) -> None:
|
|
30
|
-
with self.assertRaises(ValueError):
|
|
31
|
-
MODULE.version_from_sdist(Path("not-bdsc.tar.gz"))
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if __name__ == "__main__":
|
|
35
|
-
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|