flybase-cli 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Gustavo Madeira Santana
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,244 @@
1
+ Metadata-Version: 2.4
2
+ Name: flybase-cli
3
+ Version: 0.1.2
4
+ Summary: FlyBase sync/query helper for agents.
5
+ License-Expression: MIT
6
+ Requires-Python: >=3.11
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Dynamic: license-file
10
+
11
+ # FlyBase local sync/query
12
+
13
+ Use FlyBase bulk files for agent workloads. Live API: helper only.
14
+
15
+ ## Why
16
+
17
+ - `https://api.flybase.org/api/v1.0/` exists.
18
+ - some endpoints return useful JSON now, eg `domain/FBgn0001250`, `sequence/id/FBgn0001250`.
19
+ - some plausible endpoints return empty body today.
20
+ - bulk bucket + release files: better for repeatable agent queries.
21
+
22
+ ## Current surfaces checked
23
+
24
+ - release bucket: `https://s3ftp.flybase.org/releases/current/`
25
+ - precomputed files: `https://s3ftp.flybase.org/releases/current/precomputed_files/`
26
+ - Postgres dump: `https://s3ftp.flybase.org/releases/current/psql/FB2026_01.sql.gz`
27
+ - API root: `https://api.flybase.org/api/v1.0/`
28
+ - batch download: `https://flybase.org/batchdownload`
29
+
30
+ ## Layout
31
+
32
+ - `src/flybase_cli/`: package code
33
+ - `tests/`: stdlib `unittest`
34
+ - `flybase_cli.py`: thin repo-root shim
35
+ - `pyproject.toml`: package metadata / console entrypoint
36
+
37
+ ## CLI
38
+
39
+ ```bash
40
+ python3 flybase_cli.py presets
41
+
42
+ python3 flybase_cli.py sync gene-core
43
+
44
+ python3 flybase_cli.py sync gene-core --release FB2026_01
45
+
46
+ python3 flybase_cli.py sync gene-knowledge --release FB2026_01
47
+
48
+ python3 flybase_cli.py full-sync --release FB2026_01
49
+
50
+ python3 flybase_cli.py full-sync \
51
+ --release FB2026_01 \
52
+ --include 'best_gene_summary|entity_publication'
53
+
54
+ python3 flybase_cli.py sync-incremental \
55
+ gene-knowledge \
56
+ --from-release FB2025_06 \
57
+ --release FB2026_01
58
+
59
+ python3 flybase_cli.py release-diff \
60
+ --preset gene-knowledge \
61
+ --from-release FB2025_06 \
62
+ --to-release FB2026_01
63
+
64
+ python3 flybase_cli.py genomes --release FB2026_01
65
+
66
+ python3 flybase_cli.py sync-genome \
67
+ --release FB2026_01 \
68
+ --genome dmel_r6.67 \
69
+ --section fasta \
70
+ --asset mirna
71
+
72
+ python3 flybase_cli.py genome-presets
73
+
74
+ python3 flybase_cli.py sync-genome \
75
+ --release FB2026_01 \
76
+ --genome dmel_r6.67 \
77
+ --preset mirna-fasta
78
+
79
+ PYTHONPATH=src python3 -m flybase_cli sync gene-expression
80
+
81
+ python3 flybase_cli.py manifest \
82
+ --url https://s3ftp.flybase.org/genomes/Drosophila_melanogaster/dmel_r6.67_FB2026_01/fasta/ \
83
+ --include 'miRNA'
84
+
85
+ python3 flybase_cli.py sync-url \
86
+ --url https://s3ftp.flybase.org/genomes/Drosophila_melanogaster/dmel_r6.67_FB2026_01/fasta/ \
87
+ --include 'miRNA'
88
+
89
+ python3 flybase_cli.py ingest \
90
+ data/flybase/precomputed_files/genes/best_gene_summary_fb_2026_01.tsv.gz \
91
+ data/flybase/precomputed_files/genes/fbgn_fbtr_fbpp_fb_2026_01.tsv.gz \
92
+ data/flybase/precomputed_files/genes/fbgn_annotation_ID_fb_2026_01.tsv.gz
93
+
94
+ python3 flybase_cli.py tables --columns
95
+
96
+ python3 flybase_cli.py describe --sample-values 2
97
+ python3 flybase_cli.py schema-export --sample-values 1
98
+ python3 flybase_cli.py query-plan --sample-values 1 --limit 5
99
+ python3 flybase_cli.py query-run --template-name gene-summary-by-fbgn --param fbgn_id=FBgn0002121
100
+
101
+ python3 flybase_cli.py fts-build
102
+
103
+ python3 flybase_cli.py search 'memory formation'
104
+
105
+ python3 flybase_cli.py pg-load --release FB2026_01
106
+
107
+ python3 flybase_cli.py sql \
108
+ "select * from fb_best_gene_summary_fb_2026_01 limit 5"
109
+
110
+ python3 flybase_cli.py sql \
111
+ "select s.fbgn_id, s.gene_symbol, a.annotation_id, p.flybase_fbtr, p.flybase_fbpp \
112
+ from fb_best_gene_summary_fb_2026_01 s \
113
+ join fb_fbgn_annotation_id_fb_2026_01 a on a.primary_fbgn = s.fbgn_id \
114
+ left join fb_fbgn_fbtr_fbpp_fb_2026_01 p on p.flybase_fbgn = s.fbgn_id \
115
+ limit 5"
116
+
117
+ python3 flybase_cli.py api domain/FBgn0001250
118
+ ```
119
+
120
+ ## Sync presets
121
+
122
+ - `gene-core`: summaries + FBgn/FBtr/FBpp + annotation IDs + SO annotations
123
+ - `gene-expression`: curated/high-throughput/scRNA expression slices
124
+ - `references`: publication/link tables
125
+ - `gene-knowledge`: core gene facts + representative publications + orthology tables
126
+ - `orthology`: ortholog, paralog, and disease-association tables
127
+ - `interactions`: gene- and allele-level interaction tables
128
+
129
+ ## Full sync
130
+
131
+ - `full-sync` crawls an entire release prefix, default `precomputed_files/`
132
+ - default behavior: download only files the current loaders can ingest into SQLite
133
+ - use `--all-files` if you want non-ingestable release artifacts too
134
+ - use `--include` / `--exclude` to stage a narrower smoke or partial warehouse
135
+ - default manifest path: `data/flybase/manifests/<release>/full-sync.json`
136
+
137
+ ## Discovery
138
+
139
+ - `genomes --release FB2026_01` lists genome builds linked from that FlyBase release
140
+ - `sync-url` turns a crawlable FlyBase directory URL into a one-step local sync
141
+ - `sync-genome` resolves a release/build pair into the right genome-section URL automatically
142
+ - `genome-presets` lists reusable genome asset sync recipes
143
+
144
+ ## Genome sync
145
+
146
+ - sections: `fasta`, `gff`, `gtf`, `dna`, `chado-xml`
147
+ - asset shortcuts include `mirna`, `transcript`, `translation`, `gene`, `chromosome`, `cds`, `ncrna`, `gff`, `gtf`
148
+ - presets include `mirna-fasta`, `transcript-fasta`, `translation-fasta`, `gene-fasta`, `chromosome-fasta`, `ncrna-fasta`, `gff-all`, `gtf-all`
149
+ - use `--include`/`--exclude` for narrower file selection on top of the asset preset
150
+
151
+ ## Ingest formats
152
+
153
+ - delimited: `tsv`, `csv`, gzipped variants
154
+ - sequence: `fasta`, `fa`, `fna`, `faa`, gzipped variants
155
+ - annotation: `gff`, `gff3`, `gtf`, gzipped variants
156
+ - JSON: `json`, `json.gz`
157
+
158
+ ## JSON ingest
159
+
160
+ - top-level scalar JSON fields become queryable SQLite columns
161
+ - one nested dict level is flattened, eg `gene.symbol` -> `gene_symbol`
162
+ - repeated top-level lists become child tables, eg `symbolSynonyms` -> `<table>_symbolsynonyms`
163
+ - repeated lists nested inside child dict rows become descendant tables, eg `genomeLocations[].exons[]` -> `<table>_genomelocations_exons`
164
+ - full source record remains in `payload_json`
165
+
166
+ Example:
167
+
168
+ ```bash
169
+ python3 flybase_cli.py sql \
170
+ "select record_id, symbol, gene_geneId from fb_ncrna_genes_fb_2026_01 limit 5"
171
+
172
+ python3 flybase_cli.py sql \
173
+ "select parent_record_id, ordinal, value \
174
+ from fb_ncrna_genes_fb_2026_01_symbolsynonyms \
175
+ limit 5"
176
+
177
+ python3 flybase_cli.py sql \
178
+ "select parent_record_id, parent_ordinal, ordinal, startPosition, endPosition \
179
+ from fb_ncrna_genes_fb_2026_01_genomelocations_exons \
180
+ limit 5"
181
+ ```
182
+
183
+ ## Search
184
+
185
+ - `fts-build` creates a local SQLite FTS5 index from ingested tables
186
+ - `search` queries that index without calling the live FlyBase API
187
+ - record ids prefer stable FlyBase-like columns such as `fbgn_id`, `primary_fbgn`, `flybase_fbtr`
188
+
189
+ ## Metadata
190
+
191
+ - `describe` summarizes ingested tables with row counts, source paths, semantic tags, columns, and representative non-empty values
192
+ - `schema-export` writes the same metadata to a deterministic JSON artifact beside the SQLite DB, eg `FB2026_01.schema.json`
193
+ - `schema-export` also includes inferred `relationships` for nested child tables and common FlyBase ID joins
194
+ - `schema-export` also emits `semantic_summary` for table/entity tag coverage
195
+ - `schema-export` also emits ready-to-run `query_templates`
196
+ - `query-plan` prints starter SQL without the larger schema payload
197
+ - `query-plan` now includes named biological templates such as `gene-summary-by-fbgn`, `transcript-protein-links`, `publications-for-gene`, and coordinate lookups when matching tables exist
198
+ - `query-run` selects one template and executes it with parameter values
199
+ - useful first step before writing ad hoc SQL or building agent query plans
200
+
201
+ Example:
202
+
203
+ ```bash
204
+ python3 flybase_cli.py schema-export \
205
+ --db data/flybase/FB2026_01.sqlite \
206
+ --sample-values 1
207
+
208
+ python3 flybase_cli.py query-plan \
209
+ --db data/flybase/FB2026_01.sqlite \
210
+ --sample-values 1 \
211
+ --limit 5
212
+
213
+ python3 flybase_cli.py query-run \
214
+ --db data/flybase/FB2026_01.sqlite \
215
+ --template-name gene-summary-by-fbgn \
216
+ --param fbgn_id=FBgn0002121
217
+ ```
218
+
219
+ ## Notes
220
+
221
+ - nested JSON child tables keep lineage columns like `parent_record_id`, `parent_ordinal`, `ordinal`.
222
+ - many FlyBase files start with `##` metadata lines; loader skips those.
223
+ - `sync` writes a preset manifest under `data/flybase/manifests/<release>/`.
224
+ - `full-sync` is the broadest offline path for release bulk data without going through the full Postgres dump.
225
+ - `sync --release FB2026_01` defaults to `data/flybase/FB2026_01.sqlite` to avoid cross-release mixing.
226
+ - `sync-incremental` uses stable manifest keys so release-renamed files still land in `updated` instead of noisy add/remove pairs.
227
+ - `release-diff` compares releases either by raw prefix or by curated multi-prefix preset.
228
+ - `manifest --url` lets you crawl non-`releases/` FlyBase directories such as genome FASTA/GFF trees.
229
+ - `sync-url` is the shortest path for genome assets once you know the directory URL.
230
+ - `sync-genome` is the shortest path when you know the FlyBase release + genome build label.
231
+ - `sync-genome --preset ...` is the preferred path for common genome asset pulls.
232
+ - some FlyBase `.gff.gz` assets are tar-wrapped gzip archives; loader handles that transparently.
233
+ - `sql` and `query-run` shape results as record-oriented JSON with summary metadata for agent chaining.
234
+ - `pg-load` stages the full Postgres import script for `releases/<release>/psql/<release>.sql.gz`.
235
+ - `pg-load --execute` runs the staged script when `createdb` and `psql` are installed locally.
236
+ - SQLite keeps setup minimal; switch to DuckDB/Postgres if you want bigger joins/faster scans.
237
+ - if you only need a few IDs, FlyBase Batch Download may be simpler than syncing files.
238
+ - use `--no-header` for files whose first non-comment row is data, not column names.
239
+
240
+ ## Tests
241
+
242
+ ```bash
243
+ python3 -m unittest discover -s tests
244
+ ```
@@ -0,0 +1,234 @@
1
+ # FlyBase local sync/query
2
+
3
+ Use FlyBase bulk files for agent workloads. Live API: helper only.
4
+
5
+ ## Why
6
+
7
+ - `https://api.flybase.org/api/v1.0/` exists.
8
+ - some endpoints return useful JSON now, eg `domain/FBgn0001250`, `sequence/id/FBgn0001250`.
9
+ - some plausible endpoints return empty body today.
10
+ - bulk bucket + release files: better for repeatable agent queries.
11
+
12
+ ## Current surfaces checked
13
+
14
+ - release bucket: `https://s3ftp.flybase.org/releases/current/`
15
+ - precomputed files: `https://s3ftp.flybase.org/releases/current/precomputed_files/`
16
+ - Postgres dump: `https://s3ftp.flybase.org/releases/current/psql/FB2026_01.sql.gz`
17
+ - API root: `https://api.flybase.org/api/v1.0/`
18
+ - batch download: `https://flybase.org/batchdownload`
19
+
20
+ ## Layout
21
+
22
+ - `src/flybase_cli/`: package code
23
+ - `tests/`: stdlib `unittest`
24
+ - `flybase_cli.py`: thin repo-root shim
25
+ - `pyproject.toml`: package metadata / console entrypoint
26
+
27
+ ## CLI
28
+
29
+ ```bash
30
+ python3 flybase_cli.py presets
31
+
32
+ python3 flybase_cli.py sync gene-core
33
+
34
+ python3 flybase_cli.py sync gene-core --release FB2026_01
35
+
36
+ python3 flybase_cli.py sync gene-knowledge --release FB2026_01
37
+
38
+ python3 flybase_cli.py full-sync --release FB2026_01
39
+
40
+ python3 flybase_cli.py full-sync \
41
+ --release FB2026_01 \
42
+ --include 'best_gene_summary|entity_publication'
43
+
44
+ python3 flybase_cli.py sync-incremental \
45
+ gene-knowledge \
46
+ --from-release FB2025_06 \
47
+ --release FB2026_01
48
+
49
+ python3 flybase_cli.py release-diff \
50
+ --preset gene-knowledge \
51
+ --from-release FB2025_06 \
52
+ --to-release FB2026_01
53
+
54
+ python3 flybase_cli.py genomes --release FB2026_01
55
+
56
+ python3 flybase_cli.py sync-genome \
57
+ --release FB2026_01 \
58
+ --genome dmel_r6.67 \
59
+ --section fasta \
60
+ --asset mirna
61
+
62
+ python3 flybase_cli.py genome-presets
63
+
64
+ python3 flybase_cli.py sync-genome \
65
+ --release FB2026_01 \
66
+ --genome dmel_r6.67 \
67
+ --preset mirna-fasta
68
+
69
+ PYTHONPATH=src python3 -m flybase_cli sync gene-expression
70
+
71
+ python3 flybase_cli.py manifest \
72
+ --url https://s3ftp.flybase.org/genomes/Drosophila_melanogaster/dmel_r6.67_FB2026_01/fasta/ \
73
+ --include 'miRNA'
74
+
75
+ python3 flybase_cli.py sync-url \
76
+ --url https://s3ftp.flybase.org/genomes/Drosophila_melanogaster/dmel_r6.67_FB2026_01/fasta/ \
77
+ --include 'miRNA'
78
+
79
+ python3 flybase_cli.py ingest \
80
+ data/flybase/precomputed_files/genes/best_gene_summary_fb_2026_01.tsv.gz \
81
+ data/flybase/precomputed_files/genes/fbgn_fbtr_fbpp_fb_2026_01.tsv.gz \
82
+ data/flybase/precomputed_files/genes/fbgn_annotation_ID_fb_2026_01.tsv.gz
83
+
84
+ python3 flybase_cli.py tables --columns
85
+
86
+ python3 flybase_cli.py describe --sample-values 2
87
+ python3 flybase_cli.py schema-export --sample-values 1
88
+ python3 flybase_cli.py query-plan --sample-values 1 --limit 5
89
+ python3 flybase_cli.py query-run --template-name gene-summary-by-fbgn --param fbgn_id=FBgn0002121
90
+
91
+ python3 flybase_cli.py fts-build
92
+
93
+ python3 flybase_cli.py search 'memory formation'
94
+
95
+ python3 flybase_cli.py pg-load --release FB2026_01
96
+
97
+ python3 flybase_cli.py sql \
98
+ "select * from fb_best_gene_summary_fb_2026_01 limit 5"
99
+
100
+ python3 flybase_cli.py sql \
101
+ "select s.fbgn_id, s.gene_symbol, a.annotation_id, p.flybase_fbtr, p.flybase_fbpp \
102
+ from fb_best_gene_summary_fb_2026_01 s \
103
+ join fb_fbgn_annotation_id_fb_2026_01 a on a.primary_fbgn = s.fbgn_id \
104
+ left join fb_fbgn_fbtr_fbpp_fb_2026_01 p on p.flybase_fbgn = s.fbgn_id \
105
+ limit 5"
106
+
107
+ python3 flybase_cli.py api domain/FBgn0001250
108
+ ```
109
+
110
+ ## Sync presets
111
+
112
+ - `gene-core`: summaries + FBgn/FBtr/FBpp + annotation IDs + SO annotations
113
+ - `gene-expression`: curated/high-throughput/scRNA expression slices
114
+ - `references`: publication/link tables
115
+ - `gene-knowledge`: core gene facts + representative publications + orthology tables
116
+ - `orthology`: ortholog, paralog, and disease-association tables
117
+ - `interactions`: gene- and allele-level interaction tables
118
+
119
+ ## Full sync
120
+
121
+ - `full-sync` crawls an entire release prefix, default `precomputed_files/`
122
+ - default behavior: download only files the current loaders can ingest into SQLite
123
+ - use `--all-files` if you want non-ingestable release artifacts too
124
+ - use `--include` / `--exclude` to stage a narrower smoke or partial warehouse
125
+ - default manifest path: `data/flybase/manifests/<release>/full-sync.json`
126
+
127
+ ## Discovery
128
+
129
+ - `genomes --release FB2026_01` lists genome builds linked from that FlyBase release
130
+ - `sync-url` turns a crawlable FlyBase directory URL into a one-step local sync
131
+ - `sync-genome` resolves a release/build pair into the right genome-section URL automatically
132
+ - `genome-presets` lists reusable genome asset sync recipes
133
+
134
+ ## Genome sync
135
+
136
+ - sections: `fasta`, `gff`, `gtf`, `dna`, `chado-xml`
137
+ - asset shortcuts include `mirna`, `transcript`, `translation`, `gene`, `chromosome`, `cds`, `ncrna`, `gff`, `gtf`
138
+ - presets include `mirna-fasta`, `transcript-fasta`, `translation-fasta`, `gene-fasta`, `chromosome-fasta`, `ncrna-fasta`, `gff-all`, `gtf-all`
139
+ - use `--include`/`--exclude` for narrower file selection on top of the asset preset
140
+
141
+ ## Ingest formats
142
+
143
+ - delimited: `tsv`, `csv`, gzipped variants
144
+ - sequence: `fasta`, `fa`, `fna`, `faa`, gzipped variants
145
+ - annotation: `gff`, `gff3`, `gtf`, gzipped variants
146
+ - JSON: `json`, `json.gz`
147
+
148
+ ## JSON ingest
149
+
150
+ - top-level scalar JSON fields become queryable SQLite columns
151
+ - one nested dict level is flattened, eg `gene.symbol` -> `gene_symbol`
152
+ - repeated top-level lists become child tables, eg `symbolSynonyms` -> `<table>_symbolsynonyms`
153
+ - repeated lists nested inside child dict rows become descendant tables, eg `genomeLocations[].exons[]` -> `<table>_genomelocations_exons`
154
+ - full source record remains in `payload_json`
155
+
156
+ Example:
157
+
158
+ ```bash
159
+ python3 flybase_cli.py sql \
160
+ "select record_id, symbol, gene_geneId from fb_ncrna_genes_fb_2026_01 limit 5"
161
+
162
+ python3 flybase_cli.py sql \
163
+ "select parent_record_id, ordinal, value \
164
+ from fb_ncrna_genes_fb_2026_01_symbolsynonyms \
165
+ limit 5"
166
+
167
+ python3 flybase_cli.py sql \
168
+ "select parent_record_id, parent_ordinal, ordinal, startPosition, endPosition \
169
+ from fb_ncrna_genes_fb_2026_01_genomelocations_exons \
170
+ limit 5"
171
+ ```
172
+
173
+ ## Search
174
+
175
+ - `fts-build` creates a local SQLite FTS5 index from ingested tables
176
+ - `search` queries that index without calling the live FlyBase API
177
+ - record ids prefer stable FlyBase-like columns such as `fbgn_id`, `primary_fbgn`, `flybase_fbtr`
178
+
179
+ ## Metadata
180
+
181
+ - `describe` summarizes ingested tables with row counts, source paths, semantic tags, columns, and representative non-empty values
182
+ - `schema-export` writes the same metadata to a deterministic JSON artifact beside the SQLite DB, eg `FB2026_01.schema.json`
183
+ - `schema-export` also includes inferred `relationships` for nested child tables and common FlyBase ID joins
184
+ - `schema-export` also emits `semantic_summary` for table/entity tag coverage
185
+ - `schema-export` also emits ready-to-run `query_templates`
186
+ - `query-plan` prints starter SQL without the larger schema payload
187
+ - `query-plan` now includes named biological templates such as `gene-summary-by-fbgn`, `transcript-protein-links`, `publications-for-gene`, and coordinate lookups when matching tables exist
188
+ - `query-run` selects one template and executes it with parameter values
189
+ - useful first step before writing ad hoc SQL or building agent query plans
190
+
191
+ Example:
192
+
193
+ ```bash
194
+ python3 flybase_cli.py schema-export \
195
+ --db data/flybase/FB2026_01.sqlite \
196
+ --sample-values 1
197
+
198
+ python3 flybase_cli.py query-plan \
199
+ --db data/flybase/FB2026_01.sqlite \
200
+ --sample-values 1 \
201
+ --limit 5
202
+
203
+ python3 flybase_cli.py query-run \
204
+ --db data/flybase/FB2026_01.sqlite \
205
+ --template-name gene-summary-by-fbgn \
206
+ --param fbgn_id=FBgn0002121
207
+ ```
208
+
209
+ ## Notes
210
+
211
+ - nested JSON child tables keep lineage columns like `parent_record_id`, `parent_ordinal`, `ordinal`.
212
+ - many FlyBase files start with `##` metadata lines; loader skips those.
213
+ - `sync` writes a preset manifest under `data/flybase/manifests/<release>/`.
214
+ - `full-sync` is the broadest offline path for release bulk data without going through the full Postgres dump.
215
+ - `sync --release FB2026_01` defaults to `data/flybase/FB2026_01.sqlite` to avoid cross-release mixing.
216
+ - `sync-incremental` uses stable manifest keys so release-renamed files still land in `updated` instead of noisy add/remove pairs.
217
+ - `release-diff` compares releases either by raw prefix or by curated multi-prefix preset.
218
+ - `manifest --url` lets you crawl non-`releases/` FlyBase directories such as genome FASTA/GFF trees.
219
+ - `sync-url` is the shortest path for genome assets once you know the directory URL.
220
+ - `sync-genome` is the shortest path when you know the FlyBase release + genome build label.
221
+ - `sync-genome --preset ...` is the preferred path for common genome asset pulls.
222
+ - some FlyBase `.gff.gz` assets are tar-wrapped gzip archives; loader handles that transparently.
223
+ - `sql` and `query-run` shape results as record-oriented JSON with summary metadata for agent chaining.
224
+ - `pg-load` stages the full Postgres import script for `releases/<release>/psql/<release>.sql.gz`.
225
+ - `pg-load --execute` runs the staged script when `createdb` and `psql` are installed locally.
226
+ - SQLite keeps setup minimal; switch to DuckDB/Postgres if you want bigger joins/faster scans.
227
+ - if you only need a few IDs, FlyBase Batch Download may be simpler than syncing files.
228
+ - use `--no-header` for files whose first non-comment row is data, not column names.
229
+
230
+ ## Tests
231
+
232
+ ```bash
233
+ python3 -m unittest discover -s tests
234
+ ```
@@ -0,0 +1,20 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "flybase-cli"
7
+ version = "0.1.2"
8
+ description = "FlyBase sync/query helper for agents."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.11"
12
+
13
+ [project.scripts]
14
+ flybase-cli = "flybase_cli.cli:main"
15
+
16
+ [tool.setuptools]
17
+ package-dir = {"" = "src"}
18
+
19
+ [tool.setuptools.packages.find]
20
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+ from .version import __version__
3
+
4
+ __all__ = ["main", "__version__"]
@@ -0,0 +1,5 @@
1
+ from .cli import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ raise SystemExit(main())