flybase-cli 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,114 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ COLUMN_DESCRIPTION_HINTS: dict[str, str] = {
5
+ "record_id": "Primary row identifier for this table.",
6
+ "parent_record_id": "Parent record identifier from a nested JSON source row.",
7
+ "parent_ordinal": "Ordinal position of the direct parent row within a repeated list.",
8
+ "ordinal": "Ordinal position within a repeated list.",
9
+ "value": "Scalar value from a repeated JSON list.",
10
+ "payload_json": "Full raw JSON payload for the source record.",
11
+ "fbgn_id": "FlyBase gene identifier.",
12
+ "primary_fbgn": "FlyBase gene identifier alias.",
13
+ "flybase_fbgn": "FlyBase gene identifier alias.",
14
+ "fbtr_id": "FlyBase transcript identifier.",
15
+ "primary_fbtr": "FlyBase transcript identifier alias.",
16
+ "flybase_fbtr": "FlyBase transcript identifier alias.",
17
+ "fbpp_id": "FlyBase protein identifier.",
18
+ "primary_fbpp": "FlyBase protein identifier alias.",
19
+ "flybase_fbpp": "FlyBase protein identifier alias.",
20
+ "annotation_id": "Genome annotation identifier.",
21
+ "feature_id": "Feature identifier from GFF or GTF annotations.",
22
+ "feature_name": "Feature symbol or display name.",
23
+ "gene_symbol": "Gene symbol.",
24
+ "symbol": "Feature symbol.",
25
+ "seqid": "Sequence or chromosome identifier.",
26
+ "start": "Genomic start coordinate.",
27
+ "end": "Genomic end coordinate.",
28
+ "strand": "Genomic strand.",
29
+ "summary": "Free-text summary field.",
30
+ }
31
+
32
+ RELATIONSHIP_GROUPS: dict[str, dict[str, object]] = {
33
+ "fbgn": {
34
+ "aliases": ("fbgn_id", "primary_fbgn", "flybase_fbgn"),
35
+ "kind": "id-alias",
36
+ "label": "FlyBase fbgn",
37
+ },
38
+ "fbtr": {
39
+ "aliases": ("fbtr_id", "primary_fbtr", "flybase_fbtr", "transcript_id"),
40
+ "kind": "id-alias",
41
+ "label": "FlyBase fbtr/transcript",
42
+ },
43
+ "fbpp": {
44
+ "aliases": ("fbpp_id", "primary_fbpp", "flybase_fbpp"),
45
+ "kind": "id-alias",
46
+ "label": "FlyBase fbpp",
47
+ },
48
+ "publication": {
49
+ "aliases": ("fbrf", "fbrf_id", "reference_id", "publication_id", "pmid"),
50
+ "kind": "shared-id",
51
+ "label": "publication",
52
+ },
53
+ "annotation": {
54
+ "aliases": ("annotation_id", "feature_id"),
55
+ "kind": "shared-id",
56
+ "label": "annotation",
57
+ },
58
+ }
59
+
60
+
61
+ def describe_column_name(column_name: str) -> str:
62
+ hinted = COLUMN_DESCRIPTION_HINTS.get(column_name)
63
+ if hinted is not None:
64
+ return hinted
65
+ lowered = column_name.lower()
66
+ if lowered.endswith("_json"):
67
+ return "Raw JSON payload for this nested structure."
68
+ if lowered.endswith("_id") or lowered.endswith("id"):
69
+ return "Identifier column."
70
+ if "summary" in lowered or "description" in lowered:
71
+ return "Free-text description or summary field."
72
+ if lowered.endswith("symbol") or lowered == "symbol":
73
+ return "Symbol or short label."
74
+ if lowered in {"startposition", "endposition"}:
75
+ return "Genomic coordinate from a nested JSON location record."
76
+ return ""
77
+
78
+
79
+ def infer_table_tags(table_name: str, source_path: str, columns: list[str]) -> list[str]:
80
+ name_blob = " ".join([table_name, source_path]).lower()
81
+ lowered_columns = {column.lower() for column in columns}
82
+ tags: set[str] = set()
83
+
84
+ if any(alias in lowered_columns for alias in RELATIONSHIP_GROUPS["fbgn"]["aliases"]) or {
85
+ "gene_symbol",
86
+ "symbol",
87
+ } & lowered_columns:
88
+ tags.add("gene")
89
+ if any(alias in lowered_columns for alias in RELATIONSHIP_GROUPS["fbtr"]["aliases"]):
90
+ tags.add("transcript")
91
+ if any(alias in lowered_columns for alias in RELATIONSHIP_GROUPS["fbpp"]["aliases"]):
92
+ tags.add("protein")
93
+ if any(alias in lowered_columns for alias in RELATIONSHIP_GROUPS["publication"]["aliases"]):
94
+ tags.add("publication")
95
+ if any(alias in lowered_columns for alias in RELATIONSHIP_GROUPS["annotation"]["aliases"]):
96
+ tags.add("annotation")
97
+ if {"seqid", "start", "end", "startposition", "endposition"} & lowered_columns:
98
+ tags.add("coordinates")
99
+ if "summary" in name_blob or "summary" in lowered_columns:
100
+ tags.add("summary")
101
+ if "expression" in name_blob or "rpkm" in name_blob or "scrna" in name_blob:
102
+ tags.add("expression")
103
+ if "ortholog" in name_blob or "paralog" in name_blob:
104
+ tags.add("orthology")
105
+ if "interaction" in name_blob:
106
+ tags.add("interaction")
107
+ if "publication" in name_blob or "reference" in name_blob:
108
+ tags.add("reference")
109
+ if "payload_json" in lowered_columns:
110
+ tags.add("json")
111
+ if "parent_record_id" in lowered_columns:
112
+ tags.add("nested")
113
+
114
+ return sorted(tags)
flybase_cli/syncing.py ADDED
@@ -0,0 +1,254 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+ from .config import SyncPreset
7
+ from .core import build_manifest, filter_manifest, sync_manifest, write_json
8
+ from .loaders import is_ingestable
9
+
10
+
11
+ RELEASE_TOKEN_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
12
+ (re.compile(r"FB\d{4}_\d{2}", flags=re.IGNORECASE), "FBRELEASE"),
13
+ (re.compile(r"fb_\d{4}_\d{2}", flags=re.IGNORECASE), "fb_release"),
14
+ )
15
+
16
+
17
+ def stable_manifest_key(path: str) -> str:
18
+ stable = path
19
+ for pattern, replacement in RELEASE_TOKEN_PATTERNS:
20
+ stable = pattern.sub(replacement, stable)
21
+ return stable.lower()
22
+
23
+
24
+ def prefer_manifest_entry(entries: list[dict[str, str]]) -> dict[str, str]:
25
+ return sorted(entries, key=lambda item: item["path"])[0]
26
+
27
+
28
+ def group_manifest_by_stable_key(
29
+ manifest: list[dict[str, str]],
30
+ ) -> dict[str, list[dict[str, str]]]:
31
+ grouped: dict[str, list[dict[str, str]]] = {}
32
+ for item in manifest:
33
+ grouped.setdefault(stable_manifest_key(item["path"]), []).append(item)
34
+ return grouped
35
+
36
+
37
+ def merge_manifests(manifests: list[list[dict[str, str]]]) -> list[dict[str, str]]:
38
+ merged: dict[str, dict[str, str]] = {}
39
+ for manifest in manifests:
40
+ for item in manifest:
41
+ merged[item["path"]] = item
42
+ return sorted(merged.values(), key=lambda item: item["path"])
43
+
44
+
45
+ def filter_ingestable_manifest(manifest: list[dict[str, str]]) -> list[dict[str, str]]:
46
+ return [item for item in manifest if is_ingestable(Path(item["path"]))]
47
+
48
+
49
+ def preset_manifest(preset: SyncPreset, release: str) -> list[dict[str, str]]:
50
+ manifests: list[list[dict[str, str]]] = []
51
+ for selection in preset.selections:
52
+ manifests.append(
53
+ filter_manifest(
54
+ build_manifest(selection.prefix, release=release),
55
+ selection.includes,
56
+ selection.excludes,
57
+ )
58
+ )
59
+ return merge_manifests(manifests)
60
+
61
+
62
+ def full_manifest(
63
+ *,
64
+ release: str,
65
+ prefix: str = "precomputed_files/",
66
+ include: list[str] | tuple[str, ...] = (),
67
+ exclude: list[str] | tuple[str, ...] = (),
68
+ ingestable_only: bool = True,
69
+ ) -> list[dict[str, str]]:
70
+ manifest = filter_manifest(
71
+ build_manifest(prefix, release=release),
72
+ include,
73
+ exclude,
74
+ )
75
+ if ingestable_only:
76
+ manifest = filter_ingestable_manifest(manifest)
77
+ return manifest
78
+
79
+
80
+ def diff_manifests(
81
+ previous_manifest: list[dict[str, str]],
82
+ current_manifest: list[dict[str, str]],
83
+ ) -> dict[str, object]:
84
+ previous_by_key = group_manifest_by_stable_key(previous_manifest)
85
+ current_by_key = group_manifest_by_stable_key(current_manifest)
86
+ keys = sorted(set(previous_by_key) | set(current_by_key))
87
+
88
+ added: list[dict[str, object]] = []
89
+ removed: list[dict[str, object]] = []
90
+ updated: list[dict[str, object]] = []
91
+ unchanged: list[dict[str, object]] = []
92
+
93
+ for key in keys:
94
+ previous_entries = previous_by_key.get(key, [])
95
+ current_entries = current_by_key.get(key, [])
96
+ if not previous_entries:
97
+ added.extend({"stable_key": key, "to": item} for item in current_entries)
98
+ continue
99
+ if not current_entries:
100
+ removed.extend({"stable_key": key, "from": item} for item in previous_entries)
101
+ continue
102
+
103
+ previous_entry = prefer_manifest_entry(previous_entries)
104
+ current_entry = prefer_manifest_entry(current_entries)
105
+ if previous_entry["path"] == current_entry["path"] and previous_entry["url"] == current_entry["url"]:
106
+ unchanged.append({"stable_key": key, "item": current_entry})
107
+ continue
108
+ updated.append(
109
+ {
110
+ "stable_key": key,
111
+ "from": previous_entry,
112
+ "to": current_entry,
113
+ }
114
+ )
115
+
116
+ return {
117
+ "added": added,
118
+ "removed": removed,
119
+ "updated": updated,
120
+ "unchanged": unchanged,
121
+ "added_count": len(added),
122
+ "removed_count": len(removed),
123
+ "updated_count": len(updated),
124
+ "unchanged_count": len(unchanged),
125
+ }
126
+
127
+
128
+ def build_release_diff(
129
+ *,
130
+ prefix: str,
131
+ from_release: str,
132
+ to_release: str,
133
+ include: list[str] | tuple[str, ...] = (),
134
+ exclude: list[str] | tuple[str, ...] = (),
135
+ ) -> dict[str, object]:
136
+ previous_manifest = filter_manifest(
137
+ build_manifest(prefix, release=from_release),
138
+ include,
139
+ exclude,
140
+ )
141
+ current_manifest = filter_manifest(
142
+ build_manifest(prefix, release=to_release),
143
+ include,
144
+ exclude,
145
+ )
146
+ return {
147
+ "prefix": prefix,
148
+ "from_release": from_release,
149
+ "to_release": to_release,
150
+ "previous_manifest_count": len(previous_manifest),
151
+ "current_manifest_count": len(current_manifest),
152
+ **diff_manifests(previous_manifest, current_manifest),
153
+ }
154
+
155
+
156
+ def build_preset_release_diff(
157
+ *,
158
+ preset: SyncPreset,
159
+ from_release: str,
160
+ to_release: str,
161
+ ) -> dict[str, object]:
162
+ previous_manifest = preset_manifest(preset, release=from_release)
163
+ current_manifest = preset_manifest(preset, release=to_release)
164
+ return {
165
+ "preset": preset.name,
166
+ "description": preset.description,
167
+ "prefixes": list(preset.prefixes),
168
+ "from_release": from_release,
169
+ "to_release": to_release,
170
+ "previous_manifest_count": len(previous_manifest),
171
+ "current_manifest_count": len(current_manifest),
172
+ **diff_manifests(previous_manifest, current_manifest),
173
+ }
174
+
175
+
176
+ def incremental_manifest(diff: dict[str, object]) -> list[dict[str, str]]:
177
+ selected = [item["to"] for item in diff["added"]]
178
+ selected.extend(item["to"] for item in diff["updated"])
179
+ return sorted(selected, key=lambda item: item["path"])
180
+
181
+
182
+ def sync_incremental_preset(
183
+ *,
184
+ preset: SyncPreset,
185
+ root: Path,
186
+ db_path: Path,
187
+ manifest_path: Path,
188
+ diff_path: Path | None,
189
+ from_release: str,
190
+ to_release: str,
191
+ force: bool = False,
192
+ no_header: bool = False,
193
+ ) -> dict[str, object]:
194
+ previous_manifest = preset_manifest(preset, release=from_release)
195
+ current_manifest = preset_manifest(preset, release=to_release)
196
+ diff = diff_manifests(previous_manifest, current_manifest)
197
+ selected_manifest = incremental_manifest(diff)
198
+ if diff_path is not None:
199
+ write_json(diff_path, diff)
200
+ summary = sync_manifest(
201
+ selected_manifest,
202
+ root=root,
203
+ db_path=db_path,
204
+ manifest_path=manifest_path,
205
+ force=force,
206
+ no_header=no_header,
207
+ )
208
+ return {
209
+ "preset": preset.name,
210
+ "description": preset.description,
211
+ "from_release": from_release,
212
+ "to_release": to_release,
213
+ "incremental_file_count": len(selected_manifest),
214
+ "diff_path": str(diff_path) if diff_path is not None else None,
215
+ **diff,
216
+ **summary,
217
+ }
218
+
219
+
220
+ def sync_full_release(
221
+ *,
222
+ root: Path,
223
+ db_path: Path,
224
+ manifest_path: Path,
225
+ release: str,
226
+ prefix: str = "precomputed_files/",
227
+ include: list[str] | tuple[str, ...] = (),
228
+ exclude: list[str] | tuple[str, ...] = (),
229
+ ingestable_only: bool = True,
230
+ force: bool = False,
231
+ no_header: bool = False,
232
+ ) -> dict[str, object]:
233
+ manifest = full_manifest(
234
+ release=release,
235
+ prefix=prefix,
236
+ include=include,
237
+ exclude=exclude,
238
+ ingestable_only=ingestable_only,
239
+ )
240
+ summary = sync_manifest(
241
+ manifest,
242
+ root=root,
243
+ db_path=db_path,
244
+ manifest_path=manifest_path,
245
+ force=force,
246
+ no_header=no_header,
247
+ )
248
+ return {
249
+ "mode": "full-sync",
250
+ "release": release,
251
+ "prefix": prefix,
252
+ "ingestable_only": ingestable_only,
253
+ **summary,
254
+ }
flybase_cli/version.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.2"
@@ -0,0 +1,244 @@
1
+ Metadata-Version: 2.4
2
+ Name: flybase-cli
3
+ Version: 0.1.2
4
+ Summary: FlyBase sync/query helper for agents.
5
+ License-Expression: MIT
6
+ Requires-Python: >=3.11
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Dynamic: license-file
10
+
11
+ # FlyBase local sync/query
12
+
13
+ Use FlyBase bulk files for agent workloads. Live API: helper only.
14
+
15
+ ## Why
16
+
17
+ - `https://api.flybase.org/api/v1.0/` exists.
18
+ - some endpoints return useful JSON now, eg `domain/FBgn0001250`, `sequence/id/FBgn0001250`.
19
+ - some plausible endpoints return empty body today.
20
+ - bulk bucket + release files: better for repeatable agent queries.
21
+
22
+ ## Current surfaces checked
23
+
24
+ - release bucket: `https://s3ftp.flybase.org/releases/current/`
25
+ - precomputed files: `https://s3ftp.flybase.org/releases/current/precomputed_files/`
26
+ - Postgres dump: `https://s3ftp.flybase.org/releases/current/psql/FB2026_01.sql.gz`
27
+ - API root: `https://api.flybase.org/api/v1.0/`
28
+ - batch download: `https://flybase.org/batchdownload`
29
+
30
+ ## Layout
31
+
32
+ - `src/flybase_cli/`: package code
33
+ - `tests/`: stdlib `unittest`
34
+ - `flybase_cli.py`: thin repo-root shim
35
+ - `pyproject.toml`: package metadata / console entrypoint
36
+
37
+ ## CLI
38
+
39
+ ```bash
40
+ python3 flybase_cli.py presets
41
+
42
+ python3 flybase_cli.py sync gene-core
43
+
44
+ python3 flybase_cli.py sync gene-core --release FB2026_01
45
+
46
+ python3 flybase_cli.py sync gene-knowledge --release FB2026_01
47
+
48
+ python3 flybase_cli.py full-sync --release FB2026_01
49
+
50
+ python3 flybase_cli.py full-sync \
51
+ --release FB2026_01 \
52
+ --include 'best_gene_summary|entity_publication'
53
+
54
+ python3 flybase_cli.py sync-incremental \
55
+ gene-knowledge \
56
+ --from-release FB2025_06 \
57
+ --release FB2026_01
58
+
59
+ python3 flybase_cli.py release-diff \
60
+ --preset gene-knowledge \
61
+ --from-release FB2025_06 \
62
+ --to-release FB2026_01
63
+
64
+ python3 flybase_cli.py genomes --release FB2026_01
65
+
66
+ python3 flybase_cli.py sync-genome \
67
+ --release FB2026_01 \
68
+ --genome dmel_r6.67 \
69
+ --section fasta \
70
+ --asset mirna
71
+
72
+ python3 flybase_cli.py genome-presets
73
+
74
+ python3 flybase_cli.py sync-genome \
75
+ --release FB2026_01 \
76
+ --genome dmel_r6.67 \
77
+ --preset mirna-fasta
78
+
79
+ PYTHONPATH=src python3 -m flybase_cli sync gene-expression
80
+
81
+ python3 flybase_cli.py manifest \
82
+ --url https://s3ftp.flybase.org/genomes/Drosophila_melanogaster/dmel_r6.67_FB2026_01/fasta/ \
83
+ --include 'miRNA'
84
+
85
+ python3 flybase_cli.py sync-url \
86
+ --url https://s3ftp.flybase.org/genomes/Drosophila_melanogaster/dmel_r6.67_FB2026_01/fasta/ \
87
+ --include 'miRNA'
88
+
89
+ python3 flybase_cli.py ingest \
90
+ data/flybase/precomputed_files/genes/best_gene_summary_fb_2026_01.tsv.gz \
91
+ data/flybase/precomputed_files/genes/fbgn_fbtr_fbpp_fb_2026_01.tsv.gz \
92
+ data/flybase/precomputed_files/genes/fbgn_annotation_ID_fb_2026_01.tsv.gz
93
+
94
+ python3 flybase_cli.py tables --columns
95
+
96
+ python3 flybase_cli.py describe --sample-values 2
97
+ python3 flybase_cli.py schema-export --sample-values 1
98
+ python3 flybase_cli.py query-plan --sample-values 1 --limit 5
99
+ python3 flybase_cli.py query-run --template-name gene-summary-by-fbgn --param fbgn_id=FBgn0002121
100
+
101
+ python3 flybase_cli.py fts-build
102
+
103
+ python3 flybase_cli.py search 'memory formation'
104
+
105
+ python3 flybase_cli.py pg-load --release FB2026_01
106
+
107
+ python3 flybase_cli.py sql \
108
+ "select * from fb_best_gene_summary_fb_2026_01 limit 5"
109
+
110
+ python3 flybase_cli.py sql \
111
+ "select s.fbgn_id, s.gene_symbol, a.annotation_id, p.flybase_fbtr, p.flybase_fbpp \
112
+ from fb_best_gene_summary_fb_2026_01 s \
113
+ join fb_fbgn_annotation_id_fb_2026_01 a on a.primary_fbgn = s.fbgn_id \
114
+ left join fb_fbgn_fbtr_fbpp_fb_2026_01 p on p.flybase_fbgn = s.fbgn_id \
115
+ limit 5"
116
+
117
+ python3 flybase_cli.py api domain/FBgn0001250
118
+ ```
119
+
120
+ ## Sync presets
121
+
122
+ - `gene-core`: summaries + FBgn/FBtr/FBpp + annotation IDs + SO annotations
123
+ - `gene-expression`: curated/high-throughput/scRNA expression slices
124
+ - `references`: publication/link tables
125
+ - `gene-knowledge`: core gene facts + representative publications + orthology tables
126
+ - `orthology`: ortholog, paralog, and disease-association tables
127
+ - `interactions`: gene- and allele-level interaction tables
128
+
129
+ ## Full sync
130
+
131
+ - `full-sync` crawls an entire release prefix, default `precomputed_files/`
132
+ - default behavior: download only files the current loaders can ingest into SQLite
133
+ - use `--all-files` if you want non-ingestable release artifacts too
134
+ - use `--include` / `--exclude` to stage a narrower smoke or partial warehouse
135
+ - default manifest path: `data/flybase/manifests/<release>/full-sync.json`
136
+
137
+ ## Discovery
138
+
139
+ - `genomes --release FB2026_01` lists genome builds linked from that FlyBase release
140
+ - `sync-url` turns a crawlable FlyBase directory URL into a one-step local sync
141
+ - `sync-genome` resolves a release/build pair into the right genome-section URL automatically
142
+ - `genome-presets` lists reusable genome asset sync recipes
143
+
144
+ ## Genome sync
145
+
146
+ - sections: `fasta`, `gff`, `gtf`, `dna`, `chado-xml`
147
+ - asset shortcuts include `mirna`, `transcript`, `translation`, `gene`, `chromosome`, `cds`, `ncrna`, `gff`, `gtf`
148
+ - presets include `mirna-fasta`, `transcript-fasta`, `translation-fasta`, `gene-fasta`, `chromosome-fasta`, `ncrna-fasta`, `gff-all`, `gtf-all`
149
+ - use `--include`/`--exclude` for narrower file selection on top of the asset preset
150
+
151
+ ## Ingest formats
152
+
153
+ - delimited: `tsv`, `csv`, gzipped variants
154
+ - sequence: `fasta`, `fa`, `fna`, `faa`, gzipped variants
155
+ - annotation: `gff`, `gff3`, `gtf`, gzipped variants
156
+ - JSON: `json`, `json.gz`
157
+
158
+ ## JSON ingest
159
+
160
+ - top-level scalar JSON fields become queryable SQLite columns
161
+ - one nested dict level is flattened, eg `gene.symbol` -> `gene_symbol`
162
+ - repeated top-level lists become child tables, eg `symbolSynonyms` -> `<table>_symbolsynonyms`
163
+ - repeated lists nested inside child dict rows become descendant tables, eg `genomeLocations[].exons[]` -> `<table>_genomelocations_exons`
164
+ - full source record remains in `payload_json`
165
+
166
+ Example:
167
+
168
+ ```bash
169
+ python3 flybase_cli.py sql \
170
+ "select record_id, symbol, gene_geneId from fb_ncrna_genes_fb_2026_01 limit 5"
171
+
172
+ python3 flybase_cli.py sql \
173
+ "select parent_record_id, ordinal, value \
174
+ from fb_ncrna_genes_fb_2026_01_symbolsynonyms \
175
+ limit 5"
176
+
177
+ python3 flybase_cli.py sql \
178
+ "select parent_record_id, parent_ordinal, ordinal, startPosition, endPosition \
179
+ from fb_ncrna_genes_fb_2026_01_genomelocations_exons \
180
+ limit 5"
181
+ ```
182
+
183
+ ## Search
184
+
185
+ - `fts-build` creates a local SQLite FTS5 index from ingested tables
186
+ - `search` queries that index without calling the live FlyBase API
187
+ - record ids prefer stable FlyBase-like columns such as `fbgn_id`, `primary_fbgn`, `flybase_fbtr`
188
+
189
+ ## Metadata
190
+
191
+ - `describe` summarizes ingested tables with row counts, source paths, semantic tags, columns, and representative non-empty values
192
+ - `schema-export` writes the same metadata to a deterministic JSON artifact beside the SQLite DB, eg `FB2026_01.schema.json`
193
+ - `schema-export` also includes inferred `relationships` for nested child tables and common FlyBase ID joins
194
+ - `schema-export` also emits `semantic_summary` for table/entity tag coverage
195
+ - `schema-export` also emits ready-to-run `query_templates`
196
+ - `query-plan` prints starter SQL without the larger schema payload
197
+ - `query-plan` now includes named biological templates such as `gene-summary-by-fbgn`, `transcript-protein-links`, `publications-for-gene`, and coordinate lookups when matching tables exist
198
+ - `query-run` selects one template and executes it with parameter values
199
+ - useful first step before writing ad hoc SQL or building agent query plans
200
+
201
+ Example:
202
+
203
+ ```bash
204
+ python3 flybase_cli.py schema-export \
205
+ --db data/flybase/FB2026_01.sqlite \
206
+ --sample-values 1
207
+
208
+ python3 flybase_cli.py query-plan \
209
+ --db data/flybase/FB2026_01.sqlite \
210
+ --sample-values 1 \
211
+ --limit 5
212
+
213
+ python3 flybase_cli.py query-run \
214
+ --db data/flybase/FB2026_01.sqlite \
215
+ --template-name gene-summary-by-fbgn \
216
+ --param fbgn_id=FBgn0002121
217
+ ```
218
+
219
+ ## Notes
220
+
221
+ - nested JSON child tables keep lineage columns like `parent_record_id`, `parent_ordinal`, `ordinal`.
222
+ - many FlyBase files start with `##` metadata lines; loader skips those.
223
+ - `sync` writes a preset manifest under `data/flybase/manifests/<release>/`.
224
+ - `full-sync` is the broadest offline path for release bulk data without going through the full Postgres dump.
225
+ - `sync --release FB2026_01` defaults to `data/flybase/FB2026_01.sqlite` to avoid cross-release mixing.
226
+ - `sync-incremental` uses stable manifest keys so release-renamed files still land in `updated` instead of noisy add/remove pairs.
227
+ - `release-diff` compares releases either by raw prefix or by curated multi-prefix preset.
228
+ - `manifest --url` lets you crawl non-`releases/` FlyBase directories such as genome FASTA/GFF trees.
229
+ - `sync-url` is the shortest path for genome assets once you know the directory URL.
230
+ - `sync-genome` is the shortest path when you know the FlyBase release + genome build label.
231
+ - `sync-genome --preset ...` is the preferred path for common genome asset pulls.
232
+ - some FlyBase `.gff.gz` assets are tar-wrapped gzip archives; loader handles that transparently.
233
+ - `sql` and `query-run` shape results as record-oriented JSON with summary metadata for agent chaining.
234
+ - `pg-load` stages the full Postgres import script for `releases/<release>/psql/<release>.sql.gz`.
235
+ - `pg-load --execute` runs the staged script when `createdb` and `psql` are installed locally.
236
+ - SQLite keeps setup minimal; switch to DuckDB/Postgres if you want bigger joins/faster scans.
237
+ - if you only need a few IDs, FlyBase Batch Download may be simpler than syncing files.
238
+ - use `--no-header` for files whose first non-comment row is data, not column names.
239
+
240
+ ## Tests
241
+
242
+ ```bash
243
+ python3 -m unittest discover -s tests
244
+ ```
@@ -0,0 +1,18 @@
1
+ flybase_cli/__init__.py,sha256=lEw53vLD-DrNuj5MFPyP9jpSSwFtyNOEhQ0hM5TPJjc,90
2
+ flybase_cli/__main__.py,sha256=PSQ4rpL0dG6f-qH4N7H-gD9igQkdHzH4yVZDcW8lfZo,80
3
+ flybase_cli/cli.py,sha256=05xJ2O9WQD3fNAlZDzdNxAeAH1ZXdbPJexbo3ArvJHE,25338
4
+ flybase_cli/config.py,sha256=9arWRL_7Hw3mWjKjn_nR7j7j9EGnJKzytyyy0wAtFRI,7620
5
+ flybase_cli/core.py,sha256=ID2p3ZHfEXh2twuEiOFOT7QBuZzJv4ln-w9oTL2DNdo,21681
6
+ flybase_cli/loaders.py,sha256=8ll0lJ3dHMRhRX3oCjE8_XRSP1-wcxzdf_4ckhElVvc,18833
7
+ flybase_cli/postgres.py,sha256=5TVLQj3kZwdCboG_oq1tbKdSAKhhSm4726YJLiDpxlI,2731
8
+ flybase_cli/querying.py,sha256=x9R-R6XitCYkVJpPHtwhdrOYXLP2uTUbkS3iKUcTRK4,4816
9
+ flybase_cli/schema.py,sha256=1-40-ep1cEKoDWw7-9nA_-euIWRR_9c4E9hHcNl9Yf8,24055
10
+ flybase_cli/semantics.py,sha256=SpixB9nPNYBSWRgm6WMcXBkrYSbKxFo-rX4hw2NRBiQ,4649
11
+ flybase_cli/syncing.py,sha256=Qa7lGtKd-O6jrnNS3OPcbHHYHRr8CvHm7G4P32lVZbQ,7783
12
+ flybase_cli/version.py,sha256=YvuYzWnKtqBb-IqG8HAu-nhIYAsgj9Vmc_b9o7vO-js,22
13
+ flybase_cli-0.1.2.dist-info/licenses/LICENSE,sha256=pOm_AKDa_IHzuMyjTQlSC46C0WZiYTDhQXkmlkSk6cI,1080
14
+ flybase_cli-0.1.2.dist-info/METADATA,sha256=6TnyXJI8WMpCezR1ngYrE0L3AhaOJ1ezlHhTvO6Lso4,9577
15
+ flybase_cli-0.1.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
16
+ flybase_cli-0.1.2.dist-info/entry_points.txt,sha256=UL7hpwro7lPQv5gu3MNehvdDcLfy2zoPqBM81rQK6Tg,53
17
+ flybase_cli-0.1.2.dist-info/top_level.txt,sha256=QbB-Gk-A6obyDd9fkFs2Njv6zB9k7OWTBLtKW-xsP7Q,12
18
+ flybase_cli-0.1.2.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ flybase-cli = flybase_cli.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Gustavo Madeira Santana
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ flybase_cli