flybase-cli 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
flybase_cli/loaders.py ADDED
@@ -0,0 +1,539 @@
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ import gzip
5
+ import json
6
+ import re
7
+ import sqlite3
8
+ import tarfile
9
+ from contextlib import contextmanager
10
+ from io import TextIOWrapper
11
+ from pathlib import Path
12
+ from typing import Iterator
13
+
14
+ from .config import (
15
+ BATCH_SIZE,
16
+ DELIMITED_SUFFIXES,
17
+ FASTA_SUFFIXES,
18
+ GFF_SUFFIXES,
19
+ GTF_SUFFIXES,
20
+ JSON_ID_CANDIDATES,
21
+ JSON_MAX_INFERRED_COLUMNS,
22
+ JSON_SUFFIXES,
23
+ )
24
+
25
+
26
+ @contextmanager
27
+ def open_maybe_gzip(path: Path):
28
+ if path.suffix != ".gz":
29
+ with path.open("r", encoding="utf-8", newline="") as handle:
30
+ yield handle
31
+ return
32
+
33
+ try:
34
+ archive = tarfile.open(path, mode="r:gz")
35
+ except tarfile.ReadError:
36
+ with gzip.open(path, "rt", encoding="utf-8", newline="") as handle:
37
+ yield handle
38
+ return
39
+
40
+ try:
41
+ member = next((item for item in archive if item.isfile()), None)
42
+ if member is None:
43
+ raise ValueError(f"no regular file found in archive: {path}")
44
+ extracted = archive.extractfile(member)
45
+ if extracted is None:
46
+ raise ValueError(f"unable to extract archive member: {path}")
47
+ wrapper = TextIOWrapper(extracted, encoding="utf-8", newline="")
48
+ try:
49
+ yield wrapper
50
+ finally:
51
+ wrapper.close()
52
+ finally:
53
+ archive.close()
54
+
55
+
56
+ def sanitize_columns(columns: list[str]) -> list[str]:
57
+ seen: dict[str, int] = {}
58
+ output: list[str] = []
59
+ for index, column in enumerate(columns, start=1):
60
+ base = re.sub(r"[^A-Za-z0-9_]+", "_", column.strip()).strip("_").lower()
61
+ if not base:
62
+ base = f"col_{index}"
63
+ seen[base] = seen.get(base, 0) + 1
64
+ output.append(base if seen[base] == 1 else f"{base}_{seen[base]}")
65
+ return output
66
+
67
+
68
+ def create_table(conn: sqlite3.Connection, table_name: str, columns: list[str]) -> str:
69
+ conn.execute(f'DROP TABLE IF EXISTS "{table_name}"')
70
+ create_sql = ", ".join(f'"{column}" TEXT' for column in columns)
71
+ conn.execute(f'CREATE TABLE "{table_name}" ({create_sql})')
72
+ quoted_columns = ", ".join(f'"{column}"' for column in columns)
73
+ placeholders = ", ".join("?" for _ in columns)
74
+ return f'INSERT INTO "{table_name}" ({quoted_columns}) VALUES ({placeholders})'
75
+
76
+
77
+ def flush_batch(
78
+ conn: sqlite3.Connection,
79
+ insert_sql: str,
80
+ batch: list[list[str]] | list[tuple[str, ...]],
81
+ row_count: int,
82
+ ) -> int:
83
+ if batch:
84
+ conn.executemany(insert_sql, batch)
85
+ row_count += len(batch)
86
+ return row_count
87
+
88
+
89
+ def sample_delimiter(path: Path) -> str:
90
+ return "," if ".csv" in path.name.lower() else "\t"
91
+
92
+
93
+ def iter_delimited_rows(source: Path) -> Iterator[tuple[list[str], str]]:
94
+ delimiter = sample_delimiter(source)
95
+ with open_maybe_gzip(source) as handle:
96
+ reader = csv.reader(handle, delimiter=delimiter)
97
+ for row in reader:
98
+ yield row, delimiter
99
+
100
+
101
+ def read_header_and_rows(
102
+ source: Path,
103
+ no_header: bool,
104
+ ) -> tuple[list[str], list[str] | None, Iterator[tuple[list[str], str]], str]:
105
+ row_iter = iter_delimited_rows(source)
106
+ delimiter = "\t"
107
+
108
+ for row, delimiter in row_iter:
109
+ if not row:
110
+ continue
111
+ if row[0].startswith("##") and len(row) == 1:
112
+ continue
113
+ if no_header:
114
+ header = [f"col_{index}" for index in range(1, len(row) + 1)]
115
+ return header, row, row_iter, delimiter
116
+ row[0] = row[0].lstrip("#")
117
+ return row, None, row_iter, delimiter
118
+
119
+ raise ValueError(f"empty file: {source}")
120
+
121
+
122
+ def normalize_row(row: list[str], width: int, delimiter: str) -> list[str]:
123
+ if len(row) < width:
124
+ return row + [""] * (width - len(row))
125
+ if len(row) > width:
126
+ return row[: width - 1] + [delimiter.join(row[width - 1 :])]
127
+ return row
128
+
129
+
130
+ def ingest_delimited(
131
+ conn: sqlite3.Connection,
132
+ source: Path,
133
+ table_name: str,
134
+ no_header: bool = False,
135
+ ) -> int:
136
+ raw_header, first_data_row, row_iter, delimiter = read_header_and_rows(source, no_header)
137
+ columns = sanitize_columns(raw_header)
138
+ insert_sql = create_table(conn, table_name, columns)
139
+ batch: list[list[str]] = []
140
+ row_count = 0
141
+
142
+ if first_data_row is not None:
143
+ batch.append(normalize_row(first_data_row, len(columns), delimiter))
144
+
145
+ for row, _ in row_iter:
146
+ if not row:
147
+ continue
148
+ batch.append(normalize_row(row, len(columns), delimiter))
149
+ if len(batch) >= BATCH_SIZE:
150
+ row_count = flush_batch(conn, insert_sql, batch, row_count)
151
+ batch.clear()
152
+
153
+ return flush_batch(conn, insert_sql, batch, row_count)
154
+
155
+
156
+ def split_fasta_header(header: str) -> tuple[str, str]:
157
+ text = header[1:].strip()
158
+ if not text:
159
+ return "", ""
160
+ parts = text.split(None, 1)
161
+ record_id = parts[0]
162
+ description = parts[1] if len(parts) > 1 else ""
163
+ return record_id, description
164
+
165
+
166
+ def ingest_fasta(conn: sqlite3.Connection, source: Path, table_name: str) -> int:
167
+ columns = ["record_id", "header", "description", "sequence", "sequence_length"]
168
+ insert_sql = create_table(conn, table_name, columns)
169
+ batch: list[tuple[str, ...]] = []
170
+ row_count = 0
171
+ current_header = ""
172
+ current_id = ""
173
+ current_description = ""
174
+ sequence_parts: list[str] = []
175
+
176
+ def flush_current() -> None:
177
+ nonlocal row_count, batch, sequence_parts
178
+ if not current_header:
179
+ return
180
+ sequence = "".join(sequence_parts)
181
+ batch.append(
182
+ (
183
+ current_id,
184
+ current_header[1:].strip(),
185
+ current_description,
186
+ sequence,
187
+ str(len(sequence)),
188
+ )
189
+ )
190
+ sequence_parts = []
191
+
192
+ with open_maybe_gzip(source) as handle:
193
+ for raw_line in handle:
194
+ line = raw_line.strip()
195
+ if not line:
196
+ continue
197
+ if line.startswith(">"):
198
+ flush_current()
199
+ current_header = line
200
+ current_id, current_description = split_fasta_header(line)
201
+ if len(batch) >= BATCH_SIZE:
202
+ row_count = flush_batch(conn, insert_sql, batch, row_count)
203
+ batch.clear()
204
+ continue
205
+ sequence_parts.append(line)
206
+
207
+ flush_current()
208
+ return flush_batch(conn, insert_sql, batch, row_count)
209
+
210
+
211
+ def parse_feature_attributes(raw_attributes: str) -> dict[str, str]:
212
+ attributes: dict[str, str] = {}
213
+ for part in raw_attributes.split(";"):
214
+ chunk = part.strip()
215
+ if not chunk:
216
+ continue
217
+ if "=" in chunk:
218
+ key, value = chunk.split("=", 1)
219
+ elif " " in chunk:
220
+ key, value = chunk.split(" ", 1)
221
+ value = value.strip().strip('"')
222
+ else:
223
+ key, value = chunk, ""
224
+ attributes[key.strip()] = value.strip().strip('"')
225
+ return attributes
226
+
227
+
228
+ def pick_attribute(attributes: dict[str, str], *keys: str) -> str:
229
+ for key in keys:
230
+ value = attributes.get(key)
231
+ if value:
232
+ return value
233
+ return ""
234
+
235
+
236
+ def ingest_feature_file(
237
+ conn: sqlite3.Connection,
238
+ source: Path,
239
+ table_name: str,
240
+ ) -> int:
241
+ columns = [
242
+ "seqid",
243
+ "source",
244
+ "feature_type",
245
+ "start",
246
+ "end",
247
+ "score",
248
+ "strand",
249
+ "phase",
250
+ "feature_id",
251
+ "parent_id",
252
+ "feature_name",
253
+ "gene_id",
254
+ "transcript_id",
255
+ "attributes_json",
256
+ "attributes_raw",
257
+ ]
258
+ insert_sql = create_table(conn, table_name, columns)
259
+ batch: list[tuple[str, ...]] = []
260
+ row_count = 0
261
+
262
+ with open_maybe_gzip(source) as handle:
263
+ for raw_line in handle:
264
+ line = raw_line.rstrip("\n")
265
+ if not line or line.startswith("#"):
266
+ continue
267
+ fields = line.split("\t")
268
+ if len(fields) != 9:
269
+ continue
270
+ attributes = parse_feature_attributes(fields[8])
271
+ batch.append(
272
+ (
273
+ fields[0],
274
+ fields[1],
275
+ fields[2],
276
+ fields[3],
277
+ fields[4],
278
+ fields[5],
279
+ fields[6],
280
+ fields[7],
281
+ pick_attribute(attributes, "ID", "id"),
282
+ pick_attribute(attributes, "Parent", "parent"),
283
+ pick_attribute(attributes, "Name", "gene_name", "transcript_name", "name"),
284
+ pick_attribute(attributes, "gene_id", "geneID", "gene"),
285
+ pick_attribute(attributes, "transcript_id", "transcriptID", "transcript"),
286
+ json.dumps(attributes, sort_keys=True),
287
+ fields[8],
288
+ )
289
+ )
290
+ if len(batch) >= BATCH_SIZE:
291
+ row_count = flush_batch(conn, insert_sql, batch, row_count)
292
+ batch.clear()
293
+
294
+ return flush_batch(conn, insert_sql, batch, row_count)
295
+
296
+
297
+ def iter_json_rows(payload: object) -> Iterator[tuple[str, str]]:
298
+ if isinstance(payload, list):
299
+ for index, item in enumerate(payload, start=1):
300
+ yield pick_json_record_id(item, index), json.dumps(item, sort_keys=True)
301
+ return
302
+
303
+ if isinstance(payload, dict):
304
+ list_keys = [key for key, value in payload.items() if isinstance(value, list)]
305
+ if len(list_keys) == 1:
306
+ for index, item in enumerate(payload[list_keys[0]], start=1):
307
+ yield pick_json_record_id(item, index), json.dumps(item, sort_keys=True)
308
+ return
309
+ yield "1", json.dumps(payload, sort_keys=True)
310
+ return
311
+
312
+ yield "1", json.dumps(payload)
313
+
314
+
315
+ def pick_json_record_id(item: object, fallback_index: int) -> str:
316
+ if isinstance(item, dict):
317
+ for candidate in JSON_ID_CANDIDATES:
318
+ value = item.get(candidate)
319
+ if value:
320
+ return str(value)
321
+ return str(fallback_index)
322
+
323
+
324
+ def json_scalar_to_text(value: object) -> str | None:
325
+ if value is None:
326
+ return ""
327
+ if isinstance(value, (str, int, float, bool)):
328
+ return str(value)
329
+ return None
330
+
331
+
332
+ def flatten_json_record(record: dict[str, object], prefix: str = "") -> dict[str, str]:
333
+ flattened: dict[str, str] = {}
334
+ for key, value in record.items():
335
+ safe_key = re.sub(r"[^A-Za-z0-9_]+", "_", key).strip("_")
336
+ if not safe_key:
337
+ continue
338
+ full_key = f"{prefix}{safe_key}" if not prefix else f"{prefix}_{safe_key}"
339
+ scalar = json_scalar_to_text(value)
340
+ if scalar is not None:
341
+ flattened[full_key] = scalar
342
+ continue
343
+ if isinstance(value, dict):
344
+ for nested_key, nested_value in flatten_json_record(value, full_key).items():
345
+ flattened[nested_key] = nested_value
346
+ return flattened
347
+
348
+
349
+ def extract_json_records(payload: object) -> list[dict[str, object]]:
350
+ if isinstance(payload, list):
351
+ return [item for item in payload if isinstance(item, dict)]
352
+ if isinstance(payload, dict):
353
+ list_keys = [key for key, value in payload.items() if isinstance(value, list)]
354
+ if len(list_keys) == 1:
355
+ return [item for item in payload[list_keys[0]] if isinstance(item, dict)]
356
+ return [payload]
357
+ return []
358
+
359
+
360
+ def infer_json_columns(records: list[dict[str, object]]) -> list[str]:
361
+ frequencies: dict[str, int] = {}
362
+ for record in records[:200]:
363
+ for key in flatten_json_record(record):
364
+ frequencies[key] = frequencies.get(key, 0) + 1
365
+ ordered = sorted(frequencies.items(), key=lambda item: (-item[1], item[0]))
366
+ return [key for key, _ in ordered[:JSON_MAX_INFERRED_COLUMNS]]
367
+
368
+
369
+ def sanitize_json_child_name(name: str) -> str:
370
+ return re.sub(r"[^A-Za-z0-9_]+", "_", name).strip("_").lower()
371
+
372
+
373
+ def json_parent_ordinal_columns(depth: int) -> list[str]:
374
+ if depth <= 0:
375
+ return []
376
+ if depth == 1:
377
+ return ["parent_ordinal"]
378
+ return [*(f"ancestor_ordinal_{index}" for index in range(1, depth)), "parent_ordinal"]
379
+
380
+
381
+ def json_link_columns(parent_ordinals: list[str]) -> list[str]:
382
+ return ["parent_record_id", *json_parent_ordinal_columns(len(parent_ordinals)), "ordinal"]
383
+
384
+
385
+ def discover_json_list_fields(records: list[dict[str, object]]) -> dict[str, dict[str, object]]:
386
+ discovered: dict[str, dict[str, object]] = {}
387
+ for record in records[:200]:
388
+ for key, value in record.items():
389
+ if not isinstance(value, list) or not value:
390
+ continue
391
+ field = discovered.setdefault(key, {"kind": None, "dict_rows": []})
392
+ first = value[0]
393
+ if all(isinstance(item, dict) for item in value):
394
+ if field["kind"] in (None, "dict"):
395
+ field["kind"] = "dict"
396
+ field["dict_rows"].extend(item for item in value if isinstance(item, dict))
397
+ continue
398
+ if all(json_scalar_to_text(item) is not None for item in value):
399
+ if field["kind"] is None:
400
+ field["kind"] = "scalar"
401
+ continue
402
+ field["kind"] = "mixed"
403
+
404
+ filtered: dict[str, dict[str, object]] = {}
405
+ for key, value in discovered.items():
406
+ kind = value["kind"]
407
+ if kind == "scalar":
408
+ filtered[key] = {"kind": "scalar"}
409
+ elif kind == "dict":
410
+ columns = infer_json_columns(value["dict_rows"])
411
+ filtered[key] = {"kind": "dict", "columns": columns}
412
+ return filtered
413
+
414
+
415
+ def ingest_json_child_tables(
416
+ conn: sqlite3.Connection,
417
+ parent_table_name: str,
418
+ parent_rows: list[tuple[str, list[str], dict[str, object]]],
419
+ ) -> list[tuple[str, int]]:
420
+ if not parent_rows:
421
+ return []
422
+
423
+ records = [record for _, _, record in parent_rows]
424
+ list_fields = discover_json_list_fields(records)
425
+ ingested: list[tuple[str, int]] = []
426
+
427
+ for field_name, field_info in list_fields.items():
428
+ child_table_name = f"{parent_table_name}_{sanitize_json_child_name(field_name)}"
429
+ kind = field_info["kind"]
430
+ link_columns = json_link_columns(parent_rows[0][1])
431
+ if kind == "scalar":
432
+ insert_sql = create_table(conn, child_table_name, [*link_columns, "value"])
433
+ batch: list[tuple[str, ...]] = []
434
+ for record_id, parent_ordinals, record in parent_rows:
435
+ values = record.get(field_name)
436
+ if not isinstance(values, list):
437
+ continue
438
+ for ordinal, item in enumerate(values, start=1):
439
+ scalar = json_scalar_to_text(item)
440
+ if scalar is None:
441
+ continue
442
+ batch.append(tuple([record_id, *parent_ordinals, str(ordinal), scalar]))
443
+ conn.executemany(insert_sql, batch)
444
+ ingested.append((child_table_name, len(batch)))
445
+ continue
446
+
447
+ if kind == "dict":
448
+ columns = [*link_columns, *field_info["columns"], "payload_json"]
449
+ insert_sql = create_table(conn, child_table_name, columns)
450
+ batch: list[tuple[str, ...]] = []
451
+ child_rows: list[tuple[str, list[str], dict[str, object]]] = []
452
+ for record_id, parent_ordinals, record in parent_rows:
453
+ values = record.get(field_name)
454
+ if not isinstance(values, list):
455
+ continue
456
+ for ordinal, item in enumerate(values, start=1):
457
+ if not isinstance(item, dict):
458
+ continue
459
+ flattened = flatten_json_record(item)
460
+ row = [record_id, *parent_ordinals, str(ordinal)]
461
+ row.extend(flattened.get(column, "") for column in field_info["columns"])
462
+ row.append(json.dumps(item, sort_keys=True))
463
+ batch.append(tuple(row))
464
+ child_rows.append((record_id, [*parent_ordinals, str(ordinal)], item))
465
+ conn.executemany(insert_sql, batch)
466
+ ingested.append((child_table_name, len(batch)))
467
+ ingested.extend(ingest_json_child_tables(conn, child_table_name, child_rows))
468
+
469
+ return ingested
470
+
471
+
472
+ def ingest_json(conn: sqlite3.Connection, source: Path, table_name: str) -> list[tuple[str, int]]:
473
+ with open_maybe_gzip(source) as handle:
474
+ payload = json.load(handle)
475
+ records = extract_json_records(payload)
476
+ if not records:
477
+ columns = ["record_id", "payload_json"]
478
+ insert_sql = create_table(conn, table_name, columns)
479
+ batch = list(iter_json_rows(payload))
480
+ conn.executemany(insert_sql, batch)
481
+ return [(table_name, len(batch))]
482
+
483
+ inferred_columns = infer_json_columns(records)
484
+ columns = ["record_id", *inferred_columns, "payload_json"]
485
+ insert_sql = create_table(conn, table_name, columns)
486
+ batch: list[tuple[str, ...]] = []
487
+ for index, record in enumerate(records, start=1):
488
+ flattened = flatten_json_record(record)
489
+ row = [pick_json_record_id(record, index)]
490
+ row.extend(flattened.get(column, "") for column in inferred_columns)
491
+ row.append(json.dumps(record, sort_keys=True))
492
+ batch.append(tuple(row))
493
+ conn.executemany(insert_sql, batch)
494
+ ingested = [(table_name, len(batch))]
495
+ ingested.extend(
496
+ ingest_json_child_tables(
497
+ conn,
498
+ table_name,
499
+ [(pick_json_record_id(record, index), [], record) for index, record in enumerate(records, start=1)],
500
+ )
501
+ )
502
+ return ingested
503
+
504
+
505
+ def detect_ingest_format(source: Path) -> str | None:
506
+ name = source.name.lower()
507
+ if any(name.endswith(suffix) for suffix in DELIMITED_SUFFIXES):
508
+ return "delimited"
509
+ if any(name.endswith(suffix) for suffix in FASTA_SUFFIXES):
510
+ return "fasta"
511
+ if any(name.endswith(suffix) for suffix in GFF_SUFFIXES):
512
+ return "gff"
513
+ if any(name.endswith(suffix) for suffix in GTF_SUFFIXES):
514
+ return "gtf"
515
+ if any(name.endswith(suffix) for suffix in JSON_SUFFIXES):
516
+ return "json"
517
+ return None
518
+
519
+
520
+ def ingest_source(
521
+ conn: sqlite3.Connection,
522
+ source: Path,
523
+ table_name: str,
524
+ no_header: bool = False,
525
+ ) -> list[tuple[str, int]]:
526
+ detected = detect_ingest_format(source)
527
+ if detected == "delimited":
528
+ return [(table_name, ingest_delimited(conn, source, table_name, no_header=no_header))]
529
+ if detected == "fasta":
530
+ return [(table_name, ingest_fasta(conn, source, table_name))]
531
+ if detected in {"gff", "gtf"}:
532
+ return [(table_name, ingest_feature_file(conn, source, table_name))]
533
+ if detected == "json":
534
+ return ingest_json(conn, source, table_name)
535
+ raise ValueError(f"unsupported ingest format: {source}")
536
+
537
+
538
+ def is_ingestable(path: Path) -> bool:
539
+ return detect_ingest_format(path) is not None
@@ -0,0 +1,106 @@
1
+ from __future__ import annotations
2
+
3
+ import shutil
4
+ import subprocess
5
+ from pathlib import Path
6
+
7
+ from .config import DEFAULT_POSTGRES_DIR
8
+ from .core import download_file, release_base_url
9
+
10
+
11
+ def dump_url_for_release(release: str) -> str:
12
+ return f"{release_base_url(release)}psql/{release}.sql.gz"
13
+
14
+
15
+ def default_dump_path(root: Path, release: str) -> Path:
16
+ return root / f"{release}.sql.gz"
17
+
18
+
19
+ def default_script_path(root: Path, release: str) -> Path:
20
+ return root / f"load-{release}.sh"
21
+
22
+
23
+ def default_db_name(release: str) -> str:
24
+ return f"flybase_{release.lower()}"
25
+
26
+
27
+ def available_postgres_tools() -> dict[str, str | None]:
28
+ return {
29
+ "createdb": shutil.which("createdb"),
30
+ "dropdb": shutil.which("dropdb"),
31
+ "psql": shutil.which("psql"),
32
+ }
33
+
34
+
35
+ def render_pg_load_script(
36
+ *,
37
+ dump_path: Path,
38
+ db_name: str,
39
+ drop_existing: bool,
40
+ ) -> str:
41
+ lines = ["#!/usr/bin/env bash", "set -euo pipefail", ""]
42
+ if drop_existing:
43
+ lines.append(f"dropdb --if-exists {db_name}")
44
+ lines.append(f"createdb {db_name}")
45
+ lines.append(f"gzip -dc {dump_path} | psql {db_name}")
46
+ lines.append("")
47
+ return "\n".join(lines)
48
+
49
+
50
+ def write_pg_load_script(
51
+ *,
52
+ release: str,
53
+ dump_path: Path,
54
+ db_name: str,
55
+ script_path: Path,
56
+ drop_existing: bool,
57
+ ) -> Path:
58
+ script_path.parent.mkdir(parents=True, exist_ok=True)
59
+ script = render_pg_load_script(
60
+ dump_path=dump_path,
61
+ db_name=db_name,
62
+ drop_existing=drop_existing,
63
+ )
64
+ script_path.write_text(script, encoding="utf-8")
65
+ script_path.chmod(0o755)
66
+ return script_path
67
+
68
+
69
+ def ensure_dump_file(
70
+ *,
71
+ release: str,
72
+ dump_path: Path,
73
+ force: bool = False,
74
+ ) -> Path:
75
+ if dump_path.exists() and not force:
76
+ return dump_path
77
+ dump_path.parent.mkdir(parents=True, exist_ok=True)
78
+ download_file(dump_url_for_release(release), dump_path)
79
+ return dump_path
80
+
81
+
82
+ def execute_pg_load_script(script_path: Path) -> None:
83
+ subprocess.run([str(script_path)], check=True)
84
+
85
+
86
+ def build_pg_load_plan(
87
+ *,
88
+ release: str,
89
+ root: Path = DEFAULT_POSTGRES_DIR,
90
+ db_name: str | None = None,
91
+ dump_path: Path | None = None,
92
+ script_path: Path | None = None,
93
+ drop_existing: bool = False,
94
+ ) -> dict[str, object]:
95
+ db = db_name or default_db_name(release)
96
+ dump = dump_path or default_dump_path(root, release)
97
+ script = script_path or default_script_path(root, release)
98
+ return {
99
+ "release": release,
100
+ "db_name": db,
101
+ "dump_url": dump_url_for_release(release),
102
+ "dump_path": str(dump),
103
+ "script_path": str(script),
104
+ "drop_existing": drop_existing,
105
+ "tools": available_postgres_tools(),
106
+ }