PyPI - bqcsv - Versions diffs - 1.0.0__tar.gz - Mend

bqcsv 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

bqcsv-1.0.0/PKG-INFO +137 -0
bqcsv-1.0.0/README.md +128 -0
bqcsv-1.0.0/bqcsv/__init__.py +1 -0
bqcsv-1.0.0/bqcsv/cli.py +201 -0
bqcsv-1.0.0/bqcsv/config.py +54 -0
bqcsv-1.0.0/bqcsv/schema.py +212 -0
bqcsv-1.0.0/bqcsv/table.py +46 -0
bqcsv-1.0.0/bqcsv/uploader.py +193 -0
bqcsv-1.0.0/bqcsv.egg-info/PKG-INFO +137 -0
bqcsv-1.0.0/bqcsv.egg-info/SOURCES.txt +16 -0
bqcsv-1.0.0/bqcsv.egg-info/dependency_links.txt +1 -0
bqcsv-1.0.0/bqcsv.egg-info/entry_points.txt +2 -0
bqcsv-1.0.0/bqcsv.egg-info/requires.txt +2 -0
bqcsv-1.0.0/bqcsv.egg-info/top_level.txt +1 -0
bqcsv-1.0.0/pyproject.toml +21 -0
bqcsv-1.0.0/setup.cfg +4 -0
bqcsv-1.0.0/tests/test_cli.py +140 -0
bqcsv-1.0.0/tests/test_uploader.py +112 -0

bqcsv-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,137 @@
+Metadata-Version: 2.4
+Name: bqcsv
+Version: 1.0.0
+Summary: Upload a local CSV file to a BigQuery table via the bq CLI
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: google-cloud-bigquery>=3.0
+Requires-Dist: pandas>=2.0
+# bqcsv
+Upload a local CSV file to BigQuery using the `bq` CLI and your existing `gcloud` authentication.
+## Why a dedicated CLI tool?
+Out of the box, Google's `bq` CLI cannot create a table with column names inferred from a CSV file.
+`bqcsv` fixes that:
+* detects the schema from the CSV file
+* creates a table with proper column names and types
+* loads the CSV file using `bq load`
+## Authentication
+No additional authentication is needed.
+`bqcsv` uses your existing authentication via `gcloud auth login`.
+## Requirements
+- Python 3.10+
+- [Google Cloud SDK](https://cloud.google.com/sdk) with `bq` on your `PATH`
+## How to use `bqcsv`
+### Upload a CSV file to a table
+To upload a CSV file, specify your project ID, dataset ID, and table name:
+```bash
+bqcsv data.csv --project my-gcp-project --dataset staging --table events_raw
+```
+The `--table` argument is optional. By default, `bqcsv` derives the table name from the CSV file:
+```bash
+bqcsv data.csv --project my-gcp-project --dataset staging
+# is identical to
+bqcsv data.csv --project my-gcp-project --dataset staging --table data
+```
+### Saving your configuration
+To avoid passing `--project`, `--dataset`, or `--table` on every run, save them to your local config:
+```bash
+bqcsv config set --project my-gcp-project --dataset analytics --table events
+bqcsv config show
+```
+Defaults are stored in `~/.config/bqcsv/config.toml`.
+After you set your defaults, you can call `bqcsv` without arguments:
+```bash
+bqcsv data.csv
+```
+If you have not set a default `--table` value, the table name is derived from the CSV file.
+## Development
+### Install from your local repo
+```bash
+pip install -e .
+```
+### Testing
+To delete a test table, use `bq`:
+```bash
+bq rm -f -t  PROJECT_ID:DATASET_ID.TABLE_NAME
+```
+You can run the module directly when working on a new feature or fixing a bug:
+```sh
+python -m bqcsv.cli config set --project PROJECT_ID --dataset DATASET_ID --table TEST_TABLE_NAME
+```
+## Releasing to PyPI
+1. **Bump the version** in both places (they must match):
+   - `pyproject.toml` → `[project].version`
+   - `bqcsv/__init__.py` → `__version__`
+2. **Install build tools** (one-time):
+   ```bash
+   pip install build twine
+   ```
+3. **Run tests** and commit the version bump.
+4. **Build the package**:
+   ```bash
+   python -m build
+   ```
+   This creates `dist/bqcsv-<version>.tar.gz` and `dist/bqcsv-<version>-py3-none-any.whl`.
+5. **Upload to PyPI**:
+   ```bash
+   twine upload dist/*
+   ```
+   On first upload, create an account at [pypi.org](https://pypi.org) and use an [API token](https://pypi.org/help/#apitoken) as the password (`__token__` as the username).
+6. **Tag the release** (optional but recommended):
+   ```bash
+   git tag v0.2.0
+   git push origin v0.2.0
+   ```
+After publishing, users can install the new version with:
+```bash
+pip install --upgrade bqcsv
+```

bqcsv-1.0.0/README.md ADDED Viewed

@@ -0,0 +1,128 @@
+# bqcsv
+Upload a local CSV file to BigQuery using the `bq` CLI and your existing `gcloud` authentication.
+## Why a dedicated CLI tool?
+Out of the box, Google's `bq` CLI cannot create a table with column names inferred from a CSV file.
+`bqcsv` fixes that:
+* detects the schema from the CSV file
+* creates a table with proper column names and types
+* loads the CSV file using `bq load`
+## Authentication
+No additional authentication is needed.
+`bqcsv` uses your existing authentication via `gcloud auth login`.
+## Requirements
+- Python 3.10+
+- [Google Cloud SDK](https://cloud.google.com/sdk) with `bq` on your `PATH`
+## How to use `bqcsv`
+### Upload a CSV file to a table
+To upload a CSV file, specify your project ID, dataset ID, and table name:
+```bash
+bqcsv data.csv --project my-gcp-project --dataset staging --table events_raw
+```
+The `--table` argument is optional. By default, `bqcsv` derives the table name from the CSV file:
+```bash
+bqcsv data.csv --project my-gcp-project --dataset staging
+# is identical to
+bqcsv data.csv --project my-gcp-project --dataset staging --table data
+```
+### Saving your configuration
+To avoid passing `--project`, `--dataset`, or `--table` on every run, save them to your local config:
+```bash
+bqcsv config set --project my-gcp-project --dataset analytics --table events
+bqcsv config show
+```
+Defaults are stored in `~/.config/bqcsv/config.toml`.
+After you set your defaults, you can call `bqcsv` without arguments:
+```bash
+bqcsv data.csv
+```
+If you have not set a default `--table` value, the table name is derived from the CSV file.
+## Development
+### Install from your local repo
+```bash
+pip install -e .
+```
+### Testing
+To delete a test table, use `bq`:
+```bash
+bq rm -f -t  PROJECT_ID:DATASET_ID.TABLE_NAME
+```
+You can run the module directly when working on a new feature or fixing a bug:
+```sh
+python -m bqcsv.cli config set --project PROJECT_ID --dataset DATASET_ID --table TEST_TABLE_NAME
+```
+## Releasing to PyPI
+1. **Bump the version** in both places (they must match):
+   - `pyproject.toml` → `[project].version`
+   - `bqcsv/__init__.py` → `__version__`
+2. **Install build tools** (one-time):
+   ```bash
+   pip install build twine
+   ```
+3. **Run tests** and commit the version bump.
+4. **Build the package**:
+   ```bash
+   python -m build
+   ```
+   This creates `dist/bqcsv-<version>.tar.gz` and `dist/bqcsv-<version>-py3-none-any.whl`.
+5. **Upload to PyPI**:
+   ```bash
+   twine upload dist/*
+   ```
+   On first upload, create an account at [pypi.org](https://pypi.org) and use an [API token](https://pypi.org/help/#apitoken) as the password (`__token__` as the username).
+6. **Tag the release** (optional but recommended):
+   ```bash
+   git tag v0.2.0
+   git push origin v0.2.0
+   ```
+After publishing, users can install the new version with:
+```bash
+pip install --upgrade bqcsv
+```

bqcsv-1.0.0/bqcsv/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "1.0.0"

bqcsv-1.0.0/bqcsv/cli.py ADDED Viewed

@@ -0,0 +1,201 @@
+from __future__ import annotations
+import argparse
+import json
+import sys
+from pathlib import Path
+from bqcsv.config import (
+    CONFIG_KEYS,
+    CONFIG_PATH,
+    load_config,
+    resolve_setting,
+    save_config,
+    unset_config,
+)
+from bqcsv.uploader import upload_csv
+def _upload_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="bqcsv",
+        description="Upload a local CSV file to BigQuery using the authenticated `bq` CLI.",
+    )
+    parser.add_argument("csv_path", type=Path, help="Path to the local CSV file to upload")
+    parser.add_argument("--project", help="GCP project ID (overrides config)")
+    parser.add_argument("--dataset", help="BigQuery dataset ID (overrides config)")
+    parser.add_argument(
+        "--table",
+        help="BigQuery table ID (overrides config; defaults to the CSV file name without extension)",
+    )
+    parser.add_argument(
+        "--replace",
+        action="store_true",
+        help="Replace the destination table instead of appending rows",
+    )
+    parser.add_argument(
+        "--no-header",
+        action="store_true",
+        help="Treat the first row as data instead of a header row",
+    )
+    parser.add_argument(
+        "--schema",
+        type=Path,
+        help="Optional JSON schema file for the table (disables autodetect)",
+    )
+    parser.add_argument(
+        "--output",
+        choices=("text", "json"),
+        default="text",
+        help="Output format: text prints progress as it runs; json prints a single JSON object at the end",
+    )
+    return parser
+def _config_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(prog="bqcsv config")
+    subparsers = parser.add_subparsers(dest="config_command", required=True)
+    show_parser = subparsers.add_parser("show", help="Show saved defaults")
+    show_parser.set_defaults(func=_run_config_show)
+    set_parser = subparsers.add_parser("set", help="Save default project/dataset/table")
+    set_parser.add_argument("--project", help="Default GCP project ID")
+    set_parser.add_argument("--dataset", help="Default BigQuery dataset ID")
+    set_parser.add_argument("--table", help="Default BigQuery table ID")
+    set_parser.set_defaults(func=_run_config_set)
+    unset_parser = subparsers.add_parser("unset", help="Remove saved defaults")
+    unset_parser.add_argument("--project", action="store_true", help="Remove default project")
+    unset_parser.add_argument("--dataset", action="store_true", help="Remove default dataset")
+    unset_parser.add_argument("--table", action="store_true", help="Remove default table")
+    unset_parser.set_defaults(func=_run_config_unset)
+    return parser
+def resolve_table_name(
+    csv_path: Path,
+    cli_table: str | None,
+    config: dict[str, str],
+) -> str:
+    table = resolve_setting(cli_table, config, "table")
+    if table:
+        return table
+    return csv_path.expanduser().resolve().stem
+def _emit_upload_result(
+    *,
+    output: str,
+    logs: list[str],
+    status: str,
+) -> None:
+    if output == "json":
+        print(json.dumps({"logs": "\n".join(logs), "status": status}))
+        return
+    for line in logs:
+        print(line, file=sys.stderr if status == "error" else sys.stdout)
+    print(f"Status: {status}.")
+def _run_upload(argv: list[str]) -> int:
+    args = _upload_parser().parse_args(argv)
+    config = load_config()
+    csv_path = args.csv_path.expanduser().resolve()
+    project = resolve_setting(args.project, config, "project")
+    dataset = resolve_setting(args.dataset, config, "dataset")
+    table = resolve_table_name(csv_path, args.table, config)
+    json_output = args.output == "json"
+    logs: list[str] = []
+    missing = [
+        name
+        for name, value in (("project", project), ("dataset", dataset))
+        if not value
+    ]
+    if missing:
+        names = ", ".join(f"--{name}" for name in missing)
+        logs.append(
+            f"Missing required setting(s): {names}. "
+            f"Set them on the command line or via `bqcsv config set`."
+        )
+        _emit_upload_result(output=args.output, logs=logs, status="error")
+        return 2
+    try:
+        upload_csv(
+            csv_path,
+            project=project,
+            dataset=dataset,
+            table=table,
+            replace=args.replace,
+            skip_header=not args.no_header,
+            schema_path=args.schema.expanduser().resolve() if args.schema else None,
+            on_log=logs.append if json_output else None,
+        )
+    except Exception as exc:
+        logs.append(str(exc))
+        _emit_upload_result(output=args.output, logs=logs, status="error")
+        return 1
+    destination = f"{project}:{dataset}.{table}" if project else f"{dataset}.{table}"
+    logs.append(f"Uploaded {args.csv_path} to {destination}")
+    _emit_upload_result(output=args.output, logs=logs, status="success")
+    return 0
+def _run_config_show(_: argparse.Namespace) -> int:
+    config = load_config()
+    if not config:
+        print(f"No config saved at {CONFIG_PATH}")
+        return 0
+    for key in CONFIG_KEYS:
+        if key in config:
+            print(f"{key} = {config[key]}")
+    print(f"\nConfig file: {CONFIG_PATH}")
+    return 0
+def _run_config_set(args: argparse.Namespace) -> int:
+    updates = {
+        key: value
+        for key, value in (
+            ("project", args.project),
+            ("dataset", args.dataset),
+            ("table", args.table),
+        )
+        if value
+    }
+    if not updates:
+        print("Provide at least one of --project, --dataset, or --table.", file=sys.stderr)
+        return 2
+    save_config(updates)
+    print(f"Saved defaults to {CONFIG_PATH}")
+    return 0
+def _run_config_unset(args: argparse.Namespace) -> int:
+    keys = [key for key in CONFIG_KEYS if getattr(args, key)]
+    if not keys:
+        print("Provide at least one of --project, --dataset, or --table.", file=sys.stderr)
+        return 2
+    unset_config(keys)
+    print(f"Removed {', '.join(keys)} from {CONFIG_PATH}")
+    return 0
+def _run_config(argv: list[str]) -> int:
+    args = _config_parser().parse_args(argv)
+    return args.func(args)
+def main(argv: list[str] | None = None) -> int:
+    argv = list(sys.argv[1:] if argv is None else argv)
+    if argv and argv[0] == "config":
+        return _run_config(argv[1:])
+    return _run_upload(argv)
+if __name__ == "__main__":
+    raise SystemExit(main())

bqcsv-1.0.0/bqcsv/config.py ADDED Viewed

@@ -0,0 +1,54 @@
+from __future__ import annotations
+import os
+import tomllib
+from pathlib import Path
+from typing import Any
+CONFIG_DIR = Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config")) / "bqcsv"
+CONFIG_PATH = CONFIG_DIR / "config.toml"
+CONFIG_KEYS = ("project", "dataset", "table")
+def _ensure_config_dir() -> None:
+    CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+def load_config() -> dict[str, str]:
+    if not CONFIG_PATH.is_file():
+        return {}
+    with CONFIG_PATH.open("rb") as f:
+        data = tomllib.load(f)
+    return {key: str(data[key]) for key in CONFIG_KEYS if key in data and data[key]}
+def save_config(values: dict[str, str]) -> None:
+    _ensure_config_dir()
+    current = load_config()
+    current.update(values)
+    lines = [f'{key} = "{_escape_toml(value)}"' for key, value in current.items()]
+    CONFIG_PATH.write_text("\n".join(lines) + "\n", encoding="utf-8")
+def unset_config(keys: list[str]) -> None:
+    if not CONFIG_PATH.is_file():
+        return
+    current = load_config()
+    for key in keys:
+        current.pop(key, None)
+    if not current:
+        CONFIG_PATH.unlink(missing_ok=True)
+        return
+    lines = [f'{key} = "{_escape_toml(value)}"' for key, value in current.items()]
+    CONFIG_PATH.write_text("\n".join(lines) + "\n", encoding="utf-8")
+def resolve_setting(cli_value: str | None, config: dict[str, str], key: str) -> str | None:
+    if cli_value:
+        return cli_value
+    return config.get(key)
+def _escape_toml(value: str) -> str:
+    return value.replace("\\", "\\\\").replace('"', '\\"')