bqcsv 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bqcsv-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.4
2
+ Name: bqcsv
3
+ Version: 1.0.0
4
+ Summary: Upload a local CSV file to a BigQuery table via the bq CLI
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: google-cloud-bigquery>=3.0
8
+ Requires-Dist: pandas>=2.0
9
+
10
+ # bqcsv
11
+
12
+ Upload a local CSV file to BigQuery using the `bq` CLI and your existing `gcloud` authentication.
13
+
14
+ ## Why a dedicated CLI tool?
15
+
16
+ Out of the box, Google's `bq` CLI cannot create a table with column names inferred from a CSV file.
17
+
18
+ `bqcsv` fixes that:
19
+
20
+ * detects the schema from the CSV file
21
+ * creates a table with proper column names and types
22
+ * loads the CSV file using `bq load`
23
+
24
+ ## Authentication
25
+
26
+ No additional authentication is needed.
27
+
28
+ `bqcsv` uses your existing authentication via `gcloud auth login`.
29
+
30
+ ## Requirements
31
+
32
+ - Python 3.10+
33
+ - [Google Cloud SDK](https://cloud.google.com/sdk) with `bq` on your `PATH`
34
+
35
+ ## How to use `bqcsv`
36
+
37
+ ### Upload a CSV file to a table
38
+
39
+ To upload a CSV file, specify your project ID, dataset ID, and table name:
40
+
41
+ ```bash
42
+ bqcsv data.csv --project my-gcp-project --dataset staging --table events_raw
43
+ ```
44
+
45
+ The `--table` argument is optional. By default, `bqcsv` derives the table name from the CSV file:
46
+
47
+ ```bash
48
+ bqcsv data.csv --project my-gcp-project --dataset staging
49
+
50
+ # is identical to
51
+
52
+ bqcsv data.csv --project my-gcp-project --dataset staging --table data
53
+ ```
54
+
55
+ ### Saving your configuration
56
+
57
+ To avoid passing `--project`, `--dataset`, or `--table` on every run, save them to your local config:
58
+
59
+ ```bash
60
+ bqcsv config set --project my-gcp-project --dataset analytics --table events
61
+ bqcsv config show
62
+ ```
63
+
64
+ Defaults are stored in `~/.config/bqcsv/config.toml`.
65
+
66
+ After you set your defaults, you can call `bqcsv` without arguments:
67
+
68
+ ```bash
69
+ bqcsv data.csv
70
+ ```
71
+
72
+ If you have not set a default `--table` value, the table name is derived from the CSV file.
73
+
74
+ ## Development
75
+
76
+ ### Install from your local repo
77
+
78
+ ```bash
79
+ pip install -e .
80
+ ```
81
+
82
+ ### Testing
83
+
84
+ To delete a test table, use `bq`:
85
+
86
+ ```bash
87
+ bq rm -f -t PROJECT_ID:DATASET_ID.TABLE_NAME
88
+ ```
89
+
90
+ You can run the module directly when working on a new feature or fixing a bug:
91
+
92
+ ```sh
93
+ python -m bqcsv.cli config set --project PROJECT_ID --dataset DATASET_ID --table TEST_TABLE_NAME
94
+ ```
95
+
96
+ ## Releasing to PyPI
97
+
98
+ 1. **Bump the version** in both places (they must match):
99
+ - `pyproject.toml` → `[project].version`
100
+ - `bqcsv/__init__.py` → `__version__`
101
+
102
+ 2. **Install build tools** (one-time):
103
+
104
+ ```bash
105
+ pip install build twine
106
+ ```
107
+
108
+ 3. **Run tests** and commit the version bump.
109
+
110
+ 4. **Build the package**:
111
+
112
+ ```bash
113
+ python -m build
114
+ ```
115
+
116
+ This creates `dist/bqcsv-<version>.tar.gz` and `dist/bqcsv-<version>-py3-none-any.whl`.
117
+
118
+ 5. **Upload to PyPI**:
119
+
120
+ ```bash
121
+ twine upload dist/*
122
+ ```
123
+
124
+ On first upload, create an account at [pypi.org](https://pypi.org) and use an [API token](https://pypi.org/help/#apitoken) as the password (`__token__` as the username).
125
+
126
+ 6. **Tag the release** (optional but recommended):
127
+
128
+ ```bash
129
+ git tag v0.2.0
130
+ git push origin v0.2.0
131
+ ```
132
+
133
+ After publishing, users can install the new version with:
134
+
135
+ ```bash
136
+ pip install --upgrade bqcsv
137
+ ```
bqcsv-1.0.0/README.md ADDED
@@ -0,0 +1,128 @@
1
+ # bqcsv
2
+
3
+ Upload a local CSV file to BigQuery using the `bq` CLI and your existing `gcloud` authentication.
4
+
5
+ ## Why a dedicated CLI tool?
6
+
7
+ Out of the box, Google's `bq` CLI cannot create a table with column names inferred from a CSV file.
8
+
9
+ `bqcsv` fixes that:
10
+
11
+ * detects the schema from the CSV file
12
+ * creates a table with proper column names and types
13
+ * loads the CSV file using `bq load`
14
+
15
+ ## Authentication
16
+
17
+ No additional authentication is needed.
18
+
19
+ `bqcsv` uses your existing authentication via `gcloud auth login`.
20
+
21
+ ## Requirements
22
+
23
+ - Python 3.10+
24
+ - [Google Cloud SDK](https://cloud.google.com/sdk) with `bq` on your `PATH`
25
+
26
+ ## How to use `bqcsv`
27
+
28
+ ### Upload a CSV file to a table
29
+
30
+ To upload a CSV file, specify your project ID, dataset ID, and table name:
31
+
32
+ ```bash
33
+ bqcsv data.csv --project my-gcp-project --dataset staging --table events_raw
34
+ ```
35
+
36
+ The `--table` argument is optional. By default, `bqcsv` derives the table name from the CSV file:
37
+
38
+ ```bash
39
+ bqcsv data.csv --project my-gcp-project --dataset staging
40
+
41
+ # is identical to
42
+
43
+ bqcsv data.csv --project my-gcp-project --dataset staging --table data
44
+ ```
45
+
46
+ ### Saving your configuration
47
+
48
+ To avoid passing `--project`, `--dataset`, or `--table` on every run, save them to your local config:
49
+
50
+ ```bash
51
+ bqcsv config set --project my-gcp-project --dataset analytics --table events
52
+ bqcsv config show
53
+ ```
54
+
55
+ Defaults are stored in `~/.config/bqcsv/config.toml`.
56
+
57
+ After you set your defaults, you can call `bqcsv` without arguments:
58
+
59
+ ```bash
60
+ bqcsv data.csv
61
+ ```
62
+
63
+ If you have not set a default `--table` value, the table name is derived from the CSV file.
64
+
65
+ ## Development
66
+
67
+ ### Install from your local repo
68
+
69
+ ```bash
70
+ pip install -e .
71
+ ```
72
+
73
+ ### Testing
74
+
75
+ To delete a test table, use `bq`:
76
+
77
+ ```bash
78
+ bq rm -f -t PROJECT_ID:DATASET_ID.TABLE_NAME
79
+ ```
80
+
81
+ You can run the module directly when working on a new feature or fixing a bug:
82
+
83
+ ```sh
84
+ python -m bqcsv.cli config set --project PROJECT_ID --dataset DATASET_ID --table TEST_TABLE_NAME
85
+ ```
86
+
87
+ ## Releasing to PyPI
88
+
89
+ 1. **Bump the version** in both places (they must match):
90
+ - `pyproject.toml` → `[project].version`
91
+ - `bqcsv/__init__.py` → `__version__`
92
+
93
+ 2. **Install build tools** (one-time):
94
+
95
+ ```bash
96
+ pip install build twine
97
+ ```
98
+
99
+ 3. **Run tests** and commit the version bump.
100
+
101
+ 4. **Build the package**:
102
+
103
+ ```bash
104
+ python -m build
105
+ ```
106
+
107
+ This creates `dist/bqcsv-<version>.tar.gz` and `dist/bqcsv-<version>-py3-none-any.whl`.
108
+
109
+ 5. **Upload to PyPI**:
110
+
111
+ ```bash
112
+ twine upload dist/*
113
+ ```
114
+
115
+ On first upload, create an account at [pypi.org](https://pypi.org) and use an [API token](https://pypi.org/help/#apitoken) as the password (`__token__` as the username).
116
+
117
+ 6. **Tag the release** (optional but recommended):
118
+
119
+ ```bash
120
+ git tag v0.2.0
121
+ git push origin v0.2.0
122
+ ```
123
+
124
+ After publishing, users can install the new version with:
125
+
126
+ ```bash
127
+ pip install --upgrade bqcsv
128
+ ```
@@ -0,0 +1 @@
1
+ __version__ = "1.0.0"
@@ -0,0 +1,201 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ from bqcsv.config import (
9
+ CONFIG_KEYS,
10
+ CONFIG_PATH,
11
+ load_config,
12
+ resolve_setting,
13
+ save_config,
14
+ unset_config,
15
+ )
16
+ from bqcsv.uploader import upload_csv
17
+
18
+
19
+ def _upload_parser() -> argparse.ArgumentParser:
20
+ parser = argparse.ArgumentParser(
21
+ prog="bqcsv",
22
+ description="Upload a local CSV file to BigQuery using the authenticated `bq` CLI.",
23
+ )
24
+ parser.add_argument("csv_path", type=Path, help="Path to the local CSV file to upload")
25
+ parser.add_argument("--project", help="GCP project ID (overrides config)")
26
+ parser.add_argument("--dataset", help="BigQuery dataset ID (overrides config)")
27
+ parser.add_argument(
28
+ "--table",
29
+ help="BigQuery table ID (overrides config; defaults to the CSV file name without extension)",
30
+ )
31
+ parser.add_argument(
32
+ "--replace",
33
+ action="store_true",
34
+ help="Replace the destination table instead of appending rows",
35
+ )
36
+ parser.add_argument(
37
+ "--no-header",
38
+ action="store_true",
39
+ help="Treat the first row as data instead of a header row",
40
+ )
41
+ parser.add_argument(
42
+ "--schema",
43
+ type=Path,
44
+ help="Optional JSON schema file for the table (disables autodetect)",
45
+ )
46
+ parser.add_argument(
47
+ "--output",
48
+ choices=("text", "json"),
49
+ default="text",
50
+ help="Output format: text prints progress as it runs; json prints a single JSON object at the end",
51
+ )
52
+ return parser
53
+
54
+
55
+ def _config_parser() -> argparse.ArgumentParser:
56
+ parser = argparse.ArgumentParser(prog="bqcsv config")
57
+ subparsers = parser.add_subparsers(dest="config_command", required=True)
58
+
59
+ show_parser = subparsers.add_parser("show", help="Show saved defaults")
60
+ show_parser.set_defaults(func=_run_config_show)
61
+
62
+ set_parser = subparsers.add_parser("set", help="Save default project/dataset/table")
63
+ set_parser.add_argument("--project", help="Default GCP project ID")
64
+ set_parser.add_argument("--dataset", help="Default BigQuery dataset ID")
65
+ set_parser.add_argument("--table", help="Default BigQuery table ID")
66
+ set_parser.set_defaults(func=_run_config_set)
67
+
68
+ unset_parser = subparsers.add_parser("unset", help="Remove saved defaults")
69
+ unset_parser.add_argument("--project", action="store_true", help="Remove default project")
70
+ unset_parser.add_argument("--dataset", action="store_true", help="Remove default dataset")
71
+ unset_parser.add_argument("--table", action="store_true", help="Remove default table")
72
+ unset_parser.set_defaults(func=_run_config_unset)
73
+
74
+ return parser
75
+
76
+
77
+ def resolve_table_name(
78
+ csv_path: Path,
79
+ cli_table: str | None,
80
+ config: dict[str, str],
81
+ ) -> str:
82
+ table = resolve_setting(cli_table, config, "table")
83
+ if table:
84
+ return table
85
+ return csv_path.expanduser().resolve().stem
86
+
87
+
88
+ def _emit_upload_result(
89
+ *,
90
+ output: str,
91
+ logs: list[str],
92
+ status: str,
93
+ ) -> None:
94
+ if output == "json":
95
+ print(json.dumps({"logs": "\n".join(logs), "status": status}))
96
+ return
97
+ for line in logs:
98
+ print(line, file=sys.stderr if status == "error" else sys.stdout)
99
+ print(f"Status: {status}.")
100
+
101
+
102
+ def _run_upload(argv: list[str]) -> int:
103
+ args = _upload_parser().parse_args(argv)
104
+ config = load_config()
105
+ csv_path = args.csv_path.expanduser().resolve()
106
+ project = resolve_setting(args.project, config, "project")
107
+ dataset = resolve_setting(args.dataset, config, "dataset")
108
+ table = resolve_table_name(csv_path, args.table, config)
109
+ json_output = args.output == "json"
110
+ logs: list[str] = []
111
+
112
+ missing = [
113
+ name
114
+ for name, value in (("project", project), ("dataset", dataset))
115
+ if not value
116
+ ]
117
+ if missing:
118
+ names = ", ".join(f"--{name}" for name in missing)
119
+ logs.append(
120
+ f"Missing required setting(s): {names}. "
121
+ f"Set them on the command line or via `bqcsv config set`."
122
+ )
123
+ _emit_upload_result(output=args.output, logs=logs, status="error")
124
+ return 2
125
+
126
+ try:
127
+ upload_csv(
128
+ csv_path,
129
+ project=project,
130
+ dataset=dataset,
131
+ table=table,
132
+ replace=args.replace,
133
+ skip_header=not args.no_header,
134
+ schema_path=args.schema.expanduser().resolve() if args.schema else None,
135
+ on_log=logs.append if json_output else None,
136
+ )
137
+ except Exception as exc:
138
+ logs.append(str(exc))
139
+ _emit_upload_result(output=args.output, logs=logs, status="error")
140
+ return 1
141
+
142
+ destination = f"{project}:{dataset}.{table}" if project else f"{dataset}.{table}"
143
+ logs.append(f"Uploaded {args.csv_path} to {destination}")
144
+ _emit_upload_result(output=args.output, logs=logs, status="success")
145
+ return 0
146
+
147
+
148
+ def _run_config_show(_: argparse.Namespace) -> int:
149
+ config = load_config()
150
+ if not config:
151
+ print(f"No config saved at {CONFIG_PATH}")
152
+ return 0
153
+ for key in CONFIG_KEYS:
154
+ if key in config:
155
+ print(f"{key} = {config[key]}")
156
+ print(f"\nConfig file: {CONFIG_PATH}")
157
+ return 0
158
+
159
+
160
+ def _run_config_set(args: argparse.Namespace) -> int:
161
+ updates = {
162
+ key: value
163
+ for key, value in (
164
+ ("project", args.project),
165
+ ("dataset", args.dataset),
166
+ ("table", args.table),
167
+ )
168
+ if value
169
+ }
170
+ if not updates:
171
+ print("Provide at least one of --project, --dataset, or --table.", file=sys.stderr)
172
+ return 2
173
+ save_config(updates)
174
+ print(f"Saved defaults to {CONFIG_PATH}")
175
+ return 0
176
+
177
+
178
+ def _run_config_unset(args: argparse.Namespace) -> int:
179
+ keys = [key for key in CONFIG_KEYS if getattr(args, key)]
180
+ if not keys:
181
+ print("Provide at least one of --project, --dataset, or --table.", file=sys.stderr)
182
+ return 2
183
+ unset_config(keys)
184
+ print(f"Removed {', '.join(keys)} from {CONFIG_PATH}")
185
+ return 0
186
+
187
+
188
+ def _run_config(argv: list[str]) -> int:
189
+ args = _config_parser().parse_args(argv)
190
+ return args.func(args)
191
+
192
+
193
+ def main(argv: list[str] | None = None) -> int:
194
+ argv = list(sys.argv[1:] if argv is None else argv)
195
+ if argv and argv[0] == "config":
196
+ return _run_config(argv[1:])
197
+ return _run_upload(argv)
198
+
199
+
200
+ if __name__ == "__main__":
201
+ raise SystemExit(main())
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import tomllib
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ CONFIG_DIR = Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config")) / "bqcsv"
9
+ CONFIG_PATH = CONFIG_DIR / "config.toml"
10
+
11
+ CONFIG_KEYS = ("project", "dataset", "table")
12
+
13
+
14
+ def _ensure_config_dir() -> None:
15
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
16
+
17
+
18
+ def load_config() -> dict[str, str]:
19
+ if not CONFIG_PATH.is_file():
20
+ return {}
21
+ with CONFIG_PATH.open("rb") as f:
22
+ data = tomllib.load(f)
23
+ return {key: str(data[key]) for key in CONFIG_KEYS if key in data and data[key]}
24
+
25
+
26
+ def save_config(values: dict[str, str]) -> None:
27
+ _ensure_config_dir()
28
+ current = load_config()
29
+ current.update(values)
30
+ lines = [f'{key} = "{_escape_toml(value)}"' for key, value in current.items()]
31
+ CONFIG_PATH.write_text("\n".join(lines) + "\n", encoding="utf-8")
32
+
33
+
34
+ def unset_config(keys: list[str]) -> None:
35
+ if not CONFIG_PATH.is_file():
36
+ return
37
+ current = load_config()
38
+ for key in keys:
39
+ current.pop(key, None)
40
+ if not current:
41
+ CONFIG_PATH.unlink(missing_ok=True)
42
+ return
43
+ lines = [f'{key} = "{_escape_toml(value)}"' for key, value in current.items()]
44
+ CONFIG_PATH.write_text("\n".join(lines) + "\n", encoding="utf-8")
45
+
46
+
47
+ def resolve_setting(cli_value: str | None, config: dict[str, str], key: str) -> str | None:
48
+ if cli_value:
49
+ return cli_value
50
+ return config.get(key)
51
+
52
+
53
+ def _escape_toml(value: str) -> str:
54
+ return value.replace("\\", "\\\\").replace('"', '\\"')