chub-dev 0.1.0 → 0.1.2-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -0
- package/bin/chub-mcp +2 -0
- package/dist/airtable/docs/database/javascript/DOC.md +1437 -0
- package/dist/airtable/docs/database/python/DOC.md +1735 -0
- package/dist/amplitude/docs/analytics/javascript/DOC.md +1282 -0
- package/dist/amplitude/docs/analytics/python/DOC.md +1199 -0
- package/dist/anthropic/docs/claude-api/javascript/DOC.md +503 -0
- package/dist/anthropic/docs/claude-api/python/DOC.md +389 -0
- package/dist/asana/docs/tasks/DOC.md +1396 -0
- package/dist/assemblyai/docs/transcription/DOC.md +1043 -0
- package/dist/atlassian/docs/confluence/javascript/DOC.md +1347 -0
- package/dist/atlassian/docs/confluence/python/DOC.md +1604 -0
- package/dist/auth0/docs/identity/javascript/DOC.md +968 -0
- package/dist/auth0/docs/identity/python/DOC.md +1199 -0
- package/dist/aws/docs/s3/javascript/DOC.md +1773 -0
- package/dist/aws/docs/s3/python/DOC.md +1807 -0
- package/dist/binance/docs/trading/javascript/DOC.md +1315 -0
- package/dist/binance/docs/trading/python/DOC.md +1454 -0
- package/dist/braintree/docs/gateway/javascript/DOC.md +1278 -0
- package/dist/braintree/docs/gateway/python/DOC.md +1179 -0
- package/dist/chromadb/docs/embeddings-db/javascript/DOC.md +1263 -0
- package/dist/chromadb/docs/embeddings-db/python/DOC.md +1707 -0
- package/dist/clerk/docs/auth/javascript/DOC.md +1220 -0
- package/dist/clerk/docs/auth/python/DOC.md +274 -0
- package/dist/cloudflare/docs/workers/javascript/DOC.md +918 -0
- package/dist/cloudflare/docs/workers/python/DOC.md +994 -0
- package/dist/cockroachdb/docs/distributed-db/DOC.md +1500 -0
- package/dist/cohere/docs/llm/DOC.md +1335 -0
- package/dist/datadog/docs/monitoring/javascript/DOC.md +1740 -0
- package/dist/datadog/docs/monitoring/python/DOC.md +1815 -0
- package/dist/deepgram/docs/speech/javascript/DOC.md +885 -0
- package/dist/deepgram/docs/speech/python/DOC.md +685 -0
- package/dist/deepl/docs/translation/javascript/DOC.md +887 -0
- package/dist/deepl/docs/translation/python/DOC.md +944 -0
- package/dist/deepseek/docs/llm/DOC.md +1220 -0
- package/dist/directus/docs/headless-cms/javascript/DOC.md +1128 -0
- package/dist/directus/docs/headless-cms/python/DOC.md +1276 -0
- package/dist/discord/docs/bot/javascript/DOC.md +1090 -0
- package/dist/discord/docs/bot/python/DOC.md +1130 -0
- package/dist/elasticsearch/docs/search/DOC.md +1634 -0
- package/dist/elevenlabs/docs/text-to-speech/javascript/DOC.md +336 -0
- package/dist/elevenlabs/docs/text-to-speech/python/DOC.md +552 -0
- package/dist/firebase/docs/auth/DOC.md +1015 -0
- package/dist/gemini/docs/genai/javascript/DOC.md +691 -0
- package/dist/gemini/docs/genai/python/DOC.md +555 -0
- package/dist/github/docs/octokit/DOC.md +1560 -0
- package/dist/google/docs/bigquery/javascript/DOC.md +1688 -0
- package/dist/google/docs/bigquery/python/DOC.md +1503 -0
- package/dist/hubspot/docs/crm/javascript/DOC.md +1805 -0
- package/dist/hubspot/docs/crm/python/DOC.md +2033 -0
- package/dist/huggingface/docs/transformers/DOC.md +948 -0
- package/dist/intercom/docs/messaging/javascript/DOC.md +1844 -0
- package/dist/intercom/docs/messaging/python/DOC.md +1797 -0
- package/dist/jira/docs/issues/javascript/DOC.md +1420 -0
- package/dist/jira/docs/issues/python/DOC.md +1492 -0
- package/dist/kafka/docs/streaming/javascript/DOC.md +1671 -0
- package/dist/kafka/docs/streaming/python/DOC.md +1464 -0
- package/dist/landingai-ade/docs/api/DOC.md +620 -0
- package/dist/landingai-ade/docs/sdk/python/DOC.md +489 -0
- package/dist/landingai-ade/docs/sdk/typescript/DOC.md +542 -0
- package/dist/landingai-ade/skills/SKILL.md +489 -0
- package/dist/launchdarkly/docs/feature-flags/javascript/DOC.md +1191 -0
- package/dist/launchdarkly/docs/feature-flags/python/DOC.md +1671 -0
- package/dist/linear/docs/tracker/DOC.md +1554 -0
- package/dist/livekit/docs/realtime/javascript/DOC.md +303 -0
- package/dist/livekit/docs/realtime/python/DOC.md +163 -0
- package/dist/mailchimp/docs/marketing/DOC.md +1420 -0
- package/dist/meilisearch/docs/search/DOC.md +1241 -0
- package/dist/microsoft/docs/onedrive/javascript/DOC.md +1421 -0
- package/dist/microsoft/docs/onedrive/python/DOC.md +1549 -0
- package/dist/mongodb/docs/atlas/DOC.md +2041 -0
- package/dist/notion/docs/workspace-api/javascript/DOC.md +1435 -0
- package/dist/notion/docs/workspace-api/python/DOC.md +1400 -0
- package/dist/okta/docs/identity/javascript/DOC.md +1171 -0
- package/dist/okta/docs/identity/python/DOC.md +1401 -0
- package/dist/openai/docs/chat/javascript/DOC.md +407 -0
- package/dist/openai/docs/chat/python/DOC.md +568 -0
- package/dist/paypal/docs/checkout/DOC.md +278 -0
- package/dist/pinecone/docs/sdk/javascript/DOC.md +984 -0
- package/dist/pinecone/docs/sdk/python/DOC.md +1395 -0
- package/dist/plaid/docs/banking/javascript/DOC.md +1163 -0
- package/dist/plaid/docs/banking/python/DOC.md +1203 -0
- package/dist/playwright-community/skills/login-flows/SKILL.md +108 -0
- package/dist/postmark/docs/transactional-email/DOC.md +1168 -0
- package/dist/prisma/docs/orm/javascript/DOC.md +1419 -0
- package/dist/prisma/docs/orm/python/DOC.md +1317 -0
- package/dist/qdrant/docs/vector-search/javascript/DOC.md +1221 -0
- package/dist/qdrant/docs/vector-search/python/DOC.md +1653 -0
- package/dist/rabbitmq/docs/message-queue/javascript/DOC.md +1193 -0
- package/dist/rabbitmq/docs/message-queue/python/DOC.md +1243 -0
- package/dist/razorpay/docs/payments/javascript/DOC.md +1219 -0
- package/dist/razorpay/docs/payments/python/DOC.md +1330 -0
- package/dist/redis/docs/key-value/javascript/DOC.md +1851 -0
- package/dist/redis/docs/key-value/python/DOC.md +2054 -0
- package/dist/registry.json +2817 -0
- package/dist/replicate/docs/model-hosting/DOC.md +1318 -0
- package/dist/resend/docs/email/DOC.md +1271 -0
- package/dist/salesforce/docs/crm/javascript/DOC.md +1241 -0
- package/dist/salesforce/docs/crm/python/DOC.md +1183 -0
- package/dist/search-index.json +1 -0
- package/dist/sendgrid/docs/email-api/javascript/DOC.md +371 -0
- package/dist/sendgrid/docs/email-api/python/DOC.md +656 -0
- package/dist/sentry/docs/error-tracking/javascript/DOC.md +1073 -0
- package/dist/sentry/docs/error-tracking/python/DOC.md +1309 -0
- package/dist/shopify/docs/storefront/DOC.md +457 -0
- package/dist/slack/docs/workspace/javascript/DOC.md +933 -0
- package/dist/slack/docs/workspace/python/DOC.md +271 -0
- package/dist/square/docs/payments/javascript/DOC.md +1855 -0
- package/dist/square/docs/payments/python/DOC.md +1728 -0
- package/dist/stripe/docs/api/DOC.md +1727 -0
- package/dist/stripe/docs/payments/DOC.md +1726 -0
- package/dist/stytch/docs/auth/javascript/DOC.md +1813 -0
- package/dist/stytch/docs/auth/python/DOC.md +1962 -0
- package/dist/supabase/docs/client/DOC.md +1606 -0
- package/dist/twilio/docs/messaging/python/DOC.md +469 -0
- package/dist/twilio/docs/messaging/typescript/DOC.md +946 -0
- package/dist/vercel/docs/platform/DOC.md +1940 -0
- package/dist/weaviate/docs/vector-db/javascript/DOC.md +1268 -0
- package/dist/weaviate/docs/vector-db/python/DOC.md +1388 -0
- package/dist/zendesk/docs/support/javascript/DOC.md +2150 -0
- package/dist/zendesk/docs/support/python/DOC.md +2297 -0
- package/package.json +22 -6
- package/skills/get-api-docs/SKILL.md +84 -0
- package/src/commands/annotate.js +83 -0
- package/src/commands/build.js +12 -1
- package/src/commands/feedback.js +150 -0
- package/src/commands/get.js +83 -42
- package/src/commands/search.js +7 -0
- package/src/index.js +43 -17
- package/src/lib/analytics.js +90 -0
- package/src/lib/annotations.js +57 -0
- package/src/lib/bm25.js +170 -0
- package/src/lib/cache.js +69 -6
- package/src/lib/config.js +8 -3
- package/src/lib/identity.js +99 -0
- package/src/lib/registry.js +103 -20
- package/src/lib/telemetry.js +86 -0
- package/src/mcp/server.js +177 -0
- package/src/mcp/tools.js +251 -0
|
@@ -0,0 +1,1503 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: bigquery
|
|
3
|
+
description: "Google Cloud BigQuery Python client library for data warehouse queries and analytics"
|
|
4
|
+
metadata:
|
|
5
|
+
languages: "python"
|
|
6
|
+
versions: "3.38.0"
|
|
7
|
+
updated-on: "2026-03-02"
|
|
8
|
+
source: maintainer
|
|
9
|
+
tags: "google,bigquery,data-warehouse,sql,analytics"
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# Google Cloud BigQuery - Python Client Library
|
|
13
|
+
|
|
14
|
+
## Golden Rule
|
|
15
|
+
|
|
16
|
+
**ALWAYS use `google-cloud-bigquery` version 3.38.0 or higher.**
|
|
17
|
+
|
|
18
|
+
This is the official, maintained Google Cloud client library for BigQuery. Do NOT use deprecated packages like `bigquery` (without google-cloud prefix) or any unofficial libraries.
|
|
19
|
+
|
|
20
|
+
**Installation:**
|
|
21
|
+
```bash
|
|
22
|
+
pip install google-cloud-bigquery
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
**IMPORTANT:** BigQuery does NOT support API keys for authentication. You MUST use OAuth 2.0 credentials via service accounts or Application Default Credentials (ADC).
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
### Install the Package
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install google-cloud-bigquery
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### Install with Optional Dependencies
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
# With pandas support
|
|
39
|
+
pip install google-cloud-bigquery[pandas]
|
|
40
|
+
|
|
41
|
+
# With all optional dependencies
|
|
42
|
+
pip install google-cloud-bigquery[all]
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Authentication Setup
|
|
46
|
+
|
|
47
|
+
BigQuery uses Application Default Credentials (ADC). Set the environment variable to point to your service account key:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account-key.json"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Environment Variables
|
|
54
|
+
|
|
55
|
+
Create a `.env` file:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
|
|
59
|
+
PROJECT_ID=your-gcp-project-id
|
|
60
|
+
DATASET_ID=your_dataset_id
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Load environment variables in your application:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
import os
|
|
67
|
+
from dotenv import load_dotenv
|
|
68
|
+
|
|
69
|
+
load_dotenv()
|
|
70
|
+
|
|
71
|
+
project_id = os.getenv('PROJECT_ID')
|
|
72
|
+
dataset_id = os.getenv('DATASET_ID')
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Initialization
|
|
76
|
+
|
|
77
|
+
### Basic Client Initialization
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from google.cloud import bigquery
|
|
81
|
+
|
|
82
|
+
# Automatically uses GOOGLE_APPLICATION_CREDENTIALS
|
|
83
|
+
client = bigquery.Client()
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Explicit Configuration
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from google.cloud import bigquery
|
|
90
|
+
|
|
91
|
+
client = bigquery.Client(
|
|
92
|
+
project='your-gcp-project-id',
|
|
93
|
+
credentials=credentials
|
|
94
|
+
)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Using Service Account Key File
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from google.cloud import bigquery
|
|
101
|
+
|
|
102
|
+
client = bigquery.Client.from_service_account_json(
|
|
103
|
+
'/path/to/service-account-key.json'
|
|
104
|
+
)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### With Project ID
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
from google.cloud import bigquery
|
|
111
|
+
|
|
112
|
+
client = bigquery.Client(project='your-gcp-project-id')
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### With Location
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from google.cloud import bigquery
|
|
119
|
+
|
|
120
|
+
client = bigquery.Client(
|
|
121
|
+
project='your-gcp-project-id',
|
|
122
|
+
location='US' # or 'EU', 'asia-northeast1', etc.
|
|
123
|
+
)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Querying Data
|
|
127
|
+
|
|
128
|
+
### Basic Query
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from google.cloud import bigquery
|
|
132
|
+
|
|
133
|
+
client = bigquery.Client()
|
|
134
|
+
|
|
135
|
+
query = """
|
|
136
|
+
SELECT name, state, year, number
|
|
137
|
+
FROM `bigquery-public-data.usa_names.usa_1910_2013`
|
|
138
|
+
WHERE state = 'TX'
|
|
139
|
+
LIMIT 100
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
query_job = client.query(query)
|
|
143
|
+
results = query_job.result()
|
|
144
|
+
|
|
145
|
+
for row in results:
|
|
146
|
+
print(f"{row.name}: {row.number}")
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Query with query_and_wait()
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
from google.cloud import bigquery
|
|
153
|
+
|
|
154
|
+
client = bigquery.Client()
|
|
155
|
+
|
|
156
|
+
query = """
|
|
157
|
+
SELECT name, SUM(number) as total
|
|
158
|
+
FROM `bigquery-public-data.usa_names.usa_1910_2013`
|
|
159
|
+
WHERE state = 'TX'
|
|
160
|
+
GROUP BY name
|
|
161
|
+
ORDER BY total DESC
|
|
162
|
+
LIMIT 10
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
rows = client.query_and_wait(query)
|
|
166
|
+
|
|
167
|
+
for row in rows:
|
|
168
|
+
print(f"{row.name}: {row.total}")
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Query to DataFrame
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
from google.cloud import bigquery
|
|
175
|
+
|
|
176
|
+
client = bigquery.Client()
|
|
177
|
+
|
|
178
|
+
query = """
|
|
179
|
+
SELECT name, state, year, number
|
|
180
|
+
FROM `bigquery-public-data.usa_names.usa_1910_2013`
|
|
181
|
+
WHERE state = 'TX'
|
|
182
|
+
LIMIT 1000
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
df = client.query(query).to_dataframe()
|
|
186
|
+
print(df.head())
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Parameterized Queries
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
from google.cloud import bigquery
|
|
193
|
+
|
|
194
|
+
client = bigquery.Client()
|
|
195
|
+
|
|
196
|
+
query = """
|
|
197
|
+
SELECT name, state, year, number
|
|
198
|
+
FROM `bigquery-public-data.usa_names.usa_1910_2013`
|
|
199
|
+
WHERE state IN UNNEST(@states)
|
|
200
|
+
AND year >= @year
|
|
201
|
+
ORDER BY number DESC
|
|
202
|
+
LIMIT @limit
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
job_config = bigquery.QueryJobConfig(
|
|
206
|
+
query_parameters=[
|
|
207
|
+
bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]),
|
|
208
|
+
bigquery.ScalarQueryParameter("year", "INT64", 2000),
|
|
209
|
+
bigquery.ScalarQueryParameter("limit", "INT64", 20),
|
|
210
|
+
]
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
query_job = client.query(query, job_config=job_config)
|
|
214
|
+
results = query_job.result()
|
|
215
|
+
|
|
216
|
+
for row in results:
|
|
217
|
+
print(row)
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Query with Struct Parameters
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
from google.cloud import bigquery
|
|
224
|
+
|
|
225
|
+
client = bigquery.Client()
|
|
226
|
+
|
|
227
|
+
query = """
|
|
228
|
+
SELECT @struct_value.name as name, @struct_value.age as age
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
job_config = bigquery.QueryJobConfig(
|
|
232
|
+
query_parameters=[
|
|
233
|
+
bigquery.StructQueryParameter(
|
|
234
|
+
"struct_value",
|
|
235
|
+
bigquery.ScalarQueryParameter("name", "STRING", "Tom"),
|
|
236
|
+
bigquery.ScalarQueryParameter("age", "INT64", 30),
|
|
237
|
+
)
|
|
238
|
+
]
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
query_job = client.query(query, job_config=job_config)
|
|
242
|
+
results = query_job.result()
|
|
243
|
+
|
|
244
|
+
for row in results:
|
|
245
|
+
print(f"{row.name}: {row.age}")
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### Query with Array of Structs
|
|
249
|
+
|
|
250
|
+
```python
|
|
251
|
+
from google.cloud import bigquery
|
|
252
|
+
|
|
253
|
+
client = bigquery.Client()
|
|
254
|
+
|
|
255
|
+
query = """
|
|
256
|
+
SELECT country.name, country.capital_city
|
|
257
|
+
FROM UNNEST(@countries) as country
|
|
258
|
+
"""
|
|
259
|
+
|
|
260
|
+
job_config = bigquery.QueryJobConfig(
|
|
261
|
+
query_parameters=[
|
|
262
|
+
bigquery.ArrayQueryParameter(
|
|
263
|
+
"countries",
|
|
264
|
+
bigquery.StructQueryParameterType(
|
|
265
|
+
bigquery.ScalarQueryParameterType("name", "STRING"),
|
|
266
|
+
bigquery.ScalarQueryParameterType("capital_city", "STRING"),
|
|
267
|
+
),
|
|
268
|
+
[
|
|
269
|
+
{"name": "France", "capital_city": "Paris"},
|
|
270
|
+
{"name": "Germany", "capital_city": "Berlin"},
|
|
271
|
+
]
|
|
272
|
+
)
|
|
273
|
+
]
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
query_job = client.query(query, job_config=job_config)
|
|
277
|
+
results = query_job.result()
|
|
278
|
+
|
|
279
|
+
for row in results:
|
|
280
|
+
print(f"{row.name}: {row.capital_city}")
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
### Query with Timestamp Parameters
|
|
284
|
+
|
|
285
|
+
```python
|
|
286
|
+
from google.cloud import bigquery
|
|
287
|
+
from datetime import datetime
|
|
288
|
+
|
|
289
|
+
client = bigquery.Client()
|
|
290
|
+
|
|
291
|
+
query = """
|
|
292
|
+
SELECT @timestamp_value as timestamp_col
|
|
293
|
+
"""
|
|
294
|
+
|
|
295
|
+
job_config = bigquery.QueryJobConfig(
|
|
296
|
+
query_parameters=[
|
|
297
|
+
bigquery.ScalarQueryParameter(
|
|
298
|
+
"timestamp_value",
|
|
299
|
+
"TIMESTAMP",
|
|
300
|
+
datetime(2024, 1, 1, 0, 0, 0)
|
|
301
|
+
)
|
|
302
|
+
]
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
query_job = client.query(query, job_config=job_config)
|
|
306
|
+
results = query_job.result()
|
|
307
|
+
|
|
308
|
+
for row in results:
|
|
309
|
+
print(row.timestamp_col)
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
### Dry Run Query (Check Bytes Processed)
|
|
313
|
+
|
|
314
|
+
```python
|
|
315
|
+
from google.cloud import bigquery
|
|
316
|
+
|
|
317
|
+
client = bigquery.Client()
|
|
318
|
+
|
|
319
|
+
query = """
|
|
320
|
+
SELECT name, state
|
|
321
|
+
FROM `bigquery-public-data.usa_names.usa_1910_2013`
|
|
322
|
+
WHERE state = 'TX'
|
|
323
|
+
"""
|
|
324
|
+
|
|
325
|
+
job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
|
|
326
|
+
query_job = client.query(query, job_config=job_config)
|
|
327
|
+
|
|
328
|
+
print(f"This query will process {query_job.total_bytes_processed} bytes.")
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
### Query Job with Manual Job Control
|
|
332
|
+
|
|
333
|
+
```python
|
|
334
|
+
from google.cloud import bigquery
|
|
335
|
+
|
|
336
|
+
client = bigquery.Client()
|
|
337
|
+
|
|
338
|
+
query = """
|
|
339
|
+
SELECT name, COUNT(*) as count
|
|
340
|
+
FROM `bigquery-public-data.usa_names.usa_1910_2013`
|
|
341
|
+
GROUP BY name
|
|
342
|
+
ORDER BY count DESC
|
|
343
|
+
LIMIT 10
|
|
344
|
+
"""
|
|
345
|
+
|
|
346
|
+
query_job = client.query(query)
|
|
347
|
+
print(f"Job {query_job.job_id} started.")
|
|
348
|
+
|
|
349
|
+
results = query_job.result()
|
|
350
|
+
|
|
351
|
+
print("Rows:")
|
|
352
|
+
for row in results:
|
|
353
|
+
print(row)
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
### Polling Query Job Status
|
|
357
|
+
|
|
358
|
+
```python
|
|
359
|
+
from google.cloud import bigquery
|
|
360
|
+
import time
|
|
361
|
+
|
|
362
|
+
client = bigquery.Client()
|
|
363
|
+
|
|
364
|
+
query = "SELECT 1 as value"
|
|
365
|
+
query_job = client.query(query)
|
|
366
|
+
|
|
367
|
+
print(f"Job {query_job.job_id} started.")
|
|
368
|
+
|
|
369
|
+
while query_job.state != 'DONE':
|
|
370
|
+
time.sleep(1)
|
|
371
|
+
query_job.reload()
|
|
372
|
+
print(f"Job state: {query_job.state}")
|
|
373
|
+
|
|
374
|
+
print(f"Job completed. Processed {query_job.total_bytes_processed} bytes.")
|
|
375
|
+
```
|
|
376
|
+
|
|
377
|
+
## Datasets
|
|
378
|
+
|
|
379
|
+
### Create Dataset
|
|
380
|
+
|
|
381
|
+
```python
|
|
382
|
+
from google.cloud import bigquery
|
|
383
|
+
|
|
384
|
+
client = bigquery.Client()
|
|
385
|
+
|
|
386
|
+
dataset_id = f"{client.project}.my_new_dataset"
|
|
387
|
+
dataset = bigquery.Dataset(dataset_id)
|
|
388
|
+
dataset = client.create_dataset(dataset)
|
|
389
|
+
|
|
390
|
+
print(f"Created dataset {dataset.dataset_id}")
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
### Create Dataset with Options
|
|
394
|
+
|
|
395
|
+
```python
|
|
396
|
+
from google.cloud import bigquery
|
|
397
|
+
|
|
398
|
+
client = bigquery.Client()
|
|
399
|
+
|
|
400
|
+
dataset_id = f"{client.project}.my_new_dataset"
|
|
401
|
+
dataset = bigquery.Dataset(dataset_id)
|
|
402
|
+
|
|
403
|
+
dataset.location = "US"
|
|
404
|
+
dataset.description = "My dataset description"
|
|
405
|
+
dataset.default_table_expiration_ms = 3600000 # 1 hour
|
|
406
|
+
|
|
407
|
+
dataset = client.create_dataset(dataset)
|
|
408
|
+
print(f"Created dataset {dataset.dataset_id}")
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
### Get Dataset
|
|
412
|
+
|
|
413
|
+
```python
|
|
414
|
+
from google.cloud import bigquery
|
|
415
|
+
|
|
416
|
+
client = bigquery.Client()
|
|
417
|
+
|
|
418
|
+
dataset_id = f"{client.project}.my_dataset"
|
|
419
|
+
dataset = client.get_dataset(dataset_id)
|
|
420
|
+
|
|
421
|
+
print(f"Dataset {dataset.dataset_id}")
|
|
422
|
+
print(f"Description: {dataset.description}")
|
|
423
|
+
print(f"Location: {dataset.location}")
|
|
424
|
+
```
|
|
425
|
+
|
|
426
|
+
### List Datasets
|
|
427
|
+
|
|
428
|
+
```python
|
|
429
|
+
from google.cloud import bigquery
|
|
430
|
+
|
|
431
|
+
client = bigquery.Client()
|
|
432
|
+
|
|
433
|
+
datasets = list(client.list_datasets())
|
|
434
|
+
|
|
435
|
+
if datasets:
|
|
436
|
+
print("Datasets:")
|
|
437
|
+
for dataset in datasets:
|
|
438
|
+
print(f" {dataset.dataset_id}")
|
|
439
|
+
else:
|
|
440
|
+
print("No datasets found.")
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
### Update Dataset
|
|
444
|
+
|
|
445
|
+
```python
|
|
446
|
+
from google.cloud import bigquery
|
|
447
|
+
|
|
448
|
+
client = bigquery.Client()
|
|
449
|
+
|
|
450
|
+
dataset_id = f"{client.project}.my_dataset"
|
|
451
|
+
dataset = client.get_dataset(dataset_id)
|
|
452
|
+
|
|
453
|
+
dataset.description = "Updated description"
|
|
454
|
+
dataset = client.update_dataset(dataset, ["description"])
|
|
455
|
+
|
|
456
|
+
print(f"Updated dataset {dataset.dataset_id}")
|
|
457
|
+
```
|
|
458
|
+
|
|
459
|
+
### Delete Dataset
|
|
460
|
+
|
|
461
|
+
```python
|
|
462
|
+
from google.cloud import bigquery
|
|
463
|
+
|
|
464
|
+
client = bigquery.Client()
|
|
465
|
+
|
|
466
|
+
dataset_id = f"{client.project}.my_dataset"
|
|
467
|
+
client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True)
|
|
468
|
+
|
|
469
|
+
print(f"Deleted dataset {dataset_id}")
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
## Tables
|
|
473
|
+
|
|
474
|
+
### Create Table
|
|
475
|
+
|
|
476
|
+
```python
|
|
477
|
+
from google.cloud import bigquery
|
|
478
|
+
|
|
479
|
+
client = bigquery.Client()
|
|
480
|
+
|
|
481
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
482
|
+
|
|
483
|
+
schema = [
|
|
484
|
+
bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
|
|
485
|
+
bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
|
|
486
|
+
bigquery.SchemaField("email", "STRING", mode="REQUIRED"),
|
|
487
|
+
bigquery.SchemaField("created_at", "TIMESTAMP", mode="REQUIRED"),
|
|
488
|
+
]
|
|
489
|
+
|
|
490
|
+
table = bigquery.Table(table_id, schema=schema)
|
|
491
|
+
table = client.create_table(table)
|
|
492
|
+
|
|
493
|
+
print(f"Created table {table.table_id}")
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
### Create Table with Nested Schema
|
|
497
|
+
|
|
498
|
+
```python
|
|
499
|
+
from google.cloud import bigquery
|
|
500
|
+
|
|
501
|
+
client = bigquery.Client()
|
|
502
|
+
|
|
503
|
+
table_id = f"{client.project}.my_dataset.my_nested_table"
|
|
504
|
+
|
|
505
|
+
schema = [
|
|
506
|
+
bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"),
|
|
507
|
+
bigquery.SchemaField(
|
|
508
|
+
"address",
|
|
509
|
+
"RECORD",
|
|
510
|
+
mode="NULLABLE",
|
|
511
|
+
fields=[
|
|
512
|
+
bigquery.SchemaField("street", "STRING", mode="NULLABLE"),
|
|
513
|
+
bigquery.SchemaField("city", "STRING", mode="NULLABLE"),
|
|
514
|
+
bigquery.SchemaField("zip", "STRING", mode="NULLABLE"),
|
|
515
|
+
],
|
|
516
|
+
),
|
|
517
|
+
bigquery.SchemaField("tags", "STRING", mode="REPEATED"),
|
|
518
|
+
]
|
|
519
|
+
|
|
520
|
+
table = bigquery.Table(table_id, schema=schema)
|
|
521
|
+
table = client.create_table(table)
|
|
522
|
+
|
|
523
|
+
print(f"Created table {table.table_id}")
|
|
524
|
+
```
|
|
525
|
+
|
|
526
|
+
### Get Table Metadata
|
|
527
|
+
|
|
528
|
+
```python
|
|
529
|
+
from google.cloud import bigquery
|
|
530
|
+
|
|
531
|
+
client = bigquery.Client()
|
|
532
|
+
|
|
533
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
534
|
+
table = client.get_table(table_id)
|
|
535
|
+
|
|
536
|
+
print(f"Table {table.table_id}")
|
|
537
|
+
print(f"Schema: {table.schema}")
|
|
538
|
+
print(f"Num rows: {table.num_rows}")
|
|
539
|
+
print(f"Num bytes: {table.num_bytes}")
|
|
540
|
+
```
|
|
541
|
+
|
|
542
|
+
### List Tables
|
|
543
|
+
|
|
544
|
+
```python
|
|
545
|
+
from google.cloud import bigquery
|
|
546
|
+
|
|
547
|
+
client = bigquery.Client()
|
|
548
|
+
|
|
549
|
+
dataset_id = f"{client.project}.my_dataset"
|
|
550
|
+
tables = list(client.list_tables(dataset_id))
|
|
551
|
+
|
|
552
|
+
if tables:
|
|
553
|
+
print("Tables:")
|
|
554
|
+
for table in tables:
|
|
555
|
+
print(f" {table.table_id}")
|
|
556
|
+
else:
|
|
557
|
+
print("No tables found.")
|
|
558
|
+
```
|
|
559
|
+
|
|
560
|
+
### Delete Table
|
|
561
|
+
|
|
562
|
+
```python
|
|
563
|
+
from google.cloud import bigquery
|
|
564
|
+
|
|
565
|
+
client = bigquery.Client()
|
|
566
|
+
|
|
567
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
568
|
+
client.delete_table(table_id, not_found_ok=True)
|
|
569
|
+
|
|
570
|
+
print(f"Deleted table {table_id}")
|
|
571
|
+
```
|
|
572
|
+
|
|
573
|
+
### Update Table Schema
|
|
574
|
+
|
|
575
|
+
```python
|
|
576
|
+
from google.cloud import bigquery
|
|
577
|
+
|
|
578
|
+
client = bigquery.Client()
|
|
579
|
+
|
|
580
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
581
|
+
table = client.get_table(table_id)
|
|
582
|
+
|
|
583
|
+
original_schema = table.schema
|
|
584
|
+
new_schema = original_schema[:]
|
|
585
|
+
new_schema.append(bigquery.SchemaField("new_field", "STRING", mode="NULLABLE"))
|
|
586
|
+
|
|
587
|
+
table.schema = new_schema
|
|
588
|
+
table = client.update_table(table, ["schema"])
|
|
589
|
+
|
|
590
|
+
print(f"Updated table {table.table_id}")
|
|
591
|
+
```
|
|
592
|
+
|
|
593
|
+
## Inserting Data
|
|
594
|
+
|
|
595
|
+
### Streaming Insert (Single Row)
|
|
596
|
+
|
|
597
|
+
```python
|
|
598
|
+
from google.cloud import bigquery
|
|
599
|
+
from datetime import datetime
|
|
600
|
+
|
|
601
|
+
client = bigquery.Client()
|
|
602
|
+
|
|
603
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
604
|
+
|
|
605
|
+
rows_to_insert = [
|
|
606
|
+
{"name": "Tom", "age": 30, "email": "tom@example.com", "created_at": datetime.now().isoformat()}
|
|
607
|
+
]
|
|
608
|
+
|
|
609
|
+
errors = client.insert_rows_json(table_id, rows_to_insert)
|
|
610
|
+
|
|
611
|
+
if errors == []:
|
|
612
|
+
print("New rows have been added.")
|
|
613
|
+
else:
|
|
614
|
+
print(f"Encountered errors while inserting rows: {errors}")
|
|
615
|
+
```
|
|
616
|
+
|
|
617
|
+
### Streaming Insert (Multiple Rows)
|
|
618
|
+
|
|
619
|
+
```python
|
|
620
|
+
from google.cloud import bigquery
|
|
621
|
+
from datetime import datetime
|
|
622
|
+
|
|
623
|
+
client = bigquery.Client()
|
|
624
|
+
|
|
625
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
626
|
+
|
|
627
|
+
rows_to_insert = [
|
|
628
|
+
{"name": "Tom", "age": 30, "email": "tom@example.com", "created_at": datetime.now().isoformat()},
|
|
629
|
+
{"name": "Jane", "age": 32, "email": "jane@example.com", "created_at": datetime.now().isoformat()},
|
|
630
|
+
{"name": "Bob", "age": 28, "email": "bob@example.com", "created_at": datetime.now().isoformat()},
|
|
631
|
+
]
|
|
632
|
+
|
|
633
|
+
errors = client.insert_rows_json(table_id, rows_to_insert)
|
|
634
|
+
|
|
635
|
+
if errors == []:
|
|
636
|
+
print(f"{len(rows_to_insert)} rows have been added.")
|
|
637
|
+
else:
|
|
638
|
+
print(f"Encountered errors while inserting rows: {errors}")
|
|
639
|
+
```
|
|
640
|
+
|
|
641
|
+
### Streaming Insert with Row Objects
|
|
642
|
+
|
|
643
|
+
```python
|
|
644
|
+
from google.cloud import bigquery
|
|
645
|
+
|
|
646
|
+
client = bigquery.Client()
|
|
647
|
+
|
|
648
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
649
|
+
table = client.get_table(table_id)
|
|
650
|
+
|
|
651
|
+
rows_to_insert = [
|
|
652
|
+
("Tom", 30, "tom@example.com"),
|
|
653
|
+
("Jane", 32, "jane@example.com"),
|
|
654
|
+
]
|
|
655
|
+
|
|
656
|
+
errors = client.insert_rows(table, rows_to_insert)
|
|
657
|
+
|
|
658
|
+
if errors == []:
|
|
659
|
+
print("New rows have been added.")
|
|
660
|
+
else:
|
|
661
|
+
print(f"Encountered errors while inserting rows: {errors}")
|
|
662
|
+
```
|
|
663
|
+
|
|
664
|
+
### Streaming Insert with Insert IDs (Deduplication)
|
|
665
|
+
|
|
666
|
+
```python
|
|
667
|
+
from google.cloud import bigquery
|
|
668
|
+
|
|
669
|
+
client = bigquery.Client()
|
|
670
|
+
|
|
671
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
672
|
+
|
|
673
|
+
rows_to_insert = [
|
|
674
|
+
{"name": "Tom", "age": 30},
|
|
675
|
+
{"name": "Jane", "age": 32},
|
|
676
|
+
]
|
|
677
|
+
|
|
678
|
+
row_ids = ["unique-id-1", "unique-id-2"]
|
|
679
|
+
|
|
680
|
+
errors = client.insert_rows_json(table_id, rows_to_insert, row_ids=row_ids)
|
|
681
|
+
|
|
682
|
+
if errors == []:
|
|
683
|
+
print("New rows have been added with insert IDs.")
|
|
684
|
+
else:
|
|
685
|
+
print(f"Encountered errors while inserting rows: {errors}")
|
|
686
|
+
```
|
|
687
|
+
|
|
688
|
+
### Streaming Insert with Nested Data
|
|
689
|
+
|
|
690
|
+
```python
|
|
691
|
+
from google.cloud import bigquery
|
|
692
|
+
|
|
693
|
+
client = bigquery.Client()
|
|
694
|
+
|
|
695
|
+
table_id = f"{client.project}.my_dataset.my_nested_table"
|
|
696
|
+
|
|
697
|
+
rows_to_insert = [
|
|
698
|
+
{
|
|
699
|
+
"id": 1,
|
|
700
|
+
"address": {
|
|
701
|
+
"street": "123 Main St",
|
|
702
|
+
"city": "Austin",
|
|
703
|
+
"zip": "78701"
|
|
704
|
+
},
|
|
705
|
+
"tags": ["important", "customer"]
|
|
706
|
+
}
|
|
707
|
+
]
|
|
708
|
+
|
|
709
|
+
errors = client.insert_rows_json(table_id, rows_to_insert)
|
|
710
|
+
|
|
711
|
+
if errors == []:
|
|
712
|
+
print("New rows have been added.")
|
|
713
|
+
else:
|
|
714
|
+
print(f"Encountered errors while inserting rows: {errors}")
|
|
715
|
+
```
|
|
716
|
+
|
|
717
|
+
## Loading Data
|
|
718
|
+
|
|
719
|
+
### Load from Cloud Storage (CSV)
|
|
720
|
+
|
|
721
|
+
```python
|
|
722
|
+
from google.cloud import bigquery
|
|
723
|
+
|
|
724
|
+
client = bigquery.Client()
|
|
725
|
+
|
|
726
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
727
|
+
uri = "gs://my-bucket/data.csv"
|
|
728
|
+
|
|
729
|
+
job_config = bigquery.LoadJobConfig(
|
|
730
|
+
schema=[
|
|
731
|
+
bigquery.SchemaField("name", "STRING"),
|
|
732
|
+
bigquery.SchemaField("age", "INTEGER"),
|
|
733
|
+
bigquery.SchemaField("email", "STRING"),
|
|
734
|
+
],
|
|
735
|
+
skip_leading_rows=1,
|
|
736
|
+
source_format=bigquery.SourceFormat.CSV,
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
load_job = client.load_table_from_uri(uri, table_id, job_config=job_config)
|
|
740
|
+
load_job.result()
|
|
741
|
+
|
|
742
|
+
print(f"Loaded {load_job.output_rows} rows.")
|
|
743
|
+
```
|
|
744
|
+
|
|
745
|
+
### Load from Cloud Storage (JSON)
|
|
746
|
+
|
|
747
|
+
```python
|
|
748
|
+
from google.cloud import bigquery
|
|
749
|
+
|
|
750
|
+
client = bigquery.Client()
|
|
751
|
+
|
|
752
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
753
|
+
uri = "gs://my-bucket/data.json"
|
|
754
|
+
|
|
755
|
+
job_config = bigquery.LoadJobConfig(
|
|
756
|
+
source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
|
|
757
|
+
autodetect=True,
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
load_job = client.load_table_from_uri(uri, table_id, job_config=job_config)
|
|
761
|
+
load_job.result()
|
|
762
|
+
|
|
763
|
+
print(f"Loaded {load_job.output_rows} rows.")
|
|
764
|
+
```
|
|
765
|
+
|
|
766
|
+
### Load from Local File
|
|
767
|
+
|
|
768
|
+
```python
|
|
769
|
+
from google.cloud import bigquery
|
|
770
|
+
|
|
771
|
+
client = bigquery.Client()
|
|
772
|
+
|
|
773
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
774
|
+
filename = "./data.csv"
|
|
775
|
+
|
|
776
|
+
job_config = bigquery.LoadJobConfig(
|
|
777
|
+
schema=[
|
|
778
|
+
bigquery.SchemaField("name", "STRING"),
|
|
779
|
+
bigquery.SchemaField("age", "INTEGER"),
|
|
780
|
+
],
|
|
781
|
+
skip_leading_rows=1,
|
|
782
|
+
source_format=bigquery.SourceFormat.CSV,
|
|
783
|
+
)
|
|
784
|
+
|
|
785
|
+
with open(filename, "rb") as source_file:
|
|
786
|
+
load_job = client.load_table_from_file(source_file, table_id, job_config=job_config)
|
|
787
|
+
|
|
788
|
+
load_job.result()
|
|
789
|
+
print(f"Loaded {load_job.output_rows} rows.")
|
|
790
|
+
```
|
|
791
|
+
|
|
792
|
+
### Load with Write Disposition
|
|
793
|
+
|
|
794
|
+
```python
|
|
795
|
+
from google.cloud import bigquery
|
|
796
|
+
|
|
797
|
+
client = bigquery.Client()
|
|
798
|
+
|
|
799
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
800
|
+
uri = "gs://my-bucket/data.csv"
|
|
801
|
+
|
|
802
|
+
job_config = bigquery.LoadJobConfig(
|
|
803
|
+
source_format=bigquery.SourceFormat.CSV,
|
|
804
|
+
skip_leading_rows=1,
|
|
805
|
+
autodetect=True,
|
|
806
|
+
write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, # WRITE_APPEND, WRITE_EMPTY
|
|
807
|
+
)
|
|
808
|
+
|
|
809
|
+
load_job = client.load_table_from_uri(uri, table_id, job_config=job_config)
|
|
810
|
+
load_job.result()
|
|
811
|
+
|
|
812
|
+
print(f"Loaded {load_job.output_rows} rows.")
|
|
813
|
+
```
|
|
814
|
+
|
|
815
|
+
### Load Parquet from Cloud Storage
|
|
816
|
+
|
|
817
|
+
```python
|
|
818
|
+
from google.cloud import bigquery
|
|
819
|
+
|
|
820
|
+
client = bigquery.Client()
|
|
821
|
+
|
|
822
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
823
|
+
uri = "gs://my-bucket/data.parquet"
|
|
824
|
+
|
|
825
|
+
job_config = bigquery.LoadJobConfig(
|
|
826
|
+
source_format=bigquery.SourceFormat.PARQUET,
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
load_job = client.load_table_from_uri(uri, table_id, job_config=job_config)
|
|
830
|
+
load_job.result()
|
|
831
|
+
|
|
832
|
+
print(f"Loaded {load_job.output_rows} rows.")
|
|
833
|
+
```
|
|
834
|
+
|
|
835
|
+
### Load from DataFrame
|
|
836
|
+
|
|
837
|
+
```python
|
|
838
|
+
from google.cloud import bigquery
|
|
839
|
+
import pandas as pd
|
|
840
|
+
|
|
841
|
+
client = bigquery.Client()
|
|
842
|
+
|
|
843
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
844
|
+
|
|
845
|
+
df = pd.DataFrame({
|
|
846
|
+
"name": ["Tom", "Jane", "Bob"],
|
|
847
|
+
"age": [30, 32, 28],
|
|
848
|
+
"email": ["tom@example.com", "jane@example.com", "bob@example.com"]
|
|
849
|
+
})
|
|
850
|
+
|
|
851
|
+
job_config = bigquery.LoadJobConfig(
|
|
852
|
+
schema=[
|
|
853
|
+
bigquery.SchemaField("name", "STRING"),
|
|
854
|
+
bigquery.SchemaField("age", "INTEGER"),
|
|
855
|
+
bigquery.SchemaField("email", "STRING"),
|
|
856
|
+
]
|
|
857
|
+
)
|
|
858
|
+
|
|
859
|
+
load_job = client.load_table_from_dataframe(df, table_id, job_config=job_config)
|
|
860
|
+
load_job.result()
|
|
861
|
+
|
|
862
|
+
print(f"Loaded {load_job.output_rows} rows.")
|
|
863
|
+
```
|
|
864
|
+
|
|
865
|
+
### Load Multiple Files from Cloud Storage
|
|
866
|
+
|
|
867
|
+
```python
|
|
868
|
+
from google.cloud import bigquery
|
|
869
|
+
|
|
870
|
+
client = bigquery.Client()
|
|
871
|
+
|
|
872
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
873
|
+
uris = [
|
|
874
|
+
"gs://my-bucket/data1.csv",
|
|
875
|
+
"gs://my-bucket/data2.csv",
|
|
876
|
+
"gs://my-bucket/data3.csv",
|
|
877
|
+
]
|
|
878
|
+
|
|
879
|
+
job_config = bigquery.LoadJobConfig(
|
|
880
|
+
source_format=bigquery.SourceFormat.CSV,
|
|
881
|
+
skip_leading_rows=1,
|
|
882
|
+
autodetect=True,
|
|
883
|
+
)
|
|
884
|
+
|
|
885
|
+
load_job = client.load_table_from_uri(uris, table_id, job_config=job_config)
|
|
886
|
+
load_job.result()
|
|
887
|
+
|
|
888
|
+
print(f"Loaded {load_job.output_rows} rows.")
|
|
889
|
+
```
|
|
890
|
+
|
|
891
|
+
## Exporting Data
|
|
892
|
+
|
|
893
|
+
### Export to Cloud Storage (CSV)
|
|
894
|
+
|
|
895
|
+
```python
|
|
896
|
+
from google.cloud import bigquery
|
|
897
|
+
|
|
898
|
+
client = bigquery.Client()
|
|
899
|
+
|
|
900
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
901
|
+
destination_uri = "gs://my-bucket/export.csv"
|
|
902
|
+
|
|
903
|
+
extract_job = client.extract_table(table_id, destination_uri)
|
|
904
|
+
extract_job.result()
|
|
905
|
+
|
|
906
|
+
print(f"Exported {table_id} to {destination_uri}")
|
|
907
|
+
```
|
|
908
|
+
|
|
909
|
+
### Export to Cloud Storage with Options
|
|
910
|
+
|
|
911
|
+
```python
|
|
912
|
+
from google.cloud import bigquery
|
|
913
|
+
|
|
914
|
+
client = bigquery.Client()
|
|
915
|
+
|
|
916
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
917
|
+
destination_uri = "gs://my-bucket/export-*.csv"
|
|
918
|
+
|
|
919
|
+
job_config = bigquery.ExtractJobConfig(
|
|
920
|
+
compression=bigquery.Compression.GZIP,
|
|
921
|
+
destination_format=bigquery.DestinationFormat.CSV,
|
|
922
|
+
print_header=True,
|
|
923
|
+
)
|
|
924
|
+
|
|
925
|
+
extract_job = client.extract_table(table_id, destination_uri, job_config=job_config)
|
|
926
|
+
extract_job.result()
|
|
927
|
+
|
|
928
|
+
print(f"Exported {table_id} to {destination_uri}")
|
|
929
|
+
```
|
|
930
|
+
|
|
931
|
+
### Export to Cloud Storage (JSON)
|
|
932
|
+
|
|
933
|
+
```python
|
|
934
|
+
from google.cloud import bigquery
|
|
935
|
+
|
|
936
|
+
client = bigquery.Client()
|
|
937
|
+
|
|
938
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
939
|
+
destination_uri = "gs://my-bucket/export-*.json"
|
|
940
|
+
|
|
941
|
+
job_config = bigquery.ExtractJobConfig(
|
|
942
|
+
destination_format=bigquery.DestinationFormat.NEWLINE_DELIMITED_JSON
|
|
943
|
+
)
|
|
944
|
+
|
|
945
|
+
extract_job = client.extract_table(table_id, destination_uri, job_config=job_config)
|
|
946
|
+
extract_job.result()
|
|
947
|
+
|
|
948
|
+
print(f"Exported {table_id} to {destination_uri}")
|
|
949
|
+
```
|
|
950
|
+
|
|
951
|
+
### Export to Cloud Storage (Avro)
|
|
952
|
+
|
|
953
|
+
```python
|
|
954
|
+
from google.cloud import bigquery
|
|
955
|
+
|
|
956
|
+
client = bigquery.Client()
|
|
957
|
+
|
|
958
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
959
|
+
destination_uri = "gs://my-bucket/export-*.avro"
|
|
960
|
+
|
|
961
|
+
job_config = bigquery.ExtractJobConfig(
|
|
962
|
+
destination_format=bigquery.DestinationFormat.AVRO
|
|
963
|
+
)
|
|
964
|
+
|
|
965
|
+
extract_job = client.extract_table(table_id, destination_uri, job_config=job_config)
|
|
966
|
+
extract_job.result()
|
|
967
|
+
|
|
968
|
+
print(f"Exported {table_id} to {destination_uri}")
|
|
969
|
+
```
|
|
970
|
+
|
|
971
|
+
### Export Query Results to Cloud Storage
|
|
972
|
+
|
|
973
|
+
```python
|
|
974
|
+
from google.cloud import bigquery
|
|
975
|
+
|
|
976
|
+
client = bigquery.Client()
|
|
977
|
+
|
|
978
|
+
query = """
|
|
979
|
+
SELECT name, state, year
|
|
980
|
+
FROM `bigquery-public-data.usa_names.usa_1910_2013`
|
|
981
|
+
WHERE state = 'TX'
|
|
982
|
+
LIMIT 1000
|
|
983
|
+
"""
|
|
984
|
+
|
|
985
|
+
destination_table_id = f"{client.project}.my_dataset.temp_table"
|
|
986
|
+
|
|
987
|
+
job_config = bigquery.QueryJobConfig(
|
|
988
|
+
destination=destination_table_id,
|
|
989
|
+
write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
|
|
990
|
+
)
|
|
991
|
+
|
|
992
|
+
query_job = client.query(query, job_config=job_config)
|
|
993
|
+
query_job.result()
|
|
994
|
+
|
|
995
|
+
destination_uri = "gs://my-bucket/query-results-*.csv"
|
|
996
|
+
|
|
997
|
+
extract_job = client.extract_table(destination_table_id, destination_uri)
|
|
998
|
+
extract_job.result()
|
|
999
|
+
|
|
1000
|
+
print(f"Exported query results to {destination_uri}")
|
|
1001
|
+
```
|
|
1002
|
+
|
|
1003
|
+
## Advanced Querying
|
|
1004
|
+
|
|
1005
|
+
### Query with Destination Table
|
|
1006
|
+
|
|
1007
|
+
```python
|
|
1008
|
+
from google.cloud import bigquery
|
|
1009
|
+
|
|
1010
|
+
client = bigquery.Client()
|
|
1011
|
+
|
|
1012
|
+
query = """
|
|
1013
|
+
SELECT name, SUM(number) as total
|
|
1014
|
+
FROM `bigquery-public-data.usa_names.usa_1910_2013`
|
|
1015
|
+
WHERE state = 'TX'
|
|
1016
|
+
GROUP BY name
|
|
1017
|
+
"""
|
|
1018
|
+
|
|
1019
|
+
table_id = f"{client.project}.my_dataset.results_table"
|
|
1020
|
+
|
|
1021
|
+
job_config = bigquery.QueryJobConfig(
|
|
1022
|
+
destination=table_id,
|
|
1023
|
+
write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
|
|
1024
|
+
)
|
|
1025
|
+
|
|
1026
|
+
query_job = client.query(query, job_config=job_config)
|
|
1027
|
+
results = query_job.result()
|
|
1028
|
+
|
|
1029
|
+
print(f"Query results saved to {table_id}. {results.total_rows} rows.")
|
|
1030
|
+
```
|
|
1031
|
+
|
|
1032
|
+
### Query with Caching Disabled
|
|
1033
|
+
|
|
1034
|
+
```python
|
|
1035
|
+
from google.cloud import bigquery
|
|
1036
|
+
|
|
1037
|
+
client = bigquery.Client()
|
|
1038
|
+
|
|
1039
|
+
query = """
|
|
1040
|
+
SELECT name, state
|
|
1041
|
+
FROM `bigquery-public-data.usa_names.usa_1910_2013`
|
|
1042
|
+
LIMIT 10
|
|
1043
|
+
"""
|
|
1044
|
+
|
|
1045
|
+
job_config = bigquery.QueryJobConfig(use_query_cache=False)
|
|
1046
|
+
query_job = client.query(query, job_config=job_config)
|
|
1047
|
+
results = query_job.result()
|
|
1048
|
+
|
|
1049
|
+
for row in results:
|
|
1050
|
+
print(row)
|
|
1051
|
+
```
|
|
1052
|
+
|
|
1053
|
+
### Query with Legacy SQL
|
|
1054
|
+
|
|
1055
|
+
```python
|
|
1056
|
+
from google.cloud import bigquery
|
|
1057
|
+
|
|
1058
|
+
client = bigquery.Client()
|
|
1059
|
+
|
|
1060
|
+
query = """
|
|
1061
|
+
SELECT name, state
|
|
1062
|
+
FROM [bigquery-public-data:usa_names.usa_1910_2013]
|
|
1063
|
+
WHERE state = 'TX'
|
|
1064
|
+
LIMIT 10
|
|
1065
|
+
"""
|
|
1066
|
+
|
|
1067
|
+
job_config = bigquery.QueryJobConfig(use_legacy_sql=True)
|
|
1068
|
+
query_job = client.query(query, job_config=job_config)
|
|
1069
|
+
results = query_job.result()
|
|
1070
|
+
|
|
1071
|
+
for row in results:
|
|
1072
|
+
print(row)
|
|
1073
|
+
```
|
|
1074
|
+
|
|
1075
|
+
### Query with Maximum Billing Tier
|
|
1076
|
+
|
|
1077
|
+
```python
|
|
1078
|
+
from google.cloud import bigquery
|
|
1079
|
+
|
|
1080
|
+
client = bigquery.Client()
|
|
1081
|
+
|
|
1082
|
+
query = """
|
|
1083
|
+
SELECT name, COUNT(*) as count
|
|
1084
|
+
FROM `bigquery-public-data.usa_names.usa_1910_2013`
|
|
1085
|
+
GROUP BY name
|
|
1086
|
+
"""
|
|
1087
|
+
|
|
1088
|
+
job_config = bigquery.QueryJobConfig(maximum_billing_tier=1)
|
|
1089
|
+
query_job = client.query(query, job_config=job_config)
|
|
1090
|
+
results = query_job.result()
|
|
1091
|
+
|
|
1092
|
+
print(f"Processed {query_job.total_bytes_processed} bytes.")
|
|
1093
|
+
```
|
|
1094
|
+
|
|
1095
|
+
### Query with Maximum Bytes Billed
|
|
1096
|
+
|
|
1097
|
+
```python
|
|
1098
|
+
from google.cloud import bigquery
|
|
1099
|
+
|
|
1100
|
+
client = bigquery.Client()
|
|
1101
|
+
|
|
1102
|
+
query = """
|
|
1103
|
+
SELECT name, state
|
|
1104
|
+
FROM `bigquery-public-data.usa_names.usa_1910_2013`
|
|
1105
|
+
LIMIT 10
|
|
1106
|
+
"""
|
|
1107
|
+
|
|
1108
|
+
job_config = bigquery.QueryJobConfig(maximum_bytes_billed=1000000)
|
|
1109
|
+
query_job = client.query(query, job_config=job_config)
|
|
1110
|
+
results = query_job.result()
|
|
1111
|
+
|
|
1112
|
+
for row in results:
|
|
1113
|
+
print(row)
|
|
1114
|
+
```
|
|
1115
|
+
|
|
1116
|
+
### Create Clustered Table
|
|
1117
|
+
|
|
1118
|
+
```python
|
|
1119
|
+
from google.cloud import bigquery
|
|
1120
|
+
|
|
1121
|
+
client = bigquery.Client()
|
|
1122
|
+
|
|
1123
|
+
table_id = f"{client.project}.my_dataset.my_clustered_table"
|
|
1124
|
+
|
|
1125
|
+
schema = [
|
|
1126
|
+
bigquery.SchemaField("name", "STRING"),
|
|
1127
|
+
bigquery.SchemaField("state", "STRING"),
|
|
1128
|
+
bigquery.SchemaField("year", "INTEGER"),
|
|
1129
|
+
bigquery.SchemaField("number", "INTEGER"),
|
|
1130
|
+
]
|
|
1131
|
+
|
|
1132
|
+
table = bigquery.Table(table_id, schema=schema)
|
|
1133
|
+
table.clustering_fields = ["state", "year"]
|
|
1134
|
+
|
|
1135
|
+
table = client.create_table(table)
|
|
1136
|
+
print(f"Created clustered table {table.table_id}")
|
|
1137
|
+
```
|
|
1138
|
+
|
|
1139
|
+
### Create Partitioned Table
|
|
1140
|
+
|
|
1141
|
+
```python
|
|
1142
|
+
from google.cloud import bigquery
|
|
1143
|
+
|
|
1144
|
+
client = bigquery.Client()
|
|
1145
|
+
|
|
1146
|
+
table_id = f"{client.project}.my_dataset.my_partitioned_table"
|
|
1147
|
+
|
|
1148
|
+
schema = [
|
|
1149
|
+
bigquery.SchemaField("name", "STRING"),
|
|
1150
|
+
bigquery.SchemaField("created_date", "DATE"),
|
|
1151
|
+
bigquery.SchemaField("value", "INTEGER"),
|
|
1152
|
+
]
|
|
1153
|
+
|
|
1154
|
+
table = bigquery.Table(table_id, schema=schema)
|
|
1155
|
+
table.time_partitioning = bigquery.TimePartitioning(
|
|
1156
|
+
type_=bigquery.TimePartitioningType.DAY,
|
|
1157
|
+
field="created_date",
|
|
1158
|
+
)
|
|
1159
|
+
|
|
1160
|
+
table = client.create_table(table)
|
|
1161
|
+
print(f"Created partitioned table {table.table_id}")
|
|
1162
|
+
```
|
|
1163
|
+
|
|
1164
|
+
### Create Partitioned and Clustered Table
|
|
1165
|
+
|
|
1166
|
+
```python
|
|
1167
|
+
from google.cloud import bigquery
|
|
1168
|
+
|
|
1169
|
+
client = bigquery.Client()
|
|
1170
|
+
|
|
1171
|
+
table_id = f"{client.project}.my_dataset.my_optimized_table"
|
|
1172
|
+
|
|
1173
|
+
schema = [
|
|
1174
|
+
bigquery.SchemaField("transaction_id", "STRING"),
|
|
1175
|
+
bigquery.SchemaField("transaction_date", "DATE"),
|
|
1176
|
+
bigquery.SchemaField("customer_id", "STRING"),
|
|
1177
|
+
bigquery.SchemaField("amount", "FLOAT"),
|
|
1178
|
+
]
|
|
1179
|
+
|
|
1180
|
+
table = bigquery.Table(table_id, schema=schema)
|
|
1181
|
+
table.time_partitioning = bigquery.TimePartitioning(
|
|
1182
|
+
type_=bigquery.TimePartitioningType.DAY,
|
|
1183
|
+
field="transaction_date",
|
|
1184
|
+
)
|
|
1185
|
+
table.clustering_fields = ["customer_id"]
|
|
1186
|
+
|
|
1187
|
+
table = client.create_table(table)
|
|
1188
|
+
print(f"Created partitioned and clustered table {table.table_id}")
|
|
1189
|
+
```
|
|
1190
|
+
|
|
1191
|
+
## Jobs
|
|
1192
|
+
|
|
1193
|
+
### List Jobs
|
|
1194
|
+
|
|
1195
|
+
```python
|
|
1196
|
+
from google.cloud import bigquery
|
|
1197
|
+
|
|
1198
|
+
client = bigquery.Client()
|
|
1199
|
+
|
|
1200
|
+
jobs = list(client.list_jobs(max_results=10))
|
|
1201
|
+
|
|
1202
|
+
print("Jobs:")
|
|
1203
|
+
for job in jobs:
|
|
1204
|
+
print(f"{job.job_id} - {job.state}")
|
|
1205
|
+
```
|
|
1206
|
+
|
|
1207
|
+
### List Jobs with Filter
|
|
1208
|
+
|
|
1209
|
+
```python
|
|
1210
|
+
from google.cloud import bigquery
|
|
1211
|
+
|
|
1212
|
+
client = bigquery.Client()
|
|
1213
|
+
|
|
1214
|
+
jobs = list(client.list_jobs(
|
|
1215
|
+
max_results=10,
|
|
1216
|
+
state_filter="DONE"
|
|
1217
|
+
))
|
|
1218
|
+
|
|
1219
|
+
print("Completed jobs:")
|
|
1220
|
+
for job in jobs:
|
|
1221
|
+
print(f"{job.job_id} - {job.job_type}")
|
|
1222
|
+
```
|
|
1223
|
+
|
|
1224
|
+
### Get Job Details
|
|
1225
|
+
|
|
1226
|
+
```python
|
|
1227
|
+
from google.cloud import bigquery
|
|
1228
|
+
|
|
1229
|
+
client = bigquery.Client()
|
|
1230
|
+
|
|
1231
|
+
job = client.get_job("my-job-id")
|
|
1232
|
+
|
|
1233
|
+
print(f"Job {job.job_id}")
|
|
1234
|
+
print(f"State: {job.state}")
|
|
1235
|
+
print(f"Created: {job.created}")
|
|
1236
|
+
print(f"Started: {job.started}")
|
|
1237
|
+
print(f"Ended: {job.ended}")
|
|
1238
|
+
|
|
1239
|
+
if hasattr(job, 'total_bytes_processed'):
|
|
1240
|
+
print(f"Bytes processed: {job.total_bytes_processed}")
|
|
1241
|
+
```
|
|
1242
|
+
|
|
1243
|
+
### Cancel Job
|
|
1244
|
+
|
|
1245
|
+
```python
|
|
1246
|
+
from google.cloud import bigquery
|
|
1247
|
+
|
|
1248
|
+
client = bigquery.Client()
|
|
1249
|
+
|
|
1250
|
+
job = client.get_job("my-job-id")
|
|
1251
|
+
job.cancel()
|
|
1252
|
+
|
|
1253
|
+
print(f"Job {job.job_id} cancelled.")
|
|
1254
|
+
```
|
|
1255
|
+
|
|
1256
|
+
## Copying Tables
|
|
1257
|
+
|
|
1258
|
+
### Copy Table
|
|
1259
|
+
|
|
1260
|
+
```python
|
|
1261
|
+
from google.cloud import bigquery
|
|
1262
|
+
|
|
1263
|
+
client = bigquery.Client()
|
|
1264
|
+
|
|
1265
|
+
source_table_id = f"{client.project}.my_dataset.source_table"
|
|
1266
|
+
dest_table_id = f"{client.project}.my_dataset.dest_table"
|
|
1267
|
+
|
|
1268
|
+
job = client.copy_table(source_table_id, dest_table_id)
|
|
1269
|
+
job.result()
|
|
1270
|
+
|
|
1271
|
+
print(f"Table copied to {dest_table_id}")
|
|
1272
|
+
```
|
|
1273
|
+
|
|
1274
|
+
### Copy Table with Write Disposition
|
|
1275
|
+
|
|
1276
|
+
```python
|
|
1277
|
+
from google.cloud import bigquery
|
|
1278
|
+
|
|
1279
|
+
client = bigquery.Client()
|
|
1280
|
+
|
|
1281
|
+
source_table_id = f"{client.project}.my_dataset.source_table"
|
|
1282
|
+
dest_table_id = f"{client.project}.my_dataset.dest_table"
|
|
1283
|
+
|
|
1284
|
+
job_config = bigquery.CopyJobConfig(
|
|
1285
|
+
write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE
|
|
1286
|
+
)
|
|
1287
|
+
|
|
1288
|
+
job = client.copy_table(source_table_id, dest_table_id, job_config=job_config)
|
|
1289
|
+
job.result()
|
|
1290
|
+
|
|
1291
|
+
print(f"Table copied to {dest_table_id}")
|
|
1292
|
+
```
|
|
1293
|
+
|
|
1294
|
+
### Copy Multiple Tables
|
|
1295
|
+
|
|
1296
|
+
```python
|
|
1297
|
+
from google.cloud import bigquery
|
|
1298
|
+
|
|
1299
|
+
client = bigquery.Client()
|
|
1300
|
+
|
|
1301
|
+
source_tables = [
|
|
1302
|
+
f"{client.project}.my_dataset.table1",
|
|
1303
|
+
f"{client.project}.my_dataset.table2",
|
|
1304
|
+
f"{client.project}.my_dataset.table3",
|
|
1305
|
+
]
|
|
1306
|
+
dest_table_id = f"{client.project}.my_dataset.merged_table"
|
|
1307
|
+
|
|
1308
|
+
job = client.copy_table(source_tables, dest_table_id)
|
|
1309
|
+
job.result()
|
|
1310
|
+
|
|
1311
|
+
print(f"Tables merged into {dest_table_id}")
|
|
1312
|
+
```
|
|
1313
|
+
|
|
1314
|
+
## Row-Level Operations
|
|
1315
|
+
|
|
1316
|
+
### Get Table Rows
|
|
1317
|
+
|
|
1318
|
+
```python
|
|
1319
|
+
from google.cloud import bigquery
|
|
1320
|
+
|
|
1321
|
+
client = bigquery.Client()
|
|
1322
|
+
|
|
1323
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
1324
|
+
|
|
1325
|
+
rows = client.list_rows(table_id, max_results=10)
|
|
1326
|
+
|
|
1327
|
+
for row in rows:
|
|
1328
|
+
print(row)
|
|
1329
|
+
```
|
|
1330
|
+
|
|
1331
|
+
### Get Rows with Selected Fields
|
|
1332
|
+
|
|
1333
|
+
```python
|
|
1334
|
+
from google.cloud import bigquery
|
|
1335
|
+
|
|
1336
|
+
client = bigquery.Client()
|
|
1337
|
+
|
|
1338
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
1339
|
+
table = client.get_table(table_id)
|
|
1340
|
+
|
|
1341
|
+
selected_fields = [
|
|
1342
|
+
table.schema[0], # name
|
|
1343
|
+
table.schema[1], # age
|
|
1344
|
+
]
|
|
1345
|
+
|
|
1346
|
+
rows = client.list_rows(table_id, selected_fields=selected_fields, max_results=10)
|
|
1347
|
+
|
|
1348
|
+
for row in rows:
|
|
1349
|
+
print(row)
|
|
1350
|
+
```
|
|
1351
|
+
|
|
1352
|
+
### Get Rows with Pagination
|
|
1353
|
+
|
|
1354
|
+
```python
|
|
1355
|
+
from google.cloud import bigquery
|
|
1356
|
+
|
|
1357
|
+
client = bigquery.Client()
|
|
1358
|
+
|
|
1359
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
1360
|
+
|
|
1361
|
+
page_size = 100
|
|
1362
|
+
pages = client.list_rows(table_id, max_results=page_size)
|
|
1363
|
+
|
|
1364
|
+
for page in pages.pages:
|
|
1365
|
+
for row in page:
|
|
1366
|
+
print(row)
|
|
1367
|
+
```
|
|
1368
|
+
|
|
1369
|
+
### Convert Rows to DataFrame
|
|
1370
|
+
|
|
1371
|
+
```python
|
|
1372
|
+
from google.cloud import bigquery
|
|
1373
|
+
|
|
1374
|
+
client = bigquery.Client()
|
|
1375
|
+
|
|
1376
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
1377
|
+
|
|
1378
|
+
rows = client.list_rows(table_id, max_results=1000)
|
|
1379
|
+
df = rows.to_dataframe()
|
|
1380
|
+
|
|
1381
|
+
print(df.head())
|
|
1382
|
+
```
|
|
1383
|
+
|
|
1384
|
+
## IAM and Access Control
|
|
1385
|
+
|
|
1386
|
+
### Get Dataset IAM Policy
|
|
1387
|
+
|
|
1388
|
+
```python
|
|
1389
|
+
from google.cloud import bigquery
|
|
1390
|
+
|
|
1391
|
+
client = bigquery.Client()
|
|
1392
|
+
|
|
1393
|
+
dataset_id = f"{client.project}.my_dataset"
|
|
1394
|
+
dataset = client.get_dataset(dataset_id)
|
|
1395
|
+
|
|
1396
|
+
policy = client.get_iam_policy(dataset)
|
|
1397
|
+
|
|
1398
|
+
print("IAM Policy:")
|
|
1399
|
+
for binding in policy.bindings:
|
|
1400
|
+
print(f"Role: {binding['role']}")
|
|
1401
|
+
print(f"Members: {binding['members']}")
|
|
1402
|
+
```
|
|
1403
|
+
|
|
1404
|
+
### Set Dataset IAM Policy
|
|
1405
|
+
|
|
1406
|
+
```python
|
|
1407
|
+
from google.cloud import bigquery
|
|
1408
|
+
|
|
1409
|
+
client = bigquery.Client()
|
|
1410
|
+
|
|
1411
|
+
dataset_id = f"{client.project}.my_dataset"
|
|
1412
|
+
dataset = client.get_dataset(dataset_id)
|
|
1413
|
+
|
|
1414
|
+
policy = client.get_iam_policy(dataset)
|
|
1415
|
+
|
|
1416
|
+
policy.bindings.append({
|
|
1417
|
+
"role": "roles/bigquery.dataViewer",
|
|
1418
|
+
"members": {"user:example@example.com"}
|
|
1419
|
+
})
|
|
1420
|
+
|
|
1421
|
+
policy = client.set_iam_policy(dataset, policy)
|
|
1422
|
+
print("IAM policy updated.")
|
|
1423
|
+
```
|
|
1424
|
+
|
|
1425
|
+
## Error Handling
|
|
1426
|
+
|
|
1427
|
+
### Comprehensive Error Handling
|
|
1428
|
+
|
|
1429
|
+
```python
|
|
1430
|
+
from google.cloud import bigquery
|
|
1431
|
+
from google.api_core import exceptions
|
|
1432
|
+
|
|
1433
|
+
client = bigquery.Client()
|
|
1434
|
+
|
|
1435
|
+
query = "SELECT * FROM invalid_table"
|
|
1436
|
+
|
|
1437
|
+
try:
|
|
1438
|
+
query_job = client.query(query)
|
|
1439
|
+
results = query_job.result()
|
|
1440
|
+
except exceptions.NotFound as e:
|
|
1441
|
+
print(f"Table not found: {e}")
|
|
1442
|
+
except exceptions.BadRequest as e:
|
|
1443
|
+
print(f"Invalid query: {e}")
|
|
1444
|
+
except exceptions.Forbidden as e:
|
|
1445
|
+
print(f"Permission denied: {e}")
|
|
1446
|
+
except exceptions.GoogleAPIError as e:
|
|
1447
|
+
print(f"API error: {e}")
|
|
1448
|
+
```
|
|
1449
|
+
|
|
1450
|
+
### Handle Streaming Insert Errors
|
|
1451
|
+
|
|
1452
|
+
```python
|
|
1453
|
+
from google.cloud import bigquery
|
|
1454
|
+
|
|
1455
|
+
client = bigquery.Client()
|
|
1456
|
+
|
|
1457
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
1458
|
+
|
|
1459
|
+
rows_to_insert = [
|
|
1460
|
+
{"name": "Tom", "age": 30},
|
|
1461
|
+
{"name": "Jane", "age": "invalid"}, # Invalid type
|
|
1462
|
+
]
|
|
1463
|
+
|
|
1464
|
+
errors = client.insert_rows_json(table_id, rows_to_insert)
|
|
1465
|
+
|
|
1466
|
+
if errors:
|
|
1467
|
+
print("Errors encountered:")
|
|
1468
|
+
for error in errors:
|
|
1469
|
+
print(f"Row index: {error['index']}")
|
|
1470
|
+
print(f"Errors: {error['errors']}")
|
|
1471
|
+
else:
|
|
1472
|
+
print("All rows inserted successfully.")
|
|
1473
|
+
```
|
|
1474
|
+
|
|
1475
|
+
### Handle Load Job Errors
|
|
1476
|
+
|
|
1477
|
+
```python
|
|
1478
|
+
from google.cloud import bigquery
|
|
1479
|
+
|
|
1480
|
+
client = bigquery.Client()
|
|
1481
|
+
|
|
1482
|
+
table_id = f"{client.project}.my_dataset.my_table"
|
|
1483
|
+
uri = "gs://my-bucket/data.csv"
|
|
1484
|
+
|
|
1485
|
+
job_config = bigquery.LoadJobConfig(
|
|
1486
|
+
source_format=bigquery.SourceFormat.CSV,
|
|
1487
|
+
skip_leading_rows=1,
|
|
1488
|
+
autodetect=True,
|
|
1489
|
+
)
|
|
1490
|
+
|
|
1491
|
+
load_job = client.load_table_from_uri(uri, table_id, job_config=job_config)
|
|
1492
|
+
|
|
1493
|
+
try:
|
|
1494
|
+
load_job.result()
|
|
1495
|
+
print(f"Loaded {load_job.output_rows} rows.")
|
|
1496
|
+
except Exception as e:
|
|
1497
|
+
print(f"Load job failed: {e}")
|
|
1498
|
+
|
|
1499
|
+
if load_job.errors:
|
|
1500
|
+
print("Errors:")
|
|
1501
|
+
for error in load_job.errors:
|
|
1502
|
+
print(error)
|
|
1503
|
+
```
|