chub-dev 0.1.0 → 0.1.2-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/README.md +55 -0
  2. package/bin/chub-mcp +2 -0
  3. package/dist/airtable/docs/database/javascript/DOC.md +1437 -0
  4. package/dist/airtable/docs/database/python/DOC.md +1735 -0
  5. package/dist/amplitude/docs/analytics/javascript/DOC.md +1282 -0
  6. package/dist/amplitude/docs/analytics/python/DOC.md +1199 -0
  7. package/dist/anthropic/docs/claude-api/javascript/DOC.md +503 -0
  8. package/dist/anthropic/docs/claude-api/python/DOC.md +389 -0
  9. package/dist/asana/docs/tasks/DOC.md +1396 -0
  10. package/dist/assemblyai/docs/transcription/DOC.md +1043 -0
  11. package/dist/atlassian/docs/confluence/javascript/DOC.md +1347 -0
  12. package/dist/atlassian/docs/confluence/python/DOC.md +1604 -0
  13. package/dist/auth0/docs/identity/javascript/DOC.md +968 -0
  14. package/dist/auth0/docs/identity/python/DOC.md +1199 -0
  15. package/dist/aws/docs/s3/javascript/DOC.md +1773 -0
  16. package/dist/aws/docs/s3/python/DOC.md +1807 -0
  17. package/dist/binance/docs/trading/javascript/DOC.md +1315 -0
  18. package/dist/binance/docs/trading/python/DOC.md +1454 -0
  19. package/dist/braintree/docs/gateway/javascript/DOC.md +1278 -0
  20. package/dist/braintree/docs/gateway/python/DOC.md +1179 -0
  21. package/dist/chromadb/docs/embeddings-db/javascript/DOC.md +1263 -0
  22. package/dist/chromadb/docs/embeddings-db/python/DOC.md +1707 -0
  23. package/dist/clerk/docs/auth/javascript/DOC.md +1220 -0
  24. package/dist/clerk/docs/auth/python/DOC.md +274 -0
  25. package/dist/cloudflare/docs/workers/javascript/DOC.md +918 -0
  26. package/dist/cloudflare/docs/workers/python/DOC.md +994 -0
  27. package/dist/cockroachdb/docs/distributed-db/DOC.md +1500 -0
  28. package/dist/cohere/docs/llm/DOC.md +1335 -0
  29. package/dist/datadog/docs/monitoring/javascript/DOC.md +1740 -0
  30. package/dist/datadog/docs/monitoring/python/DOC.md +1815 -0
  31. package/dist/deepgram/docs/speech/javascript/DOC.md +885 -0
  32. package/dist/deepgram/docs/speech/python/DOC.md +685 -0
  33. package/dist/deepl/docs/translation/javascript/DOC.md +887 -0
  34. package/dist/deepl/docs/translation/python/DOC.md +944 -0
  35. package/dist/deepseek/docs/llm/DOC.md +1220 -0
  36. package/dist/directus/docs/headless-cms/javascript/DOC.md +1128 -0
  37. package/dist/directus/docs/headless-cms/python/DOC.md +1276 -0
  38. package/dist/discord/docs/bot/javascript/DOC.md +1090 -0
  39. package/dist/discord/docs/bot/python/DOC.md +1130 -0
  40. package/dist/elasticsearch/docs/search/DOC.md +1634 -0
  41. package/dist/elevenlabs/docs/text-to-speech/javascript/DOC.md +336 -0
  42. package/dist/elevenlabs/docs/text-to-speech/python/DOC.md +552 -0
  43. package/dist/firebase/docs/auth/DOC.md +1015 -0
  44. package/dist/gemini/docs/genai/javascript/DOC.md +691 -0
  45. package/dist/gemini/docs/genai/python/DOC.md +555 -0
  46. package/dist/github/docs/octokit/DOC.md +1560 -0
  47. package/dist/google/docs/bigquery/javascript/DOC.md +1688 -0
  48. package/dist/google/docs/bigquery/python/DOC.md +1503 -0
  49. package/dist/hubspot/docs/crm/javascript/DOC.md +1805 -0
  50. package/dist/hubspot/docs/crm/python/DOC.md +2033 -0
  51. package/dist/huggingface/docs/transformers/DOC.md +948 -0
  52. package/dist/intercom/docs/messaging/javascript/DOC.md +1844 -0
  53. package/dist/intercom/docs/messaging/python/DOC.md +1797 -0
  54. package/dist/jira/docs/issues/javascript/DOC.md +1420 -0
  55. package/dist/jira/docs/issues/python/DOC.md +1492 -0
  56. package/dist/kafka/docs/streaming/javascript/DOC.md +1671 -0
  57. package/dist/kafka/docs/streaming/python/DOC.md +1464 -0
  58. package/dist/landingai-ade/docs/api/DOC.md +620 -0
  59. package/dist/landingai-ade/docs/sdk/python/DOC.md +489 -0
  60. package/dist/landingai-ade/docs/sdk/typescript/DOC.md +542 -0
  61. package/dist/landingai-ade/skills/SKILL.md +489 -0
  62. package/dist/launchdarkly/docs/feature-flags/javascript/DOC.md +1191 -0
  63. package/dist/launchdarkly/docs/feature-flags/python/DOC.md +1671 -0
  64. package/dist/linear/docs/tracker/DOC.md +1554 -0
  65. package/dist/livekit/docs/realtime/javascript/DOC.md +303 -0
  66. package/dist/livekit/docs/realtime/python/DOC.md +163 -0
  67. package/dist/mailchimp/docs/marketing/DOC.md +1420 -0
  68. package/dist/meilisearch/docs/search/DOC.md +1241 -0
  69. package/dist/microsoft/docs/onedrive/javascript/DOC.md +1421 -0
  70. package/dist/microsoft/docs/onedrive/python/DOC.md +1549 -0
  71. package/dist/mongodb/docs/atlas/DOC.md +2041 -0
  72. package/dist/notion/docs/workspace-api/javascript/DOC.md +1435 -0
  73. package/dist/notion/docs/workspace-api/python/DOC.md +1400 -0
  74. package/dist/okta/docs/identity/javascript/DOC.md +1171 -0
  75. package/dist/okta/docs/identity/python/DOC.md +1401 -0
  76. package/dist/openai/docs/chat/javascript/DOC.md +407 -0
  77. package/dist/openai/docs/chat/python/DOC.md +568 -0
  78. package/dist/paypal/docs/checkout/DOC.md +278 -0
  79. package/dist/pinecone/docs/sdk/javascript/DOC.md +984 -0
  80. package/dist/pinecone/docs/sdk/python/DOC.md +1395 -0
  81. package/dist/plaid/docs/banking/javascript/DOC.md +1163 -0
  82. package/dist/plaid/docs/banking/python/DOC.md +1203 -0
  83. package/dist/playwright-community/skills/login-flows/SKILL.md +108 -0
  84. package/dist/postmark/docs/transactional-email/DOC.md +1168 -0
  85. package/dist/prisma/docs/orm/javascript/DOC.md +1419 -0
  86. package/dist/prisma/docs/orm/python/DOC.md +1317 -0
  87. package/dist/qdrant/docs/vector-search/javascript/DOC.md +1221 -0
  88. package/dist/qdrant/docs/vector-search/python/DOC.md +1653 -0
  89. package/dist/rabbitmq/docs/message-queue/javascript/DOC.md +1193 -0
  90. package/dist/rabbitmq/docs/message-queue/python/DOC.md +1243 -0
  91. package/dist/razorpay/docs/payments/javascript/DOC.md +1219 -0
  92. package/dist/razorpay/docs/payments/python/DOC.md +1330 -0
  93. package/dist/redis/docs/key-value/javascript/DOC.md +1851 -0
  94. package/dist/redis/docs/key-value/python/DOC.md +2054 -0
  95. package/dist/registry.json +2817 -0
  96. package/dist/replicate/docs/model-hosting/DOC.md +1318 -0
  97. package/dist/resend/docs/email/DOC.md +1271 -0
  98. package/dist/salesforce/docs/crm/javascript/DOC.md +1241 -0
  99. package/dist/salesforce/docs/crm/python/DOC.md +1183 -0
  100. package/dist/search-index.json +1 -0
  101. package/dist/sendgrid/docs/email-api/javascript/DOC.md +371 -0
  102. package/dist/sendgrid/docs/email-api/python/DOC.md +656 -0
  103. package/dist/sentry/docs/error-tracking/javascript/DOC.md +1073 -0
  104. package/dist/sentry/docs/error-tracking/python/DOC.md +1309 -0
  105. package/dist/shopify/docs/storefront/DOC.md +457 -0
  106. package/dist/slack/docs/workspace/javascript/DOC.md +933 -0
  107. package/dist/slack/docs/workspace/python/DOC.md +271 -0
  108. package/dist/square/docs/payments/javascript/DOC.md +1855 -0
  109. package/dist/square/docs/payments/python/DOC.md +1728 -0
  110. package/dist/stripe/docs/api/DOC.md +1727 -0
  111. package/dist/stripe/docs/payments/DOC.md +1726 -0
  112. package/dist/stytch/docs/auth/javascript/DOC.md +1813 -0
  113. package/dist/stytch/docs/auth/python/DOC.md +1962 -0
  114. package/dist/supabase/docs/client/DOC.md +1606 -0
  115. package/dist/twilio/docs/messaging/python/DOC.md +469 -0
  116. package/dist/twilio/docs/messaging/typescript/DOC.md +946 -0
  117. package/dist/vercel/docs/platform/DOC.md +1940 -0
  118. package/dist/weaviate/docs/vector-db/javascript/DOC.md +1268 -0
  119. package/dist/weaviate/docs/vector-db/python/DOC.md +1388 -0
  120. package/dist/zendesk/docs/support/javascript/DOC.md +2150 -0
  121. package/dist/zendesk/docs/support/python/DOC.md +2297 -0
  122. package/package.json +22 -6
  123. package/skills/get-api-docs/SKILL.md +84 -0
  124. package/src/commands/annotate.js +83 -0
  125. package/src/commands/build.js +12 -1
  126. package/src/commands/feedback.js +150 -0
  127. package/src/commands/get.js +83 -42
  128. package/src/commands/search.js +7 -0
  129. package/src/index.js +43 -17
  130. package/src/lib/analytics.js +90 -0
  131. package/src/lib/annotations.js +57 -0
  132. package/src/lib/bm25.js +170 -0
  133. package/src/lib/cache.js +69 -6
  134. package/src/lib/config.js +8 -3
  135. package/src/lib/identity.js +99 -0
  136. package/src/lib/registry.js +103 -20
  137. package/src/lib/telemetry.js +86 -0
  138. package/src/mcp/server.js +177 -0
  139. package/src/mcp/tools.js +251 -0
@@ -0,0 +1,1503 @@
1
+ ---
2
+ name: bigquery
3
+ description: "Google Cloud BigQuery Python client library for data warehouse queries and analytics"
4
+ metadata:
5
+ languages: "python"
6
+ versions: "3.38.0"
7
+ updated-on: "2026-03-02"
8
+ source: maintainer
9
+ tags: "google,bigquery,data-warehouse,sql,analytics"
10
+ ---
11
+
12
+ # Google Cloud BigQuery - Python Client Library
13
+
14
+ ## Golden Rule
15
+
16
+ **ALWAYS use `google-cloud-bigquery` version 3.38.0 or higher.**
17
+
18
+ This is the official, maintained Google Cloud client library for BigQuery. Do NOT use deprecated packages like `bigquery` (without google-cloud prefix) or any unofficial libraries.
19
+
20
+ **Installation:**
21
+ ```bash
22
+ pip install google-cloud-bigquery
23
+ ```
24
+
25
+ **IMPORTANT:** BigQuery does NOT support API keys for authentication. You MUST use OAuth 2.0 credentials via service accounts or Application Default Credentials (ADC).
26
+
27
+ ## Installation
28
+
29
+ ### Install the Package
30
+
31
+ ```bash
32
+ pip install google-cloud-bigquery
33
+ ```
34
+
35
+ ### Install with Optional Dependencies
36
+
37
+ ```bash
38
+ # With pandas support
39
+ pip install google-cloud-bigquery[pandas]
40
+
41
+ # With all optional dependencies
42
+ pip install google-cloud-bigquery[all]
43
+ ```
44
+
45
+ ### Authentication Setup
46
+
47
+ BigQuery uses Application Default Credentials (ADC). Set the environment variable to point to your service account key:
48
+
49
+ ```bash
50
+ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account-key.json"
51
+ ```
52
+
53
+ ### Environment Variables
54
+
55
+ Create a `.env` file:
56
+
57
+ ```bash
58
+ GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
59
+ PROJECT_ID=your-gcp-project-id
60
+ DATASET_ID=your_dataset_id
61
+ ```
62
+
63
+ Load environment variables in your application:
64
+
65
+ ```python
66
+ import os
67
+ from dotenv import load_dotenv
68
+
69
+ load_dotenv()
70
+
71
+ project_id = os.getenv('PROJECT_ID')
72
+ dataset_id = os.getenv('DATASET_ID')
73
+ ```
74
+
75
+ ## Initialization
76
+
77
+ ### Basic Client Initialization
78
+
79
+ ```python
80
+ from google.cloud import bigquery
81
+
82
+ # Automatically uses GOOGLE_APPLICATION_CREDENTIALS
83
+ client = bigquery.Client()
84
+ ```
85
+
86
+ ### Explicit Configuration
87
+
88
+ ```python
89
+ from google.cloud import bigquery
90
+
91
+ client = bigquery.Client(
92
+ project='your-gcp-project-id',
93
+ credentials=credentials
94
+ )
95
+ ```
96
+
97
+ ### Using Service Account Key File
98
+
99
+ ```python
100
+ from google.cloud import bigquery
101
+
102
+ client = bigquery.Client.from_service_account_json(
103
+ '/path/to/service-account-key.json'
104
+ )
105
+ ```
106
+
107
+ ### With Project ID
108
+
109
+ ```python
110
+ from google.cloud import bigquery
111
+
112
+ client = bigquery.Client(project='your-gcp-project-id')
113
+ ```
114
+
115
+ ### With Location
116
+
117
+ ```python
118
+ from google.cloud import bigquery
119
+
120
+ client = bigquery.Client(
121
+ project='your-gcp-project-id',
122
+ location='US' # or 'EU', 'asia-northeast1', etc.
123
+ )
124
+ ```
125
+
126
+ ## Querying Data
127
+
128
+ ### Basic Query
129
+
130
+ ```python
131
+ from google.cloud import bigquery
132
+
133
+ client = bigquery.Client()
134
+
135
+ query = """
136
+ SELECT name, state, year, number
137
+ FROM `bigquery-public-data.usa_names.usa_1910_2013`
138
+ WHERE state = 'TX'
139
+ LIMIT 100
140
+ """
141
+
142
+ query_job = client.query(query)
143
+ results = query_job.result()
144
+
145
+ for row in results:
146
+ print(f"{row.name}: {row.number}")
147
+ ```
148
+
149
+ ### Query with query_and_wait()
150
+
151
+ ```python
152
+ from google.cloud import bigquery
153
+
154
+ client = bigquery.Client()
155
+
156
+ query = """
157
+ SELECT name, SUM(number) as total
158
+ FROM `bigquery-public-data.usa_names.usa_1910_2013`
159
+ WHERE state = 'TX'
160
+ GROUP BY name
161
+ ORDER BY total DESC
162
+ LIMIT 10
163
+ """
164
+
165
+ rows = client.query_and_wait(query)
166
+
167
+ for row in rows:
168
+ print(f"{row.name}: {row.total}")
169
+ ```
170
+
171
+ ### Query to DataFrame
172
+
173
+ ```python
174
+ from google.cloud import bigquery
175
+
176
+ client = bigquery.Client()
177
+
178
+ query = """
179
+ SELECT name, state, year, number
180
+ FROM `bigquery-public-data.usa_names.usa_1910_2013`
181
+ WHERE state = 'TX'
182
+ LIMIT 1000
183
+ """
184
+
185
+ df = client.query(query).to_dataframe()
186
+ print(df.head())
187
+ ```
188
+
189
+ ### Parameterized Queries
190
+
191
+ ```python
192
+ from google.cloud import bigquery
193
+
194
+ client = bigquery.Client()
195
+
196
+ query = """
197
+ SELECT name, state, year, number
198
+ FROM `bigquery-public-data.usa_names.usa_1910_2013`
199
+ WHERE state IN UNNEST(@states)
200
+ AND year >= @year
201
+ ORDER BY number DESC
202
+ LIMIT @limit
203
+ """
204
+
205
+ job_config = bigquery.QueryJobConfig(
206
+ query_parameters=[
207
+ bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]),
208
+ bigquery.ScalarQueryParameter("year", "INT64", 2000),
209
+ bigquery.ScalarQueryParameter("limit", "INT64", 20),
210
+ ]
211
+ )
212
+
213
+ query_job = client.query(query, job_config=job_config)
214
+ results = query_job.result()
215
+
216
+ for row in results:
217
+ print(row)
218
+ ```
219
+
220
+ ### Query with Struct Parameters
221
+
222
+ ```python
223
+ from google.cloud import bigquery
224
+
225
+ client = bigquery.Client()
226
+
227
+ query = """
228
+ SELECT @struct_value.name as name, @struct_value.age as age
229
+ """
230
+
231
+ job_config = bigquery.QueryJobConfig(
232
+ query_parameters=[
233
+ bigquery.StructQueryParameter(
234
+ "struct_value",
235
+ bigquery.ScalarQueryParameter("name", "STRING", "Tom"),
236
+ bigquery.ScalarQueryParameter("age", "INT64", 30),
237
+ )
238
+ ]
239
+ )
240
+
241
+ query_job = client.query(query, job_config=job_config)
242
+ results = query_job.result()
243
+
244
+ for row in results:
245
+ print(f"{row.name}: {row.age}")
246
+ ```
247
+
248
+ ### Query with Array of Structs
249
+
250
+ ```python
251
+ from google.cloud import bigquery
252
+
253
+ client = bigquery.Client()
254
+
255
+ query = """
256
+ SELECT country.name, country.capital_city
257
+ FROM UNNEST(@countries) as country
258
+ """
259
+
260
+ job_config = bigquery.QueryJobConfig(
261
+ query_parameters=[
262
+ bigquery.ArrayQueryParameter(
263
+ "countries",
264
+ bigquery.StructQueryParameterType(
265
+ bigquery.ScalarQueryParameterType("name", "STRING"),
266
+ bigquery.ScalarQueryParameterType("capital_city", "STRING"),
267
+ ),
268
+ [
269
+ {"name": "France", "capital_city": "Paris"},
270
+ {"name": "Germany", "capital_city": "Berlin"},
271
+ ]
272
+ )
273
+ ]
274
+ )
275
+
276
+ query_job = client.query(query, job_config=job_config)
277
+ results = query_job.result()
278
+
279
+ for row in results:
280
+ print(f"{row.name}: {row.capital_city}")
281
+ ```
282
+
283
+ ### Query with Timestamp Parameters
284
+
285
+ ```python
286
+ from google.cloud import bigquery
287
+ from datetime import datetime
288
+
289
+ client = bigquery.Client()
290
+
291
+ query = """
292
+ SELECT @timestamp_value as timestamp_col
293
+ """
294
+
295
+ job_config = bigquery.QueryJobConfig(
296
+ query_parameters=[
297
+ bigquery.ScalarQueryParameter(
298
+ "timestamp_value",
299
+ "TIMESTAMP",
300
+ datetime(2024, 1, 1, 0, 0, 0)
301
+ )
302
+ ]
303
+ )
304
+
305
+ query_job = client.query(query, job_config=job_config)
306
+ results = query_job.result()
307
+
308
+ for row in results:
309
+ print(row.timestamp_col)
310
+ ```
311
+
312
+ ### Dry Run Query (Check Bytes Processed)
313
+
314
+ ```python
315
+ from google.cloud import bigquery
316
+
317
+ client = bigquery.Client()
318
+
319
+ query = """
320
+ SELECT name, state
321
+ FROM `bigquery-public-data.usa_names.usa_1910_2013`
322
+ WHERE state = 'TX'
323
+ """
324
+
325
+ job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
326
+ query_job = client.query(query, job_config=job_config)
327
+
328
+ print(f"This query will process {query_job.total_bytes_processed} bytes.")
329
+ ```
330
+
331
+ ### Query Job with Manual Job Control
332
+
333
+ ```python
334
+ from google.cloud import bigquery
335
+
336
+ client = bigquery.Client()
337
+
338
+ query = """
339
+ SELECT name, COUNT(*) as count
340
+ FROM `bigquery-public-data.usa_names.usa_1910_2013`
341
+ GROUP BY name
342
+ ORDER BY count DESC
343
+ LIMIT 10
344
+ """
345
+
346
+ query_job = client.query(query)
347
+ print(f"Job {query_job.job_id} started.")
348
+
349
+ results = query_job.result()
350
+
351
+ print("Rows:")
352
+ for row in results:
353
+ print(row)
354
+ ```
355
+
356
+ ### Polling Query Job Status
357
+
358
+ ```python
359
+ from google.cloud import bigquery
360
+ import time
361
+
362
+ client = bigquery.Client()
363
+
364
+ query = "SELECT 1 as value"
365
+ query_job = client.query(query)
366
+
367
+ print(f"Job {query_job.job_id} started.")
368
+
369
+ while query_job.state != 'DONE':
370
+ time.sleep(1)
371
+ query_job.reload()
372
+ print(f"Job state: {query_job.state}")
373
+
374
+ print(f"Job completed. Processed {query_job.total_bytes_processed} bytes.")
375
+ ```
376
+
377
+ ## Datasets
378
+
379
+ ### Create Dataset
380
+
381
+ ```python
382
+ from google.cloud import bigquery
383
+
384
+ client = bigquery.Client()
385
+
386
+ dataset_id = f"{client.project}.my_new_dataset"
387
+ dataset = bigquery.Dataset(dataset_id)
388
+ dataset = client.create_dataset(dataset)
389
+
390
+ print(f"Created dataset {dataset.dataset_id}")
391
+ ```
392
+
393
+ ### Create Dataset with Options
394
+
395
+ ```python
396
+ from google.cloud import bigquery
397
+
398
+ client = bigquery.Client()
399
+
400
+ dataset_id = f"{client.project}.my_new_dataset"
401
+ dataset = bigquery.Dataset(dataset_id)
402
+
403
+ dataset.location = "US"
404
+ dataset.description = "My dataset description"
405
+ dataset.default_table_expiration_ms = 3600000 # 1 hour
406
+
407
+ dataset = client.create_dataset(dataset)
408
+ print(f"Created dataset {dataset.dataset_id}")
409
+ ```
410
+
411
+ ### Get Dataset
412
+
413
+ ```python
414
+ from google.cloud import bigquery
415
+
416
+ client = bigquery.Client()
417
+
418
+ dataset_id = f"{client.project}.my_dataset"
419
+ dataset = client.get_dataset(dataset_id)
420
+
421
+ print(f"Dataset {dataset.dataset_id}")
422
+ print(f"Description: {dataset.description}")
423
+ print(f"Location: {dataset.location}")
424
+ ```
425
+
426
+ ### List Datasets
427
+
428
+ ```python
429
+ from google.cloud import bigquery
430
+
431
+ client = bigquery.Client()
432
+
433
+ datasets = list(client.list_datasets())
434
+
435
+ if datasets:
436
+ print("Datasets:")
437
+ for dataset in datasets:
438
+ print(f" {dataset.dataset_id}")
439
+ else:
440
+ print("No datasets found.")
441
+ ```
442
+
443
+ ### Update Dataset
444
+
445
+ ```python
446
+ from google.cloud import bigquery
447
+
448
+ client = bigquery.Client()
449
+
450
+ dataset_id = f"{client.project}.my_dataset"
451
+ dataset = client.get_dataset(dataset_id)
452
+
453
+ dataset.description = "Updated description"
454
+ dataset = client.update_dataset(dataset, ["description"])
455
+
456
+ print(f"Updated dataset {dataset.dataset_id}")
457
+ ```
458
+
459
+ ### Delete Dataset
460
+
461
+ ```python
462
+ from google.cloud import bigquery
463
+
464
+ client = bigquery.Client()
465
+
466
+ dataset_id = f"{client.project}.my_dataset"
467
+ client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True)
468
+
469
+ print(f"Deleted dataset {dataset_id}")
470
+ ```
471
+
472
+ ## Tables
473
+
474
+ ### Create Table
475
+
476
+ ```python
477
+ from google.cloud import bigquery
478
+
479
+ client = bigquery.Client()
480
+
481
+ table_id = f"{client.project}.my_dataset.my_table"
482
+
483
+ schema = [
484
+ bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
485
+ bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
486
+ bigquery.SchemaField("email", "STRING", mode="REQUIRED"),
487
+ bigquery.SchemaField("created_at", "TIMESTAMP", mode="REQUIRED"),
488
+ ]
489
+
490
+ table = bigquery.Table(table_id, schema=schema)
491
+ table = client.create_table(table)
492
+
493
+ print(f"Created table {table.table_id}")
494
+ ```
495
+
496
+ ### Create Table with Nested Schema
497
+
498
+ ```python
499
+ from google.cloud import bigquery
500
+
501
+ client = bigquery.Client()
502
+
503
+ table_id = f"{client.project}.my_dataset.my_nested_table"
504
+
505
+ schema = [
506
+ bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"),
507
+ bigquery.SchemaField(
508
+ "address",
509
+ "RECORD",
510
+ mode="NULLABLE",
511
+ fields=[
512
+ bigquery.SchemaField("street", "STRING", mode="NULLABLE"),
513
+ bigquery.SchemaField("city", "STRING", mode="NULLABLE"),
514
+ bigquery.SchemaField("zip", "STRING", mode="NULLABLE"),
515
+ ],
516
+ ),
517
+ bigquery.SchemaField("tags", "STRING", mode="REPEATED"),
518
+ ]
519
+
520
+ table = bigquery.Table(table_id, schema=schema)
521
+ table = client.create_table(table)
522
+
523
+ print(f"Created table {table.table_id}")
524
+ ```
525
+
526
+ ### Get Table Metadata
527
+
528
+ ```python
529
+ from google.cloud import bigquery
530
+
531
+ client = bigquery.Client()
532
+
533
+ table_id = f"{client.project}.my_dataset.my_table"
534
+ table = client.get_table(table_id)
535
+
536
+ print(f"Table {table.table_id}")
537
+ print(f"Schema: {table.schema}")
538
+ print(f"Num rows: {table.num_rows}")
539
+ print(f"Num bytes: {table.num_bytes}")
540
+ ```
541
+
542
+ ### List Tables
543
+
544
+ ```python
545
+ from google.cloud import bigquery
546
+
547
+ client = bigquery.Client()
548
+
549
+ dataset_id = f"{client.project}.my_dataset"
550
+ tables = list(client.list_tables(dataset_id))
551
+
552
+ if tables:
553
+ print("Tables:")
554
+ for table in tables:
555
+ print(f" {table.table_id}")
556
+ else:
557
+ print("No tables found.")
558
+ ```
559
+
560
+ ### Delete Table
561
+
562
+ ```python
563
+ from google.cloud import bigquery
564
+
565
+ client = bigquery.Client()
566
+
567
+ table_id = f"{client.project}.my_dataset.my_table"
568
+ client.delete_table(table_id, not_found_ok=True)
569
+
570
+ print(f"Deleted table {table_id}")
571
+ ```
572
+
573
+ ### Update Table Schema
574
+
575
+ ```python
576
+ from google.cloud import bigquery
577
+
578
+ client = bigquery.Client()
579
+
580
+ table_id = f"{client.project}.my_dataset.my_table"
581
+ table = client.get_table(table_id)
582
+
583
+ original_schema = table.schema
584
+ new_schema = original_schema[:]
585
+ new_schema.append(bigquery.SchemaField("new_field", "STRING", mode="NULLABLE"))
586
+
587
+ table.schema = new_schema
588
+ table = client.update_table(table, ["schema"])
589
+
590
+ print(f"Updated table {table.table_id}")
591
+ ```
592
+
593
+ ## Inserting Data
594
+
595
+ ### Streaming Insert (Single Row)
596
+
597
+ ```python
598
+ from google.cloud import bigquery
599
+ from datetime import datetime
600
+
601
+ client = bigquery.Client()
602
+
603
+ table_id = f"{client.project}.my_dataset.my_table"
604
+
605
+ rows_to_insert = [
606
+ {"name": "Tom", "age": 30, "email": "tom@example.com", "created_at": datetime.now().isoformat()}
607
+ ]
608
+
609
+ errors = client.insert_rows_json(table_id, rows_to_insert)
610
+
611
+ if errors == []:
612
+ print("New rows have been added.")
613
+ else:
614
+ print(f"Encountered errors while inserting rows: {errors}")
615
+ ```
616
+
617
+ ### Streaming Insert (Multiple Rows)
618
+
619
+ ```python
620
+ from google.cloud import bigquery
621
+ from datetime import datetime
622
+
623
+ client = bigquery.Client()
624
+
625
+ table_id = f"{client.project}.my_dataset.my_table"
626
+
627
+ rows_to_insert = [
628
+ {"name": "Tom", "age": 30, "email": "tom@example.com", "created_at": datetime.now().isoformat()},
629
+ {"name": "Jane", "age": 32, "email": "jane@example.com", "created_at": datetime.now().isoformat()},
630
+ {"name": "Bob", "age": 28, "email": "bob@example.com", "created_at": datetime.now().isoformat()},
631
+ ]
632
+
633
+ errors = client.insert_rows_json(table_id, rows_to_insert)
634
+
635
+ if errors == []:
636
+ print(f"{len(rows_to_insert)} rows have been added.")
637
+ else:
638
+ print(f"Encountered errors while inserting rows: {errors}")
639
+ ```
640
+
641
+ ### Streaming Insert with Row Objects
642
+
643
+ ```python
644
+ from google.cloud import bigquery
645
+
646
+ client = bigquery.Client()
647
+
648
+ table_id = f"{client.project}.my_dataset.my_table"
649
+ table = client.get_table(table_id)
650
+
651
+ rows_to_insert = [
652
+ ("Tom", 30, "tom@example.com"),
653
+ ("Jane", 32, "jane@example.com"),
654
+ ]
655
+
656
+ errors = client.insert_rows(table, rows_to_insert)
657
+
658
+ if errors == []:
659
+ print("New rows have been added.")
660
+ else:
661
+ print(f"Encountered errors while inserting rows: {errors}")
662
+ ```
663
+
664
+ ### Streaming Insert with Insert IDs (Deduplication)
665
+
666
+ ```python
667
+ from google.cloud import bigquery
668
+
669
+ client = bigquery.Client()
670
+
671
+ table_id = f"{client.project}.my_dataset.my_table"
672
+
673
+ rows_to_insert = [
674
+ {"name": "Tom", "age": 30},
675
+ {"name": "Jane", "age": 32},
676
+ ]
677
+
678
+ row_ids = ["unique-id-1", "unique-id-2"]
679
+
680
+ errors = client.insert_rows_json(table_id, rows_to_insert, row_ids=row_ids)
681
+
682
+ if errors == []:
683
+ print("New rows have been added with insert IDs.")
684
+ else:
685
+ print(f"Encountered errors while inserting rows: {errors}")
686
+ ```
687
+
688
+ ### Streaming Insert with Nested Data
689
+
690
+ ```python
691
+ from google.cloud import bigquery
692
+
693
+ client = bigquery.Client()
694
+
695
+ table_id = f"{client.project}.my_dataset.my_nested_table"
696
+
697
+ rows_to_insert = [
698
+ {
699
+ "id": 1,
700
+ "address": {
701
+ "street": "123 Main St",
702
+ "city": "Austin",
703
+ "zip": "78701"
704
+ },
705
+ "tags": ["important", "customer"]
706
+ }
707
+ ]
708
+
709
+ errors = client.insert_rows_json(table_id, rows_to_insert)
710
+
711
+ if errors == []:
712
+ print("New rows have been added.")
713
+ else:
714
+ print(f"Encountered errors while inserting rows: {errors}")
715
+ ```
716
+
717
+ ## Loading Data
718
+
719
+ ### Load from Cloud Storage (CSV)
720
+
721
+ ```python
722
+ from google.cloud import bigquery
723
+
724
+ client = bigquery.Client()
725
+
726
+ table_id = f"{client.project}.my_dataset.my_table"
727
+ uri = "gs://my-bucket/data.csv"
728
+
729
+ job_config = bigquery.LoadJobConfig(
730
+ schema=[
731
+ bigquery.SchemaField("name", "STRING"),
732
+ bigquery.SchemaField("age", "INTEGER"),
733
+ bigquery.SchemaField("email", "STRING"),
734
+ ],
735
+ skip_leading_rows=1,
736
+ source_format=bigquery.SourceFormat.CSV,
737
+ )
738
+
739
+ load_job = client.load_table_from_uri(uri, table_id, job_config=job_config)
740
+ load_job.result()
741
+
742
+ print(f"Loaded {load_job.output_rows} rows.")
743
+ ```
744
+
745
+ ### Load from Cloud Storage (JSON)
746
+
747
+ ```python
748
+ from google.cloud import bigquery
749
+
750
+ client = bigquery.Client()
751
+
752
+ table_id = f"{client.project}.my_dataset.my_table"
753
+ uri = "gs://my-bucket/data.json"
754
+
755
+ job_config = bigquery.LoadJobConfig(
756
+ source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
757
+ autodetect=True,
758
+ )
759
+
760
+ load_job = client.load_table_from_uri(uri, table_id, job_config=job_config)
761
+ load_job.result()
762
+
763
+ print(f"Loaded {load_job.output_rows} rows.")
764
+ ```
765
+
766
+ ### Load from Local File
767
+
768
+ ```python
769
+ from google.cloud import bigquery
770
+
771
+ client = bigquery.Client()
772
+
773
+ table_id = f"{client.project}.my_dataset.my_table"
774
+ filename = "./data.csv"
775
+
776
+ job_config = bigquery.LoadJobConfig(
777
+ schema=[
778
+ bigquery.SchemaField("name", "STRING"),
779
+ bigquery.SchemaField("age", "INTEGER"),
780
+ ],
781
+ skip_leading_rows=1,
782
+ source_format=bigquery.SourceFormat.CSV,
783
+ )
784
+
785
+ with open(filename, "rb") as source_file:
786
+ load_job = client.load_table_from_file(source_file, table_id, job_config=job_config)
787
+
788
+ load_job.result()
789
+ print(f"Loaded {load_job.output_rows} rows.")
790
+ ```
791
+
792
+ ### Load with Write Disposition
793
+
794
+ ```python
795
+ from google.cloud import bigquery
796
+
797
+ client = bigquery.Client()
798
+
799
+ table_id = f"{client.project}.my_dataset.my_table"
800
+ uri = "gs://my-bucket/data.csv"
801
+
802
+ job_config = bigquery.LoadJobConfig(
803
+ source_format=bigquery.SourceFormat.CSV,
804
+ skip_leading_rows=1,
805
+ autodetect=True,
806
+ write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, # WRITE_APPEND, WRITE_EMPTY
807
+ )
808
+
809
+ load_job = client.load_table_from_uri(uri, table_id, job_config=job_config)
810
+ load_job.result()
811
+
812
+ print(f"Loaded {load_job.output_rows} rows.")
813
+ ```
814
+
815
+ ### Load Parquet from Cloud Storage
816
+
817
+ ```python
818
+ from google.cloud import bigquery
819
+
820
+ client = bigquery.Client()
821
+
822
+ table_id = f"{client.project}.my_dataset.my_table"
823
+ uri = "gs://my-bucket/data.parquet"
824
+
825
+ job_config = bigquery.LoadJobConfig(
826
+ source_format=bigquery.SourceFormat.PARQUET,
827
+ )
828
+
829
+ load_job = client.load_table_from_uri(uri, table_id, job_config=job_config)
830
+ load_job.result()
831
+
832
+ print(f"Loaded {load_job.output_rows} rows.")
833
+ ```
834
+
835
+ ### Load from DataFrame
836
+
837
+ ```python
838
+ from google.cloud import bigquery
839
+ import pandas as pd
840
+
841
+ client = bigquery.Client()
842
+
843
+ table_id = f"{client.project}.my_dataset.my_table"
844
+
845
+ df = pd.DataFrame({
846
+ "name": ["Tom", "Jane", "Bob"],
847
+ "age": [30, 32, 28],
848
+ "email": ["tom@example.com", "jane@example.com", "bob@example.com"]
849
+ })
850
+
851
+ job_config = bigquery.LoadJobConfig(
852
+ schema=[
853
+ bigquery.SchemaField("name", "STRING"),
854
+ bigquery.SchemaField("age", "INTEGER"),
855
+ bigquery.SchemaField("email", "STRING"),
856
+ ]
857
+ )
858
+
859
+ load_job = client.load_table_from_dataframe(df, table_id, job_config=job_config)
860
+ load_job.result()
861
+
862
+ print(f"Loaded {load_job.output_rows} rows.")
863
+ ```
864
+
865
+ ### Load Multiple Files from Cloud Storage
866
+
867
+ ```python
868
+ from google.cloud import bigquery
869
+
870
+ client = bigquery.Client()
871
+
872
+ table_id = f"{client.project}.my_dataset.my_table"
873
+ uris = [
874
+ "gs://my-bucket/data1.csv",
875
+ "gs://my-bucket/data2.csv",
876
+ "gs://my-bucket/data3.csv",
877
+ ]
878
+
879
+ job_config = bigquery.LoadJobConfig(
880
+ source_format=bigquery.SourceFormat.CSV,
881
+ skip_leading_rows=1,
882
+ autodetect=True,
883
+ )
884
+
885
+ load_job = client.load_table_from_uri(uris, table_id, job_config=job_config)
886
+ load_job.result()
887
+
888
+ print(f"Loaded {load_job.output_rows} rows.")
889
+ ```
890
+
891
+ ## Exporting Data
892
+
893
+ ### Export to Cloud Storage (CSV)
894
+
895
+ ```python
896
+ from google.cloud import bigquery
897
+
898
+ client = bigquery.Client()
899
+
900
+ table_id = f"{client.project}.my_dataset.my_table"
901
+ destination_uri = "gs://my-bucket/export.csv"
902
+
903
+ extract_job = client.extract_table(table_id, destination_uri)
904
+ extract_job.result()
905
+
906
+ print(f"Exported {table_id} to {destination_uri}")
907
+ ```
908
+
909
+ ### Export to Cloud Storage with Options
910
+
911
+ ```python
912
+ from google.cloud import bigquery
913
+
914
+ client = bigquery.Client()
915
+
916
+ table_id = f"{client.project}.my_dataset.my_table"
917
+ destination_uri = "gs://my-bucket/export-*.csv"
918
+
919
+ job_config = bigquery.ExtractJobConfig(
920
+ compression=bigquery.Compression.GZIP,
921
+ destination_format=bigquery.DestinationFormat.CSV,
922
+ print_header=True,
923
+ )
924
+
925
+ extract_job = client.extract_table(table_id, destination_uri, job_config=job_config)
926
+ extract_job.result()
927
+
928
+ print(f"Exported {table_id} to {destination_uri}")
929
+ ```
930
+
931
+ ### Export to Cloud Storage (JSON)
932
+
933
+ ```python
934
+ from google.cloud import bigquery
935
+
936
+ client = bigquery.Client()
937
+
938
+ table_id = f"{client.project}.my_dataset.my_table"
939
+ destination_uri = "gs://my-bucket/export-*.json"
940
+
941
+ job_config = bigquery.ExtractJobConfig(
942
+ destination_format=bigquery.DestinationFormat.NEWLINE_DELIMITED_JSON
943
+ )
944
+
945
+ extract_job = client.extract_table(table_id, destination_uri, job_config=job_config)
946
+ extract_job.result()
947
+
948
+ print(f"Exported {table_id} to {destination_uri}")
949
+ ```
950
+
951
+ ### Export to Cloud Storage (Avro)
952
+
953
+ ```python
954
+ from google.cloud import bigquery
955
+
956
+ client = bigquery.Client()
957
+
958
+ table_id = f"{client.project}.my_dataset.my_table"
959
+ destination_uri = "gs://my-bucket/export-*.avro"
960
+
961
+ job_config = bigquery.ExtractJobConfig(
962
+ destination_format=bigquery.DestinationFormat.AVRO
963
+ )
964
+
965
+ extract_job = client.extract_table(table_id, destination_uri, job_config=job_config)
966
+ extract_job.result()
967
+
968
+ print(f"Exported {table_id} to {destination_uri}")
969
+ ```
970
+
971
+ ### Export Query Results to Cloud Storage
972
+
973
+ ```python
974
+ from google.cloud import bigquery
975
+
976
+ client = bigquery.Client()
977
+
978
+ query = """
979
+ SELECT name, state, year
980
+ FROM `bigquery-public-data.usa_names.usa_1910_2013`
981
+ WHERE state = 'TX'
982
+ LIMIT 1000
983
+ """
984
+
985
+ destination_table_id = f"{client.project}.my_dataset.temp_table"
986
+
987
+ job_config = bigquery.QueryJobConfig(
988
+ destination=destination_table_id,
989
+ write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
990
+ )
991
+
992
+ query_job = client.query(query, job_config=job_config)
993
+ query_job.result()
994
+
995
+ destination_uri = "gs://my-bucket/query-results-*.csv"
996
+
997
+ extract_job = client.extract_table(destination_table_id, destination_uri)
998
+ extract_job.result()
999
+
1000
+ print(f"Exported query results to {destination_uri}")
1001
+ ```
1002
+
1003
+ ## Advanced Querying
1004
+
1005
+ ### Query with Destination Table
1006
+
1007
+ ```python
1008
+ from google.cloud import bigquery
1009
+
1010
+ client = bigquery.Client()
1011
+
1012
+ query = """
1013
+ SELECT name, SUM(number) as total
1014
+ FROM `bigquery-public-data.usa_names.usa_1910_2013`
1015
+ WHERE state = 'TX'
1016
+ GROUP BY name
1017
+ """
1018
+
1019
+ table_id = f"{client.project}.my_dataset.results_table"
1020
+
1021
+ job_config = bigquery.QueryJobConfig(
1022
+ destination=table_id,
1023
+ write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
1024
+ )
1025
+
1026
+ query_job = client.query(query, job_config=job_config)
1027
+ results = query_job.result()
1028
+
1029
+ print(f"Query results saved to {table_id}. {results.total_rows} rows.")
1030
+ ```
1031
+
1032
+ ### Query with Caching Disabled
1033
+
1034
+ ```python
1035
+ from google.cloud import bigquery
1036
+
1037
+ client = bigquery.Client()
1038
+
1039
+ query = """
1040
+ SELECT name, state
1041
+ FROM `bigquery-public-data.usa_names.usa_1910_2013`
1042
+ LIMIT 10
1043
+ """
1044
+
1045
+ job_config = bigquery.QueryJobConfig(use_query_cache=False)
1046
+ query_job = client.query(query, job_config=job_config)
1047
+ results = query_job.result()
1048
+
1049
+ for row in results:
1050
+ print(row)
1051
+ ```
1052
+
1053
+ ### Query with Legacy SQL
1054
+
1055
+ ```python
1056
+ from google.cloud import bigquery
1057
+
1058
+ client = bigquery.Client()
1059
+
1060
+ query = """
1061
+ SELECT name, state
1062
+ FROM [bigquery-public-data:usa_names.usa_1910_2013]
1063
+ WHERE state = 'TX'
1064
+ LIMIT 10
1065
+ """
1066
+
1067
+ job_config = bigquery.QueryJobConfig(use_legacy_sql=True)
1068
+ query_job = client.query(query, job_config=job_config)
1069
+ results = query_job.result()
1070
+
1071
+ for row in results:
1072
+ print(row)
1073
+ ```
1074
+
1075
+ ### Query with Maximum Billing Tier
1076
+
1077
+ ```python
1078
+ from google.cloud import bigquery
1079
+
1080
+ client = bigquery.Client()
1081
+
1082
+ query = """
1083
+ SELECT name, COUNT(*) as count
1084
+ FROM `bigquery-public-data.usa_names.usa_1910_2013`
1085
+ GROUP BY name
1086
+ """
1087
+
1088
+ job_config = bigquery.QueryJobConfig(maximum_billing_tier=1)
1089
+ query_job = client.query(query, job_config=job_config)
1090
+ results = query_job.result()
1091
+
1092
+ print(f"Processed {query_job.total_bytes_processed} bytes.")
1093
+ ```
1094
+
1095
+ ### Query with Maximum Bytes Billed
1096
+
1097
+ ```python
1098
+ from google.cloud import bigquery
1099
+
1100
+ client = bigquery.Client()
1101
+
1102
+ query = """
1103
+ SELECT name, state
1104
+ FROM `bigquery-public-data.usa_names.usa_1910_2013`
1105
+ LIMIT 10
1106
+ """
1107
+
1108
+ job_config = bigquery.QueryJobConfig(maximum_bytes_billed=1000000)
1109
+ query_job = client.query(query, job_config=job_config)
1110
+ results = query_job.result()
1111
+
1112
+ for row in results:
1113
+ print(row)
1114
+ ```
1115
+
1116
+ ### Create Clustered Table
1117
+
1118
+ ```python
1119
+ from google.cloud import bigquery
1120
+
1121
+ client = bigquery.Client()
1122
+
1123
+ table_id = f"{client.project}.my_dataset.my_clustered_table"
1124
+
1125
+ schema = [
1126
+ bigquery.SchemaField("name", "STRING"),
1127
+ bigquery.SchemaField("state", "STRING"),
1128
+ bigquery.SchemaField("year", "INTEGER"),
1129
+ bigquery.SchemaField("number", "INTEGER"),
1130
+ ]
1131
+
1132
+ table = bigquery.Table(table_id, schema=schema)
1133
+ table.clustering_fields = ["state", "year"]
1134
+
1135
+ table = client.create_table(table)
1136
+ print(f"Created clustered table {table.table_id}")
1137
+ ```
1138
+
1139
+ ### Create Partitioned Table
1140
+
1141
+ ```python
1142
+ from google.cloud import bigquery
1143
+
1144
+ client = bigquery.Client()
1145
+
1146
+ table_id = f"{client.project}.my_dataset.my_partitioned_table"
1147
+
1148
+ schema = [
1149
+ bigquery.SchemaField("name", "STRING"),
1150
+ bigquery.SchemaField("created_date", "DATE"),
1151
+ bigquery.SchemaField("value", "INTEGER"),
1152
+ ]
1153
+
1154
+ table = bigquery.Table(table_id, schema=schema)
1155
+ table.time_partitioning = bigquery.TimePartitioning(
1156
+ type_=bigquery.TimePartitioningType.DAY,
1157
+ field="created_date",
1158
+ )
1159
+
1160
+ table = client.create_table(table)
1161
+ print(f"Created partitioned table {table.table_id}")
1162
+ ```
1163
+
1164
+ ### Create Partitioned and Clustered Table
1165
+
1166
+ ```python
1167
+ from google.cloud import bigquery
1168
+
1169
+ client = bigquery.Client()
1170
+
1171
+ table_id = f"{client.project}.my_dataset.my_optimized_table"
1172
+
1173
+ schema = [
1174
+ bigquery.SchemaField("transaction_id", "STRING"),
1175
+ bigquery.SchemaField("transaction_date", "DATE"),
1176
+ bigquery.SchemaField("customer_id", "STRING"),
1177
+ bigquery.SchemaField("amount", "FLOAT"),
1178
+ ]
1179
+
1180
+ table = bigquery.Table(table_id, schema=schema)
1181
+ table.time_partitioning = bigquery.TimePartitioning(
1182
+ type_=bigquery.TimePartitioningType.DAY,
1183
+ field="transaction_date",
1184
+ )
1185
+ table.clustering_fields = ["customer_id"]
1186
+
1187
+ table = client.create_table(table)
1188
+ print(f"Created partitioned and clustered table {table.table_id}")
1189
+ ```
1190
+
1191
+ ## Jobs
1192
+
1193
+ ### List Jobs
1194
+
1195
+ ```python
1196
+ from google.cloud import bigquery
1197
+
1198
+ client = bigquery.Client()
1199
+
1200
+ jobs = list(client.list_jobs(max_results=10))
1201
+
1202
+ print("Jobs:")
1203
+ for job in jobs:
1204
+ print(f"{job.job_id} - {job.state}")
1205
+ ```
1206
+
1207
+ ### List Jobs with Filter
1208
+
1209
+ ```python
1210
+ from google.cloud import bigquery
1211
+
1212
+ client = bigquery.Client()
1213
+
1214
+ jobs = list(client.list_jobs(
1215
+ max_results=10,
1216
+ state_filter="DONE"
1217
+ ))
1218
+
1219
+ print("Completed jobs:")
1220
+ for job in jobs:
1221
+ print(f"{job.job_id} - {job.job_type}")
1222
+ ```
1223
+
1224
+ ### Get Job Details
1225
+
1226
+ ```python
1227
+ from google.cloud import bigquery
1228
+
1229
+ client = bigquery.Client()
1230
+
1231
+ job = client.get_job("my-job-id")
1232
+
1233
+ print(f"Job {job.job_id}")
1234
+ print(f"State: {job.state}")
1235
+ print(f"Created: {job.created}")
1236
+ print(f"Started: {job.started}")
1237
+ print(f"Ended: {job.ended}")
1238
+
1239
+ if hasattr(job, 'total_bytes_processed'):
1240
+ print(f"Bytes processed: {job.total_bytes_processed}")
1241
+ ```
1242
+
1243
+ ### Cancel Job
1244
+
1245
+ ```python
1246
+ from google.cloud import bigquery
1247
+
1248
+ client = bigquery.Client()
1249
+
1250
+ job = client.get_job("my-job-id")
1251
+ job.cancel()
1252
+
1253
+ print(f"Job {job.job_id} cancelled.")
1254
+ ```
1255
+
1256
+ ## Copying Tables
1257
+
1258
+ ### Copy Table
1259
+
1260
+ ```python
1261
+ from google.cloud import bigquery
1262
+
1263
+ client = bigquery.Client()
1264
+
1265
+ source_table_id = f"{client.project}.my_dataset.source_table"
1266
+ dest_table_id = f"{client.project}.my_dataset.dest_table"
1267
+
1268
+ job = client.copy_table(source_table_id, dest_table_id)
1269
+ job.result()
1270
+
1271
+ print(f"Table copied to {dest_table_id}")
1272
+ ```
1273
+
1274
+ ### Copy Table with Write Disposition
1275
+
1276
+ ```python
1277
+ from google.cloud import bigquery
1278
+
1279
+ client = bigquery.Client()
1280
+
1281
+ source_table_id = f"{client.project}.my_dataset.source_table"
1282
+ dest_table_id = f"{client.project}.my_dataset.dest_table"
1283
+
1284
+ job_config = bigquery.CopyJobConfig(
1285
+ write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE
1286
+ )
1287
+
1288
+ job = client.copy_table(source_table_id, dest_table_id, job_config=job_config)
1289
+ job.result()
1290
+
1291
+ print(f"Table copied to {dest_table_id}")
1292
+ ```
1293
+
1294
+ ### Copy Multiple Tables
1295
+
1296
+ ```python
1297
+ from google.cloud import bigquery
1298
+
1299
+ client = bigquery.Client()
1300
+
1301
+ source_tables = [
1302
+ f"{client.project}.my_dataset.table1",
1303
+ f"{client.project}.my_dataset.table2",
1304
+ f"{client.project}.my_dataset.table3",
1305
+ ]
1306
+ dest_table_id = f"{client.project}.my_dataset.merged_table"
1307
+
1308
+ job = client.copy_table(source_tables, dest_table_id)
1309
+ job.result()
1310
+
1311
+ print(f"Tables merged into {dest_table_id}")
1312
+ ```
1313
+
1314
+ ## Row-Level Operations
1315
+
1316
+ ### Get Table Rows
1317
+
1318
+ ```python
1319
+ from google.cloud import bigquery
1320
+
1321
+ client = bigquery.Client()
1322
+
1323
+ table_id = f"{client.project}.my_dataset.my_table"
1324
+
1325
+ rows = client.list_rows(table_id, max_results=10)
1326
+
1327
+ for row in rows:
1328
+ print(row)
1329
+ ```
1330
+
1331
+ ### Get Rows with Selected Fields
1332
+
1333
+ ```python
1334
+ from google.cloud import bigquery
1335
+
1336
+ client = bigquery.Client()
1337
+
1338
+ table_id = f"{client.project}.my_dataset.my_table"
1339
+ table = client.get_table(table_id)
1340
+
1341
+ selected_fields = [
1342
+ table.schema[0], # name
1343
+ table.schema[1], # age
1344
+ ]
1345
+
1346
+ rows = client.list_rows(table_id, selected_fields=selected_fields, max_results=10)
1347
+
1348
+ for row in rows:
1349
+ print(row)
1350
+ ```
1351
+
1352
+ ### Get Rows with Pagination
1353
+
1354
+ ```python
1355
+ from google.cloud import bigquery
1356
+
1357
+ client = bigquery.Client()
1358
+
1359
+ table_id = f"{client.project}.my_dataset.my_table"
1360
+
1361
+ page_size = 100
1362
+ pages = client.list_rows(table_id, max_results=page_size)
1363
+
1364
+ for page in pages.pages:
1365
+ for row in page:
1366
+ print(row)
1367
+ ```
1368
+
1369
+ ### Convert Rows to DataFrame
1370
+
1371
+ ```python
1372
+ from google.cloud import bigquery
1373
+
1374
+ client = bigquery.Client()
1375
+
1376
+ table_id = f"{client.project}.my_dataset.my_table"
1377
+
1378
+ rows = client.list_rows(table_id, max_results=1000)
1379
+ df = rows.to_dataframe()
1380
+
1381
+ print(df.head())
1382
+ ```
1383
+
1384
+ ## IAM and Access Control
1385
+
1386
+ ### Get Dataset IAM Policy
1387
+
1388
+ ```python
1389
+ from google.cloud import bigquery
1390
+
1391
+ client = bigquery.Client()
1392
+
1393
+ dataset_id = f"{client.project}.my_dataset"
1394
+ dataset = client.get_dataset(dataset_id)
1395
+
1396
+ policy = client.get_iam_policy(dataset)
1397
+
1398
+ print("IAM Policy:")
1399
+ for binding in policy.bindings:
1400
+ print(f"Role: {binding['role']}")
1401
+ print(f"Members: {binding['members']}")
1402
+ ```
1403
+
1404
+ ### Set Dataset IAM Policy
1405
+
1406
+ ```python
1407
+ from google.cloud import bigquery
1408
+
1409
+ client = bigquery.Client()
1410
+
1411
+ dataset_id = f"{client.project}.my_dataset"
1412
+ dataset = client.get_dataset(dataset_id)
1413
+
1414
+ policy = client.get_iam_policy(dataset)
1415
+
1416
+ policy.bindings.append({
1417
+ "role": "roles/bigquery.dataViewer",
1418
+ "members": {"user:example@example.com"}
1419
+ })
1420
+
1421
+ policy = client.set_iam_policy(dataset, policy)
1422
+ print("IAM policy updated.")
1423
+ ```
1424
+
1425
+ ## Error Handling
1426
+
1427
+ ### Comprehensive Error Handling
1428
+
1429
+ ```python
1430
+ from google.cloud import bigquery
1431
+ from google.api_core import exceptions
1432
+
1433
+ client = bigquery.Client()
1434
+
1435
+ query = "SELECT * FROM invalid_table"
1436
+
1437
+ try:
1438
+ query_job = client.query(query)
1439
+ results = query_job.result()
1440
+ except exceptions.NotFound as e:
1441
+ print(f"Table not found: {e}")
1442
+ except exceptions.BadRequest as e:
1443
+ print(f"Invalid query: {e}")
1444
+ except exceptions.Forbidden as e:
1445
+ print(f"Permission denied: {e}")
1446
+ except exceptions.GoogleAPIError as e:
1447
+ print(f"API error: {e}")
1448
+ ```
1449
+
1450
+ ### Handle Streaming Insert Errors
1451
+
1452
+ ```python
1453
+ from google.cloud import bigquery
1454
+
1455
+ client = bigquery.Client()
1456
+
1457
+ table_id = f"{client.project}.my_dataset.my_table"
1458
+
1459
+ rows_to_insert = [
1460
+ {"name": "Tom", "age": 30},
1461
+ {"name": "Jane", "age": "invalid"}, # Invalid type
1462
+ ]
1463
+
1464
+ errors = client.insert_rows_json(table_id, rows_to_insert)
1465
+
1466
+ if errors:
1467
+ print("Errors encountered:")
1468
+ for error in errors:
1469
+ print(f"Row index: {error['index']}")
1470
+ print(f"Errors: {error['errors']}")
1471
+ else:
1472
+ print("All rows inserted successfully.")
1473
+ ```
1474
+
1475
+ ### Handle Load Job Errors
1476
+
1477
+ ```python
1478
+ from google.cloud import bigquery
1479
+
1480
+ client = bigquery.Client()
1481
+
1482
+ table_id = f"{client.project}.my_dataset.my_table"
1483
+ uri = "gs://my-bucket/data.csv"
1484
+
1485
+ job_config = bigquery.LoadJobConfig(
1486
+ source_format=bigquery.SourceFormat.CSV,
1487
+ skip_leading_rows=1,
1488
+ autodetect=True,
1489
+ )
1490
+
1491
+ load_job = client.load_table_from_uri(uri, table_id, job_config=job_config)
1492
+
1493
+ try:
1494
+ load_job.result()
1495
+ print(f"Loaded {load_job.output_rows} rows.")
1496
+ except Exception as e:
1497
+ print(f"Load job failed: {e}")
1498
+
1499
+ if load_job.errors:
1500
+ print("Errors:")
1501
+ for error in load_job.errors:
1502
+ print(error)
1503
+ ```