embulk-output-bigquery 0.3.7 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +47 -17
- data/embulk-output-bigquery.gemspec +1 -1
- data/example/config_delete_in_advance_partitioned_table.yml +33 -0
- data/example/config_progress_log_interval.yml +31 -0
- data/example/config_replace_backup_paritioned_table.yml +34 -0
- data/example/config_replace_paritioned_table.yml +33 -0
- data/lib/embulk/output/bigquery.rb +55 -14
- data/lib/embulk/output/bigquery/bigquery_client.rb +63 -28
- data/lib/embulk/output/bigquery/file_writer.rb +13 -4
- data/lib/embulk/output/bigquery/helper.rb +10 -0
- data/test/test_bigquery_client.rb +41 -0
- data/test/test_configure.rb +17 -0
- data/test/test_example.rb +20 -11
- data/test/test_helper.rb +10 -0
- data/test/test_transaction.rb +169 -32
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71bc9b253f725436a06e183667cbc87720c3719b
|
4
|
+
data.tar.gz: a32e43da05a4f90ab72c5715ffdf6b08501996d4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bd3d8aefbc98c2f044b782f807f595603ac7b11052a06b6486803fd2f6871127058a50e9c69ffc1fac92b75de9561c57e99ad9ba3cd8899507e93085d45ed615
|
7
|
+
data.tar.gz: 813b6455f463940968232b4332b8553698b9ef99ad4f3f5af6800b10223c33498fde9f8915604090f85dc7c2f78d16a865cd90da2174447c7f84ab3ef80a4cf8
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
## 0.4.0 - 2016-10-01
|
2
|
+
|
3
|
+
* [enhancement] Support partitioned table
|
4
|
+
* [maintenance] Add `progress_log_interval` option to control the interval of showing progress log, and now showing progress log is off by default
|
5
|
+
|
1
6
|
## 0.3.7 - 2016-08-03
|
2
7
|
|
3
8
|
* [maintenance] Fix Thread.new to use thread local variables to avoid nil idx error (thanks to @shyouhei and @umisora)
|
data/README.md
CHANGED
@@ -44,7 +44,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
|
|
44
44
|
| json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
|
45
45
|
| project | string | required if json_keyfile is not given | | project_id |
|
46
46
|
| dataset | string | required | | dataset |
|
47
|
-
| table | string | required | | table name
|
47
|
+
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
48
48
|
| auto_create_dataset | boolean | optional | false | automatically create dataset |
|
49
49
|
| auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) |
|
50
50
|
| schema_file | string | optional | | /path/to/schema.json |
|
@@ -63,6 +63,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
|
|
63
63
|
| payload_column_index | integer | optional | nil | See [Formatter Performance Issue](#formatter-performance-issue) |
|
64
64
|
| gcs_bucket | stringr | optional | nil | See [GCS Bucket](#gcs-bucket) |
|
65
65
|
| auto_create_gcs_bucket | boolean | optional | false | See [GCS Bucket](#gcs-bucket) |
|
66
|
+
| progress_log_interval | float | optional | nil (Disabled) | Progress log interval. The progress log is disabled by nil (default). NOTE: This option may be removed in a future because a filter plugin can achieve the same goal |
|
66
67
|
|
67
68
|
Client or request options
|
68
69
|
|
@@ -87,18 +88,21 @@ Options for intermediate local files
|
|
87
88
|
|
88
89
|
`source_format` is also used to determine formatter (csv or jsonl).
|
89
90
|
|
90
|
-
#### Same options of bq command-line tools or BigQuery job's
|
91
|
+
#### Same options of bq command-line tools or BigQuery job's property
|
91
92
|
|
92
93
|
Following options are same as [bq command-line tools](https://cloud.google.com/bigquery/bq-command-line-tool#creatingtablefromfile) or BigQuery [job's property](https://cloud.google.com/bigquery/docs/reference/v2/jobs#resource).
|
93
94
|
|
94
|
-
| name
|
95
|
-
|
96
|
-
| source_format
|
97
|
-
| max_bad_records
|
98
|
-
| field_delimiter
|
99
|
-
| encoding
|
100
|
-
| ignore_unknown_values
|
101
|
-
| allow_quoted_newlines
|
95
|
+
| name | type | required? | default | description |
|
96
|
+
|:----------------------------------|:---------|:----------|:--------|:-----------------------|
|
97
|
+
| source_format | string | required | "CSV" | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
|
98
|
+
| max_bad_records | int | optional | 0 | |
|
99
|
+
| field_delimiter | char | optional | "," | |
|
100
|
+
| encoding | string | optional | "UTF-8" | `UTF-8` or `ISO-8859-1` |
|
101
|
+
| ignore_unknown_values | boolean | optional | false | |
|
102
|
+
| allow_quoted_newlines | boolean | optional | false | Set true, if data contains newline characters. It may cause slow procsssing |
|
103
|
+
| time_partitioning | hash | optional | nil | See [Time Partitioning](#time-partitioning) |
|
104
|
+
| time_partitioning.type | string | required | nil | The only type supported is DAY, which will generate one partition per day based on data loading time. |
|
105
|
+
| time_partitioning.expiration__ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. partition |
|
102
106
|
|
103
107
|
### Example
|
104
108
|
|
@@ -123,32 +127,32 @@ out:
|
|
123
127
|
##### append
|
124
128
|
|
125
129
|
1. Load to temporary table.
|
126
|
-
2. Copy temporary table to destination table. (WRITE_APPEND)
|
130
|
+
2. Copy temporary table to destination table (or partition). (WRITE_APPEND)
|
127
131
|
|
128
132
|
##### append_direct
|
129
133
|
|
130
|
-
Insert data into existing table directly.
|
134
|
+
Insert data into existing table (or partition) directly.
|
131
135
|
This is not transactional, i.e., if fails, the target table could have some rows inserted.
|
132
136
|
|
133
137
|
##### replace
|
134
138
|
|
135
139
|
1. Load to temporary table.
|
136
|
-
2. Copy temporary table to destination table. (WRITE_TRUNCATE)
|
140
|
+
2. Copy temporary table to destination table (or partition). (WRITE_TRUNCATE)
|
137
141
|
|
138
142
|
```is_skip_job_result_check``` must be false when replace mode
|
139
143
|
|
140
144
|
##### replace_backup
|
141
145
|
|
142
146
|
1. Load to temporary table.
|
143
|
-
2. Copy destination table to backup table. (dataset_old, table_old)
|
144
|
-
3. Copy temporary table to destination table. (WRITE_TRUNCATE)
|
147
|
+
2. Copy destination table (or partition) to backup table (or partition). (dataset_old, table_old)
|
148
|
+
3. Copy temporary table to destination table (or partition). (WRITE_TRUNCATE)
|
145
149
|
|
146
150
|
```is_skip_job_result_check``` must be false when replace_backup mode.
|
147
151
|
|
148
152
|
##### delete_in_advance
|
149
153
|
|
150
|
-
1. Delete destination table, if it exists.
|
151
|
-
2. Load to destination table.
|
154
|
+
1. Delete destination table (or partition), if it exists.
|
155
|
+
2. Load to destination table (or partition).
|
152
156
|
|
153
157
|
### Authentication
|
154
158
|
|
@@ -366,6 +370,32 @@ out:
|
|
366
370
|
|
367
371
|
ToDo: Use https://cloud.google.com/storage/docs/streaming if google-api-ruby-client supports streaming transfers into GCS.
|
368
372
|
|
373
|
+
### Time Partitioning
|
374
|
+
|
375
|
+
From 0.4.0, embulk-output-bigquery supports to load into partitioned table.
|
376
|
+
See also [Creating and Updating Date-Partitioned Tables](https://cloud.google.com/bigquery/docs/creating-partitioned-tables).
|
377
|
+
|
378
|
+
To load into a partition, specify `table` parameter with a partition decorator as:
|
379
|
+
|
380
|
+
```yaml
|
381
|
+
out:
|
382
|
+
type: bigquery
|
383
|
+
table: table_name$20160929
|
384
|
+
auto_create_table: true
|
385
|
+
```
|
386
|
+
|
387
|
+
You may configure `time_partitioning` parameter together to create table via `auto_create_table: true` option as:
|
388
|
+
|
389
|
+
```yaml
|
390
|
+
out:
|
391
|
+
type: bigquery
|
392
|
+
table: table_name$20160929
|
393
|
+
auto_create_table: true
|
394
|
+
time-partitioning:
|
395
|
+
type: DAY
|
396
|
+
expiration_ms: 259200000
|
397
|
+
```
|
398
|
+
|
369
399
|
## Development
|
370
400
|
|
371
401
|
### Run example:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.
|
3
|
+
spec.version = "0.4.0"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -0,0 +1,33 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: delete_in_advance
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_partitioned_table_name$20160929
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: NONE
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
time_partitioning:
|
32
|
+
type: 'DAY'
|
33
|
+
expiration_ms: 100
|
@@ -0,0 +1,31 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_table_name
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: NONE
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
progress_log_interval: 0.1
|
@@ -0,0 +1,34 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace_backup
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_partitioned_table_name$20160929
|
26
|
+
table_old: your_partitioned_table_name_old$20160929
|
27
|
+
source_format: NEWLINE_DELIMITED_JSON
|
28
|
+
compression: NONE
|
29
|
+
auto_create_dataset: true
|
30
|
+
auto_create_table: true
|
31
|
+
schema_file: example/schema.json
|
32
|
+
time_partitioning:
|
33
|
+
type: 'DAY'
|
34
|
+
expiration_ms: 100
|
@@ -0,0 +1,33 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_partitioned_table_name$20160929
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: NONE
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
time_partitioning:
|
32
|
+
type: 'DAY'
|
33
|
+
expiration_ms: 100
|
@@ -56,6 +56,7 @@ module Embulk
|
|
56
56
|
'with_rehearsal' => config.param('with_rehearsal', :bool, :default => false),
|
57
57
|
'rehearsal_counts' => config.param('rehearsal_counts', :integer, :default => 1000),
|
58
58
|
'abort_on_error' => config.param('abort_on_error', :bool, :default => nil),
|
59
|
+
'progress_log_interval' => config.param('progress_log_interval', :float, :default => nil),
|
59
60
|
|
60
61
|
'column_options' => config.param('column_options', :array, :default => []),
|
61
62
|
'default_timezone' => config.param('default_timezone', :string, :default => ValueConverterFactory::DEFAULT_TIMEZONE),
|
@@ -84,6 +85,7 @@ module Embulk
|
|
84
85
|
'encoding' => config.param('encoding', :string, :default => 'UTF-8'),
|
85
86
|
'ignore_unknown_values' => config.param('ignore_unknown_values', :bool, :default => false),
|
86
87
|
'allow_quoted_newlines' => config.param('allow_quoted_newlines', :bool, :default => false),
|
88
|
+
'time_partitioning' => config.param('time_partitioning', :hash, :default => nil),
|
87
89
|
|
88
90
|
# for debug
|
89
91
|
'skip_load' => config.param('skip_load', :bool, :default => false),
|
@@ -204,6 +206,8 @@ module Embulk
|
|
204
206
|
|
205
207
|
if %w[replace replace_backup append].include?(task['mode'])
|
206
208
|
task['temp_table'] ||= "LOAD_TEMP_#{unique_name}_#{task['table']}"
|
209
|
+
else
|
210
|
+
task['temp_table'] = nil
|
207
211
|
end
|
208
212
|
|
209
213
|
if task['with_rehearsal']
|
@@ -218,6 +222,14 @@ module Embulk
|
|
218
222
|
task['abort_on_error'] = (task['max_bad_records'] == 0)
|
219
223
|
end
|
220
224
|
|
225
|
+
if task['time_partitioning']
|
226
|
+
unless task['time_partitioning']['type']
|
227
|
+
raise ConfigError.new "`time_partitioning` must have `type` key"
|
228
|
+
end
|
229
|
+
elsif Helper.has_partition_decorator?(task['table'])
|
230
|
+
task['time_partitioning'] = {'type' => 'DAY'}
|
231
|
+
end
|
232
|
+
|
221
233
|
task
|
222
234
|
end
|
223
235
|
|
@@ -258,14 +270,7 @@ module Embulk
|
|
258
270
|
}
|
259
271
|
end
|
260
272
|
|
261
|
-
def self.
|
262
|
-
task = self.configure(config, schema, task_count)
|
263
|
-
|
264
|
-
@task = task
|
265
|
-
@schema = schema
|
266
|
-
@bigquery = BigqueryClient.new(task, schema)
|
267
|
-
@converters = ValueConverterFactory.create_converters(task, schema)
|
268
|
-
|
273
|
+
def self.auto_create(task, bigquery)
|
269
274
|
if task['auto_create_dataset']
|
270
275
|
bigquery.create_dataset(task['dataset'])
|
271
276
|
else
|
@@ -282,18 +287,50 @@ module Embulk
|
|
282
287
|
|
283
288
|
case task['mode']
|
284
289
|
when 'delete_in_advance'
|
285
|
-
|
286
|
-
|
290
|
+
if task['time_partitioning']
|
291
|
+
bigquery.delete_partition(task['table'])
|
292
|
+
else
|
293
|
+
bigquery.delete_table(task['table'])
|
294
|
+
end
|
295
|
+
bigquery.create_table(task['table'], options: task)
|
287
296
|
when 'replace', 'replace_backup', 'append'
|
288
|
-
bigquery.create_table(task['temp_table'])
|
297
|
+
bigquery.create_table(task['temp_table'], options: task)
|
298
|
+
if task['time_partitioning']
|
299
|
+
if task['auto_create_table']
|
300
|
+
bigquery.create_table(task['table'], options: task)
|
301
|
+
else
|
302
|
+
bigquery.get_table(task['table']) # raises NotFoundError
|
303
|
+
end
|
304
|
+
end
|
289
305
|
else # append_direct
|
290
306
|
if task['auto_create_table']
|
291
|
-
bigquery.create_table(task['table'])
|
307
|
+
bigquery.create_table(task['table'], options: task)
|
292
308
|
else
|
293
309
|
bigquery.get_table(task['table']) # raises NotFoundError
|
294
310
|
end
|
295
311
|
end
|
296
312
|
|
313
|
+
if task['mode'] == 'replace_backup'
|
314
|
+
if task['time_partitioning'] and Helper.has_partition_decorator?(task['table_old'])
|
315
|
+
if task['auto_create_table']
|
316
|
+
bigquery.create_table(task['table_old'], dataset: task['dataset_old'], options: task)
|
317
|
+
else
|
318
|
+
bigquery.get_table(task['table_old'], dataset: task['dataset_old']) # raises NotFoundError
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
def self.transaction(config, schema, task_count, &control)
|
325
|
+
task = self.configure(config, schema, task_count)
|
326
|
+
|
327
|
+
@task = task
|
328
|
+
@schema = schema
|
329
|
+
@bigquery = BigqueryClient.new(task, schema)
|
330
|
+
@converters = ValueConverterFactory.create_converters(task, schema)
|
331
|
+
|
332
|
+
self.auto_create(@task, @bigquery)
|
333
|
+
|
297
334
|
begin
|
298
335
|
paths = []
|
299
336
|
if task['skip_file_generation']
|
@@ -346,7 +383,11 @@ module Embulk
|
|
346
383
|
end
|
347
384
|
|
348
385
|
if task['mode'] == 'replace_backup'
|
349
|
-
|
386
|
+
begin
|
387
|
+
bigquery.get_table(task['table'])
|
388
|
+
bigquery.copy(task['table'], task['table_old'], task['dataset_old'])
|
389
|
+
rescue NotFoundError
|
390
|
+
end
|
350
391
|
end
|
351
392
|
|
352
393
|
if task['temp_table']
|
@@ -359,7 +400,7 @@ module Embulk
|
|
359
400
|
end
|
360
401
|
ensure
|
361
402
|
begin
|
362
|
-
if task['temp_table'] # replace or replace_backup
|
403
|
+
if task['temp_table'] # append or replace or replace_backup
|
363
404
|
bigquery.delete_table(task['temp_table'])
|
364
405
|
end
|
365
406
|
ensure
|
@@ -17,6 +17,14 @@ module Embulk
|
|
17
17
|
reset_fields(fields) if fields
|
18
18
|
@project = @task['project']
|
19
19
|
@dataset = @task['dataset']
|
20
|
+
|
21
|
+
@task['source_format'] ||= 'CSV'
|
22
|
+
@task['max_bad_records'] ||= 0
|
23
|
+
@task['field_delimiter'] ||= ','
|
24
|
+
@task['source_format'] == 'CSV' ? @task['field_delimiter'] : nil
|
25
|
+
@task['encoding'] ||= 'UTF-8'
|
26
|
+
@task['ignore_unknown_values'] = false if @task['ignore_unknown_values'].nil?
|
27
|
+
@task['allow_quoted_newlines'] = false if @task['allow_quoted_newlines'].nil?
|
20
28
|
end
|
21
29
|
|
22
30
|
def fields
|
@@ -143,7 +151,7 @@ module Embulk
|
|
143
151
|
responses
|
144
152
|
end
|
145
153
|
|
146
|
-
def load(path, table)
|
154
|
+
def load(path, table, write_disposition: 'WRITE_APPEND')
|
147
155
|
with_job_retry do
|
148
156
|
begin
|
149
157
|
if File.exist?(path)
|
@@ -175,7 +183,7 @@ module Embulk
|
|
175
183
|
schema: {
|
176
184
|
fields: fields,
|
177
185
|
},
|
178
|
-
write_disposition:
|
186
|
+
write_disposition: write_disposition,
|
179
187
|
source_format: @task['source_format'],
|
180
188
|
max_bad_records: @task['max_bad_records'],
|
181
189
|
field_delimiter: @task['source_format'] == 'CSV' ? @task['field_delimiter'] : nil,
|
@@ -233,15 +241,15 @@ module Embulk
|
|
233
241
|
create_deposition: 'CREATE_IF_NEEDED',
|
234
242
|
write_disposition: write_disposition,
|
235
243
|
source_table: {
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
244
|
+
project_id: @project,
|
245
|
+
dataset_id: @dataset,
|
246
|
+
table_id: source_table,
|
247
|
+
},
|
248
|
+
destination_table: {
|
249
|
+
project_id: @project,
|
250
|
+
dataset_id: destination_dataset,
|
251
|
+
table_id: destination_table,
|
252
|
+
},
|
245
253
|
}
|
246
254
|
}
|
247
255
|
}
|
@@ -363,9 +371,11 @@ module Embulk
|
|
363
371
|
end
|
364
372
|
end
|
365
373
|
|
366
|
-
def create_table(table)
|
374
|
+
def create_table(table, dataset: nil, options: {})
|
367
375
|
begin
|
368
|
-
|
376
|
+
table = Helper.chomp_partition_decorator(table)
|
377
|
+
dataset ||= @dataset
|
378
|
+
Embulk.logger.info { "embulk-output-bigquery: Create table... #{@project}:#{dataset}.#{table}" }
|
369
379
|
body = {
|
370
380
|
table_reference: {
|
371
381
|
table_id: table,
|
@@ -374,9 +384,15 @@ module Embulk
|
|
374
384
|
fields: fields,
|
375
385
|
}
|
376
386
|
}
|
387
|
+
if options['time_partitioning']
|
388
|
+
body[:time_partitioning] = {
|
389
|
+
type: options['time_partitioning']['type'],
|
390
|
+
expiration_ms: options['time_partitioning']['expiration_ms'],
|
391
|
+
}
|
392
|
+
end
|
377
393
|
opts = {}
|
378
|
-
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{
|
379
|
-
with_network_retry { client.insert_table(@project,
|
394
|
+
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{body}, #{opts})" }
|
395
|
+
with_network_retry { client.insert_table(@project, dataset, body, opts) }
|
380
396
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
381
397
|
if e.status_code == 409 && /Already Exists:/ =~ e.message
|
382
398
|
# ignore 'Already Exists' error
|
@@ -385,16 +401,18 @@ module Embulk
|
|
385
401
|
|
386
402
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
387
403
|
Embulk.logger.error {
|
388
|
-
"embulk-output-bigquery: insert_table(#{@project}, #{
|
404
|
+
"embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{body}, #{opts}), response:#{response}"
|
389
405
|
}
|
390
|
-
raise Error, "failed to create table #{@project}:#{
|
406
|
+
raise Error, "failed to create table #{@project}:#{dataset}.#{table}, response:#{response}"
|
391
407
|
end
|
392
408
|
end
|
393
409
|
|
394
|
-
def delete_table(table)
|
410
|
+
def delete_table(table, dataset: nil)
|
395
411
|
begin
|
396
|
-
|
397
|
-
|
412
|
+
table = Helper.chomp_partition_decorator(table)
|
413
|
+
dataset ||= @dataset
|
414
|
+
Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@project}:#{dataset}.#{table}" }
|
415
|
+
with_network_retry { client.delete_table(@project, dataset, table) }
|
398
416
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
399
417
|
if e.status_code == 404 && /Not found:/ =~ e.message
|
400
418
|
# ignore 'Not Found' error
|
@@ -403,26 +421,43 @@ module Embulk
|
|
403
421
|
|
404
422
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
405
423
|
Embulk.logger.error {
|
406
|
-
"embulk-output-bigquery: delete_table(#{@project}, #{
|
424
|
+
"embulk-output-bigquery: delete_table(#{@project}, #{dataset}, #{table}), response:#{response}"
|
407
425
|
}
|
408
|
-
raise Error, "failed to delete table #{@project}:#{
|
426
|
+
raise Error, "failed to delete table #{@project}:#{dataset}.#{table}, response:#{response}"
|
409
427
|
end
|
410
428
|
end
|
411
429
|
|
412
|
-
def get_table(table)
|
430
|
+
def get_table(table, dataset: nil)
|
413
431
|
begin
|
414
|
-
|
415
|
-
|
432
|
+
table = Helper.chomp_partition_decorator(table)
|
433
|
+
dataset ||= @dataset
|
434
|
+
Embulk.logger.info { "embulk-output-bigquery: Get table... #{@project}:#{dataset}.#{table}" }
|
435
|
+
with_network_retry { client.get_table(@project, dataset, table) }
|
416
436
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
417
437
|
if e.status_code == 404
|
418
|
-
raise NotFoundError, "Table #{@project}:#{
|
438
|
+
raise NotFoundError, "Table #{@project}:#{dataset}.#{table} is not found"
|
419
439
|
end
|
420
440
|
|
421
441
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
422
442
|
Embulk.logger.error {
|
423
|
-
"embulk-output-bigquery: get_table(#{@project}, #{
|
443
|
+
"embulk-output-bigquery: get_table(#{@project}, #{dataset}, #{table}), response:#{response}"
|
424
444
|
}
|
425
|
-
raise Error, "failed to get table #{@project}:#{
|
445
|
+
raise Error, "failed to get table #{@project}:#{dataset}.#{table}, response:#{response}"
|
446
|
+
end
|
447
|
+
end
|
448
|
+
|
449
|
+
# Is this only a way to drop partition?
|
450
|
+
def delete_partition(table_with_partition, dataset: nil)
|
451
|
+
dataset ||= @dataset
|
452
|
+
begin
|
453
|
+
table = Helper.chomp_partition_decorator(table_with_partition)
|
454
|
+
get_table(table, dataset: dataset)
|
455
|
+
rescue NotFoundError
|
456
|
+
else
|
457
|
+
Embulk.logger.info { "embulk-output-bigquery: Delete partition... #{@project}:#{dataset}.#{table_with_partition}" }
|
458
|
+
Tempfile.create('embulk_output_bigquery_empty_file_') do |fp|
|
459
|
+
load(fp.path, table_with_partition, write_disposition: 'WRITE_TRUNCATE')
|
460
|
+
end
|
426
461
|
end
|
427
462
|
end
|
428
463
|
end
|
@@ -16,8 +16,11 @@ module Embulk
|
|
16
16
|
@converters = converters || ValueConverterFactory.create_converters(task, schema)
|
17
17
|
|
18
18
|
@num_rows = 0
|
19
|
-
@
|
20
|
-
|
19
|
+
if @task['progress_log_interval']
|
20
|
+
@progress_log_interval = @task['progress_log_interval']
|
21
|
+
@progress_log_timer = Time.now
|
22
|
+
@previous_num_rows = 0
|
23
|
+
end
|
21
24
|
|
22
25
|
if @task['payload_column_index']
|
23
26
|
@payload_column_index = @task['payload_column_index']
|
@@ -103,14 +106,20 @@ module Embulk
|
|
103
106
|
_io.write formatted_record
|
104
107
|
@num_rows += 1
|
105
108
|
end
|
109
|
+
show_progress if @task['progress_log_interval']
|
110
|
+
@num_rows
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def show_progress
|
106
116
|
now = Time.now
|
107
|
-
if @progress_log_timer < now -
|
117
|
+
if @progress_log_timer < now - @progress_log_interval
|
108
118
|
speed = ((@num_rows - @previous_num_rows) / (now - @progress_log_timer).to_f).round(1)
|
109
119
|
@progress_log_timer = now
|
110
120
|
@previous_num_rows = @num_rows
|
111
121
|
Embulk.logger.info { "embulk-output-bigquery: num_rows #{num_format(@num_rows)} (#{num_format(speed)} rows/sec)" }
|
112
122
|
end
|
113
|
-
@num_rows
|
114
123
|
end
|
115
124
|
end
|
116
125
|
end
|
@@ -5,6 +5,16 @@ module Embulk
|
|
5
5
|
module Output
|
6
6
|
class Bigquery < OutputPlugin
|
7
7
|
class Helper
|
8
|
+
PARTITION_DECORATOR_REGEXP = /\$.+\z/
|
9
|
+
|
10
|
+
def self.has_partition_decorator?(table)
|
11
|
+
!!(table =~ PARTITION_DECORATOR_REGEXP)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.chomp_partition_decorator(table)
|
15
|
+
table.sub(PARTITION_DECORATOR_REGEXP, '')
|
16
|
+
end
|
17
|
+
|
8
18
|
def self.bq_type_from_embulk_type(embulk_type)
|
9
19
|
case embulk_type
|
10
20
|
when :boolean then 'BOOLEAN'
|
@@ -105,6 +105,15 @@ else
|
|
105
105
|
def test_create_table_already_exists
|
106
106
|
assert_nothing_raised { client.create_table('your_table_name') }
|
107
107
|
end
|
108
|
+
|
109
|
+
def test_create_partitioned_table
|
110
|
+
client.delete_table('your_table_name')
|
111
|
+
assert_nothing_raised do
|
112
|
+
client.create_table('your_table_name$20160929', options:{
|
113
|
+
'time_partitioning' => {'type'=>'DAY'}
|
114
|
+
})
|
115
|
+
end
|
116
|
+
end
|
108
117
|
end
|
109
118
|
|
110
119
|
sub_test_case "delete_table" do
|
@@ -116,6 +125,11 @@ else
|
|
116
125
|
def test_delete_table_not_found
|
117
126
|
assert_nothing_raised { client.delete_table('your_table_name') }
|
118
127
|
end
|
128
|
+
|
129
|
+
def test_delete_partitioned_table
|
130
|
+
client.create_table('your_table_name')
|
131
|
+
assert_nothing_raised { client.delete_table('your_table_name$20160929') }
|
132
|
+
end
|
119
133
|
end
|
120
134
|
|
121
135
|
sub_test_case "get_table" do
|
@@ -130,6 +144,33 @@ else
|
|
130
144
|
client.get_table('your_table_name')
|
131
145
|
}
|
132
146
|
end
|
147
|
+
|
148
|
+
def test_get_partitioned_table
|
149
|
+
client.create_table('your_table_name')
|
150
|
+
assert_nothing_raised { client.get_table('your_table_name$20160929') }
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
sub_test_case "delete_partition" do
|
155
|
+
def test_delete_partition
|
156
|
+
client.create_table('your_table_name$20160929', options:{
|
157
|
+
'time_partitioning' => {'type'=>'DAY'}
|
158
|
+
})
|
159
|
+
assert_nothing_raised { client.delete_partition('your_table_name$20160929') }
|
160
|
+
ensure
|
161
|
+
client.delete_table('your_table_name')
|
162
|
+
end
|
163
|
+
|
164
|
+
def test_delete_partition_of_non_partitioned_table
|
165
|
+
client.create_table('your_table_name')
|
166
|
+
assert_raise { client.delete_partition('your_table_name$20160929') }
|
167
|
+
ensure
|
168
|
+
client.delete_table('your_table_name')
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_delete_partition_table_not_found
|
172
|
+
assert_nothing_raised { client.delete_partition('your_table_name$20160929') }
|
173
|
+
end
|
133
174
|
end
|
134
175
|
|
135
176
|
sub_test_case "fields" do
|
data/test/test_configure.rb
CHANGED
@@ -84,6 +84,7 @@ module Embulk
|
|
84
84
|
assert_equal "UTF-8", task['encoding']
|
85
85
|
assert_equal false, task['ignore_unknown_values']
|
86
86
|
assert_equal false, task['allow_quoted_newlines']
|
87
|
+
assert_equal nil, task['time_partitioning']
|
87
88
|
assert_equal false, task['skip_load']
|
88
89
|
end
|
89
90
|
|
@@ -249,6 +250,22 @@ module Embulk
|
|
249
250
|
task = Bigquery.configure(config, schema, processor_count)
|
250
251
|
assert_equal '.foo', task['file_ext']
|
251
252
|
end
|
253
|
+
|
254
|
+
def test_time_partitioning
|
255
|
+
config = least_config.merge('time_partitioning' => {'type' => 'DAY'})
|
256
|
+
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
257
|
+
|
258
|
+
config = least_config.merge('time_partitioning' => {'foo' => 'bar'})
|
259
|
+
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
260
|
+
|
261
|
+
config = least_config.merge('table' => 'table')
|
262
|
+
task = Bigquery.configure(config, schema, processor_count)
|
263
|
+
assert_equal nil, task['time_partitioning']
|
264
|
+
|
265
|
+
config = least_config.merge('table' => 'table_name$20160912')
|
266
|
+
task = Bigquery.configure(config, schema, processor_count)
|
267
|
+
assert_equal 'DAY', task['time_partitioning']['type']
|
268
|
+
end
|
252
269
|
end
|
253
270
|
end
|
254
271
|
end
|
data/test/test_example.rb
CHANGED
@@ -18,19 +18,28 @@ else
|
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
def embulk_run(config_path)
|
22
|
+
Bundler.with_clean_env do
|
23
|
+
cmd = "#{embulk_path} run -X page_size=1 -b . -l trace #{config_path}"
|
24
|
+
puts "=" * 64
|
25
|
+
puts cmd
|
26
|
+
system(cmd)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
files = Dir.glob("#{APP_ROOT}/example/config_*.yml").reject {|file| File.symlink?(file) }.sort
|
24
31
|
files.each do |config_path|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
+
if %w[
|
33
|
+
config_expose_errors.yml
|
34
|
+
config_prevent_duplicate_insert.yml
|
35
|
+
].include?(File.basename(config_path))
|
36
|
+
define_method(:"test_#{File.basename(config_path, ".yml")}") do
|
37
|
+
assert_false embulk_run(config_path)
|
38
|
+
end
|
39
|
+
else
|
40
|
+
define_method(:"test_#{File.basename(config_path, ".yml")}") do
|
41
|
+
assert_true embulk_run(config_path)
|
32
42
|
end
|
33
|
-
assert_true success
|
34
43
|
end
|
35
44
|
end
|
36
45
|
end
|
data/test/test_helper.rb
CHANGED
@@ -14,6 +14,16 @@ module Embulk
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
+
def has_partition_decorator?
|
18
|
+
assert_true Helper.has_partition_decorator?('table$20160929')
|
19
|
+
assert_false Helper.has_partition_decorator?('table')
|
20
|
+
end
|
21
|
+
|
22
|
+
def chomp_partition_decorator
|
23
|
+
assert_equal 'table', Helper.chomp_partition_decorator?('table$20160929')
|
24
|
+
assert_equal 'table', Helper.chomp_partition_decorator?('table')
|
25
|
+
end
|
26
|
+
|
17
27
|
def bq_type_from_embulk_type
|
18
28
|
assert_equal 'BOOLEAN', Helper.bq_type_from_embulk_type(:boolean)
|
19
29
|
assert_equal 'STRING', Helper.bq_type_from_embulk_type(:string)
|
data/test/test_transaction.rb
CHANGED
@@ -8,10 +8,12 @@ module Embulk
|
|
8
8
|
class TestTransaction < Test::Unit::TestCase
|
9
9
|
def least_config
|
10
10
|
DataSource.new({
|
11
|
-
'project'
|
12
|
-
'dataset'
|
13
|
-
'table'
|
11
|
+
'project' => 'your_project_name',
|
12
|
+
'dataset' => 'your_dataset_name',
|
13
|
+
'table' => 'your_table_name',
|
14
14
|
'p12_keyfile' => __FILE__, # fake
|
15
|
+
'temp_table' => 'temp_table', # randomly created is not good for our test
|
16
|
+
'path_prefix' => 'tmp/', # randomly created is not good for our test
|
15
17
|
})
|
16
18
|
end
|
17
19
|
|
@@ -38,17 +40,6 @@ module Embulk
|
|
38
40
|
stub(Bigquery).transaction_report { {'num_input_rows' => 1, 'num_output_rows' => 1, 'num_rejected_rows' => 0} }
|
39
41
|
end
|
40
42
|
|
41
|
-
def test_append
|
42
|
-
config = least_config.merge('mode' => 'append', 'temp_table' => 'temp_table')
|
43
|
-
any_instance_of(BigqueryClient) do |obj|
|
44
|
-
mock(obj).get_dataset(config['dataset'])
|
45
|
-
mock(obj).create_table(config['temp_table'])
|
46
|
-
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
47
|
-
mock(obj).delete_table(config['temp_table'])
|
48
|
-
end
|
49
|
-
Bigquery.transaction(config, schema, processor_count, &control)
|
50
|
-
end
|
51
|
-
|
52
43
|
sub_test_case "append_direct" do
|
53
44
|
def test_append_direct
|
54
45
|
config = least_config.merge('mode' => 'append_direct')
|
@@ -61,43 +52,108 @@ module Embulk
|
|
61
52
|
|
62
53
|
def test_append_direct_with_auto_create
|
63
54
|
config = least_config.merge('mode' => 'append_direct', 'auto_create_dataset' => true, 'auto_create_table' => true)
|
55
|
+
task = Bigquery.configure(config, schema, processor_count)
|
64
56
|
any_instance_of(BigqueryClient) do |obj|
|
65
57
|
mock(obj).create_dataset(config['dataset'])
|
66
|
-
mock(obj).create_table(config['table'])
|
58
|
+
mock(obj).create_table(config['table'], options: task)
|
59
|
+
end
|
60
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_append_direct_with_partition
|
64
|
+
config = least_config.merge('mode' => 'append_direct', 'table' => 'table$20160929')
|
65
|
+
any_instance_of(BigqueryClient) do |obj|
|
66
|
+
mock(obj).get_dataset(config['dataset'])
|
67
|
+
mock(obj).get_table(config['table'])
|
68
|
+
end
|
69
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_append_direct_with_partition_with_auto_create
|
73
|
+
config = least_config.merge('mode' => 'append_direct', 'table' => 'table$20160929', 'auto_create_dataset' => true, 'auto_create_table' => true)
|
74
|
+
task = Bigquery.configure(config, schema, processor_count)
|
75
|
+
any_instance_of(BigqueryClient) do |obj|
|
76
|
+
mock(obj).create_dataset(config['dataset'])
|
77
|
+
mock(obj).create_table(config['table'], options: task)
|
67
78
|
end
|
68
79
|
Bigquery.transaction(config, schema, processor_count, &control)
|
69
80
|
end
|
70
81
|
end
|
71
82
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
83
|
+
sub_test_case "delete_in_advance" do
|
84
|
+
def test_delete_in_advance
|
85
|
+
config = least_config.merge('mode' => 'delete_in_advance')
|
86
|
+
task = Bigquery.configure(config, schema, processor_count)
|
87
|
+
any_instance_of(BigqueryClient) do |obj|
|
88
|
+
mock(obj).get_dataset(config['dataset'])
|
89
|
+
mock(obj).delete_table(config['table'])
|
90
|
+
mock(obj).create_table(config['table'], options: task)
|
91
|
+
end
|
92
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_delete_in_advance_with_partitioning
|
96
|
+
config = least_config.merge('mode' => 'delete_in_advance', 'table' => 'table$20160929')
|
97
|
+
task = Bigquery.configure(config, schema, processor_count)
|
98
|
+
any_instance_of(BigqueryClient) do |obj|
|
99
|
+
mock(obj).get_dataset(config['dataset'])
|
100
|
+
mock(obj).delete_partition(config['table'])
|
101
|
+
mock(obj).create_table(config['table'], options: task)
|
102
|
+
end
|
103
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
78
104
|
end
|
79
|
-
Bigquery.transaction(config, schema, processor_count, &control)
|
80
105
|
end
|
81
106
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
107
|
+
sub_test_case "replace" do
|
108
|
+
def test_replace
|
109
|
+
config = least_config.merge('mode' => 'replace')
|
110
|
+
task = Bigquery.configure(config, schema, processor_count)
|
111
|
+
any_instance_of(BigqueryClient) do |obj|
|
112
|
+
mock(obj).get_dataset(config['dataset'])
|
113
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
114
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
115
|
+
mock(obj).delete_table(config['temp_table'])
|
116
|
+
end
|
117
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
118
|
+
end
|
119
|
+
|
120
|
+
def test_replace_with_partitioning
|
121
|
+
config = least_config.merge('mode' => 'replace', 'table' => 'table$20160929')
|
122
|
+
task = Bigquery.configure(config, schema, processor_count)
|
123
|
+
any_instance_of(BigqueryClient) do |obj|
|
124
|
+
mock(obj).get_dataset(config['dataset'])
|
125
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
126
|
+
mock(obj).get_table(config['table'])
|
127
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
128
|
+
mock(obj).delete_table(config['temp_table'])
|
129
|
+
end
|
130
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_replace_with_partitioning_with_auto_create_table
|
134
|
+
config = least_config.merge('mode' => 'replace', 'table' => 'table$20160929', 'auto_create_table' => true)
|
135
|
+
task = Bigquery.configure(config, schema, processor_count)
|
136
|
+
any_instance_of(BigqueryClient) do |obj|
|
137
|
+
mock(obj).get_dataset(config['dataset'])
|
138
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
139
|
+
mock(obj).create_table(config['table'], options: task)
|
140
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
141
|
+
mock(obj).delete_table(config['temp_table'])
|
142
|
+
end
|
143
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
89
144
|
end
|
90
|
-
Bigquery.transaction(config, schema, processor_count, &control)
|
91
145
|
end
|
92
146
|
|
93
147
|
sub_test_case "replace_backup" do
|
94
148
|
def test_replace_backup
|
95
149
|
config = least_config.merge('mode' => 'replace_backup', 'dataset_old' => 'dataset_old', 'table_old' => 'table_old', 'temp_table' => 'temp_table')
|
150
|
+
task = Bigquery.configure(config, schema, processor_count)
|
96
151
|
any_instance_of(BigqueryClient) do |obj|
|
97
152
|
mock(obj).get_dataset(config['dataset'])
|
98
153
|
mock(obj).get_dataset(config['dataset_old'])
|
99
|
-
mock(obj).create_table(config['temp_table'])
|
154
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
100
155
|
|
156
|
+
mock(obj).get_table(task['table'])
|
101
157
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
102
158
|
|
103
159
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -108,11 +164,51 @@ module Embulk
|
|
108
164
|
|
109
165
|
def test_replace_backup_auto_create_dataset
|
110
166
|
config = least_config.merge('mode' => 'replace_backup', 'dataset_old' => 'dataset_old', 'table_old' => 'table_old', 'temp_table' => 'temp_table', 'auto_create_dataset' => true)
|
167
|
+
task = Bigquery.configure(config, schema, processor_count)
|
111
168
|
any_instance_of(BigqueryClient) do |obj|
|
112
169
|
mock(obj).create_dataset(config['dataset'])
|
113
170
|
mock(obj).create_dataset(config['dataset_old'], reference: config['dataset'])
|
114
|
-
mock(obj).create_table(config['temp_table'])
|
171
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
115
172
|
|
173
|
+
mock(obj).get_table(task['table'])
|
174
|
+
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
175
|
+
|
176
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
177
|
+
mock(obj).delete_table(config['temp_table'])
|
178
|
+
end
|
179
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
180
|
+
end
|
181
|
+
|
182
|
+
def test_replace_backup_with_partitioning
|
183
|
+
config = least_config.merge('mode' => 'replace_backup', 'table' => 'table$20160929', 'dataset_old' => 'dataset_old', 'table_old' => 'table_old$20190929', 'temp_table' => 'temp_table')
|
184
|
+
task = Bigquery.configure(config, schema, processor_count)
|
185
|
+
any_instance_of(BigqueryClient) do |obj|
|
186
|
+
mock(obj).get_dataset(config['dataset'])
|
187
|
+
mock(obj).get_dataset(config['dataset_old'])
|
188
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
189
|
+
mock(obj).get_table(task['table'])
|
190
|
+
mock(obj).get_table(task['table_old'], dataset: config['dataset_old'])
|
191
|
+
|
192
|
+
mock(obj).get_table(task['table'])
|
193
|
+
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
194
|
+
|
195
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
196
|
+
mock(obj).delete_table(config['temp_table'])
|
197
|
+
end
|
198
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
199
|
+
end
|
200
|
+
|
201
|
+
def test_replace_backup_with_partitioning_auto_create_table
|
202
|
+
config = least_config.merge('mode' => 'replace_backup', 'table' => 'table$20160929', 'dataset_old' => 'dataset_old', 'table_old' => 'table_old$20160929', 'temp_table' => 'temp_table', 'auto_create_table' => true)
|
203
|
+
task = Bigquery.configure(config, schema, processor_count)
|
204
|
+
any_instance_of(BigqueryClient) do |obj|
|
205
|
+
mock(obj).get_dataset(config['dataset'])
|
206
|
+
mock(obj).get_dataset(config['dataset_old'])
|
207
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
208
|
+
mock(obj).create_table(task['table'], options: task)
|
209
|
+
mock(obj).create_table(task['table_old'], dataset: config['dataset_old'], options: task)
|
210
|
+
|
211
|
+
mock(obj).get_table(task['table'])
|
116
212
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
117
213
|
|
118
214
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -121,6 +217,47 @@ module Embulk
|
|
121
217
|
Bigquery.transaction(config, schema, processor_count, &control)
|
122
218
|
end
|
123
219
|
end
|
220
|
+
|
221
|
+
sub_test_case "append" do
|
222
|
+
def test_append
|
223
|
+
config = least_config.merge('mode' => 'append')
|
224
|
+
task = Bigquery.configure(config, schema, processor_count)
|
225
|
+
any_instance_of(BigqueryClient) do |obj|
|
226
|
+
mock(obj).get_dataset(config['dataset'])
|
227
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
228
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
229
|
+
mock(obj).delete_table(config['temp_table'])
|
230
|
+
end
|
231
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
232
|
+
end
|
233
|
+
|
234
|
+
def test_append_with_partitioning
|
235
|
+
config = least_config.merge('mode' => 'append', 'table' => 'table$20160929')
|
236
|
+
task = Bigquery.configure(config, schema, processor_count)
|
237
|
+
any_instance_of(BigqueryClient) do |obj|
|
238
|
+
mock(obj).get_dataset(config['dataset'])
|
239
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
240
|
+
mock(obj).get_table(config['table'])
|
241
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
242
|
+
mock(obj).delete_table(config['temp_table'])
|
243
|
+
end
|
244
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
245
|
+
end
|
246
|
+
|
247
|
+
def test_append_with_partitioning_with_auto_create_table
|
248
|
+
config = least_config.merge('mode' => 'append', 'table' => 'table$20160929', 'auto_create_table' => true)
|
249
|
+
task = Bigquery.configure(config, schema, processor_count)
|
250
|
+
any_instance_of(BigqueryClient) do |obj|
|
251
|
+
mock(obj).get_dataset(config['dataset'])
|
252
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
253
|
+
mock(obj).create_table(config['table'], options: task)
|
254
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
255
|
+
mock(obj).delete_table(config['temp_table'])
|
256
|
+
end
|
257
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
124
261
|
end
|
125
262
|
end
|
126
263
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-10-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: google-api-client
|
@@ -100,6 +100,7 @@ files:
|
|
100
100
|
- example/config_client_options.yml
|
101
101
|
- example/config_csv.yml
|
102
102
|
- example/config_delete_in_advance.yml
|
103
|
+
- example/config_delete_in_advance_partitioned_table.yml
|
103
104
|
- example/config_expose_errors.yml
|
104
105
|
- example/config_gcs.yml
|
105
106
|
- example/config_guess_from_embulk_schema.yml
|
@@ -114,8 +115,11 @@ files:
|
|
114
115
|
- example/config_payload_column.yml
|
115
116
|
- example/config_payload_column_index.yml
|
116
117
|
- example/config_prevent_duplicate_insert.yml
|
118
|
+
- example/config_progress_log_interval.yml
|
117
119
|
- example/config_replace.yml
|
118
120
|
- example/config_replace_backup.yml
|
121
|
+
- example/config_replace_backup_paritioned_table.yml
|
122
|
+
- example/config_replace_paritioned_table.yml
|
119
123
|
- example/config_skip_file_generation.yml
|
120
124
|
- example/config_table_strftime.yml
|
121
125
|
- example/config_template_table.yml
|