embulk-output-bigquery 0.3.7 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +47 -17
- data/embulk-output-bigquery.gemspec +1 -1
- data/example/config_delete_in_advance_partitioned_table.yml +33 -0
- data/example/config_progress_log_interval.yml +31 -0
- data/example/config_replace_backup_paritioned_table.yml +34 -0
- data/example/config_replace_paritioned_table.yml +33 -0
- data/lib/embulk/output/bigquery.rb +55 -14
- data/lib/embulk/output/bigquery/bigquery_client.rb +63 -28
- data/lib/embulk/output/bigquery/file_writer.rb +13 -4
- data/lib/embulk/output/bigquery/helper.rb +10 -0
- data/test/test_bigquery_client.rb +41 -0
- data/test/test_configure.rb +17 -0
- data/test/test_example.rb +20 -11
- data/test/test_helper.rb +10 -0
- data/test/test_transaction.rb +169 -32
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71bc9b253f725436a06e183667cbc87720c3719b
|
4
|
+
data.tar.gz: a32e43da05a4f90ab72c5715ffdf6b08501996d4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bd3d8aefbc98c2f044b782f807f595603ac7b11052a06b6486803fd2f6871127058a50e9c69ffc1fac92b75de9561c57e99ad9ba3cd8899507e93085d45ed615
|
7
|
+
data.tar.gz: 813b6455f463940968232b4332b8553698b9ef99ad4f3f5af6800b10223c33498fde9f8915604090f85dc7c2f78d16a865cd90da2174447c7f84ab3ef80a4cf8
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
## 0.4.0 - 2016-10-01
|
2
|
+
|
3
|
+
* [enhancement] Support partitioned table
|
4
|
+
* [maintenance] Add `progress_log_interval` option to control the interval of showing progress log, and now showing progress log is off by default
|
5
|
+
|
1
6
|
## 0.3.7 - 2016-08-03
|
2
7
|
|
3
8
|
* [maintenance] Fix Thread.new to use thread local variables to avoid nil idx error (thanks to @shyouhei and @umisora)
|
data/README.md
CHANGED
@@ -44,7 +44,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
|
|
44
44
|
| json_keyfile | string | required when auth_method is json_key | | Fullpath of json key |
|
45
45
|
| project | string | required if json_keyfile is not given | | project_id |
|
46
46
|
| dataset | string | required | | dataset |
|
47
|
-
| table | string | required | | table name
|
47
|
+
| table | string | required | | table name, or table name with a partition decorator such as `table_name$20160929`|
|
48
48
|
| auto_create_dataset | boolean | optional | false | automatically create dataset |
|
49
49
|
| auto_create_table | boolean | optional | false | See [Dynamic Table Creating](#dynamic-table-creating) |
|
50
50
|
| schema_file | string | optional | | /path/to/schema.json |
|
@@ -63,6 +63,7 @@ v0.3.x has incompatibility changes with v0.2.x. Please see [CHANGELOG.md](CHANGE
|
|
63
63
|
| payload_column_index | integer | optional | nil | See [Formatter Performance Issue](#formatter-performance-issue) |
|
64
64
|
| gcs_bucket | stringr | optional | nil | See [GCS Bucket](#gcs-bucket) |
|
65
65
|
| auto_create_gcs_bucket | boolean | optional | false | See [GCS Bucket](#gcs-bucket) |
|
66
|
+
| progress_log_interval | float | optional | nil (Disabled) | Progress log interval. The progress log is disabled by nil (default). NOTE: This option may be removed in a future because a filter plugin can achieve the same goal |
|
66
67
|
|
67
68
|
Client or request options
|
68
69
|
|
@@ -87,18 +88,21 @@ Options for intermediate local files
|
|
87
88
|
|
88
89
|
`source_format` is also used to determine formatter (csv or jsonl).
|
89
90
|
|
90
|
-
#### Same options of bq command-line tools or BigQuery job's
|
91
|
+
#### Same options of bq command-line tools or BigQuery job's property
|
91
92
|
|
92
93
|
Following options are same as [bq command-line tools](https://cloud.google.com/bigquery/bq-command-line-tool#creatingtablefromfile) or BigQuery [job's property](https://cloud.google.com/bigquery/docs/reference/v2/jobs#resource).
|
93
94
|
|
94
|
-
| name
|
95
|
-
|
96
|
-
| source_format
|
97
|
-
| max_bad_records
|
98
|
-
| field_delimiter
|
99
|
-
| encoding
|
100
|
-
| ignore_unknown_values
|
101
|
-
| allow_quoted_newlines
|
95
|
+
| name | type | required? | default | description |
|
96
|
+
|:----------------------------------|:---------|:----------|:--------|:-----------------------|
|
97
|
+
| source_format | string | required | "CSV" | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
|
98
|
+
| max_bad_records | int | optional | 0 | |
|
99
|
+
| field_delimiter | char | optional | "," | |
|
100
|
+
| encoding | string | optional | "UTF-8" | `UTF-8` or `ISO-8859-1` |
|
101
|
+
| ignore_unknown_values | boolean | optional | false | |
|
102
|
+
| allow_quoted_newlines | boolean | optional | false | Set true, if data contains newline characters. It may cause slow procsssing |
|
103
|
+
| time_partitioning | hash | optional | nil | See [Time Partitioning](#time-partitioning) |
|
104
|
+
| time_partitioning.type | string | required | nil | The only type supported is DAY, which will generate one partition per day based on data loading time. |
|
105
|
+
| time_partitioning.expiration__ms | int | optional | nil | Number of milliseconds for which to keep the storage for a partition. partition |
|
102
106
|
|
103
107
|
### Example
|
104
108
|
|
@@ -123,32 +127,32 @@ out:
|
|
123
127
|
##### append
|
124
128
|
|
125
129
|
1. Load to temporary table.
|
126
|
-
2. Copy temporary table to destination table. (WRITE_APPEND)
|
130
|
+
2. Copy temporary table to destination table (or partition). (WRITE_APPEND)
|
127
131
|
|
128
132
|
##### append_direct
|
129
133
|
|
130
|
-
Insert data into existing table directly.
|
134
|
+
Insert data into existing table (or partition) directly.
|
131
135
|
This is not transactional, i.e., if fails, the target table could have some rows inserted.
|
132
136
|
|
133
137
|
##### replace
|
134
138
|
|
135
139
|
1. Load to temporary table.
|
136
|
-
2. Copy temporary table to destination table. (WRITE_TRUNCATE)
|
140
|
+
2. Copy temporary table to destination table (or partition). (WRITE_TRUNCATE)
|
137
141
|
|
138
142
|
```is_skip_job_result_check``` must be false when replace mode
|
139
143
|
|
140
144
|
##### replace_backup
|
141
145
|
|
142
146
|
1. Load to temporary table.
|
143
|
-
2. Copy destination table to backup table. (dataset_old, table_old)
|
144
|
-
3. Copy temporary table to destination table. (WRITE_TRUNCATE)
|
147
|
+
2. Copy destination table (or partition) to backup table (or partition). (dataset_old, table_old)
|
148
|
+
3. Copy temporary table to destination table (or partition). (WRITE_TRUNCATE)
|
145
149
|
|
146
150
|
```is_skip_job_result_check``` must be false when replace_backup mode.
|
147
151
|
|
148
152
|
##### delete_in_advance
|
149
153
|
|
150
|
-
1. Delete destination table, if it exists.
|
151
|
-
2. Load to destination table.
|
154
|
+
1. Delete destination table (or partition), if it exists.
|
155
|
+
2. Load to destination table (or partition).
|
152
156
|
|
153
157
|
### Authentication
|
154
158
|
|
@@ -366,6 +370,32 @@ out:
|
|
366
370
|
|
367
371
|
ToDo: Use https://cloud.google.com/storage/docs/streaming if google-api-ruby-client supports streaming transfers into GCS.
|
368
372
|
|
373
|
+
### Time Partitioning
|
374
|
+
|
375
|
+
From 0.4.0, embulk-output-bigquery supports to load into partitioned table.
|
376
|
+
See also [Creating and Updating Date-Partitioned Tables](https://cloud.google.com/bigquery/docs/creating-partitioned-tables).
|
377
|
+
|
378
|
+
To load into a partition, specify `table` parameter with a partition decorator as:
|
379
|
+
|
380
|
+
```yaml
|
381
|
+
out:
|
382
|
+
type: bigquery
|
383
|
+
table: table_name$20160929
|
384
|
+
auto_create_table: true
|
385
|
+
```
|
386
|
+
|
387
|
+
You may configure `time_partitioning` parameter together to create table via `auto_create_table: true` option as:
|
388
|
+
|
389
|
+
```yaml
|
390
|
+
out:
|
391
|
+
type: bigquery
|
392
|
+
table: table_name$20160929
|
393
|
+
auto_create_table: true
|
394
|
+
time-partitioning:
|
395
|
+
type: DAY
|
396
|
+
expiration_ms: 259200000
|
397
|
+
```
|
398
|
+
|
369
399
|
## Development
|
370
400
|
|
371
401
|
### Run example:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-bigquery"
|
3
|
-
spec.version = "0.
|
3
|
+
spec.version = "0.4.0"
|
4
4
|
spec.authors = ["Satoshi Akama", "Naotoshi Seo"]
|
5
5
|
spec.summary = "Google BigQuery output plugin for Embulk"
|
6
6
|
spec.description = "Embulk plugin that insert records to Google BigQuery."
|
@@ -0,0 +1,33 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: delete_in_advance
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_partitioned_table_name$20160929
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: NONE
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
time_partitioning:
|
32
|
+
type: 'DAY'
|
33
|
+
expiration_ms: 100
|
@@ -0,0 +1,31 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_table_name
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: NONE
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
progress_log_interval: 0.1
|
@@ -0,0 +1,34 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace_backup
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_partitioned_table_name$20160929
|
26
|
+
table_old: your_partitioned_table_name_old$20160929
|
27
|
+
source_format: NEWLINE_DELIMITED_JSON
|
28
|
+
compression: NONE
|
29
|
+
auto_create_dataset: true
|
30
|
+
auto_create_table: true
|
31
|
+
schema_file: example/schema.json
|
32
|
+
time_partitioning:
|
33
|
+
type: 'DAY'
|
34
|
+
expiration_ms: 100
|
@@ -0,0 +1,33 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: date, type: string}
|
13
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
14
|
+
- {name: "null", type: string}
|
15
|
+
- {name: long, type: long}
|
16
|
+
- {name: string, type: string}
|
17
|
+
- {name: double, type: double}
|
18
|
+
- {name: boolean, type: boolean}
|
19
|
+
out:
|
20
|
+
type: bigquery
|
21
|
+
mode: replace
|
22
|
+
auth_method: json_key
|
23
|
+
json_keyfile: example/your-project-000.json
|
24
|
+
dataset: your_dataset_name
|
25
|
+
table: your_partitioned_table_name$20160929
|
26
|
+
source_format: NEWLINE_DELIMITED_JSON
|
27
|
+
compression: NONE
|
28
|
+
auto_create_dataset: true
|
29
|
+
auto_create_table: true
|
30
|
+
schema_file: example/schema.json
|
31
|
+
time_partitioning:
|
32
|
+
type: 'DAY'
|
33
|
+
expiration_ms: 100
|
@@ -56,6 +56,7 @@ module Embulk
|
|
56
56
|
'with_rehearsal' => config.param('with_rehearsal', :bool, :default => false),
|
57
57
|
'rehearsal_counts' => config.param('rehearsal_counts', :integer, :default => 1000),
|
58
58
|
'abort_on_error' => config.param('abort_on_error', :bool, :default => nil),
|
59
|
+
'progress_log_interval' => config.param('progress_log_interval', :float, :default => nil),
|
59
60
|
|
60
61
|
'column_options' => config.param('column_options', :array, :default => []),
|
61
62
|
'default_timezone' => config.param('default_timezone', :string, :default => ValueConverterFactory::DEFAULT_TIMEZONE),
|
@@ -84,6 +85,7 @@ module Embulk
|
|
84
85
|
'encoding' => config.param('encoding', :string, :default => 'UTF-8'),
|
85
86
|
'ignore_unknown_values' => config.param('ignore_unknown_values', :bool, :default => false),
|
86
87
|
'allow_quoted_newlines' => config.param('allow_quoted_newlines', :bool, :default => false),
|
88
|
+
'time_partitioning' => config.param('time_partitioning', :hash, :default => nil),
|
87
89
|
|
88
90
|
# for debug
|
89
91
|
'skip_load' => config.param('skip_load', :bool, :default => false),
|
@@ -204,6 +206,8 @@ module Embulk
|
|
204
206
|
|
205
207
|
if %w[replace replace_backup append].include?(task['mode'])
|
206
208
|
task['temp_table'] ||= "LOAD_TEMP_#{unique_name}_#{task['table']}"
|
209
|
+
else
|
210
|
+
task['temp_table'] = nil
|
207
211
|
end
|
208
212
|
|
209
213
|
if task['with_rehearsal']
|
@@ -218,6 +222,14 @@ module Embulk
|
|
218
222
|
task['abort_on_error'] = (task['max_bad_records'] == 0)
|
219
223
|
end
|
220
224
|
|
225
|
+
if task['time_partitioning']
|
226
|
+
unless task['time_partitioning']['type']
|
227
|
+
raise ConfigError.new "`time_partitioning` must have `type` key"
|
228
|
+
end
|
229
|
+
elsif Helper.has_partition_decorator?(task['table'])
|
230
|
+
task['time_partitioning'] = {'type' => 'DAY'}
|
231
|
+
end
|
232
|
+
|
221
233
|
task
|
222
234
|
end
|
223
235
|
|
@@ -258,14 +270,7 @@ module Embulk
|
|
258
270
|
}
|
259
271
|
end
|
260
272
|
|
261
|
-
def self.
|
262
|
-
task = self.configure(config, schema, task_count)
|
263
|
-
|
264
|
-
@task = task
|
265
|
-
@schema = schema
|
266
|
-
@bigquery = BigqueryClient.new(task, schema)
|
267
|
-
@converters = ValueConverterFactory.create_converters(task, schema)
|
268
|
-
|
273
|
+
def self.auto_create(task, bigquery)
|
269
274
|
if task['auto_create_dataset']
|
270
275
|
bigquery.create_dataset(task['dataset'])
|
271
276
|
else
|
@@ -282,18 +287,50 @@ module Embulk
|
|
282
287
|
|
283
288
|
case task['mode']
|
284
289
|
when 'delete_in_advance'
|
285
|
-
|
286
|
-
|
290
|
+
if task['time_partitioning']
|
291
|
+
bigquery.delete_partition(task['table'])
|
292
|
+
else
|
293
|
+
bigquery.delete_table(task['table'])
|
294
|
+
end
|
295
|
+
bigquery.create_table(task['table'], options: task)
|
287
296
|
when 'replace', 'replace_backup', 'append'
|
288
|
-
bigquery.create_table(task['temp_table'])
|
297
|
+
bigquery.create_table(task['temp_table'], options: task)
|
298
|
+
if task['time_partitioning']
|
299
|
+
if task['auto_create_table']
|
300
|
+
bigquery.create_table(task['table'], options: task)
|
301
|
+
else
|
302
|
+
bigquery.get_table(task['table']) # raises NotFoundError
|
303
|
+
end
|
304
|
+
end
|
289
305
|
else # append_direct
|
290
306
|
if task['auto_create_table']
|
291
|
-
bigquery.create_table(task['table'])
|
307
|
+
bigquery.create_table(task['table'], options: task)
|
292
308
|
else
|
293
309
|
bigquery.get_table(task['table']) # raises NotFoundError
|
294
310
|
end
|
295
311
|
end
|
296
312
|
|
313
|
+
if task['mode'] == 'replace_backup'
|
314
|
+
if task['time_partitioning'] and Helper.has_partition_decorator?(task['table_old'])
|
315
|
+
if task['auto_create_table']
|
316
|
+
bigquery.create_table(task['table_old'], dataset: task['dataset_old'], options: task)
|
317
|
+
else
|
318
|
+
bigquery.get_table(task['table_old'], dataset: task['dataset_old']) # raises NotFoundError
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
def self.transaction(config, schema, task_count, &control)
|
325
|
+
task = self.configure(config, schema, task_count)
|
326
|
+
|
327
|
+
@task = task
|
328
|
+
@schema = schema
|
329
|
+
@bigquery = BigqueryClient.new(task, schema)
|
330
|
+
@converters = ValueConverterFactory.create_converters(task, schema)
|
331
|
+
|
332
|
+
self.auto_create(@task, @bigquery)
|
333
|
+
|
297
334
|
begin
|
298
335
|
paths = []
|
299
336
|
if task['skip_file_generation']
|
@@ -346,7 +383,11 @@ module Embulk
|
|
346
383
|
end
|
347
384
|
|
348
385
|
if task['mode'] == 'replace_backup'
|
349
|
-
|
386
|
+
begin
|
387
|
+
bigquery.get_table(task['table'])
|
388
|
+
bigquery.copy(task['table'], task['table_old'], task['dataset_old'])
|
389
|
+
rescue NotFoundError
|
390
|
+
end
|
350
391
|
end
|
351
392
|
|
352
393
|
if task['temp_table']
|
@@ -359,7 +400,7 @@ module Embulk
|
|
359
400
|
end
|
360
401
|
ensure
|
361
402
|
begin
|
362
|
-
if task['temp_table'] # replace or replace_backup
|
403
|
+
if task['temp_table'] # append or replace or replace_backup
|
363
404
|
bigquery.delete_table(task['temp_table'])
|
364
405
|
end
|
365
406
|
ensure
|
@@ -17,6 +17,14 @@ module Embulk
|
|
17
17
|
reset_fields(fields) if fields
|
18
18
|
@project = @task['project']
|
19
19
|
@dataset = @task['dataset']
|
20
|
+
|
21
|
+
@task['source_format'] ||= 'CSV'
|
22
|
+
@task['max_bad_records'] ||= 0
|
23
|
+
@task['field_delimiter'] ||= ','
|
24
|
+
@task['source_format'] == 'CSV' ? @task['field_delimiter'] : nil
|
25
|
+
@task['encoding'] ||= 'UTF-8'
|
26
|
+
@task['ignore_unknown_values'] = false if @task['ignore_unknown_values'].nil?
|
27
|
+
@task['allow_quoted_newlines'] = false if @task['allow_quoted_newlines'].nil?
|
20
28
|
end
|
21
29
|
|
22
30
|
def fields
|
@@ -143,7 +151,7 @@ module Embulk
|
|
143
151
|
responses
|
144
152
|
end
|
145
153
|
|
146
|
-
def load(path, table)
|
154
|
+
def load(path, table, write_disposition: 'WRITE_APPEND')
|
147
155
|
with_job_retry do
|
148
156
|
begin
|
149
157
|
if File.exist?(path)
|
@@ -175,7 +183,7 @@ module Embulk
|
|
175
183
|
schema: {
|
176
184
|
fields: fields,
|
177
185
|
},
|
178
|
-
write_disposition:
|
186
|
+
write_disposition: write_disposition,
|
179
187
|
source_format: @task['source_format'],
|
180
188
|
max_bad_records: @task['max_bad_records'],
|
181
189
|
field_delimiter: @task['source_format'] == 'CSV' ? @task['field_delimiter'] : nil,
|
@@ -233,15 +241,15 @@ module Embulk
|
|
233
241
|
create_deposition: 'CREATE_IF_NEEDED',
|
234
242
|
write_disposition: write_disposition,
|
235
243
|
source_table: {
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
244
|
+
project_id: @project,
|
245
|
+
dataset_id: @dataset,
|
246
|
+
table_id: source_table,
|
247
|
+
},
|
248
|
+
destination_table: {
|
249
|
+
project_id: @project,
|
250
|
+
dataset_id: destination_dataset,
|
251
|
+
table_id: destination_table,
|
252
|
+
},
|
245
253
|
}
|
246
254
|
}
|
247
255
|
}
|
@@ -363,9 +371,11 @@ module Embulk
|
|
363
371
|
end
|
364
372
|
end
|
365
373
|
|
366
|
-
def create_table(table)
|
374
|
+
def create_table(table, dataset: nil, options: {})
|
367
375
|
begin
|
368
|
-
|
376
|
+
table = Helper.chomp_partition_decorator(table)
|
377
|
+
dataset ||= @dataset
|
378
|
+
Embulk.logger.info { "embulk-output-bigquery: Create table... #{@project}:#{dataset}.#{table}" }
|
369
379
|
body = {
|
370
380
|
table_reference: {
|
371
381
|
table_id: table,
|
@@ -374,9 +384,15 @@ module Embulk
|
|
374
384
|
fields: fields,
|
375
385
|
}
|
376
386
|
}
|
387
|
+
if options['time_partitioning']
|
388
|
+
body[:time_partitioning] = {
|
389
|
+
type: options['time_partitioning']['type'],
|
390
|
+
expiration_ms: options['time_partitioning']['expiration_ms'],
|
391
|
+
}
|
392
|
+
end
|
377
393
|
opts = {}
|
378
|
-
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{
|
379
|
-
with_network_retry { client.insert_table(@project,
|
394
|
+
Embulk.logger.debug { "embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{body}, #{opts})" }
|
395
|
+
with_network_retry { client.insert_table(@project, dataset, body, opts) }
|
380
396
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
381
397
|
if e.status_code == 409 && /Already Exists:/ =~ e.message
|
382
398
|
# ignore 'Already Exists' error
|
@@ -385,16 +401,18 @@ module Embulk
|
|
385
401
|
|
386
402
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
387
403
|
Embulk.logger.error {
|
388
|
-
"embulk-output-bigquery: insert_table(#{@project}, #{
|
404
|
+
"embulk-output-bigquery: insert_table(#{@project}, #{dataset}, #{body}, #{opts}), response:#{response}"
|
389
405
|
}
|
390
|
-
raise Error, "failed to create table #{@project}:#{
|
406
|
+
raise Error, "failed to create table #{@project}:#{dataset}.#{table}, response:#{response}"
|
391
407
|
end
|
392
408
|
end
|
393
409
|
|
394
|
-
def delete_table(table)
|
410
|
+
def delete_table(table, dataset: nil)
|
395
411
|
begin
|
396
|
-
|
397
|
-
|
412
|
+
table = Helper.chomp_partition_decorator(table)
|
413
|
+
dataset ||= @dataset
|
414
|
+
Embulk.logger.info { "embulk-output-bigquery: Delete table... #{@project}:#{dataset}.#{table}" }
|
415
|
+
with_network_retry { client.delete_table(@project, dataset, table) }
|
398
416
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
399
417
|
if e.status_code == 404 && /Not found:/ =~ e.message
|
400
418
|
# ignore 'Not Found' error
|
@@ -403,26 +421,43 @@ module Embulk
|
|
403
421
|
|
404
422
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
405
423
|
Embulk.logger.error {
|
406
|
-
"embulk-output-bigquery: delete_table(#{@project}, #{
|
424
|
+
"embulk-output-bigquery: delete_table(#{@project}, #{dataset}, #{table}), response:#{response}"
|
407
425
|
}
|
408
|
-
raise Error, "failed to delete table #{@project}:#{
|
426
|
+
raise Error, "failed to delete table #{@project}:#{dataset}.#{table}, response:#{response}"
|
409
427
|
end
|
410
428
|
end
|
411
429
|
|
412
|
-
def get_table(table)
|
430
|
+
def get_table(table, dataset: nil)
|
413
431
|
begin
|
414
|
-
|
415
|
-
|
432
|
+
table = Helper.chomp_partition_decorator(table)
|
433
|
+
dataset ||= @dataset
|
434
|
+
Embulk.logger.info { "embulk-output-bigquery: Get table... #{@project}:#{dataset}.#{table}" }
|
435
|
+
with_network_retry { client.get_table(@project, dataset, table) }
|
416
436
|
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
417
437
|
if e.status_code == 404
|
418
|
-
raise NotFoundError, "Table #{@project}:#{
|
438
|
+
raise NotFoundError, "Table #{@project}:#{dataset}.#{table} is not found"
|
419
439
|
end
|
420
440
|
|
421
441
|
response = {status_code: e.status_code, message: e.message, error_class: e.class}
|
422
442
|
Embulk.logger.error {
|
423
|
-
"embulk-output-bigquery: get_table(#{@project}, #{
|
443
|
+
"embulk-output-bigquery: get_table(#{@project}, #{dataset}, #{table}), response:#{response}"
|
424
444
|
}
|
425
|
-
raise Error, "failed to get table #{@project}:#{
|
445
|
+
raise Error, "failed to get table #{@project}:#{dataset}.#{table}, response:#{response}"
|
446
|
+
end
|
447
|
+
end
|
448
|
+
|
449
|
+
# Is this only a way to drop partition?
|
450
|
+
def delete_partition(table_with_partition, dataset: nil)
|
451
|
+
dataset ||= @dataset
|
452
|
+
begin
|
453
|
+
table = Helper.chomp_partition_decorator(table_with_partition)
|
454
|
+
get_table(table, dataset: dataset)
|
455
|
+
rescue NotFoundError
|
456
|
+
else
|
457
|
+
Embulk.logger.info { "embulk-output-bigquery: Delete partition... #{@project}:#{dataset}.#{table_with_partition}" }
|
458
|
+
Tempfile.create('embulk_output_bigquery_empty_file_') do |fp|
|
459
|
+
load(fp.path, table_with_partition, write_disposition: 'WRITE_TRUNCATE')
|
460
|
+
end
|
426
461
|
end
|
427
462
|
end
|
428
463
|
end
|
@@ -16,8 +16,11 @@ module Embulk
|
|
16
16
|
@converters = converters || ValueConverterFactory.create_converters(task, schema)
|
17
17
|
|
18
18
|
@num_rows = 0
|
19
|
-
@
|
20
|
-
|
19
|
+
if @task['progress_log_interval']
|
20
|
+
@progress_log_interval = @task['progress_log_interval']
|
21
|
+
@progress_log_timer = Time.now
|
22
|
+
@previous_num_rows = 0
|
23
|
+
end
|
21
24
|
|
22
25
|
if @task['payload_column_index']
|
23
26
|
@payload_column_index = @task['payload_column_index']
|
@@ -103,14 +106,20 @@ module Embulk
|
|
103
106
|
_io.write formatted_record
|
104
107
|
@num_rows += 1
|
105
108
|
end
|
109
|
+
show_progress if @task['progress_log_interval']
|
110
|
+
@num_rows
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def show_progress
|
106
116
|
now = Time.now
|
107
|
-
if @progress_log_timer < now -
|
117
|
+
if @progress_log_timer < now - @progress_log_interval
|
108
118
|
speed = ((@num_rows - @previous_num_rows) / (now - @progress_log_timer).to_f).round(1)
|
109
119
|
@progress_log_timer = now
|
110
120
|
@previous_num_rows = @num_rows
|
111
121
|
Embulk.logger.info { "embulk-output-bigquery: num_rows #{num_format(@num_rows)} (#{num_format(speed)} rows/sec)" }
|
112
122
|
end
|
113
|
-
@num_rows
|
114
123
|
end
|
115
124
|
end
|
116
125
|
end
|
@@ -5,6 +5,16 @@ module Embulk
|
|
5
5
|
module Output
|
6
6
|
class Bigquery < OutputPlugin
|
7
7
|
class Helper
|
8
|
+
PARTITION_DECORATOR_REGEXP = /\$.+\z/
|
9
|
+
|
10
|
+
def self.has_partition_decorator?(table)
|
11
|
+
!!(table =~ PARTITION_DECORATOR_REGEXP)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.chomp_partition_decorator(table)
|
15
|
+
table.sub(PARTITION_DECORATOR_REGEXP, '')
|
16
|
+
end
|
17
|
+
|
8
18
|
def self.bq_type_from_embulk_type(embulk_type)
|
9
19
|
case embulk_type
|
10
20
|
when :boolean then 'BOOLEAN'
|
@@ -105,6 +105,15 @@ else
|
|
105
105
|
def test_create_table_already_exists
|
106
106
|
assert_nothing_raised { client.create_table('your_table_name') }
|
107
107
|
end
|
108
|
+
|
109
|
+
def test_create_partitioned_table
|
110
|
+
client.delete_table('your_table_name')
|
111
|
+
assert_nothing_raised do
|
112
|
+
client.create_table('your_table_name$20160929', options:{
|
113
|
+
'time_partitioning' => {'type'=>'DAY'}
|
114
|
+
})
|
115
|
+
end
|
116
|
+
end
|
108
117
|
end
|
109
118
|
|
110
119
|
sub_test_case "delete_table" do
|
@@ -116,6 +125,11 @@ else
|
|
116
125
|
def test_delete_table_not_found
|
117
126
|
assert_nothing_raised { client.delete_table('your_table_name') }
|
118
127
|
end
|
128
|
+
|
129
|
+
def test_delete_partitioned_table
|
130
|
+
client.create_table('your_table_name')
|
131
|
+
assert_nothing_raised { client.delete_table('your_table_name$20160929') }
|
132
|
+
end
|
119
133
|
end
|
120
134
|
|
121
135
|
sub_test_case "get_table" do
|
@@ -130,6 +144,33 @@ else
|
|
130
144
|
client.get_table('your_table_name')
|
131
145
|
}
|
132
146
|
end
|
147
|
+
|
148
|
+
def test_get_partitioned_table
|
149
|
+
client.create_table('your_table_name')
|
150
|
+
assert_nothing_raised { client.get_table('your_table_name$20160929') }
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
sub_test_case "delete_partition" do
|
155
|
+
def test_delete_partition
|
156
|
+
client.create_table('your_table_name$20160929', options:{
|
157
|
+
'time_partitioning' => {'type'=>'DAY'}
|
158
|
+
})
|
159
|
+
assert_nothing_raised { client.delete_partition('your_table_name$20160929') }
|
160
|
+
ensure
|
161
|
+
client.delete_table('your_table_name')
|
162
|
+
end
|
163
|
+
|
164
|
+
def test_delete_partition_of_non_partitioned_table
|
165
|
+
client.create_table('your_table_name')
|
166
|
+
assert_raise { client.delete_partition('your_table_name$20160929') }
|
167
|
+
ensure
|
168
|
+
client.delete_table('your_table_name')
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_delete_partition_table_not_found
|
172
|
+
assert_nothing_raised { client.delete_partition('your_table_name$20160929') }
|
173
|
+
end
|
133
174
|
end
|
134
175
|
|
135
176
|
sub_test_case "fields" do
|
data/test/test_configure.rb
CHANGED
@@ -84,6 +84,7 @@ module Embulk
|
|
84
84
|
assert_equal "UTF-8", task['encoding']
|
85
85
|
assert_equal false, task['ignore_unknown_values']
|
86
86
|
assert_equal false, task['allow_quoted_newlines']
|
87
|
+
assert_equal nil, task['time_partitioning']
|
87
88
|
assert_equal false, task['skip_load']
|
88
89
|
end
|
89
90
|
|
@@ -249,6 +250,22 @@ module Embulk
|
|
249
250
|
task = Bigquery.configure(config, schema, processor_count)
|
250
251
|
assert_equal '.foo', task['file_ext']
|
251
252
|
end
|
253
|
+
|
254
|
+
def test_time_partitioning
|
255
|
+
config = least_config.merge('time_partitioning' => {'type' => 'DAY'})
|
256
|
+
assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
|
257
|
+
|
258
|
+
config = least_config.merge('time_partitioning' => {'foo' => 'bar'})
|
259
|
+
assert_raise { Bigquery.configure(config, schema, processor_count) }
|
260
|
+
|
261
|
+
config = least_config.merge('table' => 'table')
|
262
|
+
task = Bigquery.configure(config, schema, processor_count)
|
263
|
+
assert_equal nil, task['time_partitioning']
|
264
|
+
|
265
|
+
config = least_config.merge('table' => 'table_name$20160912')
|
266
|
+
task = Bigquery.configure(config, schema, processor_count)
|
267
|
+
assert_equal 'DAY', task['time_partitioning']['type']
|
268
|
+
end
|
252
269
|
end
|
253
270
|
end
|
254
271
|
end
|
data/test/test_example.rb
CHANGED
@@ -18,19 +18,28 @@ else
|
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
def embulk_run(config_path)
|
22
|
+
Bundler.with_clean_env do
|
23
|
+
cmd = "#{embulk_path} run -X page_size=1 -b . -l trace #{config_path}"
|
24
|
+
puts "=" * 64
|
25
|
+
puts cmd
|
26
|
+
system(cmd)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
files = Dir.glob("#{APP_ROOT}/example/config_*.yml").reject {|file| File.symlink?(file) }.sort
|
24
31
|
files.each do |config_path|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
+
if %w[
|
33
|
+
config_expose_errors.yml
|
34
|
+
config_prevent_duplicate_insert.yml
|
35
|
+
].include?(File.basename(config_path))
|
36
|
+
define_method(:"test_#{File.basename(config_path, ".yml")}") do
|
37
|
+
assert_false embulk_run(config_path)
|
38
|
+
end
|
39
|
+
else
|
40
|
+
define_method(:"test_#{File.basename(config_path, ".yml")}") do
|
41
|
+
assert_true embulk_run(config_path)
|
32
42
|
end
|
33
|
-
assert_true success
|
34
43
|
end
|
35
44
|
end
|
36
45
|
end
|
data/test/test_helper.rb
CHANGED
@@ -14,6 +14,16 @@ module Embulk
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
+
def has_partition_decorator?
|
18
|
+
assert_true Helper.has_partition_decorator?('table$20160929')
|
19
|
+
assert_false Helper.has_partition_decorator?('table')
|
20
|
+
end
|
21
|
+
|
22
|
+
def chomp_partition_decorator
|
23
|
+
assert_equal 'table', Helper.chomp_partition_decorator?('table$20160929')
|
24
|
+
assert_equal 'table', Helper.chomp_partition_decorator?('table')
|
25
|
+
end
|
26
|
+
|
17
27
|
def bq_type_from_embulk_type
|
18
28
|
assert_equal 'BOOLEAN', Helper.bq_type_from_embulk_type(:boolean)
|
19
29
|
assert_equal 'STRING', Helper.bq_type_from_embulk_type(:string)
|
data/test/test_transaction.rb
CHANGED
@@ -8,10 +8,12 @@ module Embulk
|
|
8
8
|
class TestTransaction < Test::Unit::TestCase
|
9
9
|
def least_config
|
10
10
|
DataSource.new({
|
11
|
-
'project'
|
12
|
-
'dataset'
|
13
|
-
'table'
|
11
|
+
'project' => 'your_project_name',
|
12
|
+
'dataset' => 'your_dataset_name',
|
13
|
+
'table' => 'your_table_name',
|
14
14
|
'p12_keyfile' => __FILE__, # fake
|
15
|
+
'temp_table' => 'temp_table', # randomly created is not good for our test
|
16
|
+
'path_prefix' => 'tmp/', # randomly created is not good for our test
|
15
17
|
})
|
16
18
|
end
|
17
19
|
|
@@ -38,17 +40,6 @@ module Embulk
|
|
38
40
|
stub(Bigquery).transaction_report { {'num_input_rows' => 1, 'num_output_rows' => 1, 'num_rejected_rows' => 0} }
|
39
41
|
end
|
40
42
|
|
41
|
-
def test_append
|
42
|
-
config = least_config.merge('mode' => 'append', 'temp_table' => 'temp_table')
|
43
|
-
any_instance_of(BigqueryClient) do |obj|
|
44
|
-
mock(obj).get_dataset(config['dataset'])
|
45
|
-
mock(obj).create_table(config['temp_table'])
|
46
|
-
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
47
|
-
mock(obj).delete_table(config['temp_table'])
|
48
|
-
end
|
49
|
-
Bigquery.transaction(config, schema, processor_count, &control)
|
50
|
-
end
|
51
|
-
|
52
43
|
sub_test_case "append_direct" do
|
53
44
|
def test_append_direct
|
54
45
|
config = least_config.merge('mode' => 'append_direct')
|
@@ -61,43 +52,108 @@ module Embulk
|
|
61
52
|
|
62
53
|
def test_append_direct_with_auto_create
|
63
54
|
config = least_config.merge('mode' => 'append_direct', 'auto_create_dataset' => true, 'auto_create_table' => true)
|
55
|
+
task = Bigquery.configure(config, schema, processor_count)
|
64
56
|
any_instance_of(BigqueryClient) do |obj|
|
65
57
|
mock(obj).create_dataset(config['dataset'])
|
66
|
-
mock(obj).create_table(config['table'])
|
58
|
+
mock(obj).create_table(config['table'], options: task)
|
59
|
+
end
|
60
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_append_direct_with_partition
|
64
|
+
config = least_config.merge('mode' => 'append_direct', 'table' => 'table$20160929')
|
65
|
+
any_instance_of(BigqueryClient) do |obj|
|
66
|
+
mock(obj).get_dataset(config['dataset'])
|
67
|
+
mock(obj).get_table(config['table'])
|
68
|
+
end
|
69
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_append_direct_with_partition_with_auto_create
|
73
|
+
config = least_config.merge('mode' => 'append_direct', 'table' => 'table$20160929', 'auto_create_dataset' => true, 'auto_create_table' => true)
|
74
|
+
task = Bigquery.configure(config, schema, processor_count)
|
75
|
+
any_instance_of(BigqueryClient) do |obj|
|
76
|
+
mock(obj).create_dataset(config['dataset'])
|
77
|
+
mock(obj).create_table(config['table'], options: task)
|
67
78
|
end
|
68
79
|
Bigquery.transaction(config, schema, processor_count, &control)
|
69
80
|
end
|
70
81
|
end
|
71
82
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
83
|
+
sub_test_case "delete_in_advance" do
|
84
|
+
def test_delete_in_advance
|
85
|
+
config = least_config.merge('mode' => 'delete_in_advance')
|
86
|
+
task = Bigquery.configure(config, schema, processor_count)
|
87
|
+
any_instance_of(BigqueryClient) do |obj|
|
88
|
+
mock(obj).get_dataset(config['dataset'])
|
89
|
+
mock(obj).delete_table(config['table'])
|
90
|
+
mock(obj).create_table(config['table'], options: task)
|
91
|
+
end
|
92
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_delete_in_advance_with_partitioning
|
96
|
+
config = least_config.merge('mode' => 'delete_in_advance', 'table' => 'table$20160929')
|
97
|
+
task = Bigquery.configure(config, schema, processor_count)
|
98
|
+
any_instance_of(BigqueryClient) do |obj|
|
99
|
+
mock(obj).get_dataset(config['dataset'])
|
100
|
+
mock(obj).delete_partition(config['table'])
|
101
|
+
mock(obj).create_table(config['table'], options: task)
|
102
|
+
end
|
103
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
78
104
|
end
|
79
|
-
Bigquery.transaction(config, schema, processor_count, &control)
|
80
105
|
end
|
81
106
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
107
|
+
sub_test_case "replace" do
|
108
|
+
def test_replace
|
109
|
+
config = least_config.merge('mode' => 'replace')
|
110
|
+
task = Bigquery.configure(config, schema, processor_count)
|
111
|
+
any_instance_of(BigqueryClient) do |obj|
|
112
|
+
mock(obj).get_dataset(config['dataset'])
|
113
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
114
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
115
|
+
mock(obj).delete_table(config['temp_table'])
|
116
|
+
end
|
117
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
118
|
+
end
|
119
|
+
|
120
|
+
def test_replace_with_partitioning
|
121
|
+
config = least_config.merge('mode' => 'replace', 'table' => 'table$20160929')
|
122
|
+
task = Bigquery.configure(config, schema, processor_count)
|
123
|
+
any_instance_of(BigqueryClient) do |obj|
|
124
|
+
mock(obj).get_dataset(config['dataset'])
|
125
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
126
|
+
mock(obj).get_table(config['table'])
|
127
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
128
|
+
mock(obj).delete_table(config['temp_table'])
|
129
|
+
end
|
130
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_replace_with_partitioning_with_auto_create_table
|
134
|
+
config = least_config.merge('mode' => 'replace', 'table' => 'table$20160929', 'auto_create_table' => true)
|
135
|
+
task = Bigquery.configure(config, schema, processor_count)
|
136
|
+
any_instance_of(BigqueryClient) do |obj|
|
137
|
+
mock(obj).get_dataset(config['dataset'])
|
138
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
139
|
+
mock(obj).create_table(config['table'], options: task)
|
140
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
141
|
+
mock(obj).delete_table(config['temp_table'])
|
142
|
+
end
|
143
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
89
144
|
end
|
90
|
-
Bigquery.transaction(config, schema, processor_count, &control)
|
91
145
|
end
|
92
146
|
|
93
147
|
sub_test_case "replace_backup" do
|
94
148
|
def test_replace_backup
|
95
149
|
config = least_config.merge('mode' => 'replace_backup', 'dataset_old' => 'dataset_old', 'table_old' => 'table_old', 'temp_table' => 'temp_table')
|
150
|
+
task = Bigquery.configure(config, schema, processor_count)
|
96
151
|
any_instance_of(BigqueryClient) do |obj|
|
97
152
|
mock(obj).get_dataset(config['dataset'])
|
98
153
|
mock(obj).get_dataset(config['dataset_old'])
|
99
|
-
mock(obj).create_table(config['temp_table'])
|
154
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
100
155
|
|
156
|
+
mock(obj).get_table(task['table'])
|
101
157
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
102
158
|
|
103
159
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -108,11 +164,51 @@ module Embulk
|
|
108
164
|
|
109
165
|
def test_replace_backup_auto_create_dataset
|
110
166
|
config = least_config.merge('mode' => 'replace_backup', 'dataset_old' => 'dataset_old', 'table_old' => 'table_old', 'temp_table' => 'temp_table', 'auto_create_dataset' => true)
|
167
|
+
task = Bigquery.configure(config, schema, processor_count)
|
111
168
|
any_instance_of(BigqueryClient) do |obj|
|
112
169
|
mock(obj).create_dataset(config['dataset'])
|
113
170
|
mock(obj).create_dataset(config['dataset_old'], reference: config['dataset'])
|
114
|
-
mock(obj).create_table(config['temp_table'])
|
171
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
115
172
|
|
173
|
+
mock(obj).get_table(task['table'])
|
174
|
+
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
175
|
+
|
176
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
177
|
+
mock(obj).delete_table(config['temp_table'])
|
178
|
+
end
|
179
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
180
|
+
end
|
181
|
+
|
182
|
+
def test_replace_backup_with_partitioning
|
183
|
+
config = least_config.merge('mode' => 'replace_backup', 'table' => 'table$20160929', 'dataset_old' => 'dataset_old', 'table_old' => 'table_old$20190929', 'temp_table' => 'temp_table')
|
184
|
+
task = Bigquery.configure(config, schema, processor_count)
|
185
|
+
any_instance_of(BigqueryClient) do |obj|
|
186
|
+
mock(obj).get_dataset(config['dataset'])
|
187
|
+
mock(obj).get_dataset(config['dataset_old'])
|
188
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
189
|
+
mock(obj).get_table(task['table'])
|
190
|
+
mock(obj).get_table(task['table_old'], dataset: config['dataset_old'])
|
191
|
+
|
192
|
+
mock(obj).get_table(task['table'])
|
193
|
+
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
194
|
+
|
195
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
196
|
+
mock(obj).delete_table(config['temp_table'])
|
197
|
+
end
|
198
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
199
|
+
end
|
200
|
+
|
201
|
+
def test_replace_backup_with_partitioning_auto_create_table
|
202
|
+
config = least_config.merge('mode' => 'replace_backup', 'table' => 'table$20160929', 'dataset_old' => 'dataset_old', 'table_old' => 'table_old$20160929', 'temp_table' => 'temp_table', 'auto_create_table' => true)
|
203
|
+
task = Bigquery.configure(config, schema, processor_count)
|
204
|
+
any_instance_of(BigqueryClient) do |obj|
|
205
|
+
mock(obj).get_dataset(config['dataset'])
|
206
|
+
mock(obj).get_dataset(config['dataset_old'])
|
207
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
208
|
+
mock(obj).create_table(task['table'], options: task)
|
209
|
+
mock(obj).create_table(task['table_old'], dataset: config['dataset_old'], options: task)
|
210
|
+
|
211
|
+
mock(obj).get_table(task['table'])
|
116
212
|
mock(obj).copy(config['table'], config['table_old'], config['dataset_old'])
|
117
213
|
|
118
214
|
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_TRUNCATE')
|
@@ -121,6 +217,47 @@ module Embulk
|
|
121
217
|
Bigquery.transaction(config, schema, processor_count, &control)
|
122
218
|
end
|
123
219
|
end
|
220
|
+
|
221
|
+
sub_test_case "append" do
|
222
|
+
def test_append
|
223
|
+
config = least_config.merge('mode' => 'append')
|
224
|
+
task = Bigquery.configure(config, schema, processor_count)
|
225
|
+
any_instance_of(BigqueryClient) do |obj|
|
226
|
+
mock(obj).get_dataset(config['dataset'])
|
227
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
228
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
229
|
+
mock(obj).delete_table(config['temp_table'])
|
230
|
+
end
|
231
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
232
|
+
end
|
233
|
+
|
234
|
+
def test_append_with_partitioning
|
235
|
+
config = least_config.merge('mode' => 'append', 'table' => 'table$20160929')
|
236
|
+
task = Bigquery.configure(config, schema, processor_count)
|
237
|
+
any_instance_of(BigqueryClient) do |obj|
|
238
|
+
mock(obj).get_dataset(config['dataset'])
|
239
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
240
|
+
mock(obj).get_table(config['table'])
|
241
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
242
|
+
mock(obj).delete_table(config['temp_table'])
|
243
|
+
end
|
244
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
245
|
+
end
|
246
|
+
|
247
|
+
def test_append_with_partitioning_with_auto_create_table
|
248
|
+
config = least_config.merge('mode' => 'append', 'table' => 'table$20160929', 'auto_create_table' => true)
|
249
|
+
task = Bigquery.configure(config, schema, processor_count)
|
250
|
+
any_instance_of(BigqueryClient) do |obj|
|
251
|
+
mock(obj).get_dataset(config['dataset'])
|
252
|
+
mock(obj).create_table(config['temp_table'], options: task)
|
253
|
+
mock(obj).create_table(config['table'], options: task)
|
254
|
+
mock(obj).copy(config['temp_table'], config['table'], write_disposition: 'WRITE_APPEND')
|
255
|
+
mock(obj).delete_table(config['temp_table'])
|
256
|
+
end
|
257
|
+
Bigquery.transaction(config, schema, processor_count, &control)
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
124
261
|
end
|
125
262
|
end
|
126
263
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-10-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: google-api-client
|
@@ -100,6 +100,7 @@ files:
|
|
100
100
|
- example/config_client_options.yml
|
101
101
|
- example/config_csv.yml
|
102
102
|
- example/config_delete_in_advance.yml
|
103
|
+
- example/config_delete_in_advance_partitioned_table.yml
|
103
104
|
- example/config_expose_errors.yml
|
104
105
|
- example/config_gcs.yml
|
105
106
|
- example/config_guess_from_embulk_schema.yml
|
@@ -114,8 +115,11 @@ files:
|
|
114
115
|
- example/config_payload_column.yml
|
115
116
|
- example/config_payload_column_index.yml
|
116
117
|
- example/config_prevent_duplicate_insert.yml
|
118
|
+
- example/config_progress_log_interval.yml
|
117
119
|
- example/config_replace.yml
|
118
120
|
- example/config_replace_backup.yml
|
121
|
+
- example/config_replace_backup_paritioned_table.yml
|
122
|
+
- example/config_replace_paritioned_table.yml
|
119
123
|
- example/config_skip_file_generation.yml
|
120
124
|
- example/config_table_strftime.yml
|
121
125
|
- example/config_template_table.yml
|