embulk-output-bigquery 0.5.0 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +29 -4
  3. data/README.md +71 -42
  4. data/embulk-output-bigquery.gemspec +10 -3
  5. data/lib/embulk/output/bigquery.rb +11 -20
  6. data/lib/embulk/output/bigquery/auth.rb +35 -0
  7. data/lib/embulk/output/bigquery/google_client.rb +3 -34
  8. data/lib/embulk/output/bigquery/value_converter_factory.rb +31 -0
  9. data/test/test_bigquery_client.rb +1 -5
  10. data/test/test_configure.rb +4 -12
  11. data/test/test_helper.rb +7 -1
  12. data/test/test_transaction.rb +5 -6
  13. data/test/test_value_converter_factory.rb +86 -0
  14. metadata +29 -51
  15. data/example/config_append_direct_schema_update_options.yml +0 -31
  16. data/example/config_client_options.yml +0 -33
  17. data/example/config_csv.yml +0 -30
  18. data/example/config_delete_in_advance.yml +0 -29
  19. data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
  20. data/example/config_delete_in_advance_partitioned_table.yml +0 -33
  21. data/example/config_expose_errors.yml +0 -30
  22. data/example/config_gcs.yml +0 -32
  23. data/example/config_guess_from_embulk_schema.yml +0 -29
  24. data/example/config_guess_with_column_options.yml +0 -40
  25. data/example/config_gzip.yml +0 -1
  26. data/example/config_jsonl.yml +0 -1
  27. data/example/config_max_threads.yml +0 -34
  28. data/example/config_min_ouput_tasks.yml +0 -34
  29. data/example/config_mode_append.yml +0 -30
  30. data/example/config_mode_append_direct.yml +0 -30
  31. data/example/config_nested_record.yml +0 -1
  32. data/example/config_payload_column.yml +0 -20
  33. data/example/config_payload_column_index.yml +0 -20
  34. data/example/config_progress_log_interval.yml +0 -31
  35. data/example/config_replace.yml +0 -30
  36. data/example/config_replace_backup.yml +0 -32
  37. data/example/config_replace_backup_field_partitioned_table.yml +0 -34
  38. data/example/config_replace_backup_partitioned_table.yml +0 -34
  39. data/example/config_replace_field_partitioned_table.yml +0 -33
  40. data/example/config_replace_partitioned_table.yml +0 -33
  41. data/example/config_replace_schema_update_options.yml +0 -33
  42. data/example/config_skip_file_generation.yml +0 -32
  43. data/example/config_table_strftime.yml +0 -30
  44. data/example/config_template_table.yml +0 -21
  45. data/example/config_uncompressed.yml +0 -1
  46. data/example/config_with_rehearsal.yml +0 -33
  47. data/example/example.csv +0 -17
  48. data/example/example.yml +0 -1
  49. data/example/example2_1.csv +0 -1
  50. data/example/example2_2.csv +0 -1
  51. data/example/example4_1.csv +0 -1
  52. data/example/example4_2.csv +0 -1
  53. data/example/example4_3.csv +0 -1
  54. data/example/example4_4.csv +0 -1
  55. data/example/json_key.json +0 -12
  56. data/example/nested_example.jsonl +0 -16
  57. data/example/schema.json +0 -30
  58. data/example/schema_expose_errors.json +0 -30
@@ -203,6 +203,27 @@ module Embulk
203
203
  val # Users must care of BQ timestamp format
204
204
  }
205
205
  end
206
+ when 'DATE'
207
+ Proc.new {|val|
208
+ next nil if val.nil?
209
+ with_typecast_error(val) do |val|
210
+ TimeWithZone.set_zone_offset(Time.parse(val), zone_offset).strftime("%Y-%m-%d")
211
+ end
212
+ }
213
+ when 'DATETIME'
214
+ if @timestamp_format
215
+ Proc.new {|val|
216
+ next nil if val.nil?
217
+ with_typecast_error(val) do |val|
218
+ Time.strptime(val, @timestamp_format).strftime("%Y-%m-%d %H:%M:%S.%6N")
219
+ end
220
+ }
221
+ else
222
+ Proc.new {|val|
223
+ next nil if val.nil?
224
+ val # Users must care of BQ timestamp format
225
+ }
226
+ end
206
227
  when 'RECORD'
207
228
  Proc.new {|val|
208
229
  next nil if val.nil?
@@ -240,6 +261,16 @@ module Embulk
240
261
  next nil if val.nil?
241
262
  val.strftime("%Y-%m-%d %H:%M:%S.%6N %:z")
242
263
  }
264
+ when 'DATE'
265
+ Proc.new {|val|
266
+ next nil if val.nil?
267
+ val.localtime(zone_offset).strftime("%Y-%m-%d")
268
+ }
269
+ when 'DATETIME'
270
+ Proc.new {|val|
271
+ next nil if val.nil?
272
+ val.localtime(zone_offset).strftime("%Y-%m-%d %H:%M:%S.%6N")
273
+ }
243
274
  else
244
275
  raise NotSupportedType, "cannot take column type #{type} for timestamp column"
245
276
  end
@@ -32,7 +32,7 @@ else
32
32
  'dataset' => 'your_dataset_name',
33
33
  'table' => 'your_table_name',
34
34
  'auth_method' => 'json_key',
35
- 'json_keyfile' => JSON_KEYFILE,
35
+ 'json_keyfile' => File.read(JSON_KEYFILE),
36
36
  'retries' => 3,
37
37
  'timeout_sec' => 300,
38
38
  'open_timeout_sec' => 300,
@@ -61,10 +61,6 @@ else
61
61
  def test_json_keyfile
62
62
  assert_nothing_raised { BigqueryClient.new(least_task, schema).client }
63
63
  end
64
-
65
- def test_p12_keyfile
66
- # pending
67
- end
68
64
  end
69
65
 
70
66
  sub_test_case "create_dataset" do
@@ -18,10 +18,9 @@ module Embulk
18
18
 
19
19
  def least_config
20
20
  DataSource.new({
21
- 'project' => 'your_project_name',
22
- 'dataset' => 'your_dataset_name',
23
- 'table' => 'your_table_name',
24
- 'p12_keyfile' => __FILE__, # fake
21
+ 'project' => 'your_project_name',
22
+ 'dataset' => 'your_dataset_name',
23
+ 'table' => 'your_table_name',
25
24
  })
26
25
  end
27
26
 
@@ -43,9 +42,7 @@ module Embulk
43
42
  def test_configure_default
44
43
  task = Bigquery.configure(least_config, schema, processor_count)
45
44
  assert_equal "append", task['mode']
46
- assert_equal "private_key", task['auth_method']
47
- assert_equal nil, task['service_account_email']
48
- assert_equal __FILE__, task['p12_keyfile']
45
+ assert_equal "application_default", task['auth_method']
49
46
  assert_equal nil, task['json_keyfile']
50
47
  assert_equal "your_project_name", task['project']
51
48
  assert_equal "your_dataset_name", task['dataset']
@@ -132,11 +129,6 @@ module Embulk
132
129
  config = least_config.merge('auth_method' => 'foobar')
133
130
  assert_raise { Bigquery.configure(config, schema, processor_count) }
134
131
 
135
- config = least_config.merge('auth_method' => 'private_key').tap {|h| h.delete('p12_keyfile') }
136
- assert_raise { Bigquery.configure(config, schema, processor_count) }
137
- config = least_config.merge('auth_method' => 'private_key', 'p12_keyfile' => 'dummy')
138
- assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
139
-
140
132
  config = least_config.merge('auth_method' => 'json_key').tap {|h| h.delete('json_keyfile') }
141
133
  assert_raise { Bigquery.configure(config, schema, processor_count) }
142
134
  config = least_config.merge('auth_method' => 'json_key', 'json_keyfile' => "#{EXAMPLE_ROOT}/json_key.json")
data/test/test_helper.rb CHANGED
@@ -62,7 +62,9 @@ module Embulk
62
62
  Column.new({index: 2, name: 'double', type: :double}),
63
63
  Column.new({index: 3, name: 'string', type: :string}),
64
64
  Column.new({index: 4, name: 'timestamp', type: :timestamp}),
65
- Column.new({index: 5, name: 'json', type: :json}),
65
+ Column.new({index: 5, name: 'date', type: :timestamp}),
66
+ Column.new({index: 6, name: 'datetime', type: :timestamp}),
67
+ Column.new({index: 7, name: 'json', type: :json}),
66
68
  ])
67
69
  task = {
68
70
  'column_options' => [
@@ -71,6 +73,8 @@ module Embulk
71
73
  {'name' => 'double', 'type' => 'STRING'},
72
74
  {'name' => 'string', 'type' => 'INTEGER'},
73
75
  {'name' => 'timestamp', 'type' => 'INTEGER'},
76
+ {'name' => 'date', 'type' => 'DATE'},
77
+ {'name' => 'datetime', 'type' => 'DATETIME'},
74
78
  {'name' => 'json', 'type' => 'RECORD', 'fields' => [
75
79
  { 'name' => 'key1', 'type' => 'STRING' },
76
80
  ]},
@@ -82,6 +86,8 @@ module Embulk
82
86
  {name: 'double', type: 'STRING'},
83
87
  {name: 'string', type: 'INTEGER'},
84
88
  {name: 'timestamp', type: 'INTEGER'},
89
+ {name: 'date', type: 'DATE'},
90
+ {name: 'datetime', type: 'DATETIME'},
85
91
  {name: 'json', type: 'RECORD', fields: [
86
92
  {name: 'key1', type: 'STRING'},
87
93
  ]},
@@ -8,12 +8,11 @@ module Embulk
8
8
  class TestTransaction < Test::Unit::TestCase
9
9
  def least_config
10
10
  DataSource.new({
11
- 'project' => 'your_project_name',
12
- 'dataset' => 'your_dataset_name',
13
- 'table' => 'your_table_name',
14
- 'p12_keyfile' => __FILE__, # fake
15
- 'temp_table' => 'temp_table', # randomly created is not good for our test
16
- 'path_prefix' => 'tmp/', # randomly created is not good for our test
11
+ 'project' => 'your_project_name',
12
+ 'dataset' => 'your_dataset_name',
13
+ 'table' => 'your_table_name',
14
+ 'temp_table' => 'temp_table', # randomly created is not good for our test
15
+ 'path_prefix' => 'tmp/', # randomly created is not good for our test
17
16
  })
18
17
  end
19
18
 
@@ -90,6 +90,14 @@ module Embulk
90
90
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'TIMESTAMP').create_converter }
91
91
  end
92
92
 
93
+ def test_date
94
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
95
+ end
96
+
97
+ def test_datetime
98
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
99
+ end
100
+
93
101
  def test_record
94
102
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
95
103
  end
@@ -130,6 +138,14 @@ module Embulk
130
138
  assert_equal 1408452095, converter.call(1408452095)
131
139
  end
132
140
 
141
+ def test_date
142
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
143
+ end
144
+
145
+ def test_datetime
146
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
147
+ end
148
+
133
149
  def test_record
134
150
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
135
151
  end
@@ -166,6 +182,14 @@ module Embulk
166
182
  assert_equal 1408452095.188766, converter.call(1408452095.188766)
167
183
  end
168
184
 
185
+ def test_date
186
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
187
+ end
188
+
189
+ def test_datetime
190
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
191
+ end
192
+
169
193
  def test_record
170
194
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
171
195
  end
@@ -216,6 +240,28 @@ module Embulk
216
240
  assert_equal "2016-02-26 00:00:00", converter.call("2016-02-26 00:00:00")
217
241
  end
218
242
 
243
+ def test_date
244
+ converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter
245
+ assert_equal nil, converter.call(nil)
246
+ assert_equal "2016-02-26", converter.call("2016-02-26")
247
+ assert_equal "2016-02-26", converter.call("2016-02-26 00:00:00")
248
+ assert_raise { converter.call('foo') }
249
+ end
250
+
251
+ def test_datetime
252
+ converter = ValueConverterFactory.new(
253
+ SCHEMA_TYPE, 'DATETIME',
254
+ timestamp_format: '%Y/%m/%d'
255
+ ).create_converter
256
+ assert_equal nil, converter.call(nil)
257
+ assert_equal "2016-02-26 00:00:00.000000", converter.call("2016/02/26")
258
+
259
+ # Users must care of BQ datetime format by themselves with no timestamp_format
260
+ converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter
261
+ assert_equal nil, converter.call(nil)
262
+ assert_equal "2016-02-26 00:00:00", converter.call("2016-02-26 00:00:00")
263
+ end
264
+
219
265
  def test_record
220
266
  converter = ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter
221
267
  assert_equal({'foo'=>'foo'}, converter.call(%Q[{"foo":"foo"}]))
@@ -268,6 +314,42 @@ module Embulk
268
314
  assert_equal expected, converter.call(Time.at(subject).utc)
269
315
  end
270
316
 
317
+ def test_date
318
+ converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter
319
+ assert_equal nil, converter.call(nil)
320
+ timestamp = Time.parse("2016-02-26 00:00:00.500000 +00:00")
321
+ expected = "2016-02-26"
322
+ assert_equal expected, converter.call(timestamp)
323
+
324
+ converter = ValueConverterFactory.new(
325
+ SCHEMA_TYPE, 'DATE', timezone: 'Asia/Tokyo'
326
+ ).create_converter
327
+ assert_equal nil, converter.call(nil)
328
+ timestamp = Time.parse("2016-02-25 15:00:00.500000 +00:00")
329
+ expected = "2016-02-26"
330
+ assert_equal expected, converter.call(timestamp)
331
+
332
+ assert_raise { converter.call('foo') }
333
+ end
334
+
335
+ def test_datetime
336
+ converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter
337
+ assert_equal nil, converter.call(nil)
338
+ timestamp = Time.parse("2016-02-26 00:00:00.500000 +00:00")
339
+ expected = "2016-02-26 00:00:00.500000"
340
+ assert_equal expected, converter.call(timestamp)
341
+
342
+ converter = ValueConverterFactory.new(
343
+ SCHEMA_TYPE, 'DATETIME', timezone: 'Asia/Tokyo'
344
+ ).create_converter
345
+ assert_equal nil, converter.call(nil)
346
+ timestamp = Time.parse("2016-02-25 15:00:00.500000 +00:00")
347
+ expected = "2016-02-26 00:00:00.500000"
348
+ assert_equal expected, converter.call(timestamp)
349
+
350
+ assert_raise { converter.call('foo') }
351
+ end
352
+
271
353
  def test_record
272
354
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
273
355
  end
@@ -298,6 +380,10 @@ module Embulk
298
380
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'TIMESTAMP').create_converter }
299
381
  end
300
382
 
383
+ def test_date
384
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
385
+ end
386
+
301
387
  def test_record
302
388
  converter = ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter
303
389
  assert_equal nil, converter.call(nil)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
@@ -9,22 +9,42 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-08-10 00:00:00.000000000 Z
12
+ date: 2019-11-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '0.7'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: 0.12.0
23
+ name: signet
24
+ prerelease: false
25
+ type: :runtime
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ requirements:
28
+ - - "~>"
29
+ - !ruby/object:Gem::Version
30
+ version: '0.7'
31
+ - - "<"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.12.0
34
+ - !ruby/object:Gem::Dependency
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "<"
38
+ - !ruby/object:Gem::Version
39
+ version: 0.33.0
20
40
  name: google-api-client
21
41
  prerelease: false
22
42
  type: :runtime
23
43
  version_requirements: !ruby/object:Gem::Requirement
24
44
  requirements:
25
- - - ">="
45
+ - - "<"
26
46
  - !ruby/object:Gem::Version
27
- version: '0'
47
+ version: 0.33.0
28
48
  - !ruby/object:Gem::Dependency
29
49
  requirement: !ruby/object:Gem::Requirement
30
50
  requirements:
@@ -83,51 +103,8 @@ files:
83
103
  - README.md
84
104
  - Rakefile
85
105
  - embulk-output-bigquery.gemspec
86
- - example/config_append_direct_schema_update_options.yml
87
- - example/config_client_options.yml
88
- - example/config_csv.yml
89
- - example/config_delete_in_advance.yml
90
- - example/config_delete_in_advance_field_partitioned_table.yml
91
- - example/config_delete_in_advance_partitioned_table.yml
92
- - example/config_expose_errors.yml
93
- - example/config_gcs.yml
94
- - example/config_guess_from_embulk_schema.yml
95
- - example/config_guess_with_column_options.yml
96
- - example/config_gzip.yml
97
- - example/config_jsonl.yml
98
- - example/config_max_threads.yml
99
- - example/config_min_ouput_tasks.yml
100
- - example/config_mode_append.yml
101
- - example/config_mode_append_direct.yml
102
- - example/config_nested_record.yml
103
- - example/config_payload_column.yml
104
- - example/config_payload_column_index.yml
105
- - example/config_progress_log_interval.yml
106
- - example/config_replace.yml
107
- - example/config_replace_backup.yml
108
- - example/config_replace_backup_field_partitioned_table.yml
109
- - example/config_replace_backup_partitioned_table.yml
110
- - example/config_replace_field_partitioned_table.yml
111
- - example/config_replace_partitioned_table.yml
112
- - example/config_replace_schema_update_options.yml
113
- - example/config_skip_file_generation.yml
114
- - example/config_table_strftime.yml
115
- - example/config_template_table.yml
116
- - example/config_uncompressed.yml
117
- - example/config_with_rehearsal.yml
118
- - example/example.csv
119
- - example/example.yml
120
- - example/example2_1.csv
121
- - example/example2_2.csv
122
- - example/example4_1.csv
123
- - example/example4_2.csv
124
- - example/example4_3.csv
125
- - example/example4_4.csv
126
- - example/json_key.json
127
- - example/nested_example.jsonl
128
- - example/schema.json
129
- - example/schema_expose_errors.json
130
106
  - lib/embulk/output/bigquery.rb
107
+ - lib/embulk/output/bigquery/auth.rb
131
108
  - lib/embulk/output/bigquery/bigquery_client.rb
132
109
  - lib/embulk/output/bigquery/file_writer.rb
133
110
  - lib/embulk/output/bigquery/gcs_client.rb
@@ -161,7 +138,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
161
138
  - !ruby/object:Gem::Version
162
139
  version: '0'
163
140
  requirements: []
164
- rubygems_version: 3.0.3
141
+ rubyforge_project:
142
+ rubygems_version: 2.6.14.1
165
143
  signing_key:
166
144
  specification_version: 4
167
145
  summary: Google BigQuery output plugin for Embulk
@@ -1,31 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: append_direct
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- schema_update_options: [ALLOW_FIELD_ADDITION, ALLOW_FIELD_RELAXATION]
@@ -1,33 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- timeout_sec: 400
31
- open_timeout_sec: 400
32
- retries: 2
33
- application_name: "Embulk BigQuery plugin test"