embulk-output-bigquery 0.5.0 → 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +29 -4
  3. data/README.md +71 -42
  4. data/embulk-output-bigquery.gemspec +10 -3
  5. data/lib/embulk/output/bigquery.rb +11 -20
  6. data/lib/embulk/output/bigquery/auth.rb +35 -0
  7. data/lib/embulk/output/bigquery/google_client.rb +3 -34
  8. data/lib/embulk/output/bigquery/value_converter_factory.rb +31 -0
  9. data/test/test_bigquery_client.rb +1 -5
  10. data/test/test_configure.rb +4 -12
  11. data/test/test_helper.rb +7 -1
  12. data/test/test_transaction.rb +5 -6
  13. data/test/test_value_converter_factory.rb +86 -0
  14. metadata +29 -51
  15. data/example/config_append_direct_schema_update_options.yml +0 -31
  16. data/example/config_client_options.yml +0 -33
  17. data/example/config_csv.yml +0 -30
  18. data/example/config_delete_in_advance.yml +0 -29
  19. data/example/config_delete_in_advance_field_partitioned_table.yml +0 -33
  20. data/example/config_delete_in_advance_partitioned_table.yml +0 -33
  21. data/example/config_expose_errors.yml +0 -30
  22. data/example/config_gcs.yml +0 -32
  23. data/example/config_guess_from_embulk_schema.yml +0 -29
  24. data/example/config_guess_with_column_options.yml +0 -40
  25. data/example/config_gzip.yml +0 -1
  26. data/example/config_jsonl.yml +0 -1
  27. data/example/config_max_threads.yml +0 -34
  28. data/example/config_min_ouput_tasks.yml +0 -34
  29. data/example/config_mode_append.yml +0 -30
  30. data/example/config_mode_append_direct.yml +0 -30
  31. data/example/config_nested_record.yml +0 -1
  32. data/example/config_payload_column.yml +0 -20
  33. data/example/config_payload_column_index.yml +0 -20
  34. data/example/config_progress_log_interval.yml +0 -31
  35. data/example/config_replace.yml +0 -30
  36. data/example/config_replace_backup.yml +0 -32
  37. data/example/config_replace_backup_field_partitioned_table.yml +0 -34
  38. data/example/config_replace_backup_partitioned_table.yml +0 -34
  39. data/example/config_replace_field_partitioned_table.yml +0 -33
  40. data/example/config_replace_partitioned_table.yml +0 -33
  41. data/example/config_replace_schema_update_options.yml +0 -33
  42. data/example/config_skip_file_generation.yml +0 -32
  43. data/example/config_table_strftime.yml +0 -30
  44. data/example/config_template_table.yml +0 -21
  45. data/example/config_uncompressed.yml +0 -1
  46. data/example/config_with_rehearsal.yml +0 -33
  47. data/example/example.csv +0 -17
  48. data/example/example.yml +0 -1
  49. data/example/example2_1.csv +0 -1
  50. data/example/example2_2.csv +0 -1
  51. data/example/example4_1.csv +0 -1
  52. data/example/example4_2.csv +0 -1
  53. data/example/example4_3.csv +0 -1
  54. data/example/example4_4.csv +0 -1
  55. data/example/json_key.json +0 -12
  56. data/example/nested_example.jsonl +0 -16
  57. data/example/schema.json +0 -30
  58. data/example/schema_expose_errors.json +0 -30
@@ -203,6 +203,27 @@ module Embulk
203
203
  val # Users must care of BQ timestamp format
204
204
  }
205
205
  end
206
+ when 'DATE'
207
+ Proc.new {|val|
208
+ next nil if val.nil?
209
+ with_typecast_error(val) do |val|
210
+ TimeWithZone.set_zone_offset(Time.parse(val), zone_offset).strftime("%Y-%m-%d")
211
+ end
212
+ }
213
+ when 'DATETIME'
214
+ if @timestamp_format
215
+ Proc.new {|val|
216
+ next nil if val.nil?
217
+ with_typecast_error(val) do |val|
218
+ Time.strptime(val, @timestamp_format).strftime("%Y-%m-%d %H:%M:%S.%6N")
219
+ end
220
+ }
221
+ else
222
+ Proc.new {|val|
223
+ next nil if val.nil?
224
+ val # Users must care of BQ timestamp format
225
+ }
226
+ end
206
227
  when 'RECORD'
207
228
  Proc.new {|val|
208
229
  next nil if val.nil?
@@ -240,6 +261,16 @@ module Embulk
240
261
  next nil if val.nil?
241
262
  val.strftime("%Y-%m-%d %H:%M:%S.%6N %:z")
242
263
  }
264
+ when 'DATE'
265
+ Proc.new {|val|
266
+ next nil if val.nil?
267
+ val.localtime(zone_offset).strftime("%Y-%m-%d")
268
+ }
269
+ when 'DATETIME'
270
+ Proc.new {|val|
271
+ next nil if val.nil?
272
+ val.localtime(zone_offset).strftime("%Y-%m-%d %H:%M:%S.%6N")
273
+ }
243
274
  else
244
275
  raise NotSupportedType, "cannot take column type #{type} for timestamp column"
245
276
  end
@@ -32,7 +32,7 @@ else
32
32
  'dataset' => 'your_dataset_name',
33
33
  'table' => 'your_table_name',
34
34
  'auth_method' => 'json_key',
35
- 'json_keyfile' => JSON_KEYFILE,
35
+ 'json_keyfile' => File.read(JSON_KEYFILE),
36
36
  'retries' => 3,
37
37
  'timeout_sec' => 300,
38
38
  'open_timeout_sec' => 300,
@@ -61,10 +61,6 @@ else
61
61
  def test_json_keyfile
62
62
  assert_nothing_raised { BigqueryClient.new(least_task, schema).client }
63
63
  end
64
-
65
- def test_p12_keyfile
66
- # pending
67
- end
68
64
  end
69
65
 
70
66
  sub_test_case "create_dataset" do
@@ -18,10 +18,9 @@ module Embulk
18
18
 
19
19
  def least_config
20
20
  DataSource.new({
21
- 'project' => 'your_project_name',
22
- 'dataset' => 'your_dataset_name',
23
- 'table' => 'your_table_name',
24
- 'p12_keyfile' => __FILE__, # fake
21
+ 'project' => 'your_project_name',
22
+ 'dataset' => 'your_dataset_name',
23
+ 'table' => 'your_table_name',
25
24
  })
26
25
  end
27
26
 
@@ -43,9 +42,7 @@ module Embulk
43
42
  def test_configure_default
44
43
  task = Bigquery.configure(least_config, schema, processor_count)
45
44
  assert_equal "append", task['mode']
46
- assert_equal "private_key", task['auth_method']
47
- assert_equal nil, task['service_account_email']
48
- assert_equal __FILE__, task['p12_keyfile']
45
+ assert_equal "application_default", task['auth_method']
49
46
  assert_equal nil, task['json_keyfile']
50
47
  assert_equal "your_project_name", task['project']
51
48
  assert_equal "your_dataset_name", task['dataset']
@@ -132,11 +129,6 @@ module Embulk
132
129
  config = least_config.merge('auth_method' => 'foobar')
133
130
  assert_raise { Bigquery.configure(config, schema, processor_count) }
134
131
 
135
- config = least_config.merge('auth_method' => 'private_key').tap {|h| h.delete('p12_keyfile') }
136
- assert_raise { Bigquery.configure(config, schema, processor_count) }
137
- config = least_config.merge('auth_method' => 'private_key', 'p12_keyfile' => 'dummy')
138
- assert_nothing_raised { Bigquery.configure(config, schema, processor_count) }
139
-
140
132
  config = least_config.merge('auth_method' => 'json_key').tap {|h| h.delete('json_keyfile') }
141
133
  assert_raise { Bigquery.configure(config, schema, processor_count) }
142
134
  config = least_config.merge('auth_method' => 'json_key', 'json_keyfile' => "#{EXAMPLE_ROOT}/json_key.json")
data/test/test_helper.rb CHANGED
@@ -62,7 +62,9 @@ module Embulk
62
62
  Column.new({index: 2, name: 'double', type: :double}),
63
63
  Column.new({index: 3, name: 'string', type: :string}),
64
64
  Column.new({index: 4, name: 'timestamp', type: :timestamp}),
65
- Column.new({index: 5, name: 'json', type: :json}),
65
+ Column.new({index: 5, name: 'date', type: :timestamp}),
66
+ Column.new({index: 6, name: 'datetime', type: :timestamp}),
67
+ Column.new({index: 7, name: 'json', type: :json}),
66
68
  ])
67
69
  task = {
68
70
  'column_options' => [
@@ -71,6 +73,8 @@ module Embulk
71
73
  {'name' => 'double', 'type' => 'STRING'},
72
74
  {'name' => 'string', 'type' => 'INTEGER'},
73
75
  {'name' => 'timestamp', 'type' => 'INTEGER'},
76
+ {'name' => 'date', 'type' => 'DATE'},
77
+ {'name' => 'datetime', 'type' => 'DATETIME'},
74
78
  {'name' => 'json', 'type' => 'RECORD', 'fields' => [
75
79
  { 'name' => 'key1', 'type' => 'STRING' },
76
80
  ]},
@@ -82,6 +86,8 @@ module Embulk
82
86
  {name: 'double', type: 'STRING'},
83
87
  {name: 'string', type: 'INTEGER'},
84
88
  {name: 'timestamp', type: 'INTEGER'},
89
+ {name: 'date', type: 'DATE'},
90
+ {name: 'datetime', type: 'DATETIME'},
85
91
  {name: 'json', type: 'RECORD', fields: [
86
92
  {name: 'key1', type: 'STRING'},
87
93
  ]},
@@ -8,12 +8,11 @@ module Embulk
8
8
  class TestTransaction < Test::Unit::TestCase
9
9
  def least_config
10
10
  DataSource.new({
11
- 'project' => 'your_project_name',
12
- 'dataset' => 'your_dataset_name',
13
- 'table' => 'your_table_name',
14
- 'p12_keyfile' => __FILE__, # fake
15
- 'temp_table' => 'temp_table', # randomly created is not good for our test
16
- 'path_prefix' => 'tmp/', # randomly created is not good for our test
11
+ 'project' => 'your_project_name',
12
+ 'dataset' => 'your_dataset_name',
13
+ 'table' => 'your_table_name',
14
+ 'temp_table' => 'temp_table', # randomly created is not good for our test
15
+ 'path_prefix' => 'tmp/', # randomly created is not good for our test
17
16
  })
18
17
  end
19
18
 
@@ -90,6 +90,14 @@ module Embulk
90
90
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'TIMESTAMP').create_converter }
91
91
  end
92
92
 
93
+ def test_date
94
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
95
+ end
96
+
97
+ def test_datetime
98
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
99
+ end
100
+
93
101
  def test_record
94
102
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
95
103
  end
@@ -130,6 +138,14 @@ module Embulk
130
138
  assert_equal 1408452095, converter.call(1408452095)
131
139
  end
132
140
 
141
+ def test_date
142
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
143
+ end
144
+
145
+ def test_datetime
146
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
147
+ end
148
+
133
149
  def test_record
134
150
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
135
151
  end
@@ -166,6 +182,14 @@ module Embulk
166
182
  assert_equal 1408452095.188766, converter.call(1408452095.188766)
167
183
  end
168
184
 
185
+ def test_date
186
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
187
+ end
188
+
189
+ def test_datetime
190
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter }
191
+ end
192
+
169
193
  def test_record
170
194
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
171
195
  end
@@ -216,6 +240,28 @@ module Embulk
216
240
  assert_equal "2016-02-26 00:00:00", converter.call("2016-02-26 00:00:00")
217
241
  end
218
242
 
243
+ def test_date
244
+ converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter
245
+ assert_equal nil, converter.call(nil)
246
+ assert_equal "2016-02-26", converter.call("2016-02-26")
247
+ assert_equal "2016-02-26", converter.call("2016-02-26 00:00:00")
248
+ assert_raise { converter.call('foo') }
249
+ end
250
+
251
+ def test_datetime
252
+ converter = ValueConverterFactory.new(
253
+ SCHEMA_TYPE, 'DATETIME',
254
+ timestamp_format: '%Y/%m/%d'
255
+ ).create_converter
256
+ assert_equal nil, converter.call(nil)
257
+ assert_equal "2016-02-26 00:00:00.000000", converter.call("2016/02/26")
258
+
259
+ # Users must care of BQ datetime format by themselves with no timestamp_format
260
+ converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter
261
+ assert_equal nil, converter.call(nil)
262
+ assert_equal "2016-02-26 00:00:00", converter.call("2016-02-26 00:00:00")
263
+ end
264
+
219
265
  def test_record
220
266
  converter = ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter
221
267
  assert_equal({'foo'=>'foo'}, converter.call(%Q[{"foo":"foo"}]))
@@ -268,6 +314,42 @@ module Embulk
268
314
  assert_equal expected, converter.call(Time.at(subject).utc)
269
315
  end
270
316
 
317
+ def test_date
318
+ converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter
319
+ assert_equal nil, converter.call(nil)
320
+ timestamp = Time.parse("2016-02-26 00:00:00.500000 +00:00")
321
+ expected = "2016-02-26"
322
+ assert_equal expected, converter.call(timestamp)
323
+
324
+ converter = ValueConverterFactory.new(
325
+ SCHEMA_TYPE, 'DATE', timezone: 'Asia/Tokyo'
326
+ ).create_converter
327
+ assert_equal nil, converter.call(nil)
328
+ timestamp = Time.parse("2016-02-25 15:00:00.500000 +00:00")
329
+ expected = "2016-02-26"
330
+ assert_equal expected, converter.call(timestamp)
331
+
332
+ assert_raise { converter.call('foo') }
333
+ end
334
+
335
+ def test_datetime
336
+ converter = ValueConverterFactory.new(SCHEMA_TYPE, 'DATETIME').create_converter
337
+ assert_equal nil, converter.call(nil)
338
+ timestamp = Time.parse("2016-02-26 00:00:00.500000 +00:00")
339
+ expected = "2016-02-26 00:00:00.500000"
340
+ assert_equal expected, converter.call(timestamp)
341
+
342
+ converter = ValueConverterFactory.new(
343
+ SCHEMA_TYPE, 'DATETIME', timezone: 'Asia/Tokyo'
344
+ ).create_converter
345
+ assert_equal nil, converter.call(nil)
346
+ timestamp = Time.parse("2016-02-25 15:00:00.500000 +00:00")
347
+ expected = "2016-02-26 00:00:00.500000"
348
+ assert_equal expected, converter.call(timestamp)
349
+
350
+ assert_raise { converter.call('foo') }
351
+ end
352
+
271
353
  def test_record
272
354
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter }
273
355
  end
@@ -298,6 +380,10 @@ module Embulk
298
380
  assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'TIMESTAMP').create_converter }
299
381
  end
300
382
 
383
+ def test_date
384
+ assert_raise { ValueConverterFactory.new(SCHEMA_TYPE, 'DATE').create_converter }
385
+ end
386
+
301
387
  def test_record
302
388
  converter = ValueConverterFactory.new(SCHEMA_TYPE, 'RECORD').create_converter
303
389
  assert_equal nil, converter.call(nil)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
@@ -9,22 +9,42 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-08-10 00:00:00.000000000 Z
12
+ date: 2019-11-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '0.7'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: 0.12.0
23
+ name: signet
24
+ prerelease: false
25
+ type: :runtime
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ requirements:
28
+ - - "~>"
29
+ - !ruby/object:Gem::Version
30
+ version: '0.7'
31
+ - - "<"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.12.0
34
+ - !ruby/object:Gem::Dependency
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "<"
38
+ - !ruby/object:Gem::Version
39
+ version: 0.33.0
20
40
  name: google-api-client
21
41
  prerelease: false
22
42
  type: :runtime
23
43
  version_requirements: !ruby/object:Gem::Requirement
24
44
  requirements:
25
- - - ">="
45
+ - - "<"
26
46
  - !ruby/object:Gem::Version
27
- version: '0'
47
+ version: 0.33.0
28
48
  - !ruby/object:Gem::Dependency
29
49
  requirement: !ruby/object:Gem::Requirement
30
50
  requirements:
@@ -83,51 +103,8 @@ files:
83
103
  - README.md
84
104
  - Rakefile
85
105
  - embulk-output-bigquery.gemspec
86
- - example/config_append_direct_schema_update_options.yml
87
- - example/config_client_options.yml
88
- - example/config_csv.yml
89
- - example/config_delete_in_advance.yml
90
- - example/config_delete_in_advance_field_partitioned_table.yml
91
- - example/config_delete_in_advance_partitioned_table.yml
92
- - example/config_expose_errors.yml
93
- - example/config_gcs.yml
94
- - example/config_guess_from_embulk_schema.yml
95
- - example/config_guess_with_column_options.yml
96
- - example/config_gzip.yml
97
- - example/config_jsonl.yml
98
- - example/config_max_threads.yml
99
- - example/config_min_ouput_tasks.yml
100
- - example/config_mode_append.yml
101
- - example/config_mode_append_direct.yml
102
- - example/config_nested_record.yml
103
- - example/config_payload_column.yml
104
- - example/config_payload_column_index.yml
105
- - example/config_progress_log_interval.yml
106
- - example/config_replace.yml
107
- - example/config_replace_backup.yml
108
- - example/config_replace_backup_field_partitioned_table.yml
109
- - example/config_replace_backup_partitioned_table.yml
110
- - example/config_replace_field_partitioned_table.yml
111
- - example/config_replace_partitioned_table.yml
112
- - example/config_replace_schema_update_options.yml
113
- - example/config_skip_file_generation.yml
114
- - example/config_table_strftime.yml
115
- - example/config_template_table.yml
116
- - example/config_uncompressed.yml
117
- - example/config_with_rehearsal.yml
118
- - example/example.csv
119
- - example/example.yml
120
- - example/example2_1.csv
121
- - example/example2_2.csv
122
- - example/example4_1.csv
123
- - example/example4_2.csv
124
- - example/example4_3.csv
125
- - example/example4_4.csv
126
- - example/json_key.json
127
- - example/nested_example.jsonl
128
- - example/schema.json
129
- - example/schema_expose_errors.json
130
106
  - lib/embulk/output/bigquery.rb
107
+ - lib/embulk/output/bigquery/auth.rb
131
108
  - lib/embulk/output/bigquery/bigquery_client.rb
132
109
  - lib/embulk/output/bigquery/file_writer.rb
133
110
  - lib/embulk/output/bigquery/gcs_client.rb
@@ -161,7 +138,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
161
138
  - !ruby/object:Gem::Version
162
139
  version: '0'
163
140
  requirements: []
164
- rubygems_version: 3.0.3
141
+ rubyforge_project:
142
+ rubygems_version: 2.6.14.1
165
143
  signing_key:
166
144
  specification_version: 4
167
145
  summary: Google BigQuery output plugin for Embulk
@@ -1,31 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: append_direct
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- compression: NONE
28
- auto_create_dataset: true
29
- auto_create_table: true
30
- schema_file: example/schema.json
31
- schema_update_options: [ALLOW_FIELD_ADDITION, ALLOW_FIELD_RELAXATION]
@@ -1,33 +0,0 @@
1
- in:
2
- type: file
3
- path_prefix: example/example.csv
4
- parser:
5
- type: csv
6
- charset: UTF-8
7
- newline: CRLF
8
- null_string: 'NULL'
9
- skip_header_lines: 1
10
- comment_line_marker: '#'
11
- columns:
12
- - {name: date, type: string}
13
- - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
14
- - {name: "null", type: string}
15
- - {name: long, type: long}
16
- - {name: string, type: string}
17
- - {name: double, type: double}
18
- - {name: boolean, type: boolean}
19
- out:
20
- type: bigquery
21
- mode: replace
22
- auth_method: json_key
23
- json_keyfile: example/your-project-000.json
24
- dataset: your_dataset_name
25
- table: your_table_name
26
- source_format: NEWLINE_DELIMITED_JSON
27
- auto_create_dataset: true
28
- auto_create_table: true
29
- schema_file: example/schema.json
30
- timeout_sec: 400
31
- open_timeout_sec: 400
32
- retries: 2
33
- application_name: "Embulk BigQuery plugin test"