embulk-output-bigquery 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7e6de19fdc976904bd2ac46aa529e030f9ecbe48
4
- data.tar.gz: d279269a8914553203a1d2bc06832ed4201bedf3
3
+ metadata.gz: daa84fac7fd9adf3ad62798ef5b113ed462fc303
4
+ data.tar.gz: fafbd5023df0b9d3539a754b699e089012d4caa1
5
5
  SHA512:
6
- metadata.gz: 3e7f86e4b963012e0a55227b53262c141209d5aaed5c06b82d1d3da844f467c2b0e53c750e31eec38963459e58e02ba54b82fbb39c14084df241428b5245a220
7
- data.tar.gz: 60850b5ce34b7b626cf9766a537196d160ae1c1bd0365f119817f50977f7a171762e6562ac987cdc2655fc75892dd6beafceebd47305818e625330aaabb1911b
6
+ metadata.gz: 5a273f26c8c357b082d94bc97364f18fc73db9f829b12d5bd10df867f625d4619da59c483d56c83cbbdef82383e3a4116f185f9973c246cc4ccd8adc187cadb2
7
+ data.tar.gz: 6bd617783792cd5c4e7d3ab7e0b39ab3f86fa97cb326f3b6978e743e74902130c809f353bc3238f97ab5e6b439ba44cf9972dc2840d37b2b3ad5d8b94f6dfddd
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.2 - 2016-02-15
2
+
3
+ * [new feature] Added template_table option. [#25](https://github.com/embulk/embulk-output-bigquery/pull/25)
4
+
1
5
  ## 0.2.1 - 2016-01-28
2
6
 
3
7
  * [maintenance] Upgraded Embulk version to 0.8.1 [#22](https://github.com/embulk/embulk-output-bigquery/pull/22). @joker1007 thanks!
data/README.md CHANGED
@@ -40,6 +40,7 @@ OAuth flow for installed applications.
40
40
  | table | string | required | | table name |
41
41
  | auto_create_table | boolean | optional | 0 | [See below](#dynamic-table-creating) |
42
42
  | schema_file | string | optional | | /path/to/schema.json |
43
+ | template_table | string | optional | | existing_table_name [See below](#dynamic-table-creating) |
43
44
  | prevent_duplicate_insert | boolean | optional | 0 | [See below](#data-consistency) |
44
45
  | delete_from_local_when_job_end | boolean | optional | 0 | If set to true, delete local file when job is end |
45
46
  | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
@@ -195,8 +196,11 @@ When `auto_create_table` is set to true, try to create the table using BigQuery
195
196
 
196
197
  If table already exists, insert into it.
197
198
 
198
- To describe the schema of the target table, please write schema path.
199
+ There are 2 ways to set schema.
199
200
 
201
+ #### Set schema.json
202
+
203
+ Please set file path of schema.json.
200
204
 
201
205
  ```yaml
202
206
  out:
@@ -206,6 +210,18 @@ out:
206
210
  schema_file: /path/to/schema.json
207
211
  ```
208
212
 
213
+ #### Set template_table in dataset
214
+
215
+ Plugin will try to read schema from existing table and use it as schema template.
216
+
217
+ ```yaml
218
+ out:
219
+ type: bigquery
220
+ auto_create_table: true
221
+ table: table_%Y_%m
222
+ template_table: existing_table_name
223
+ ```
224
+
209
225
  ### Data Consistency
210
226
 
211
227
  When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options to prevent duplicate data insertion.
data/build.gradle CHANGED
@@ -16,7 +16,7 @@ configurations {
16
16
  sourceCompatibility = 1.7
17
17
  targetCompatibility = 1.7
18
18
 
19
- version = "0.2.1"
19
+ version = "0.2.2"
20
20
 
21
21
  dependencies {
22
22
  compile "org.embulk:embulk-core:0.8.1"
@@ -1,27 +1,28 @@
1
1
  package org.embulk.output;
2
2
 
3
- import java.io.File;
4
- import java.io.FileInputStream;
5
- import java.io.IOException;
6
- import java.util.Collections;
7
- import java.security.GeneralSecurityException;
8
-
9
- import com.google.common.base.Optional;
10
- import com.google.common.collect.ImmutableList;
11
3
  import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
12
4
  import com.google.api.client.googleapis.compute.ComputeCredential;
13
5
  import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
6
+ import com.google.api.client.googleapis.json.GoogleJsonResponseException;
7
+ import com.google.api.client.http.HttpRequestInitializer;
14
8
  import com.google.api.client.http.HttpTransport;
15
9
  import com.google.api.client.json.JsonFactory;
16
10
  import com.google.api.client.json.jackson2.JacksonFactory;
17
- import com.google.api.client.http.HttpRequestInitializer;
18
- import com.google.api.client.googleapis.json.GoogleJsonResponseException;
19
11
  import com.google.api.services.bigquery.Bigquery;
20
12
  import com.google.api.services.bigquery.BigqueryScopes;
21
13
  import com.google.api.services.bigquery.model.ProjectList;
14
+ import com.google.common.base.Optional;
15
+ import com.google.common.collect.ImmutableList;
22
16
  import org.embulk.spi.Exec;
23
17
  import org.slf4j.Logger;
24
18
 
19
+ import java.io.File;
20
+ import java.io.FileInputStream;
21
+
22
+ import java.io.IOException;
23
+ import java.security.GeneralSecurityException;
24
+ import java.util.Collections;
25
+
25
26
  public class BigqueryAuthentication
26
27
  {
27
28
  private final Logger log = Exec.getLogger(BigqueryAuthentication.class);
@@ -1,36 +1,36 @@
1
1
  package org.embulk.output;
2
2
 
3
- import java.io.File;
4
- import java.io.FileNotFoundException;
5
- import java.io.FileOutputStream;
6
- import java.io.BufferedOutputStream;
7
- import java.io.IOException;
8
- import java.nio.charset.Charset;
9
- import java.security.NoSuchAlgorithmException;
10
- import java.util.List;
11
- import java.util.concurrent.TimeoutException;
12
3
  import com.google.common.base.Function;
13
4
  import com.google.common.base.Optional;
14
5
  import com.google.common.base.Throwables;
15
- import java.security.GeneralSecurityException;
16
- import org.jruby.embed.ScriptingContainer;
17
-
18
6
  import org.embulk.config.Config;
19
- import org.embulk.config.ConfigException;
20
7
  import org.embulk.config.ConfigDefault;
21
- import org.embulk.config.ConfigSource;
22
8
  import org.embulk.config.ConfigDiff;
23
- import org.embulk.config.TaskReport;
9
+ import org.embulk.config.ConfigException;
10
+ import org.embulk.config.ConfigSource;
24
11
  import org.embulk.config.Task;
12
+ import org.embulk.config.TaskReport;
25
13
  import org.embulk.config.TaskSource;
26
- import org.embulk.spi.unit.LocalFile;
27
14
  import org.embulk.spi.Buffer;
15
+ import org.embulk.spi.Exec;
28
16
  import org.embulk.spi.FileOutputPlugin;
29
17
  import org.embulk.spi.TransactionalFileOutput;
30
- import org.embulk.spi.Exec;
31
-
18
+ import org.embulk.spi.unit.LocalFile;
19
+ import org.jruby.embed.ScriptingContainer;
32
20
  import org.slf4j.Logger;
33
21
 
22
+ import java.io.BufferedOutputStream;
23
+ import java.io.File;
24
+ import java.io.FileNotFoundException;
25
+ import java.io.FileOutputStream;
26
+
27
+ import java.io.IOException;
28
+ import java.nio.charset.Charset;
29
+ import java.security.GeneralSecurityException;
30
+ import java.security.NoSuchAlgorithmException;
31
+ import java.util.List;
32
+ import java.util.concurrent.TimeoutException;
33
+
34
34
  public class BigqueryOutputPlugin
35
35
  implements FileOutputPlugin
36
36
  {
@@ -116,6 +116,10 @@ public class BigqueryOutputPlugin
116
116
  Optional<LocalFile> getSchemaFile();
117
117
  void setSchemaFile(Optional<LocalFile> schemaFile);
118
118
 
119
+ @Config("template_table")
120
+ @ConfigDefault("null")
121
+ Optional<String> getTemplateTable();
122
+
119
123
  @Config("prevent_duplicate_insert")
120
124
  @ConfigDefault("false")
121
125
  boolean getPreventDuplicateInsert();
@@ -209,8 +213,12 @@ public class BigqueryOutputPlugin
209
213
  task.getP12Keyfile().transform(localFileToPathString()),
210
214
  task.getJsonKeyfile().transform(localFileToPathString()),
211
215
  task.getApplicationName())
216
+ .setProject(task.getProject())
217
+ .setDataset(task.getDataset())
218
+ .setTable(task.getTable())
212
219
  .setAutoCreateTable(task.getAutoCreateTable())
213
220
  .setSchemaPath(task.getSchemaFile().transform(localFileToPathString()))
221
+ .setTemplateTable(task.getTemplateTable())
214
222
  .setSourceFormat(task.getSourceFormat().getString())
215
223
  .setFieldDelimiter(String.valueOf(task.getFieldDelimiter()))
216
224
  .setMaxBadRecords(task.getMaxBadrecords())
@@ -222,9 +230,6 @@ public class BigqueryOutputPlugin
222
230
  .setIgnoreUnknownValues(task.getIgnoreUnknownValues())
223
231
  .setAllowQuotedNewlines(task.getAllowQuotedNewlines())
224
232
  .build();
225
-
226
- bigQueryWriter.checkConfig(task.getProject(), task.getDataset(), task.getTable());
227
-
228
233
  }
229
234
  catch (IOException | GeneralSecurityException ex) {
230
235
  throw new ConfigException(ex);
@@ -1,40 +1,52 @@
1
1
  package org.embulk.output;
2
2
 
3
- import java.io.File;
4
- import java.io.IOException;
5
- import java.io.FileNotFoundException;
6
- import java.io.FileInputStream;
7
- import java.io.BufferedInputStream;
3
+ import com.fasterxml.jackson.core.type.TypeReference;
4
+ import com.fasterxml.jackson.databind.ObjectMapper;
5
+ import com.google.api.client.googleapis.json.GoogleJsonResponseException;
6
+ import com.google.api.client.googleapis.media.MediaHttpUploader;
7
+ import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
8
8
  import com.google.api.client.http.InputStreamContent;
9
-
10
- import java.security.MessageDigest;
11
- import java.security.NoSuchAlgorithmException;
12
- import java.util.List;
13
- import java.util.concurrent.TimeoutException;
14
-
15
- import com.google.api.services.bigquery.model.*;
9
+ import com.google.api.services.bigquery.Bigquery;
10
+ import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
11
+ import com.google.api.services.bigquery.Bigquery.Tables;
12
+ import com.google.api.services.bigquery.model.ErrorProto;
13
+ import com.google.api.services.bigquery.model.Job;
14
+ import com.google.api.services.bigquery.model.JobConfiguration;
15
+ import com.google.api.services.bigquery.model.JobConfigurationLoad;
16
+ import com.google.api.services.bigquery.model.JobConfigurationTableCopy;
17
+ import com.google.api.services.bigquery.model.JobReference;
18
+ import com.google.api.services.bigquery.model.JobStatistics;
19
+ import com.google.api.services.bigquery.model.Table;
20
+ import com.google.api.services.bigquery.model.TableFieldSchema;
21
+ import com.google.api.services.bigquery.model.TableReference;
22
+ import com.google.api.services.bigquery.model.TableSchema;
16
23
  import com.google.common.base.Optional;
17
- import java.security.GeneralSecurityException;
18
- import com.fasterxml.jackson.databind.ObjectMapper;
19
- import com.fasterxml.jackson.core.type.TypeReference;
20
-
21
24
  import com.google.common.collect.ImmutableList;
22
25
  import org.apache.commons.codec.binary.Hex;
23
26
  import org.embulk.spi.Exec;
24
27
  import org.slf4j.Logger;
25
28
 
26
- import com.google.api.services.bigquery.Bigquery;
27
- import com.google.api.services.bigquery.Bigquery.Tables;
28
- import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
29
- import com.google.api.client.googleapis.json.GoogleJsonResponseException;
30
- import com.google.api.client.googleapis.media.MediaHttpUploader;
31
- import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
29
+ import java.io.BufferedInputStream;
30
+ import java.io.File;
31
+ import java.io.FileInputStream;
32
+ import java.io.FileNotFoundException;
33
+
34
+ import java.io.IOException;
35
+ import java.security.GeneralSecurityException;
36
+ import java.security.MessageDigest;
37
+ import java.security.NoSuchAlgorithmException;
38
+ import java.util.List;
39
+ import java.util.concurrent.TimeoutException;
32
40
 
33
41
  public class BigqueryWriter
34
42
  {
35
43
  private final Logger log = Exec.getLogger(BigqueryWriter.class);
44
+ private final String project;
45
+ private final String dataset;
46
+ private final String table;
36
47
  private final boolean autoCreateTable;
37
48
  private final Optional<String> schemaPath;
49
+ private final Optional<String> templateTable;
38
50
  private final TableSchema tableSchema;
39
51
  private final String sourceFormat;
40
52
  private final String fieldDelimiter;
@@ -51,8 +63,12 @@ public class BigqueryWriter
51
63
  public BigqueryWriter(Builder builder)
52
64
  throws IOException, GeneralSecurityException
53
65
  {
66
+ this.project = builder.project;
67
+ this.dataset = builder.dataset;
68
+ this.table = builder.table;
54
69
  this.autoCreateTable = builder.autoCreateTable;
55
70
  this.schemaPath = builder.schemaPath;
71
+ this.templateTable = builder.templateTable;
56
72
  this.sourceFormat = builder.sourceFormat.toUpperCase();
57
73
  this.fieldDelimiter = builder.fieldDelimiter;
58
74
  this.maxBadRecords = builder.maxBadRecords;
@@ -70,8 +86,15 @@ public class BigqueryWriter
70
86
  );
71
87
  this.bigQueryClient = auth.getBigqueryClient();
72
88
 
89
+ checkConfig();
90
+
73
91
  if (autoCreateTable) {
74
- this.tableSchema = createTableSchema();
92
+ if (schemaPath.isPresent()) {
93
+ this.tableSchema = createTableSchema();
94
+ }
95
+ else {
96
+ this.tableSchema = fetchTableSchema();
97
+ }
75
98
  }
76
99
  else {
77
100
  this.tableSchema = null;
@@ -314,6 +337,15 @@ public class BigqueryWriter
314
337
  }
315
338
  }
316
339
 
340
+ public TableSchema fetchTableSchema() throws IOException
341
+ {
342
+ String fetchTarget = templateTable.orNull();
343
+ log.info(String.format("Fetch table schema from project:%s dataset:%s table:%s", project, dataset, fetchTarget));
344
+ Tables tableRequest = bigQueryClient.tables();
345
+ Table tableData = tableRequest.get(project, dataset, fetchTarget).execute();
346
+ return tableData.getSchema();
347
+ }
348
+
317
349
  public boolean isExistTable(String project, String dataset, String table) throws IOException
318
350
  {
319
351
  Tables tableRequest = bigQueryClient.tables();
@@ -326,18 +358,18 @@ public class BigqueryWriter
326
358
  return true;
327
359
  }
328
360
 
329
- public void checkConfig(String project, String dataset, String table) throws IOException
361
+ public void checkConfig() throws IOException
330
362
  {
331
363
  if (autoCreateTable) {
332
- if (!schemaPath.isPresent()) {
333
- throw new FileNotFoundException("schema_file is empty");
334
- }
335
- else {
364
+ if (schemaPath.isPresent()) {
336
365
  File file = new File(schemaPath.orNull());
337
366
  if (!file.exists()) {
338
367
  throw new FileNotFoundException("Can not load schema file.");
339
368
  }
340
369
  }
370
+ else if (!templateTable.isPresent()) {
371
+ throw new FileNotFoundException("schema_file or template_table must be present");
372
+ }
341
373
  }
342
374
  else {
343
375
  if (!isExistTable(project, dataset, table)) {
@@ -404,8 +436,12 @@ public class BigqueryWriter
404
436
  private Optional<String> p12KeyFilePath;
405
437
  private Optional<String> jsonKeyFilePath;
406
438
  private String applicationName;
439
+ private String project;
440
+ private String dataset;
441
+ private String table;
407
442
  private boolean autoCreateTable;
408
443
  private Optional<String> schemaPath;
444
+ private Optional<String> templateTable;
409
445
  private String sourceFormat;
410
446
  private String fieldDelimiter;
411
447
  private int maxBadRecords;
@@ -427,6 +463,24 @@ public class BigqueryWriter
427
463
  this.applicationName = applicationName;
428
464
  }
429
465
 
466
+ public Builder setProject(String project)
467
+ {
468
+ this.project = project;
469
+ return this;
470
+ }
471
+
472
+ public Builder setDataset(String dataset)
473
+ {
474
+ this.dataset = dataset;
475
+ return this;
476
+ }
477
+
478
+ public Builder setTable(String table)
479
+ {
480
+ this.table = table;
481
+ return this;
482
+ }
483
+
430
484
  public Builder setAutoCreateTable(boolean autoCreateTable)
431
485
  {
432
486
  this.autoCreateTable = autoCreateTable;
@@ -439,6 +493,12 @@ public class BigqueryWriter
439
493
  return this;
440
494
  }
441
495
 
496
+ public Builder setTemplateTable(Optional<String> templateTable)
497
+ {
498
+ this.templateTable = templateTable;
499
+ return this;
500
+ }
501
+
442
502
  public Builder setSourceFormat(String sourceFormat)
443
503
  {
444
504
  this.sourceFormat = sourceFormat;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-28 00:00:00.000000000 Z
11
+ date: 2016-02-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -65,7 +65,7 @@ files:
65
65
  - src/test/java/org/embulk/output/TestBigqueryWriter.java
66
66
  - classpath/commons-codec-1.3.jar
67
67
  - classpath/commons-logging-1.1.1.jar
68
- - classpath/embulk-output-bigquery-0.2.1.jar
68
+ - classpath/embulk-output-bigquery-0.2.2.jar
69
69
  - classpath/google-api-client-1.20.0.jar
70
70
  - classpath/google-api-services-bigquery-v2-rev205-1.20.0.jar
71
71
  - classpath/google-http-client-1.20.0.jar