embulk-output-bigquery 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7e6de19fdc976904bd2ac46aa529e030f9ecbe48
4
- data.tar.gz: d279269a8914553203a1d2bc06832ed4201bedf3
3
+ metadata.gz: daa84fac7fd9adf3ad62798ef5b113ed462fc303
4
+ data.tar.gz: fafbd5023df0b9d3539a754b699e089012d4caa1
5
5
  SHA512:
6
- metadata.gz: 3e7f86e4b963012e0a55227b53262c141209d5aaed5c06b82d1d3da844f467c2b0e53c750e31eec38963459e58e02ba54b82fbb39c14084df241428b5245a220
7
- data.tar.gz: 60850b5ce34b7b626cf9766a537196d160ae1c1bd0365f119817f50977f7a171762e6562ac987cdc2655fc75892dd6beafceebd47305818e625330aaabb1911b
6
+ metadata.gz: 5a273f26c8c357b082d94bc97364f18fc73db9f829b12d5bd10df867f625d4619da59c483d56c83cbbdef82383e3a4116f185f9973c246cc4ccd8adc187cadb2
7
+ data.tar.gz: 6bd617783792cd5c4e7d3ab7e0b39ab3f86fa97cb326f3b6978e743e74902130c809f353bc3238f97ab5e6b439ba44cf9972dc2840d37b2b3ad5d8b94f6dfddd
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.2 - 2016-02-15
2
+
3
+ * [new feature] Added template_table option. [#25](https://github.com/embulk/embulk-output-bigquery/pull/25)
4
+
1
5
  ## 0.2.1 - 2016-01-28
2
6
 
3
7
  * [maintenance] Upgraded Embulk version to 0.8.1 [#22](https://github.com/embulk/embulk-output-bigquery/pull/22). @joker1007 thanks!
data/README.md CHANGED
@@ -40,6 +40,7 @@ OAuth flow for installed applications.
40
40
  | table | string | required | | table name |
41
41
  | auto_create_table | boolean | optional | 0 | [See below](#dynamic-table-creating) |
42
42
  | schema_file | string | optional | | /path/to/schema.json |
43
+ | template_table | string | optional | | existing_table_name [See below](#dynamic-table-creating) |
43
44
  | prevent_duplicate_insert | boolean | optional | 0 | [See below](#data-consistency) |
44
45
  | delete_from_local_when_job_end | boolean | optional | 0 | If set to true, delete local file when job is end |
45
46
  | job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
@@ -195,8 +196,11 @@ When `auto_create_table` is set to true, try to create the table using BigQuery
195
196
 
196
197
  If table already exists, insert into it.
197
198
 
198
- To describe the schema of the target table, please write schema path.
199
+ There are 2 ways to set schema.
199
200
 
201
+ #### Set schema.json
202
+
203
+ Please set file path of schema.json.
200
204
 
201
205
  ```yaml
202
206
  out:
@@ -206,6 +210,18 @@ out:
206
210
  schema_file: /path/to/schema.json
207
211
  ```
208
212
 
213
+ #### Set template_table in dataset
214
+
215
+ Plugin will try to read schema from existing table and use it as schema template.
216
+
217
+ ```yaml
218
+ out:
219
+ type: bigquery
220
+ auto_create_table: true
221
+ table: table_%Y_%m
222
+ template_table: existing_table_name
223
+ ```
224
+
209
225
  ### Data Consistency
210
226
 
211
227
  When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options to prevent duplicate data insertion.
data/build.gradle CHANGED
@@ -16,7 +16,7 @@ configurations {
16
16
  sourceCompatibility = 1.7
17
17
  targetCompatibility = 1.7
18
18
 
19
- version = "0.2.1"
19
+ version = "0.2.2"
20
20
 
21
21
  dependencies {
22
22
  compile "org.embulk:embulk-core:0.8.1"
@@ -1,27 +1,28 @@
1
1
  package org.embulk.output;
2
2
 
3
- import java.io.File;
4
- import java.io.FileInputStream;
5
- import java.io.IOException;
6
- import java.util.Collections;
7
- import java.security.GeneralSecurityException;
8
-
9
- import com.google.common.base.Optional;
10
- import com.google.common.collect.ImmutableList;
11
3
  import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
12
4
  import com.google.api.client.googleapis.compute.ComputeCredential;
13
5
  import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
6
+ import com.google.api.client.googleapis.json.GoogleJsonResponseException;
7
+ import com.google.api.client.http.HttpRequestInitializer;
14
8
  import com.google.api.client.http.HttpTransport;
15
9
  import com.google.api.client.json.JsonFactory;
16
10
  import com.google.api.client.json.jackson2.JacksonFactory;
17
- import com.google.api.client.http.HttpRequestInitializer;
18
- import com.google.api.client.googleapis.json.GoogleJsonResponseException;
19
11
  import com.google.api.services.bigquery.Bigquery;
20
12
  import com.google.api.services.bigquery.BigqueryScopes;
21
13
  import com.google.api.services.bigquery.model.ProjectList;
14
+ import com.google.common.base.Optional;
15
+ import com.google.common.collect.ImmutableList;
22
16
  import org.embulk.spi.Exec;
23
17
  import org.slf4j.Logger;
24
18
 
19
+ import java.io.File;
20
+ import java.io.FileInputStream;
21
+
22
+ import java.io.IOException;
23
+ import java.security.GeneralSecurityException;
24
+ import java.util.Collections;
25
+
25
26
  public class BigqueryAuthentication
26
27
  {
27
28
  private final Logger log = Exec.getLogger(BigqueryAuthentication.class);
@@ -1,36 +1,36 @@
1
1
  package org.embulk.output;
2
2
 
3
- import java.io.File;
4
- import java.io.FileNotFoundException;
5
- import java.io.FileOutputStream;
6
- import java.io.BufferedOutputStream;
7
- import java.io.IOException;
8
- import java.nio.charset.Charset;
9
- import java.security.NoSuchAlgorithmException;
10
- import java.util.List;
11
- import java.util.concurrent.TimeoutException;
12
3
  import com.google.common.base.Function;
13
4
  import com.google.common.base.Optional;
14
5
  import com.google.common.base.Throwables;
15
- import java.security.GeneralSecurityException;
16
- import org.jruby.embed.ScriptingContainer;
17
-
18
6
  import org.embulk.config.Config;
19
- import org.embulk.config.ConfigException;
20
7
  import org.embulk.config.ConfigDefault;
21
- import org.embulk.config.ConfigSource;
22
8
  import org.embulk.config.ConfigDiff;
23
- import org.embulk.config.TaskReport;
9
+ import org.embulk.config.ConfigException;
10
+ import org.embulk.config.ConfigSource;
24
11
  import org.embulk.config.Task;
12
+ import org.embulk.config.TaskReport;
25
13
  import org.embulk.config.TaskSource;
26
- import org.embulk.spi.unit.LocalFile;
27
14
  import org.embulk.spi.Buffer;
15
+ import org.embulk.spi.Exec;
28
16
  import org.embulk.spi.FileOutputPlugin;
29
17
  import org.embulk.spi.TransactionalFileOutput;
30
- import org.embulk.spi.Exec;
31
-
18
+ import org.embulk.spi.unit.LocalFile;
19
+ import org.jruby.embed.ScriptingContainer;
32
20
  import org.slf4j.Logger;
33
21
 
22
+ import java.io.BufferedOutputStream;
23
+ import java.io.File;
24
+ import java.io.FileNotFoundException;
25
+ import java.io.FileOutputStream;
26
+
27
+ import java.io.IOException;
28
+ import java.nio.charset.Charset;
29
+ import java.security.GeneralSecurityException;
30
+ import java.security.NoSuchAlgorithmException;
31
+ import java.util.List;
32
+ import java.util.concurrent.TimeoutException;
33
+
34
34
  public class BigqueryOutputPlugin
35
35
  implements FileOutputPlugin
36
36
  {
@@ -116,6 +116,10 @@ public class BigqueryOutputPlugin
116
116
  Optional<LocalFile> getSchemaFile();
117
117
  void setSchemaFile(Optional<LocalFile> schemaFile);
118
118
 
119
+ @Config("template_table")
120
+ @ConfigDefault("null")
121
+ Optional<String> getTemplateTable();
122
+
119
123
  @Config("prevent_duplicate_insert")
120
124
  @ConfigDefault("false")
121
125
  boolean getPreventDuplicateInsert();
@@ -209,8 +213,12 @@ public class BigqueryOutputPlugin
209
213
  task.getP12Keyfile().transform(localFileToPathString()),
210
214
  task.getJsonKeyfile().transform(localFileToPathString()),
211
215
  task.getApplicationName())
216
+ .setProject(task.getProject())
217
+ .setDataset(task.getDataset())
218
+ .setTable(task.getTable())
212
219
  .setAutoCreateTable(task.getAutoCreateTable())
213
220
  .setSchemaPath(task.getSchemaFile().transform(localFileToPathString()))
221
+ .setTemplateTable(task.getTemplateTable())
214
222
  .setSourceFormat(task.getSourceFormat().getString())
215
223
  .setFieldDelimiter(String.valueOf(task.getFieldDelimiter()))
216
224
  .setMaxBadRecords(task.getMaxBadrecords())
@@ -222,9 +230,6 @@ public class BigqueryOutputPlugin
222
230
  .setIgnoreUnknownValues(task.getIgnoreUnknownValues())
223
231
  .setAllowQuotedNewlines(task.getAllowQuotedNewlines())
224
232
  .build();
225
-
226
- bigQueryWriter.checkConfig(task.getProject(), task.getDataset(), task.getTable());
227
-
228
233
  }
229
234
  catch (IOException | GeneralSecurityException ex) {
230
235
  throw new ConfigException(ex);
@@ -1,40 +1,52 @@
1
1
  package org.embulk.output;
2
2
 
3
- import java.io.File;
4
- import java.io.IOException;
5
- import java.io.FileNotFoundException;
6
- import java.io.FileInputStream;
7
- import java.io.BufferedInputStream;
3
+ import com.fasterxml.jackson.core.type.TypeReference;
4
+ import com.fasterxml.jackson.databind.ObjectMapper;
5
+ import com.google.api.client.googleapis.json.GoogleJsonResponseException;
6
+ import com.google.api.client.googleapis.media.MediaHttpUploader;
7
+ import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
8
8
  import com.google.api.client.http.InputStreamContent;
9
-
10
- import java.security.MessageDigest;
11
- import java.security.NoSuchAlgorithmException;
12
- import java.util.List;
13
- import java.util.concurrent.TimeoutException;
14
-
15
- import com.google.api.services.bigquery.model.*;
9
+ import com.google.api.services.bigquery.Bigquery;
10
+ import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
11
+ import com.google.api.services.bigquery.Bigquery.Tables;
12
+ import com.google.api.services.bigquery.model.ErrorProto;
13
+ import com.google.api.services.bigquery.model.Job;
14
+ import com.google.api.services.bigquery.model.JobConfiguration;
15
+ import com.google.api.services.bigquery.model.JobConfigurationLoad;
16
+ import com.google.api.services.bigquery.model.JobConfigurationTableCopy;
17
+ import com.google.api.services.bigquery.model.JobReference;
18
+ import com.google.api.services.bigquery.model.JobStatistics;
19
+ import com.google.api.services.bigquery.model.Table;
20
+ import com.google.api.services.bigquery.model.TableFieldSchema;
21
+ import com.google.api.services.bigquery.model.TableReference;
22
+ import com.google.api.services.bigquery.model.TableSchema;
16
23
  import com.google.common.base.Optional;
17
- import java.security.GeneralSecurityException;
18
- import com.fasterxml.jackson.databind.ObjectMapper;
19
- import com.fasterxml.jackson.core.type.TypeReference;
20
-
21
24
  import com.google.common.collect.ImmutableList;
22
25
  import org.apache.commons.codec.binary.Hex;
23
26
  import org.embulk.spi.Exec;
24
27
  import org.slf4j.Logger;
25
28
 
26
- import com.google.api.services.bigquery.Bigquery;
27
- import com.google.api.services.bigquery.Bigquery.Tables;
28
- import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
29
- import com.google.api.client.googleapis.json.GoogleJsonResponseException;
30
- import com.google.api.client.googleapis.media.MediaHttpUploader;
31
- import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
29
+ import java.io.BufferedInputStream;
30
+ import java.io.File;
31
+ import java.io.FileInputStream;
32
+ import java.io.FileNotFoundException;
33
+
34
+ import java.io.IOException;
35
+ import java.security.GeneralSecurityException;
36
+ import java.security.MessageDigest;
37
+ import java.security.NoSuchAlgorithmException;
38
+ import java.util.List;
39
+ import java.util.concurrent.TimeoutException;
32
40
 
33
41
  public class BigqueryWriter
34
42
  {
35
43
  private final Logger log = Exec.getLogger(BigqueryWriter.class);
44
+ private final String project;
45
+ private final String dataset;
46
+ private final String table;
36
47
  private final boolean autoCreateTable;
37
48
  private final Optional<String> schemaPath;
49
+ private final Optional<String> templateTable;
38
50
  private final TableSchema tableSchema;
39
51
  private final String sourceFormat;
40
52
  private final String fieldDelimiter;
@@ -51,8 +63,12 @@ public class BigqueryWriter
51
63
  public BigqueryWriter(Builder builder)
52
64
  throws IOException, GeneralSecurityException
53
65
  {
66
+ this.project = builder.project;
67
+ this.dataset = builder.dataset;
68
+ this.table = builder.table;
54
69
  this.autoCreateTable = builder.autoCreateTable;
55
70
  this.schemaPath = builder.schemaPath;
71
+ this.templateTable = builder.templateTable;
56
72
  this.sourceFormat = builder.sourceFormat.toUpperCase();
57
73
  this.fieldDelimiter = builder.fieldDelimiter;
58
74
  this.maxBadRecords = builder.maxBadRecords;
@@ -70,8 +86,15 @@ public class BigqueryWriter
70
86
  );
71
87
  this.bigQueryClient = auth.getBigqueryClient();
72
88
 
89
+ checkConfig();
90
+
73
91
  if (autoCreateTable) {
74
- this.tableSchema = createTableSchema();
92
+ if (schemaPath.isPresent()) {
93
+ this.tableSchema = createTableSchema();
94
+ }
95
+ else {
96
+ this.tableSchema = fetchTableSchema();
97
+ }
75
98
  }
76
99
  else {
77
100
  this.tableSchema = null;
@@ -314,6 +337,15 @@ public class BigqueryWriter
314
337
  }
315
338
  }
316
339
 
340
+ public TableSchema fetchTableSchema() throws IOException
341
+ {
342
+ String fetchTarget = templateTable.orNull();
343
+ log.info(String.format("Fetch table schema from project:%s dataset:%s table:%s", project, dataset, fetchTarget));
344
+ Tables tableRequest = bigQueryClient.tables();
345
+ Table tableData = tableRequest.get(project, dataset, fetchTarget).execute();
346
+ return tableData.getSchema();
347
+ }
348
+
317
349
  public boolean isExistTable(String project, String dataset, String table) throws IOException
318
350
  {
319
351
  Tables tableRequest = bigQueryClient.tables();
@@ -326,18 +358,18 @@ public class BigqueryWriter
326
358
  return true;
327
359
  }
328
360
 
329
- public void checkConfig(String project, String dataset, String table) throws IOException
361
+ public void checkConfig() throws IOException
330
362
  {
331
363
  if (autoCreateTable) {
332
- if (!schemaPath.isPresent()) {
333
- throw new FileNotFoundException("schema_file is empty");
334
- }
335
- else {
364
+ if (schemaPath.isPresent()) {
336
365
  File file = new File(schemaPath.orNull());
337
366
  if (!file.exists()) {
338
367
  throw new FileNotFoundException("Can not load schema file.");
339
368
  }
340
369
  }
370
+ else if (!templateTable.isPresent()) {
371
+ throw new FileNotFoundException("schema_file or template_table must be present");
372
+ }
341
373
  }
342
374
  else {
343
375
  if (!isExistTable(project, dataset, table)) {
@@ -404,8 +436,12 @@ public class BigqueryWriter
404
436
  private Optional<String> p12KeyFilePath;
405
437
  private Optional<String> jsonKeyFilePath;
406
438
  private String applicationName;
439
+ private String project;
440
+ private String dataset;
441
+ private String table;
407
442
  private boolean autoCreateTable;
408
443
  private Optional<String> schemaPath;
444
+ private Optional<String> templateTable;
409
445
  private String sourceFormat;
410
446
  private String fieldDelimiter;
411
447
  private int maxBadRecords;
@@ -427,6 +463,24 @@ public class BigqueryWriter
427
463
  this.applicationName = applicationName;
428
464
  }
429
465
 
466
+ public Builder setProject(String project)
467
+ {
468
+ this.project = project;
469
+ return this;
470
+ }
471
+
472
+ public Builder setDataset(String dataset)
473
+ {
474
+ this.dataset = dataset;
475
+ return this;
476
+ }
477
+
478
+ public Builder setTable(String table)
479
+ {
480
+ this.table = table;
481
+ return this;
482
+ }
483
+
430
484
  public Builder setAutoCreateTable(boolean autoCreateTable)
431
485
  {
432
486
  this.autoCreateTable = autoCreateTable;
@@ -439,6 +493,12 @@ public class BigqueryWriter
439
493
  return this;
440
494
  }
441
495
 
496
+ public Builder setTemplateTable(Optional<String> templateTable)
497
+ {
498
+ this.templateTable = templateTable;
499
+ return this;
500
+ }
501
+
442
502
  public Builder setSourceFormat(String sourceFormat)
443
503
  {
444
504
  this.sourceFormat = sourceFormat;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-28 00:00:00.000000000 Z
11
+ date: 2016-02-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -65,7 +65,7 @@ files:
65
65
  - src/test/java/org/embulk/output/TestBigqueryWriter.java
66
66
  - classpath/commons-codec-1.3.jar
67
67
  - classpath/commons-logging-1.1.1.jar
68
- - classpath/embulk-output-bigquery-0.2.1.jar
68
+ - classpath/embulk-output-bigquery-0.2.2.jar
69
69
  - classpath/google-api-client-1.20.0.jar
70
70
  - classpath/google-api-services-bigquery-v2-rev205-1.20.0.jar
71
71
  - classpath/google-http-client-1.20.0.jar