embulk-output-bigquery 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +17 -1
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/output/BigqueryAuthentication.java +11 -10
- data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +26 -21
- data/src/main/java/org/embulk/output/BigqueryWriter.java +88 -28
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: daa84fac7fd9adf3ad62798ef5b113ed462fc303
|
4
|
+
data.tar.gz: fafbd5023df0b9d3539a754b699e089012d4caa1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a273f26c8c357b082d94bc97364f18fc73db9f829b12d5bd10df867f625d4619da59c483d56c83cbbdef82383e3a4116f185f9973c246cc4ccd8adc187cadb2
|
7
|
+
data.tar.gz: 6bd617783792cd5c4e7d3ab7e0b39ab3f86fa97cb326f3b6978e743e74902130c809f353bc3238f97ab5e6b439ba44cf9972dc2840d37b2b3ad5d8b94f6dfddd
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 0.2.2 - 2016-02-15
|
2
|
+
|
3
|
+
* [new feature] Added template_table option. [#25](https://github.com/embulk/embulk-output-bigquery/pull/25)
|
4
|
+
|
1
5
|
## 0.2.1 - 2016-01-28
|
2
6
|
|
3
7
|
* [maintenance] Upgraded Embulk version to 0.8.1 [#22](https://github.com/embulk/embulk-output-bigquery/pull/22). @joker1007 thanks!
|
data/README.md
CHANGED
@@ -40,6 +40,7 @@ OAuth flow for installed applications.
|
|
40
40
|
| table | string | required | | table name |
|
41
41
|
| auto_create_table | boolean | optional | 0 | [See below](#dynamic-table-creating) |
|
42
42
|
| schema_file | string | optional | | /path/to/schema.json |
|
43
|
+
| template_table | string | optional | | existing_table_name [See below](#dynamic-table-creating) |
|
43
44
|
| prevent_duplicate_insert | boolean | optional | 0 | [See below](#data-consistency) |
|
44
45
|
| delete_from_local_when_job_end | boolean | optional | 0 | If set to true, delete local file when job is end |
|
45
46
|
| job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
|
@@ -195,8 +196,11 @@ When `auto_create_table` is set to true, try to create the table using BigQuery
|
|
195
196
|
|
196
197
|
If table already exists, insert into it.
|
197
198
|
|
198
|
-
|
199
|
+
There are 2 ways to set schema.
|
199
200
|
|
201
|
+
#### Set schema.json
|
202
|
+
|
203
|
+
Please set file path of schema.json.
|
200
204
|
|
201
205
|
```yaml
|
202
206
|
out:
|
@@ -206,6 +210,18 @@ out:
|
|
206
210
|
schema_file: /path/to/schema.json
|
207
211
|
```
|
208
212
|
|
213
|
+
#### Set template_table in dataset
|
214
|
+
|
215
|
+
Plugin will try to read schema from existing table and use it as schema template.
|
216
|
+
|
217
|
+
```yaml
|
218
|
+
out:
|
219
|
+
type: bigquery
|
220
|
+
auto_create_table: true
|
221
|
+
table: table_%Y_%m
|
222
|
+
template_table: existing_table_name
|
223
|
+
```
|
224
|
+
|
209
225
|
### Data Consistency
|
210
226
|
|
211
227
|
When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options to prevent duplicate data insertion.
|
data/build.gradle
CHANGED
@@ -1,27 +1,28 @@
|
|
1
1
|
package org.embulk.output;
|
2
2
|
|
3
|
-
import java.io.File;
|
4
|
-
import java.io.FileInputStream;
|
5
|
-
import java.io.IOException;
|
6
|
-
import java.util.Collections;
|
7
|
-
import java.security.GeneralSecurityException;
|
8
|
-
|
9
|
-
import com.google.common.base.Optional;
|
10
|
-
import com.google.common.collect.ImmutableList;
|
11
3
|
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
|
12
4
|
import com.google.api.client.googleapis.compute.ComputeCredential;
|
13
5
|
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
|
6
|
+
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
|
7
|
+
import com.google.api.client.http.HttpRequestInitializer;
|
14
8
|
import com.google.api.client.http.HttpTransport;
|
15
9
|
import com.google.api.client.json.JsonFactory;
|
16
10
|
import com.google.api.client.json.jackson2.JacksonFactory;
|
17
|
-
import com.google.api.client.http.HttpRequestInitializer;
|
18
|
-
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
|
19
11
|
import com.google.api.services.bigquery.Bigquery;
|
20
12
|
import com.google.api.services.bigquery.BigqueryScopes;
|
21
13
|
import com.google.api.services.bigquery.model.ProjectList;
|
14
|
+
import com.google.common.base.Optional;
|
15
|
+
import com.google.common.collect.ImmutableList;
|
22
16
|
import org.embulk.spi.Exec;
|
23
17
|
import org.slf4j.Logger;
|
24
18
|
|
19
|
+
import java.io.File;
|
20
|
+
import java.io.FileInputStream;
|
21
|
+
|
22
|
+
import java.io.IOException;
|
23
|
+
import java.security.GeneralSecurityException;
|
24
|
+
import java.util.Collections;
|
25
|
+
|
25
26
|
public class BigqueryAuthentication
|
26
27
|
{
|
27
28
|
private final Logger log = Exec.getLogger(BigqueryAuthentication.class);
|
@@ -1,36 +1,36 @@
|
|
1
1
|
package org.embulk.output;
|
2
2
|
|
3
|
-
import java.io.File;
|
4
|
-
import java.io.FileNotFoundException;
|
5
|
-
import java.io.FileOutputStream;
|
6
|
-
import java.io.BufferedOutputStream;
|
7
|
-
import java.io.IOException;
|
8
|
-
import java.nio.charset.Charset;
|
9
|
-
import java.security.NoSuchAlgorithmException;
|
10
|
-
import java.util.List;
|
11
|
-
import java.util.concurrent.TimeoutException;
|
12
3
|
import com.google.common.base.Function;
|
13
4
|
import com.google.common.base.Optional;
|
14
5
|
import com.google.common.base.Throwables;
|
15
|
-
import java.security.GeneralSecurityException;
|
16
|
-
import org.jruby.embed.ScriptingContainer;
|
17
|
-
|
18
6
|
import org.embulk.config.Config;
|
19
|
-
import org.embulk.config.ConfigException;
|
20
7
|
import org.embulk.config.ConfigDefault;
|
21
|
-
import org.embulk.config.ConfigSource;
|
22
8
|
import org.embulk.config.ConfigDiff;
|
23
|
-
import org.embulk.config.
|
9
|
+
import org.embulk.config.ConfigException;
|
10
|
+
import org.embulk.config.ConfigSource;
|
24
11
|
import org.embulk.config.Task;
|
12
|
+
import org.embulk.config.TaskReport;
|
25
13
|
import org.embulk.config.TaskSource;
|
26
|
-
import org.embulk.spi.unit.LocalFile;
|
27
14
|
import org.embulk.spi.Buffer;
|
15
|
+
import org.embulk.spi.Exec;
|
28
16
|
import org.embulk.spi.FileOutputPlugin;
|
29
17
|
import org.embulk.spi.TransactionalFileOutput;
|
30
|
-
import org.embulk.spi.
|
31
|
-
|
18
|
+
import org.embulk.spi.unit.LocalFile;
|
19
|
+
import org.jruby.embed.ScriptingContainer;
|
32
20
|
import org.slf4j.Logger;
|
33
21
|
|
22
|
+
import java.io.BufferedOutputStream;
|
23
|
+
import java.io.File;
|
24
|
+
import java.io.FileNotFoundException;
|
25
|
+
import java.io.FileOutputStream;
|
26
|
+
|
27
|
+
import java.io.IOException;
|
28
|
+
import java.nio.charset.Charset;
|
29
|
+
import java.security.GeneralSecurityException;
|
30
|
+
import java.security.NoSuchAlgorithmException;
|
31
|
+
import java.util.List;
|
32
|
+
import java.util.concurrent.TimeoutException;
|
33
|
+
|
34
34
|
public class BigqueryOutputPlugin
|
35
35
|
implements FileOutputPlugin
|
36
36
|
{
|
@@ -116,6 +116,10 @@ public class BigqueryOutputPlugin
|
|
116
116
|
Optional<LocalFile> getSchemaFile();
|
117
117
|
void setSchemaFile(Optional<LocalFile> schemaFile);
|
118
118
|
|
119
|
+
@Config("template_table")
|
120
|
+
@ConfigDefault("null")
|
121
|
+
Optional<String> getTemplateTable();
|
122
|
+
|
119
123
|
@Config("prevent_duplicate_insert")
|
120
124
|
@ConfigDefault("false")
|
121
125
|
boolean getPreventDuplicateInsert();
|
@@ -209,8 +213,12 @@ public class BigqueryOutputPlugin
|
|
209
213
|
task.getP12Keyfile().transform(localFileToPathString()),
|
210
214
|
task.getJsonKeyfile().transform(localFileToPathString()),
|
211
215
|
task.getApplicationName())
|
216
|
+
.setProject(task.getProject())
|
217
|
+
.setDataset(task.getDataset())
|
218
|
+
.setTable(task.getTable())
|
212
219
|
.setAutoCreateTable(task.getAutoCreateTable())
|
213
220
|
.setSchemaPath(task.getSchemaFile().transform(localFileToPathString()))
|
221
|
+
.setTemplateTable(task.getTemplateTable())
|
214
222
|
.setSourceFormat(task.getSourceFormat().getString())
|
215
223
|
.setFieldDelimiter(String.valueOf(task.getFieldDelimiter()))
|
216
224
|
.setMaxBadRecords(task.getMaxBadrecords())
|
@@ -222,9 +230,6 @@ public class BigqueryOutputPlugin
|
|
222
230
|
.setIgnoreUnknownValues(task.getIgnoreUnknownValues())
|
223
231
|
.setAllowQuotedNewlines(task.getAllowQuotedNewlines())
|
224
232
|
.build();
|
225
|
-
|
226
|
-
bigQueryWriter.checkConfig(task.getProject(), task.getDataset(), task.getTable());
|
227
|
-
|
228
233
|
}
|
229
234
|
catch (IOException | GeneralSecurityException ex) {
|
230
235
|
throw new ConfigException(ex);
|
@@ -1,40 +1,52 @@
|
|
1
1
|
package org.embulk.output;
|
2
2
|
|
3
|
-
import
|
4
|
-
import
|
5
|
-
import
|
6
|
-
import
|
7
|
-
import
|
3
|
+
import com.fasterxml.jackson.core.type.TypeReference;
|
4
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
5
|
+
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
|
6
|
+
import com.google.api.client.googleapis.media.MediaHttpUploader;
|
7
|
+
import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
|
8
8
|
import com.google.api.client.http.InputStreamContent;
|
9
|
-
|
10
|
-
import
|
11
|
-
import
|
12
|
-
import
|
13
|
-
import
|
14
|
-
|
15
|
-
import com.google.api.services.bigquery.model
|
9
|
+
import com.google.api.services.bigquery.Bigquery;
|
10
|
+
import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
|
11
|
+
import com.google.api.services.bigquery.Bigquery.Tables;
|
12
|
+
import com.google.api.services.bigquery.model.ErrorProto;
|
13
|
+
import com.google.api.services.bigquery.model.Job;
|
14
|
+
import com.google.api.services.bigquery.model.JobConfiguration;
|
15
|
+
import com.google.api.services.bigquery.model.JobConfigurationLoad;
|
16
|
+
import com.google.api.services.bigquery.model.JobConfigurationTableCopy;
|
17
|
+
import com.google.api.services.bigquery.model.JobReference;
|
18
|
+
import com.google.api.services.bigquery.model.JobStatistics;
|
19
|
+
import com.google.api.services.bigquery.model.Table;
|
20
|
+
import com.google.api.services.bigquery.model.TableFieldSchema;
|
21
|
+
import com.google.api.services.bigquery.model.TableReference;
|
22
|
+
import com.google.api.services.bigquery.model.TableSchema;
|
16
23
|
import com.google.common.base.Optional;
|
17
|
-
import java.security.GeneralSecurityException;
|
18
|
-
import com.fasterxml.jackson.databind.ObjectMapper;
|
19
|
-
import com.fasterxml.jackson.core.type.TypeReference;
|
20
|
-
|
21
24
|
import com.google.common.collect.ImmutableList;
|
22
25
|
import org.apache.commons.codec.binary.Hex;
|
23
26
|
import org.embulk.spi.Exec;
|
24
27
|
import org.slf4j.Logger;
|
25
28
|
|
26
|
-
import
|
27
|
-
import
|
28
|
-
import
|
29
|
-
import
|
30
|
-
|
31
|
-
import
|
29
|
+
import java.io.BufferedInputStream;
|
30
|
+
import java.io.File;
|
31
|
+
import java.io.FileInputStream;
|
32
|
+
import java.io.FileNotFoundException;
|
33
|
+
|
34
|
+
import java.io.IOException;
|
35
|
+
import java.security.GeneralSecurityException;
|
36
|
+
import java.security.MessageDigest;
|
37
|
+
import java.security.NoSuchAlgorithmException;
|
38
|
+
import java.util.List;
|
39
|
+
import java.util.concurrent.TimeoutException;
|
32
40
|
|
33
41
|
public class BigqueryWriter
|
34
42
|
{
|
35
43
|
private final Logger log = Exec.getLogger(BigqueryWriter.class);
|
44
|
+
private final String project;
|
45
|
+
private final String dataset;
|
46
|
+
private final String table;
|
36
47
|
private final boolean autoCreateTable;
|
37
48
|
private final Optional<String> schemaPath;
|
49
|
+
private final Optional<String> templateTable;
|
38
50
|
private final TableSchema tableSchema;
|
39
51
|
private final String sourceFormat;
|
40
52
|
private final String fieldDelimiter;
|
@@ -51,8 +63,12 @@ public class BigqueryWriter
|
|
51
63
|
public BigqueryWriter(Builder builder)
|
52
64
|
throws IOException, GeneralSecurityException
|
53
65
|
{
|
66
|
+
this.project = builder.project;
|
67
|
+
this.dataset = builder.dataset;
|
68
|
+
this.table = builder.table;
|
54
69
|
this.autoCreateTable = builder.autoCreateTable;
|
55
70
|
this.schemaPath = builder.schemaPath;
|
71
|
+
this.templateTable = builder.templateTable;
|
56
72
|
this.sourceFormat = builder.sourceFormat.toUpperCase();
|
57
73
|
this.fieldDelimiter = builder.fieldDelimiter;
|
58
74
|
this.maxBadRecords = builder.maxBadRecords;
|
@@ -70,8 +86,15 @@ public class BigqueryWriter
|
|
70
86
|
);
|
71
87
|
this.bigQueryClient = auth.getBigqueryClient();
|
72
88
|
|
89
|
+
checkConfig();
|
90
|
+
|
73
91
|
if (autoCreateTable) {
|
74
|
-
|
92
|
+
if (schemaPath.isPresent()) {
|
93
|
+
this.tableSchema = createTableSchema();
|
94
|
+
}
|
95
|
+
else {
|
96
|
+
this.tableSchema = fetchTableSchema();
|
97
|
+
}
|
75
98
|
}
|
76
99
|
else {
|
77
100
|
this.tableSchema = null;
|
@@ -314,6 +337,15 @@ public class BigqueryWriter
|
|
314
337
|
}
|
315
338
|
}
|
316
339
|
|
340
|
+
public TableSchema fetchTableSchema() throws IOException
|
341
|
+
{
|
342
|
+
String fetchTarget = templateTable.orNull();
|
343
|
+
log.info(String.format("Fetch table schema from project:%s dataset:%s table:%s", project, dataset, fetchTarget));
|
344
|
+
Tables tableRequest = bigQueryClient.tables();
|
345
|
+
Table tableData = tableRequest.get(project, dataset, fetchTarget).execute();
|
346
|
+
return tableData.getSchema();
|
347
|
+
}
|
348
|
+
|
317
349
|
public boolean isExistTable(String project, String dataset, String table) throws IOException
|
318
350
|
{
|
319
351
|
Tables tableRequest = bigQueryClient.tables();
|
@@ -326,18 +358,18 @@ public class BigqueryWriter
|
|
326
358
|
return true;
|
327
359
|
}
|
328
360
|
|
329
|
-
public void checkConfig(
|
361
|
+
public void checkConfig() throws IOException
|
330
362
|
{
|
331
363
|
if (autoCreateTable) {
|
332
|
-
if (
|
333
|
-
throw new FileNotFoundException("schema_file is empty");
|
334
|
-
}
|
335
|
-
else {
|
364
|
+
if (schemaPath.isPresent()) {
|
336
365
|
File file = new File(schemaPath.orNull());
|
337
366
|
if (!file.exists()) {
|
338
367
|
throw new FileNotFoundException("Can not load schema file.");
|
339
368
|
}
|
340
369
|
}
|
370
|
+
else if (!templateTable.isPresent()) {
|
371
|
+
throw new FileNotFoundException("schema_file or template_table must be present");
|
372
|
+
}
|
341
373
|
}
|
342
374
|
else {
|
343
375
|
if (!isExistTable(project, dataset, table)) {
|
@@ -404,8 +436,12 @@ public class BigqueryWriter
|
|
404
436
|
private Optional<String> p12KeyFilePath;
|
405
437
|
private Optional<String> jsonKeyFilePath;
|
406
438
|
private String applicationName;
|
439
|
+
private String project;
|
440
|
+
private String dataset;
|
441
|
+
private String table;
|
407
442
|
private boolean autoCreateTable;
|
408
443
|
private Optional<String> schemaPath;
|
444
|
+
private Optional<String> templateTable;
|
409
445
|
private String sourceFormat;
|
410
446
|
private String fieldDelimiter;
|
411
447
|
private int maxBadRecords;
|
@@ -427,6 +463,24 @@ public class BigqueryWriter
|
|
427
463
|
this.applicationName = applicationName;
|
428
464
|
}
|
429
465
|
|
466
|
+
public Builder setProject(String project)
|
467
|
+
{
|
468
|
+
this.project = project;
|
469
|
+
return this;
|
470
|
+
}
|
471
|
+
|
472
|
+
public Builder setDataset(String dataset)
|
473
|
+
{
|
474
|
+
this.dataset = dataset;
|
475
|
+
return this;
|
476
|
+
}
|
477
|
+
|
478
|
+
public Builder setTable(String table)
|
479
|
+
{
|
480
|
+
this.table = table;
|
481
|
+
return this;
|
482
|
+
}
|
483
|
+
|
430
484
|
public Builder setAutoCreateTable(boolean autoCreateTable)
|
431
485
|
{
|
432
486
|
this.autoCreateTable = autoCreateTable;
|
@@ -439,6 +493,12 @@ public class BigqueryWriter
|
|
439
493
|
return this;
|
440
494
|
}
|
441
495
|
|
496
|
+
public Builder setTemplateTable(Optional<String> templateTable)
|
497
|
+
{
|
498
|
+
this.templateTable = templateTable;
|
499
|
+
return this;
|
500
|
+
}
|
501
|
+
|
442
502
|
public Builder setSourceFormat(String sourceFormat)
|
443
503
|
{
|
444
504
|
this.sourceFormat = sourceFormat;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-02-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -65,7 +65,7 @@ files:
|
|
65
65
|
- src/test/java/org/embulk/output/TestBigqueryWriter.java
|
66
66
|
- classpath/commons-codec-1.3.jar
|
67
67
|
- classpath/commons-logging-1.1.1.jar
|
68
|
-
- classpath/embulk-output-bigquery-0.2.
|
68
|
+
- classpath/embulk-output-bigquery-0.2.2.jar
|
69
69
|
- classpath/google-api-client-1.20.0.jar
|
70
70
|
- classpath/google-api-services-bigquery-v2-rev205-1.20.0.jar
|
71
71
|
- classpath/google-http-client-1.20.0.jar
|