embulk-output-bigquery 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +17 -1
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/output/BigqueryAuthentication.java +11 -10
- data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +26 -21
- data/src/main/java/org/embulk/output/BigqueryWriter.java +88 -28
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: daa84fac7fd9adf3ad62798ef5b113ed462fc303
|
4
|
+
data.tar.gz: fafbd5023df0b9d3539a754b699e089012d4caa1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a273f26c8c357b082d94bc97364f18fc73db9f829b12d5bd10df867f625d4619da59c483d56c83cbbdef82383e3a4116f185f9973c246cc4ccd8adc187cadb2
|
7
|
+
data.tar.gz: 6bd617783792cd5c4e7d3ab7e0b39ab3f86fa97cb326f3b6978e743e74902130c809f353bc3238f97ab5e6b439ba44cf9972dc2840d37b2b3ad5d8b94f6dfddd
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 0.2.2 - 2016-02-15
|
2
|
+
|
3
|
+
* [new feature] Added template_table option. [#25](https://github.com/embulk/embulk-output-bigquery/pull/25)
|
4
|
+
|
1
5
|
## 0.2.1 - 2016-01-28
|
2
6
|
|
3
7
|
* [maintenance] Upgraded Embulk version to 0.8.1 [#22](https://github.com/embulk/embulk-output-bigquery/pull/22). @joker1007 thanks!
|
data/README.md
CHANGED
@@ -40,6 +40,7 @@ OAuth flow for installed applications.
|
|
40
40
|
| table | string | required | | table name |
|
41
41
|
| auto_create_table | boolean | optional | 0 | [See below](#dynamic-table-creating) |
|
42
42
|
| schema_file | string | optional | | /path/to/schema.json |
|
43
|
+
| template_table | string | optional | | existing_table_name [See below](#dynamic-table-creating) |
|
43
44
|
| prevent_duplicate_insert | boolean | optional | 0 | [See below](#data-consistency) |
|
44
45
|
| delete_from_local_when_job_end | boolean | optional | 0 | If set to true, delete local file when job is end |
|
45
46
|
| job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
|
@@ -195,8 +196,11 @@ When `auto_create_table` is set to true, try to create the table using BigQuery
|
|
195
196
|
|
196
197
|
If table already exists, insert into it.
|
197
198
|
|
198
|
-
|
199
|
+
There are 2 ways to set schema.
|
199
200
|
|
201
|
+
#### Set schema.json
|
202
|
+
|
203
|
+
Please set file path of schema.json.
|
200
204
|
|
201
205
|
```yaml
|
202
206
|
out:
|
@@ -206,6 +210,18 @@ out:
|
|
206
210
|
schema_file: /path/to/schema.json
|
207
211
|
```
|
208
212
|
|
213
|
+
#### Set template_table in dataset
|
214
|
+
|
215
|
+
Plugin will try to read schema from existing table and use it as schema template.
|
216
|
+
|
217
|
+
```yaml
|
218
|
+
out:
|
219
|
+
type: bigquery
|
220
|
+
auto_create_table: true
|
221
|
+
table: table_%Y_%m
|
222
|
+
template_table: existing_table_name
|
223
|
+
```
|
224
|
+
|
209
225
|
### Data Consistency
|
210
226
|
|
211
227
|
When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options to prevent duplicate data insertion.
|
data/build.gradle
CHANGED
@@ -1,27 +1,28 @@
|
|
1
1
|
package org.embulk.output;
|
2
2
|
|
3
|
-
import java.io.File;
|
4
|
-
import java.io.FileInputStream;
|
5
|
-
import java.io.IOException;
|
6
|
-
import java.util.Collections;
|
7
|
-
import java.security.GeneralSecurityException;
|
8
|
-
|
9
|
-
import com.google.common.base.Optional;
|
10
|
-
import com.google.common.collect.ImmutableList;
|
11
3
|
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
|
12
4
|
import com.google.api.client.googleapis.compute.ComputeCredential;
|
13
5
|
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
|
6
|
+
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
|
7
|
+
import com.google.api.client.http.HttpRequestInitializer;
|
14
8
|
import com.google.api.client.http.HttpTransport;
|
15
9
|
import com.google.api.client.json.JsonFactory;
|
16
10
|
import com.google.api.client.json.jackson2.JacksonFactory;
|
17
|
-
import com.google.api.client.http.HttpRequestInitializer;
|
18
|
-
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
|
19
11
|
import com.google.api.services.bigquery.Bigquery;
|
20
12
|
import com.google.api.services.bigquery.BigqueryScopes;
|
21
13
|
import com.google.api.services.bigquery.model.ProjectList;
|
14
|
+
import com.google.common.base.Optional;
|
15
|
+
import com.google.common.collect.ImmutableList;
|
22
16
|
import org.embulk.spi.Exec;
|
23
17
|
import org.slf4j.Logger;
|
24
18
|
|
19
|
+
import java.io.File;
|
20
|
+
import java.io.FileInputStream;
|
21
|
+
|
22
|
+
import java.io.IOException;
|
23
|
+
import java.security.GeneralSecurityException;
|
24
|
+
import java.util.Collections;
|
25
|
+
|
25
26
|
public class BigqueryAuthentication
|
26
27
|
{
|
27
28
|
private final Logger log = Exec.getLogger(BigqueryAuthentication.class);
|
@@ -1,36 +1,36 @@
|
|
1
1
|
package org.embulk.output;
|
2
2
|
|
3
|
-
import java.io.File;
|
4
|
-
import java.io.FileNotFoundException;
|
5
|
-
import java.io.FileOutputStream;
|
6
|
-
import java.io.BufferedOutputStream;
|
7
|
-
import java.io.IOException;
|
8
|
-
import java.nio.charset.Charset;
|
9
|
-
import java.security.NoSuchAlgorithmException;
|
10
|
-
import java.util.List;
|
11
|
-
import java.util.concurrent.TimeoutException;
|
12
3
|
import com.google.common.base.Function;
|
13
4
|
import com.google.common.base.Optional;
|
14
5
|
import com.google.common.base.Throwables;
|
15
|
-
import java.security.GeneralSecurityException;
|
16
|
-
import org.jruby.embed.ScriptingContainer;
|
17
|
-
|
18
6
|
import org.embulk.config.Config;
|
19
|
-
import org.embulk.config.ConfigException;
|
20
7
|
import org.embulk.config.ConfigDefault;
|
21
|
-
import org.embulk.config.ConfigSource;
|
22
8
|
import org.embulk.config.ConfigDiff;
|
23
|
-
import org.embulk.config.
|
9
|
+
import org.embulk.config.ConfigException;
|
10
|
+
import org.embulk.config.ConfigSource;
|
24
11
|
import org.embulk.config.Task;
|
12
|
+
import org.embulk.config.TaskReport;
|
25
13
|
import org.embulk.config.TaskSource;
|
26
|
-
import org.embulk.spi.unit.LocalFile;
|
27
14
|
import org.embulk.spi.Buffer;
|
15
|
+
import org.embulk.spi.Exec;
|
28
16
|
import org.embulk.spi.FileOutputPlugin;
|
29
17
|
import org.embulk.spi.TransactionalFileOutput;
|
30
|
-
import org.embulk.spi.
|
31
|
-
|
18
|
+
import org.embulk.spi.unit.LocalFile;
|
19
|
+
import org.jruby.embed.ScriptingContainer;
|
32
20
|
import org.slf4j.Logger;
|
33
21
|
|
22
|
+
import java.io.BufferedOutputStream;
|
23
|
+
import java.io.File;
|
24
|
+
import java.io.FileNotFoundException;
|
25
|
+
import java.io.FileOutputStream;
|
26
|
+
|
27
|
+
import java.io.IOException;
|
28
|
+
import java.nio.charset.Charset;
|
29
|
+
import java.security.GeneralSecurityException;
|
30
|
+
import java.security.NoSuchAlgorithmException;
|
31
|
+
import java.util.List;
|
32
|
+
import java.util.concurrent.TimeoutException;
|
33
|
+
|
34
34
|
public class BigqueryOutputPlugin
|
35
35
|
implements FileOutputPlugin
|
36
36
|
{
|
@@ -116,6 +116,10 @@ public class BigqueryOutputPlugin
|
|
116
116
|
Optional<LocalFile> getSchemaFile();
|
117
117
|
void setSchemaFile(Optional<LocalFile> schemaFile);
|
118
118
|
|
119
|
+
@Config("template_table")
|
120
|
+
@ConfigDefault("null")
|
121
|
+
Optional<String> getTemplateTable();
|
122
|
+
|
119
123
|
@Config("prevent_duplicate_insert")
|
120
124
|
@ConfigDefault("false")
|
121
125
|
boolean getPreventDuplicateInsert();
|
@@ -209,8 +213,12 @@ public class BigqueryOutputPlugin
|
|
209
213
|
task.getP12Keyfile().transform(localFileToPathString()),
|
210
214
|
task.getJsonKeyfile().transform(localFileToPathString()),
|
211
215
|
task.getApplicationName())
|
216
|
+
.setProject(task.getProject())
|
217
|
+
.setDataset(task.getDataset())
|
218
|
+
.setTable(task.getTable())
|
212
219
|
.setAutoCreateTable(task.getAutoCreateTable())
|
213
220
|
.setSchemaPath(task.getSchemaFile().transform(localFileToPathString()))
|
221
|
+
.setTemplateTable(task.getTemplateTable())
|
214
222
|
.setSourceFormat(task.getSourceFormat().getString())
|
215
223
|
.setFieldDelimiter(String.valueOf(task.getFieldDelimiter()))
|
216
224
|
.setMaxBadRecords(task.getMaxBadrecords())
|
@@ -222,9 +230,6 @@ public class BigqueryOutputPlugin
|
|
222
230
|
.setIgnoreUnknownValues(task.getIgnoreUnknownValues())
|
223
231
|
.setAllowQuotedNewlines(task.getAllowQuotedNewlines())
|
224
232
|
.build();
|
225
|
-
|
226
|
-
bigQueryWriter.checkConfig(task.getProject(), task.getDataset(), task.getTable());
|
227
|
-
|
228
233
|
}
|
229
234
|
catch (IOException | GeneralSecurityException ex) {
|
230
235
|
throw new ConfigException(ex);
|
@@ -1,40 +1,52 @@
|
|
1
1
|
package org.embulk.output;
|
2
2
|
|
3
|
-
import
|
4
|
-
import
|
5
|
-
import
|
6
|
-
import
|
7
|
-
import
|
3
|
+
import com.fasterxml.jackson.core.type.TypeReference;
|
4
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
5
|
+
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
|
6
|
+
import com.google.api.client.googleapis.media.MediaHttpUploader;
|
7
|
+
import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
|
8
8
|
import com.google.api.client.http.InputStreamContent;
|
9
|
-
|
10
|
-
import
|
11
|
-
import
|
12
|
-
import
|
13
|
-
import
|
14
|
-
|
15
|
-
import com.google.api.services.bigquery.model
|
9
|
+
import com.google.api.services.bigquery.Bigquery;
|
10
|
+
import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
|
11
|
+
import com.google.api.services.bigquery.Bigquery.Tables;
|
12
|
+
import com.google.api.services.bigquery.model.ErrorProto;
|
13
|
+
import com.google.api.services.bigquery.model.Job;
|
14
|
+
import com.google.api.services.bigquery.model.JobConfiguration;
|
15
|
+
import com.google.api.services.bigquery.model.JobConfigurationLoad;
|
16
|
+
import com.google.api.services.bigquery.model.JobConfigurationTableCopy;
|
17
|
+
import com.google.api.services.bigquery.model.JobReference;
|
18
|
+
import com.google.api.services.bigquery.model.JobStatistics;
|
19
|
+
import com.google.api.services.bigquery.model.Table;
|
20
|
+
import com.google.api.services.bigquery.model.TableFieldSchema;
|
21
|
+
import com.google.api.services.bigquery.model.TableReference;
|
22
|
+
import com.google.api.services.bigquery.model.TableSchema;
|
16
23
|
import com.google.common.base.Optional;
|
17
|
-
import java.security.GeneralSecurityException;
|
18
|
-
import com.fasterxml.jackson.databind.ObjectMapper;
|
19
|
-
import com.fasterxml.jackson.core.type.TypeReference;
|
20
|
-
|
21
24
|
import com.google.common.collect.ImmutableList;
|
22
25
|
import org.apache.commons.codec.binary.Hex;
|
23
26
|
import org.embulk.spi.Exec;
|
24
27
|
import org.slf4j.Logger;
|
25
28
|
|
26
|
-
import
|
27
|
-
import
|
28
|
-
import
|
29
|
-
import
|
30
|
-
|
31
|
-
import
|
29
|
+
import java.io.BufferedInputStream;
|
30
|
+
import java.io.File;
|
31
|
+
import java.io.FileInputStream;
|
32
|
+
import java.io.FileNotFoundException;
|
33
|
+
|
34
|
+
import java.io.IOException;
|
35
|
+
import java.security.GeneralSecurityException;
|
36
|
+
import java.security.MessageDigest;
|
37
|
+
import java.security.NoSuchAlgorithmException;
|
38
|
+
import java.util.List;
|
39
|
+
import java.util.concurrent.TimeoutException;
|
32
40
|
|
33
41
|
public class BigqueryWriter
|
34
42
|
{
|
35
43
|
private final Logger log = Exec.getLogger(BigqueryWriter.class);
|
44
|
+
private final String project;
|
45
|
+
private final String dataset;
|
46
|
+
private final String table;
|
36
47
|
private final boolean autoCreateTable;
|
37
48
|
private final Optional<String> schemaPath;
|
49
|
+
private final Optional<String> templateTable;
|
38
50
|
private final TableSchema tableSchema;
|
39
51
|
private final String sourceFormat;
|
40
52
|
private final String fieldDelimiter;
|
@@ -51,8 +63,12 @@ public class BigqueryWriter
|
|
51
63
|
public BigqueryWriter(Builder builder)
|
52
64
|
throws IOException, GeneralSecurityException
|
53
65
|
{
|
66
|
+
this.project = builder.project;
|
67
|
+
this.dataset = builder.dataset;
|
68
|
+
this.table = builder.table;
|
54
69
|
this.autoCreateTable = builder.autoCreateTable;
|
55
70
|
this.schemaPath = builder.schemaPath;
|
71
|
+
this.templateTable = builder.templateTable;
|
56
72
|
this.sourceFormat = builder.sourceFormat.toUpperCase();
|
57
73
|
this.fieldDelimiter = builder.fieldDelimiter;
|
58
74
|
this.maxBadRecords = builder.maxBadRecords;
|
@@ -70,8 +86,15 @@ public class BigqueryWriter
|
|
70
86
|
);
|
71
87
|
this.bigQueryClient = auth.getBigqueryClient();
|
72
88
|
|
89
|
+
checkConfig();
|
90
|
+
|
73
91
|
if (autoCreateTable) {
|
74
|
-
|
92
|
+
if (schemaPath.isPresent()) {
|
93
|
+
this.tableSchema = createTableSchema();
|
94
|
+
}
|
95
|
+
else {
|
96
|
+
this.tableSchema = fetchTableSchema();
|
97
|
+
}
|
75
98
|
}
|
76
99
|
else {
|
77
100
|
this.tableSchema = null;
|
@@ -314,6 +337,15 @@ public class BigqueryWriter
|
|
314
337
|
}
|
315
338
|
}
|
316
339
|
|
340
|
+
public TableSchema fetchTableSchema() throws IOException
|
341
|
+
{
|
342
|
+
String fetchTarget = templateTable.orNull();
|
343
|
+
log.info(String.format("Fetch table schema from project:%s dataset:%s table:%s", project, dataset, fetchTarget));
|
344
|
+
Tables tableRequest = bigQueryClient.tables();
|
345
|
+
Table tableData = tableRequest.get(project, dataset, fetchTarget).execute();
|
346
|
+
return tableData.getSchema();
|
347
|
+
}
|
348
|
+
|
317
349
|
public boolean isExistTable(String project, String dataset, String table) throws IOException
|
318
350
|
{
|
319
351
|
Tables tableRequest = bigQueryClient.tables();
|
@@ -326,18 +358,18 @@ public class BigqueryWriter
|
|
326
358
|
return true;
|
327
359
|
}
|
328
360
|
|
329
|
-
public void checkConfig(
|
361
|
+
public void checkConfig() throws IOException
|
330
362
|
{
|
331
363
|
if (autoCreateTable) {
|
332
|
-
if (
|
333
|
-
throw new FileNotFoundException("schema_file is empty");
|
334
|
-
}
|
335
|
-
else {
|
364
|
+
if (schemaPath.isPresent()) {
|
336
365
|
File file = new File(schemaPath.orNull());
|
337
366
|
if (!file.exists()) {
|
338
367
|
throw new FileNotFoundException("Can not load schema file.");
|
339
368
|
}
|
340
369
|
}
|
370
|
+
else if (!templateTable.isPresent()) {
|
371
|
+
throw new FileNotFoundException("schema_file or template_table must be present");
|
372
|
+
}
|
341
373
|
}
|
342
374
|
else {
|
343
375
|
if (!isExistTable(project, dataset, table)) {
|
@@ -404,8 +436,12 @@ public class BigqueryWriter
|
|
404
436
|
private Optional<String> p12KeyFilePath;
|
405
437
|
private Optional<String> jsonKeyFilePath;
|
406
438
|
private String applicationName;
|
439
|
+
private String project;
|
440
|
+
private String dataset;
|
441
|
+
private String table;
|
407
442
|
private boolean autoCreateTable;
|
408
443
|
private Optional<String> schemaPath;
|
444
|
+
private Optional<String> templateTable;
|
409
445
|
private String sourceFormat;
|
410
446
|
private String fieldDelimiter;
|
411
447
|
private int maxBadRecords;
|
@@ -427,6 +463,24 @@ public class BigqueryWriter
|
|
427
463
|
this.applicationName = applicationName;
|
428
464
|
}
|
429
465
|
|
466
|
+
public Builder setProject(String project)
|
467
|
+
{
|
468
|
+
this.project = project;
|
469
|
+
return this;
|
470
|
+
}
|
471
|
+
|
472
|
+
public Builder setDataset(String dataset)
|
473
|
+
{
|
474
|
+
this.dataset = dataset;
|
475
|
+
return this;
|
476
|
+
}
|
477
|
+
|
478
|
+
public Builder setTable(String table)
|
479
|
+
{
|
480
|
+
this.table = table;
|
481
|
+
return this;
|
482
|
+
}
|
483
|
+
|
430
484
|
public Builder setAutoCreateTable(boolean autoCreateTable)
|
431
485
|
{
|
432
486
|
this.autoCreateTable = autoCreateTable;
|
@@ -439,6 +493,12 @@ public class BigqueryWriter
|
|
439
493
|
return this;
|
440
494
|
}
|
441
495
|
|
496
|
+
public Builder setTemplateTable(Optional<String> templateTable)
|
497
|
+
{
|
498
|
+
this.templateTable = templateTable;
|
499
|
+
return this;
|
500
|
+
}
|
501
|
+
|
442
502
|
public Builder setSourceFormat(String sourceFormat)
|
443
503
|
{
|
444
504
|
this.sourceFormat = sourceFormat;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-02-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -65,7 +65,7 @@ files:
|
|
65
65
|
- src/test/java/org/embulk/output/TestBigqueryWriter.java
|
66
66
|
- classpath/commons-codec-1.3.jar
|
67
67
|
- classpath/commons-logging-1.1.1.jar
|
68
|
-
- classpath/embulk-output-bigquery-0.2.
|
68
|
+
- classpath/embulk-output-bigquery-0.2.2.jar
|
69
69
|
- classpath/google-api-client-1.20.0.jar
|
70
70
|
- classpath/google-api-services-bigquery-v2-rev205-1.20.0.jar
|
71
71
|
- classpath/google-http-client-1.20.0.jar
|