embulk-output-bigquery 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -6
- data/build.gradle +4 -4
- data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +130 -40
- data/src/main/java/org/embulk/output/BigqueryWriter.java +68 -108
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48a9a0add9223ccfca3e1e48f360ebf38cfe08d5
|
4
|
+
data.tar.gz: b6d9d0b0635c9d0728238873094122b4a72648de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ee81bbbc5b65c34d86014e3da5a3d848ded06766a3b58e92f5918070cc89eadd8740cf16f2b36c864c7a50f5476a0be2460fe2b722f2edfeb23f612f721df91
|
7
|
+
data.tar.gz: 605003c7364982e0f4fc391ef51517ef16b201b0dc0c3f43833f1cd98a5110b7ae2476f64ebbf3d9284e3b781a2ed85b68855d3b585cdda4b68e72f223974b14
|
data/README.md
CHANGED
@@ -30,14 +30,14 @@ OAuth flow for installed applications.
|
|
30
30
|
|:--------------------------|:------------|:-----------|:-------------|:-----------------------|
|
31
31
|
| auth_method | string | optional | "private_key" | `private_key` or `compute_engine`
|
32
32
|
| service_account_email | string | required when auth_method is private_key | | Your Google service account email
|
33
|
-
|
|
33
|
+
| p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
|
34
34
|
| sequence_format | string | optional | %03d.%02d | |
|
35
35
|
| file_ext | string | optional | | e.g. ".csv.gz" ".json.gz" |
|
36
36
|
| project | string | required | | project_id |
|
37
37
|
| dataset | string | required | | dataset |
|
38
38
|
| table | string | required | | table name |
|
39
39
|
| auto_create_table | boolean | optional | 0 | [See below](#dynamic-table-creating) |
|
40
|
-
|
|
40
|
+
| schema_file | string | optional | | /path/to/schema.json |
|
41
41
|
| prevent_duplicate_insert | boolean | optional | 0 | [See below](#data-consistency) |
|
42
42
|
| delete_from_local_when_job_end | boolean | optional | 0 | If set to true, delete local file when job is end |
|
43
43
|
| job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
|
@@ -53,7 +53,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
53
53
|
|:--------------------------|:------------|:-----------|:-------------|:-----------------------|
|
54
54
|
| source_format | string | required | "CSV" | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
|
55
55
|
| max_bad_records | int | optional | 0 | |
|
56
|
-
| field_delimiter |
|
56
|
+
| field_delimiter | char | optional | "," | |
|
57
57
|
| encoding | string | optional | "UTF-8" | `UTF-8` or `ISO-8859-1` |
|
58
58
|
| ignore_unknown_values | boolean | optional | 0 | |
|
59
59
|
| allow_quoted_newlines | boolean | optional | 0 | Set true, if data contains newline characters. It may cause slow procsssing |
|
@@ -65,7 +65,7 @@ out:
|
|
65
65
|
type: bigquery
|
66
66
|
auth_method: private_key # default
|
67
67
|
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
68
|
-
|
68
|
+
p12_keyfile: /path/to/p12_keyfile.p12
|
69
69
|
path_prefix: /path/to/output
|
70
70
|
file_ext: csv.gz
|
71
71
|
source_format: CSV
|
@@ -130,14 +130,14 @@ out:
|
|
130
130
|
type: bigquery
|
131
131
|
auto_create_table: true
|
132
132
|
table: table_%Y_%m
|
133
|
-
|
133
|
+
schema_file: /path/to/schema.json
|
134
134
|
```
|
135
135
|
|
136
136
|
### Data Consistency
|
137
137
|
|
138
138
|
When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options to prevent duplicate data insertion.
|
139
139
|
|
140
|
-
`job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values)`
|
140
|
+
`job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values + allow_quoted_newlines)`
|
141
141
|
|
142
142
|
[job ID must be unique(including failures)](https://cloud.google.com/bigquery/loading-data-into-bigquery#consistency). So same data can't insert with same settings.
|
143
143
|
|
data/build.gradle
CHANGED
@@ -15,11 +15,11 @@ configurations {
|
|
15
15
|
sourceCompatibility = 1.7
|
16
16
|
targetCompatibility = 1.7
|
17
17
|
|
18
|
-
version = "0.1.
|
18
|
+
version = "0.1.8"
|
19
19
|
|
20
20
|
dependencies {
|
21
|
-
compile "org.embulk:embulk-core:0.6.
|
22
|
-
provided "org.embulk:embulk-core:0.6.
|
21
|
+
compile "org.embulk:embulk-core:0.6.22"
|
22
|
+
provided "org.embulk:embulk-core:0.6.22"
|
23
23
|
|
24
24
|
compile "com.google.http-client:google-http-client-jackson2:1.20.0"
|
25
25
|
compile "com.google.apis:google-api-services-bigquery:v2-rev205-1.20.0"
|
@@ -49,7 +49,7 @@ Gem::Specification.new do |spec|
|
|
49
49
|
spec.description = %[Embulk plugin that insert records to Google BigQuery.]
|
50
50
|
spec.email = ["satoshiakama@gmail.com"]
|
51
51
|
spec.licenses = ["Apache-2.0"]
|
52
|
-
spec.homepage = "https://github.com/
|
52
|
+
spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
|
53
53
|
|
54
54
|
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
55
55
|
spec.test_files = spec.files.grep(%r"^(test|spec)/")
|
@@ -5,9 +5,11 @@ import java.io.FileNotFoundException;
|
|
5
5
|
import java.io.FileOutputStream;
|
6
6
|
import java.io.BufferedOutputStream;
|
7
7
|
import java.io.IOException;
|
8
|
+
import java.nio.charset.Charset;
|
8
9
|
import java.security.NoSuchAlgorithmException;
|
9
10
|
import java.util.List;
|
10
11
|
import java.util.concurrent.TimeoutException;
|
12
|
+
import com.google.common.base.Function;
|
11
13
|
import com.google.common.base.Optional;
|
12
14
|
import com.google.common.base.Throwables;
|
13
15
|
import java.security.GeneralSecurityException;
|
@@ -21,6 +23,7 @@ import org.embulk.config.ConfigDiff;
|
|
21
23
|
import org.embulk.config.CommitReport;
|
22
24
|
import org.embulk.config.Task;
|
23
25
|
import org.embulk.config.TaskSource;
|
26
|
+
import org.embulk.spi.unit.LocalFile;
|
24
27
|
import org.embulk.spi.Buffer;
|
25
28
|
import org.embulk.spi.FileOutputPlugin;
|
26
29
|
import org.embulk.spi.TransactionalFileOutput;
|
@@ -36,114 +39,147 @@ public class BigqueryOutputPlugin
|
|
36
39
|
{
|
37
40
|
@Config("auth_method")
|
38
41
|
@ConfigDefault("\"private_key\"")
|
39
|
-
|
42
|
+
AuthMethod getAuthMethod();
|
40
43
|
|
41
44
|
@Config("service_account_email")
|
42
45
|
@ConfigDefault("null")
|
43
|
-
|
46
|
+
Optional<String> getServiceAccountEmail();
|
44
47
|
|
48
|
+
// kept for backward compatibility
|
45
49
|
@Config("p12_keyfile_path")
|
46
50
|
@ConfigDefault("null")
|
47
|
-
|
51
|
+
Optional<String> getP12KeyfilePath();
|
52
|
+
|
53
|
+
@Config("p12_keyfile")
|
54
|
+
@ConfigDefault("null")
|
55
|
+
Optional<LocalFile> getP12Keyfile();
|
56
|
+
void setP12Keyfile(Optional<LocalFile> p12Keyfile);
|
48
57
|
|
49
58
|
@Config("application_name")
|
50
59
|
@ConfigDefault("\"Embulk BigQuery plugin\"")
|
51
|
-
|
60
|
+
String getApplicationName();
|
52
61
|
|
53
62
|
@Config("path_prefix")
|
54
|
-
|
63
|
+
String getPathPrefix();
|
55
64
|
|
56
65
|
@Config("sequence_format")
|
57
66
|
@ConfigDefault("\".%03d.%02d\"")
|
58
|
-
|
67
|
+
String getSequenceFormat();
|
59
68
|
|
60
69
|
@Config("file_ext")
|
61
|
-
|
70
|
+
String getFileNameExtension();
|
62
71
|
|
63
72
|
@Config("source_format")
|
64
73
|
@ConfigDefault("\"CSV\"")
|
65
|
-
|
74
|
+
SourceFormat getSourceFormat();
|
66
75
|
|
67
76
|
@Config("field_delimiter")
|
68
77
|
@ConfigDefault("\",\"")
|
69
|
-
|
78
|
+
char getFieldDelimiter();
|
70
79
|
|
71
80
|
@Config("max_bad_records")
|
72
81
|
@ConfigDefault("0")
|
73
|
-
|
82
|
+
int getMaxBadrecords();
|
74
83
|
|
75
84
|
@Config("encoding")
|
76
85
|
@ConfigDefault("\"UTF-8\"")
|
77
|
-
|
86
|
+
Charset getEncoding();
|
78
87
|
|
79
88
|
@Config("delete_from_local_when_job_end")
|
80
89
|
@ConfigDefault("false")
|
81
|
-
|
90
|
+
boolean getDeleteFromLocalWhenJobEnd();
|
82
91
|
|
83
92
|
@Config("project")
|
84
|
-
|
93
|
+
String getProject();
|
85
94
|
|
86
95
|
@Config("dataset")
|
87
|
-
|
96
|
+
String getDataset();
|
88
97
|
|
89
98
|
@Config("table")
|
90
|
-
|
99
|
+
String getTable();
|
91
100
|
|
92
101
|
@Config("auto_create_table")
|
93
102
|
@ConfigDefault("false")
|
94
|
-
|
103
|
+
boolean getAutoCreateTable();
|
95
104
|
|
105
|
+
// kept for backward compatibility
|
96
106
|
@Config("schema_path")
|
97
107
|
@ConfigDefault("null")
|
98
|
-
|
108
|
+
Optional<String> getSchemaPath();
|
109
|
+
|
110
|
+
@Config("schema_file")
|
111
|
+
@ConfigDefault("null")
|
112
|
+
Optional<LocalFile> getSchemaFile();
|
113
|
+
void setSchemaFile(Optional<LocalFile> schemaFile);
|
99
114
|
|
100
115
|
@Config("prevent_duplicate_insert")
|
101
116
|
@ConfigDefault("false")
|
102
|
-
|
117
|
+
boolean getPreventDuplicateInsert();
|
103
118
|
|
104
119
|
@Config("job_status_max_polling_time")
|
105
120
|
@ConfigDefault("3600")
|
106
|
-
|
121
|
+
int getJobStatusMaxPollingTime();
|
107
122
|
|
108
123
|
@Config("job_status_polling_interval")
|
109
124
|
@ConfigDefault("10")
|
110
|
-
|
125
|
+
int getJobStatusPollingInterval();
|
111
126
|
|
112
127
|
@Config("is_skip_job_result_check")
|
113
128
|
@ConfigDefault("false")
|
114
|
-
|
129
|
+
boolean getIsSkipJobResultCheck();
|
115
130
|
|
116
131
|
@Config("ignore_unknown_values")
|
117
132
|
@ConfigDefault("false")
|
118
|
-
|
133
|
+
boolean getIgnoreUnknownValues();
|
119
134
|
|
120
135
|
@Config("allow_quoted_newlines")
|
121
136
|
@ConfigDefault("false")
|
122
|
-
|
137
|
+
boolean getAllowQuotedNewlines();
|
123
138
|
}
|
124
139
|
|
125
140
|
private final Logger log = Exec.getLogger(BigqueryOutputPlugin.class);
|
126
141
|
private static BigqueryWriter bigQueryWriter;
|
127
142
|
|
143
|
+
@Override
|
128
144
|
public ConfigDiff transaction(ConfigSource config, int taskCount,
|
129
145
|
FileOutputPlugin.Control control)
|
130
146
|
{
|
131
147
|
final PluginTask task = config.loadConfig(PluginTask.class);
|
132
148
|
|
149
|
+
if (task.getP12KeyfilePath().isPresent()) {
|
150
|
+
if (task.getP12Keyfile().isPresent()) {
|
151
|
+
throw new ConfigException("Setting both p12_keyfile_path and p12_keyfile is invalid");
|
152
|
+
}
|
153
|
+
try {
|
154
|
+
task.setP12Keyfile(Optional.of(LocalFile.of(task.getP12KeyfilePath().get())));
|
155
|
+
} catch (IOException ex) {
|
156
|
+
throw Throwables.propagate(ex);
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
if (task.getSchemaPath().isPresent()) {
|
161
|
+
if (task.getSchemaFile().isPresent()) {
|
162
|
+
throw new ConfigException("Setting both p12_keyfile_path and p12_keyfile is invalid");
|
163
|
+
}
|
164
|
+
try {
|
165
|
+
task.setSchemaFile(Optional.of(LocalFile.of(task.getSchemaPath().get())));
|
166
|
+
} catch (IOException ex) {
|
167
|
+
throw Throwables.propagate(ex);
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
133
171
|
try {
|
134
|
-
bigQueryWriter = new BigqueryWriter.Builder(
|
135
|
-
.
|
136
|
-
|
137
|
-
.
|
138
|
-
|
139
|
-
.setDataset(task.getDataset())
|
140
|
-
.setTable(generateTableName(task.getTable()))
|
172
|
+
bigQueryWriter = new BigqueryWriter.Builder (
|
173
|
+
task.getAuthMethod().getString(),
|
174
|
+
task.getServiceAccountEmail(),
|
175
|
+
task.getP12Keyfile().transform(localFileToPathString()),
|
176
|
+
task.getApplicationName())
|
141
177
|
.setAutoCreateTable(task.getAutoCreateTable())
|
142
|
-
.setSchemaPath(task.
|
143
|
-
.setSourceFormat(task.getSourceFormat())
|
144
|
-
.setFieldDelimiter(task.getFieldDelimiter())
|
145
|
-
.
|
146
|
-
.setEncoding(task.getEncoding())
|
178
|
+
.setSchemaPath(task.getSchemaFile().transform(localFileToPathString()))
|
179
|
+
.setSourceFormat(task.getSourceFormat().getString())
|
180
|
+
.setFieldDelimiter(String.valueOf(task.getFieldDelimiter()))
|
181
|
+
.setMaxBadRecords(task.getMaxBadrecords())
|
182
|
+
.setEncoding(String.valueOf(task.getEncoding()))
|
147
183
|
.setPreventDuplicateInsert(task.getPreventDuplicateInsert())
|
148
184
|
.setJobStatusMaxPollingTime(task.getJobStatusMaxPollingTime())
|
149
185
|
.setJobStatusPollingInterval(task.getJobStatusPollingInterval())
|
@@ -151,8 +187,9 @@ public class BigqueryOutputPlugin
|
|
151
187
|
.setIgnoreUnknownValues(task.getIgnoreUnknownValues())
|
152
188
|
.setAllowQuotedNewlines(task.getAllowQuotedNewlines())
|
153
189
|
.build();
|
154
|
-
|
155
|
-
|
190
|
+
|
191
|
+
bigQueryWriter.checkConfig(task.getProject(), task.getDataset(), task.getTable());
|
192
|
+
|
156
193
|
} catch (IOException | GeneralSecurityException ex) {
|
157
194
|
throw new ConfigException(ex);
|
158
195
|
}
|
@@ -160,6 +197,7 @@ public class BigqueryOutputPlugin
|
|
160
197
|
return resume(task.dump(), taskCount, control);
|
161
198
|
}
|
162
199
|
|
200
|
+
@Override
|
163
201
|
public ConfigDiff resume(TaskSource taskSource,
|
164
202
|
int taskCount,
|
165
203
|
FileOutputPlugin.Control control)
|
@@ -176,6 +214,17 @@ public class BigqueryOutputPlugin
|
|
176
214
|
{
|
177
215
|
}
|
178
216
|
|
217
|
+
private Function<LocalFile, String> localFileToPathString()
|
218
|
+
{
|
219
|
+
return new Function<LocalFile, String>()
|
220
|
+
{
|
221
|
+
public String apply(LocalFile file)
|
222
|
+
{
|
223
|
+
return file.getPath().toString();
|
224
|
+
}
|
225
|
+
};
|
226
|
+
}
|
227
|
+
|
179
228
|
@Override
|
180
229
|
public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
|
181
230
|
{
|
@@ -186,6 +235,11 @@ public class BigqueryOutputPlugin
|
|
186
235
|
final String pathSuffix = task.getFileNameExtension();
|
187
236
|
|
188
237
|
return new TransactionalFileOutput() {
|
238
|
+
private final String project = task.getProject();
|
239
|
+
private final String dataset = task.getDataset();
|
240
|
+
private final String table = generateTableName(task.getTable());
|
241
|
+
private final boolean deleteFromLocalWhenJobEnd = task.getDeleteFromLocalWhenJobEnd();
|
242
|
+
|
189
243
|
private int fileIndex = 0;
|
190
244
|
private BufferedOutputStream output = null;
|
191
245
|
private File file;
|
@@ -243,9 +297,9 @@ public class BigqueryOutputPlugin
|
|
243
297
|
closeFile();
|
244
298
|
if (filePath != null) {
|
245
299
|
try {
|
246
|
-
bigQueryWriter.executeLoad(filePath);
|
300
|
+
bigQueryWriter.executeLoad(project, dataset, table, filePath);
|
247
301
|
|
248
|
-
if (
|
302
|
+
if (deleteFromLocalWhenJobEnd) {
|
249
303
|
log.info(String.format("Delete local file [%s]", filePath));
|
250
304
|
file.delete();
|
251
305
|
}
|
@@ -281,4 +335,40 @@ public class BigqueryOutputPlugin
|
|
281
335
|
|
282
336
|
return result.toString();
|
283
337
|
}
|
284
|
-
|
338
|
+
|
339
|
+
public enum SourceFormat
|
340
|
+
{
|
341
|
+
CSV("CSV"),
|
342
|
+
NEWLINE_DELIMITED_JSON("NEWLINE_DELIMITED_JSON");
|
343
|
+
|
344
|
+
private final String string;
|
345
|
+
|
346
|
+
SourceFormat(String string)
|
347
|
+
{
|
348
|
+
this.string = string;
|
349
|
+
}
|
350
|
+
|
351
|
+
public String getString()
|
352
|
+
{
|
353
|
+
return string;
|
354
|
+
}
|
355
|
+
}
|
356
|
+
|
357
|
+
public enum AuthMethod
|
358
|
+
{
|
359
|
+
private_key("private_key"),
|
360
|
+
compute_engine("compute_engine");
|
361
|
+
|
362
|
+
private final String string;
|
363
|
+
|
364
|
+
AuthMethod(String string)
|
365
|
+
{
|
366
|
+
this.string = string;
|
367
|
+
}
|
368
|
+
|
369
|
+
public String getString()
|
370
|
+
{
|
371
|
+
return string;
|
372
|
+
}
|
373
|
+
}
|
374
|
+
}
|
@@ -6,17 +6,17 @@ import java.io.FileNotFoundException;
|
|
6
6
|
import java.io.FileInputStream;
|
7
7
|
import java.io.BufferedInputStream;
|
8
8
|
import com.google.api.client.http.InputStreamContent;
|
9
|
+
|
9
10
|
import java.security.MessageDigest;
|
10
11
|
import java.security.NoSuchAlgorithmException;
|
11
12
|
import java.util.List;
|
12
13
|
import java.util.concurrent.TimeoutException;
|
13
14
|
import com.google.common.base.Optional;
|
14
|
-
import com.google.api.client.util.Base64;
|
15
|
-
import com.google.common.base.Throwables;
|
16
15
|
import java.security.GeneralSecurityException;
|
17
16
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
18
17
|
import com.fasterxml.jackson.core.type.TypeReference;
|
19
18
|
|
19
|
+
import com.google.common.collect.ImmutableList;
|
20
20
|
import org.apache.commons.codec.binary.Hex;
|
21
21
|
import org.embulk.spi.Exec;
|
22
22
|
import org.slf4j.Logger;
|
@@ -40,17 +40,13 @@ import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
|
|
40
40
|
|
41
41
|
public class BigqueryWriter
|
42
42
|
{
|
43
|
-
|
44
43
|
private final Logger log = Exec.getLogger(BigqueryWriter.class);
|
45
|
-
private final String project;
|
46
|
-
private final String dataset;
|
47
|
-
private final String table;
|
48
44
|
private final boolean autoCreateTable;
|
49
45
|
private final Optional<String> schemaPath;
|
50
46
|
private final TableSchema tableSchema;
|
51
47
|
private final String sourceFormat;
|
52
48
|
private final String fieldDelimiter;
|
53
|
-
private final int
|
49
|
+
private final int maxBadRecords;
|
54
50
|
private final String encoding;
|
55
51
|
private final boolean preventDuplicateInsert;
|
56
52
|
private final long jobStatusMaxPollingTime;
|
@@ -60,16 +56,14 @@ public class BigqueryWriter
|
|
60
56
|
private final boolean allowQuotedNewlines;
|
61
57
|
private final Bigquery bigQueryClient;
|
62
58
|
|
63
|
-
public BigqueryWriter(Builder builder)
|
59
|
+
public BigqueryWriter(Builder builder)
|
60
|
+
throws IOException, GeneralSecurityException
|
64
61
|
{
|
65
|
-
this.project = builder.project;
|
66
|
-
this.dataset = builder.dataset;
|
67
|
-
this.table = builder.table;
|
68
62
|
this.autoCreateTable = builder.autoCreateTable;
|
69
63
|
this.schemaPath = builder.schemaPath;
|
70
64
|
this.sourceFormat = builder.sourceFormat.toUpperCase();
|
71
65
|
this.fieldDelimiter = builder.fieldDelimiter;
|
72
|
-
this.
|
66
|
+
this.maxBadRecords = builder.maxBadRecords;
|
73
67
|
this.encoding = builder.encoding.toUpperCase();
|
74
68
|
this.preventDuplicateInsert = builder.preventDuplicateInsert;
|
75
69
|
this.jobStatusMaxPollingTime = builder.jobStatusMaxPollingTime;
|
@@ -81,15 +75,14 @@ public class BigqueryWriter
|
|
81
75
|
BigqueryAuthentication auth = new BigqueryAuthentication(builder.authMethod, builder.serviceAccountEmail, builder.p12KeyFilePath, builder.applicationName);
|
82
76
|
this.bigQueryClient = auth.getBigqueryClient();
|
83
77
|
|
84
|
-
checkConfig();
|
85
78
|
if (autoCreateTable) {
|
86
|
-
this.tableSchema = createTableSchema(
|
79
|
+
this.tableSchema = createTableSchema();
|
87
80
|
} else {
|
88
81
|
this.tableSchema = null;
|
89
82
|
}
|
90
83
|
}
|
91
84
|
|
92
|
-
private String getJobStatus(JobReference jobRef) throws JobFailedException
|
85
|
+
private String getJobStatus(String project, JobReference jobRef) throws JobFailedException
|
93
86
|
{
|
94
87
|
try {
|
95
88
|
Job job = bigQueryClient.jobs().get(project, jobRef.getJobId()).execute();
|
@@ -108,7 +101,6 @@ public class BigqueryWriter
|
|
108
101
|
String jobStatus = job.getStatus().getState();
|
109
102
|
if (jobStatus.equals("DONE")) {
|
110
103
|
JobStatistics statistics = job.getStatistics();
|
111
|
-
//log.info(String.format("Job end. create:[%s] end:[%s]", statistics.getCreationTime(), statistics.getEndTime()));
|
112
104
|
log.info(String.format("Job statistics [%s]", statistics.getLoad()));
|
113
105
|
}
|
114
106
|
return jobStatus;
|
@@ -118,14 +110,14 @@ public class BigqueryWriter
|
|
118
110
|
}
|
119
111
|
}
|
120
112
|
|
121
|
-
private void getJobStatusUntilDone(JobReference jobRef) throws TimeoutException, JobFailedException
|
113
|
+
private void getJobStatusUntilDone(String project, JobReference jobRef) throws TimeoutException, JobFailedException
|
122
114
|
{
|
123
115
|
long startTime = System.currentTimeMillis();
|
124
116
|
long elapsedTime;
|
125
117
|
|
126
118
|
try {
|
127
119
|
while (true) {
|
128
|
-
String jobStatus = getJobStatus(jobRef);
|
120
|
+
String jobStatus = getJobStatus(project, jobRef);
|
129
121
|
elapsedTime = System.currentTimeMillis() - startTime;
|
130
122
|
if (jobStatus.equals("DONE")) {
|
131
123
|
log.info(String.format("Job completed successfully. job id:[%s] elapsed_time:%dms status:[%s]", jobRef.getJobId(), elapsedTime, "SUCCESS"));
|
@@ -142,44 +134,28 @@ public class BigqueryWriter
|
|
142
134
|
}
|
143
135
|
}
|
144
136
|
|
145
|
-
public void executeLoad(String
|
146
|
-
TimeoutException, JobFailedException, IOException
|
137
|
+
public void executeLoad(String project, String dataset, String table, String localFilePath)
|
138
|
+
throws NoSuchAlgorithmException, TimeoutException, JobFailedException, IOException
|
147
139
|
{
|
148
140
|
log.info(String.format("Job preparing... project:%s dataset:%s table:%s", project, dataset, table));
|
149
141
|
|
150
142
|
Job job = new Job();
|
151
143
|
JobReference jobRef = new JobReference();
|
152
|
-
JobConfiguration jobConfig = new JobConfiguration();
|
153
|
-
JobConfigurationLoad loadConfig = new JobConfigurationLoad();
|
154
|
-
jobConfig.setLoad(loadConfig);
|
144
|
+
JobConfiguration jobConfig = new JobConfiguration().setLoad(setLoadConfig(project, dataset, table));
|
155
145
|
job.setConfiguration(jobConfig);
|
156
146
|
|
157
147
|
if (preventDuplicateInsert) {
|
158
|
-
String
|
148
|
+
ImmutableList<String> elements = ImmutableList.of(
|
149
|
+
getLocalMd5hash(localFilePath), dataset, table,
|
150
|
+
String.valueOf(tableSchema), sourceFormat, fieldDelimiter, String.valueOf(maxBadRecords),
|
151
|
+
encoding, String.valueOf(ignoreUnknownValues), String.valueOf(allowQuotedNewlines)
|
152
|
+
);
|
153
|
+
String jobId = createJobId(elements);
|
154
|
+
|
159
155
|
jobRef.setJobId(jobId);
|
160
156
|
job.setJobReference(jobRef);
|
161
157
|
}
|
162
158
|
|
163
|
-
loadConfig.setAllowQuotedNewlines(allowQuotedNewlines);
|
164
|
-
loadConfig.setEncoding(encoding);
|
165
|
-
loadConfig.setMaxBadRecords(maxBadrecords);
|
166
|
-
if (sourceFormat.equals("NEWLINE_DELIMITED_JSON")) {
|
167
|
-
loadConfig.setSourceFormat("NEWLINE_DELIMITED_JSON");
|
168
|
-
} else {
|
169
|
-
loadConfig.setFieldDelimiter(fieldDelimiter);
|
170
|
-
}
|
171
|
-
loadConfig.setWriteDisposition("WRITE_APPEND");
|
172
|
-
if (autoCreateTable) {
|
173
|
-
loadConfig.setSchema(tableSchema);
|
174
|
-
loadConfig.setCreateDisposition("CREATE_IF_NEEDED");
|
175
|
-
log.info(String.format("table:[%s] will be create if not exists", table));
|
176
|
-
} else {
|
177
|
-
loadConfig.setCreateDisposition("CREATE_NEVER");
|
178
|
-
}
|
179
|
-
loadConfig.setIgnoreUnknownValues(ignoreUnknownValues);
|
180
|
-
|
181
|
-
loadConfig.setDestinationTable(createTableReference());
|
182
|
-
|
183
159
|
File file = new File(localFilePath);
|
184
160
|
InputStreamContent mediaContent = new InputStreamContent("application/octet-stream",
|
185
161
|
new BufferedInputStream(
|
@@ -206,31 +182,52 @@ public class BigqueryWriter
|
|
206
182
|
if (isSkipJobResultCheck) {
|
207
183
|
log.info(String.format("Skip job status check. job id:[%s]", jobRef.getJobId()));
|
208
184
|
} else {
|
209
|
-
getJobStatusUntilDone(jobRef);
|
185
|
+
getJobStatusUntilDone(project, jobRef);
|
210
186
|
}
|
211
187
|
}
|
212
188
|
|
213
|
-
private
|
189
|
+
private JobConfigurationLoad setLoadConfig(String project, String dataset, String table)
|
190
|
+
{
|
191
|
+
JobConfigurationLoad config = new JobConfigurationLoad();
|
192
|
+
config.setAllowQuotedNewlines(allowQuotedNewlines)
|
193
|
+
.setEncoding(encoding)
|
194
|
+
.setMaxBadRecords(maxBadRecords)
|
195
|
+
.setSourceFormat(sourceFormat)
|
196
|
+
.setIgnoreUnknownValues(ignoreUnknownValues)
|
197
|
+
.setDestinationTable(createTableReference(project, dataset, table))
|
198
|
+
.setWriteDisposition("WRITE_APPEND");
|
199
|
+
|
200
|
+
if (sourceFormat.equals("CSV")) {
|
201
|
+
config.setFieldDelimiter(String.valueOf(fieldDelimiter));
|
202
|
+
}
|
203
|
+
if (autoCreateTable) {
|
204
|
+
config.setSchema(tableSchema);
|
205
|
+
config.setCreateDisposition("CREATE_IF_NEEDED");
|
206
|
+
log.info(String.format("table:[%s] will be create if not exists", table));
|
207
|
+
} else {
|
208
|
+
config.setCreateDisposition("CREATE_NEVER");
|
209
|
+
}
|
210
|
+
return config;
|
211
|
+
}
|
212
|
+
|
213
|
+
private String createJobId(ImmutableList<String> elements) throws NoSuchAlgorithmException, IOException
|
214
214
|
{
|
215
215
|
StringBuilder sb = new StringBuilder();
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
sb.append(tableSchema);
|
220
|
-
sb.append(sourceFormat);
|
221
|
-
sb.append(fieldDelimiter);
|
222
|
-
sb.append(maxBadrecords);
|
223
|
-
sb.append(encoding);
|
224
|
-
sb.append(ignoreUnknownValues);
|
216
|
+
for (String element : elements) {
|
217
|
+
sb.append(element);
|
218
|
+
}
|
225
219
|
|
226
220
|
MessageDigest md = MessageDigest.getInstance("MD5");
|
227
|
-
|
228
|
-
byte[] digest = md.digest(str.getBytes());
|
221
|
+
byte[] digest = md.digest(new String(sb).getBytes());
|
229
222
|
String hash = new String(Hex.encodeHex(digest));
|
230
|
-
|
223
|
+
|
224
|
+
StringBuilder jobId = new StringBuilder();
|
225
|
+
jobId.append("embulk_job_");
|
226
|
+
jobId.append(hash);
|
227
|
+
return jobId.toString();
|
231
228
|
}
|
232
229
|
|
233
|
-
private TableReference createTableReference()
|
230
|
+
private TableReference createTableReference(String project, String dataset, String table)
|
234
231
|
{
|
235
232
|
return new TableReference()
|
236
233
|
.setProjectId(project)
|
@@ -238,7 +235,7 @@ public class BigqueryWriter
|
|
238
235
|
.setTableId(table);
|
239
236
|
}
|
240
237
|
|
241
|
-
|
238
|
+
public TableSchema createTableSchema() throws IOException
|
242
239
|
{
|
243
240
|
String path = schemaPath.orNull();
|
244
241
|
File file = new File(path);
|
@@ -247,8 +244,7 @@ public class BigqueryWriter
|
|
247
244
|
stream = new FileInputStream(file);
|
248
245
|
ObjectMapper mapper = new ObjectMapper();
|
249
246
|
List<TableFieldSchema> fields = mapper.readValue(stream, new TypeReference<List<TableFieldSchema>>() {});
|
250
|
-
|
251
|
-
return tableSchema;
|
247
|
+
return new TableSchema().setFields(fields);
|
252
248
|
} finally {
|
253
249
|
if (stream != null) {
|
254
250
|
stream.close();
|
@@ -256,22 +252,22 @@ public class BigqueryWriter
|
|
256
252
|
}
|
257
253
|
}
|
258
254
|
|
259
|
-
public boolean isExistTable(String
|
255
|
+
public boolean isExistTable(String project, String dataset, String table) throws IOException
|
260
256
|
{
|
261
257
|
Tables tableRequest = bigQueryClient.tables();
|
262
258
|
try {
|
263
|
-
Table tableData = tableRequest.get(project, dataset,
|
259
|
+
Table tableData = tableRequest.get(project, dataset, table).execute();
|
264
260
|
} catch (GoogleJsonResponseException ex) {
|
265
261
|
return false;
|
266
262
|
}
|
267
263
|
return true;
|
268
264
|
}
|
269
265
|
|
270
|
-
public void checkConfig() throws
|
266
|
+
public void checkConfig(String project, String dataset, String table) throws IOException
|
271
267
|
{
|
272
268
|
if (autoCreateTable) {
|
273
269
|
if (!schemaPath.isPresent()) {
|
274
|
-
throw new FileNotFoundException("
|
270
|
+
throw new FileNotFoundException("schema_file is empty");
|
275
271
|
} else {
|
276
272
|
File file = new File(schemaPath.orNull());
|
277
273
|
if (!file.exists()) {
|
@@ -279,7 +275,7 @@ public class BigqueryWriter
|
|
279
275
|
}
|
280
276
|
}
|
281
277
|
} else {
|
282
|
-
if (!isExistTable(table)) {
|
278
|
+
if (!isExistTable(project, dataset, table)) {
|
283
279
|
throw new IOException(String.format("table [%s] is not exists", table));
|
284
280
|
}
|
285
281
|
}
|
@@ -341,14 +337,11 @@ public class BigqueryWriter
|
|
341
337
|
private Optional<String> serviceAccountEmail;
|
342
338
|
private Optional<String> p12KeyFilePath;
|
343
339
|
private String applicationName;
|
344
|
-
private String project;
|
345
|
-
private String dataset;
|
346
|
-
private String table;
|
347
340
|
private boolean autoCreateTable;
|
348
341
|
private Optional<String> schemaPath;
|
349
342
|
private String sourceFormat;
|
350
343
|
private String fieldDelimiter;
|
351
|
-
private int
|
344
|
+
private int maxBadRecords;
|
352
345
|
private String encoding;
|
353
346
|
private boolean preventDuplicateInsert;
|
354
347
|
private int jobStatusMaxPollingTime;
|
@@ -357,45 +350,12 @@ public class BigqueryWriter
|
|
357
350
|
private boolean ignoreUnknownValues;
|
358
351
|
private boolean allowQuotedNewlines;
|
359
352
|
|
360
|
-
public Builder(String authMethod)
|
353
|
+
public Builder(String authMethod, Optional<String> serviceAccountEmail, Optional<String> p12KeyFilePath, String applicationName)
|
361
354
|
{
|
362
355
|
this.authMethod = authMethod;
|
363
|
-
}
|
364
|
-
|
365
|
-
public Builder setServiceAccountEmail(Optional<String> serviceAccountEmail)
|
366
|
-
{
|
367
356
|
this.serviceAccountEmail = serviceAccountEmail;
|
368
|
-
return this;
|
369
|
-
}
|
370
|
-
|
371
|
-
public Builder setP12KeyFilePath(Optional<String> p12KeyFilePath)
|
372
|
-
{
|
373
357
|
this.p12KeyFilePath = p12KeyFilePath;
|
374
|
-
return this;
|
375
|
-
}
|
376
|
-
|
377
|
-
public Builder setApplicationName(String applicationName)
|
378
|
-
{
|
379
358
|
this.applicationName = applicationName;
|
380
|
-
return this;
|
381
|
-
}
|
382
|
-
|
383
|
-
public Builder setProject(String project)
|
384
|
-
{
|
385
|
-
this.project = project;
|
386
|
-
return this;
|
387
|
-
}
|
388
|
-
|
389
|
-
public Builder setDataset(String dataset)
|
390
|
-
{
|
391
|
-
this.dataset = dataset;
|
392
|
-
return this;
|
393
|
-
}
|
394
|
-
|
395
|
-
public Builder setTable(String table)
|
396
|
-
{
|
397
|
-
this.table = table;
|
398
|
-
return this;
|
399
359
|
}
|
400
360
|
|
401
361
|
public Builder setAutoCreateTable(boolean autoCreateTable)
|
@@ -422,9 +382,9 @@ public class BigqueryWriter
|
|
422
382
|
return this;
|
423
383
|
}
|
424
384
|
|
425
|
-
public Builder
|
385
|
+
public Builder setMaxBadRecords(int maxBadRecords)
|
426
386
|
{
|
427
|
-
this.
|
387
|
+
this.maxBadRecords = maxBadRecords;
|
428
388
|
return this;
|
429
389
|
}
|
430
390
|
|
@@ -482,4 +442,4 @@ public class BigqueryWriter
|
|
482
442
|
super(message);
|
483
443
|
}
|
484
444
|
}
|
485
|
-
}
|
445
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,7 +62,7 @@ files:
|
|
62
62
|
- src/test/java/org/embulk/output/TestBigqueryWriter.java
|
63
63
|
- classpath/commons-codec-1.3.jar
|
64
64
|
- classpath/commons-logging-1.1.1.jar
|
65
|
-
- classpath/embulk-output-bigquery-0.1.
|
65
|
+
- classpath/embulk-output-bigquery-0.1.8.jar
|
66
66
|
- classpath/google-api-client-1.20.0.jar
|
67
67
|
- classpath/google-api-services-bigquery-v2-rev205-1.20.0.jar
|
68
68
|
- classpath/google-http-client-1.20.0.jar
|
@@ -72,7 +72,7 @@ files:
|
|
72
72
|
- classpath/httpclient-4.0.1.jar
|
73
73
|
- classpath/httpcore-4.0.1.jar
|
74
74
|
- classpath/jsr305-1.3.9.jar
|
75
|
-
homepage: https://github.com/
|
75
|
+
homepage: https://github.com/embulk/embulk-output-bigquery
|
76
76
|
licenses:
|
77
77
|
- Apache-2.0
|
78
78
|
metadata: {}
|