embulk-output-bigquery 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -6
- data/build.gradle +4 -4
- data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +130 -40
- data/src/main/java/org/embulk/output/BigqueryWriter.java +68 -108
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48a9a0add9223ccfca3e1e48f360ebf38cfe08d5
|
4
|
+
data.tar.gz: b6d9d0b0635c9d0728238873094122b4a72648de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ee81bbbc5b65c34d86014e3da5a3d848ded06766a3b58e92f5918070cc89eadd8740cf16f2b36c864c7a50f5476a0be2460fe2b722f2edfeb23f612f721df91
|
7
|
+
data.tar.gz: 605003c7364982e0f4fc391ef51517ef16b201b0dc0c3f43833f1cd98a5110b7ae2476f64ebbf3d9284e3b781a2ed85b68855d3b585cdda4b68e72f223974b14
|
data/README.md
CHANGED
@@ -30,14 +30,14 @@ OAuth flow for installed applications.
|
|
30
30
|
|:--------------------------|:------------|:-----------|:-------------|:-----------------------|
|
31
31
|
| auth_method | string | optional | "private_key" | `private_key` or `compute_engine`
|
32
32
|
| service_account_email | string | required when auth_method is private_key | | Your Google service account email
|
33
|
-
|
|
33
|
+
| p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
|
34
34
|
| sequence_format | string | optional | %03d.%02d | |
|
35
35
|
| file_ext | string | optional | | e.g. ".csv.gz" ".json.gz" |
|
36
36
|
| project | string | required | | project_id |
|
37
37
|
| dataset | string | required | | dataset |
|
38
38
|
| table | string | required | | table name |
|
39
39
|
| auto_create_table | boolean | optional | 0 | [See below](#dynamic-table-creating) |
|
40
|
-
|
|
40
|
+
| schema_file | string | optional | | /path/to/schema.json |
|
41
41
|
| prevent_duplicate_insert | boolean | optional | 0 | [See below](#data-consistency) |
|
42
42
|
| delete_from_local_when_job_end | boolean | optional | 0 | If set to true, delete local file when job is end |
|
43
43
|
| job_status_max_polling_time | int | optional | 3600 sec | Max job status polling time |
|
@@ -53,7 +53,7 @@ Following options are same as [bq command-line tools](https://cloud.google.com/b
|
|
53
53
|
|:--------------------------|:------------|:-----------|:-------------|:-----------------------|
|
54
54
|
| source_format | string | required | "CSV" | File type (`NEWLINE_DELIMITED_JSON` or `CSV`) |
|
55
55
|
| max_bad_records | int | optional | 0 | |
|
56
|
-
| field_delimiter |
|
56
|
+
| field_delimiter | char | optional | "," | |
|
57
57
|
| encoding | string | optional | "UTF-8" | `UTF-8` or `ISO-8859-1` |
|
58
58
|
| ignore_unknown_values | boolean | optional | 0 | |
|
59
59
|
| allow_quoted_newlines | boolean | optional | 0 | Set true, if data contains newline characters. It may cause slow procsssing |
|
@@ -65,7 +65,7 @@ out:
|
|
65
65
|
type: bigquery
|
66
66
|
auth_method: private_key # default
|
67
67
|
service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
|
68
|
-
|
68
|
+
p12_keyfile: /path/to/p12_keyfile.p12
|
69
69
|
path_prefix: /path/to/output
|
70
70
|
file_ext: csv.gz
|
71
71
|
source_format: CSV
|
@@ -130,14 +130,14 @@ out:
|
|
130
130
|
type: bigquery
|
131
131
|
auto_create_table: true
|
132
132
|
table: table_%Y_%m
|
133
|
-
|
133
|
+
schema_file: /path/to/schema.json
|
134
134
|
```
|
135
135
|
|
136
136
|
### Data Consistency
|
137
137
|
|
138
138
|
When `prevent_duplicate_insert` is set to true, embulk-output-bigquery generate job ID from md5 hash of file and other options to prevent duplicate data insertion.
|
139
139
|
|
140
|
-
`job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values)`
|
140
|
+
`job ID = md5(md5(file) + dataset + table + schema + source_format + file_delimiter + max_bad_records + encoding + ignore_unknown_values + allow_quoted_newlines)`
|
141
141
|
|
142
142
|
[job ID must be unique(including failures)](https://cloud.google.com/bigquery/loading-data-into-bigquery#consistency). So same data can't insert with same settings.
|
143
143
|
|
data/build.gradle
CHANGED
@@ -15,11 +15,11 @@ configurations {
|
|
15
15
|
sourceCompatibility = 1.7
|
16
16
|
targetCompatibility = 1.7
|
17
17
|
|
18
|
-
version = "0.1.
|
18
|
+
version = "0.1.8"
|
19
19
|
|
20
20
|
dependencies {
|
21
|
-
compile "org.embulk:embulk-core:0.6.
|
22
|
-
provided "org.embulk:embulk-core:0.6.
|
21
|
+
compile "org.embulk:embulk-core:0.6.22"
|
22
|
+
provided "org.embulk:embulk-core:0.6.22"
|
23
23
|
|
24
24
|
compile "com.google.http-client:google-http-client-jackson2:1.20.0"
|
25
25
|
compile "com.google.apis:google-api-services-bigquery:v2-rev205-1.20.0"
|
@@ -49,7 +49,7 @@ Gem::Specification.new do |spec|
|
|
49
49
|
spec.description = %[Embulk plugin that insert records to Google BigQuery.]
|
50
50
|
spec.email = ["satoshiakama@gmail.com"]
|
51
51
|
spec.licenses = ["Apache-2.0"]
|
52
|
-
spec.homepage = "https://github.com/
|
52
|
+
spec.homepage = "https://github.com/embulk/embulk-output-bigquery"
|
53
53
|
|
54
54
|
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
55
55
|
spec.test_files = spec.files.grep(%r"^(test|spec)/")
|
@@ -5,9 +5,11 @@ import java.io.FileNotFoundException;
|
|
5
5
|
import java.io.FileOutputStream;
|
6
6
|
import java.io.BufferedOutputStream;
|
7
7
|
import java.io.IOException;
|
8
|
+
import java.nio.charset.Charset;
|
8
9
|
import java.security.NoSuchAlgorithmException;
|
9
10
|
import java.util.List;
|
10
11
|
import java.util.concurrent.TimeoutException;
|
12
|
+
import com.google.common.base.Function;
|
11
13
|
import com.google.common.base.Optional;
|
12
14
|
import com.google.common.base.Throwables;
|
13
15
|
import java.security.GeneralSecurityException;
|
@@ -21,6 +23,7 @@ import org.embulk.config.ConfigDiff;
|
|
21
23
|
import org.embulk.config.CommitReport;
|
22
24
|
import org.embulk.config.Task;
|
23
25
|
import org.embulk.config.TaskSource;
|
26
|
+
import org.embulk.spi.unit.LocalFile;
|
24
27
|
import org.embulk.spi.Buffer;
|
25
28
|
import org.embulk.spi.FileOutputPlugin;
|
26
29
|
import org.embulk.spi.TransactionalFileOutput;
|
@@ -36,114 +39,147 @@ public class BigqueryOutputPlugin
|
|
36
39
|
{
|
37
40
|
@Config("auth_method")
|
38
41
|
@ConfigDefault("\"private_key\"")
|
39
|
-
|
42
|
+
AuthMethod getAuthMethod();
|
40
43
|
|
41
44
|
@Config("service_account_email")
|
42
45
|
@ConfigDefault("null")
|
43
|
-
|
46
|
+
Optional<String> getServiceAccountEmail();
|
44
47
|
|
48
|
+
// kept for backward compatibility
|
45
49
|
@Config("p12_keyfile_path")
|
46
50
|
@ConfigDefault("null")
|
47
|
-
|
51
|
+
Optional<String> getP12KeyfilePath();
|
52
|
+
|
53
|
+
@Config("p12_keyfile")
|
54
|
+
@ConfigDefault("null")
|
55
|
+
Optional<LocalFile> getP12Keyfile();
|
56
|
+
void setP12Keyfile(Optional<LocalFile> p12Keyfile);
|
48
57
|
|
49
58
|
@Config("application_name")
|
50
59
|
@ConfigDefault("\"Embulk BigQuery plugin\"")
|
51
|
-
|
60
|
+
String getApplicationName();
|
52
61
|
|
53
62
|
@Config("path_prefix")
|
54
|
-
|
63
|
+
String getPathPrefix();
|
55
64
|
|
56
65
|
@Config("sequence_format")
|
57
66
|
@ConfigDefault("\".%03d.%02d\"")
|
58
|
-
|
67
|
+
String getSequenceFormat();
|
59
68
|
|
60
69
|
@Config("file_ext")
|
61
|
-
|
70
|
+
String getFileNameExtension();
|
62
71
|
|
63
72
|
@Config("source_format")
|
64
73
|
@ConfigDefault("\"CSV\"")
|
65
|
-
|
74
|
+
SourceFormat getSourceFormat();
|
66
75
|
|
67
76
|
@Config("field_delimiter")
|
68
77
|
@ConfigDefault("\",\"")
|
69
|
-
|
78
|
+
char getFieldDelimiter();
|
70
79
|
|
71
80
|
@Config("max_bad_records")
|
72
81
|
@ConfigDefault("0")
|
73
|
-
|
82
|
+
int getMaxBadrecords();
|
74
83
|
|
75
84
|
@Config("encoding")
|
76
85
|
@ConfigDefault("\"UTF-8\"")
|
77
|
-
|
86
|
+
Charset getEncoding();
|
78
87
|
|
79
88
|
@Config("delete_from_local_when_job_end")
|
80
89
|
@ConfigDefault("false")
|
81
|
-
|
90
|
+
boolean getDeleteFromLocalWhenJobEnd();
|
82
91
|
|
83
92
|
@Config("project")
|
84
|
-
|
93
|
+
String getProject();
|
85
94
|
|
86
95
|
@Config("dataset")
|
87
|
-
|
96
|
+
String getDataset();
|
88
97
|
|
89
98
|
@Config("table")
|
90
|
-
|
99
|
+
String getTable();
|
91
100
|
|
92
101
|
@Config("auto_create_table")
|
93
102
|
@ConfigDefault("false")
|
94
|
-
|
103
|
+
boolean getAutoCreateTable();
|
95
104
|
|
105
|
+
// kept for backward compatibility
|
96
106
|
@Config("schema_path")
|
97
107
|
@ConfigDefault("null")
|
98
|
-
|
108
|
+
Optional<String> getSchemaPath();
|
109
|
+
|
110
|
+
@Config("schema_file")
|
111
|
+
@ConfigDefault("null")
|
112
|
+
Optional<LocalFile> getSchemaFile();
|
113
|
+
void setSchemaFile(Optional<LocalFile> schemaFile);
|
99
114
|
|
100
115
|
@Config("prevent_duplicate_insert")
|
101
116
|
@ConfigDefault("false")
|
102
|
-
|
117
|
+
boolean getPreventDuplicateInsert();
|
103
118
|
|
104
119
|
@Config("job_status_max_polling_time")
|
105
120
|
@ConfigDefault("3600")
|
106
|
-
|
121
|
+
int getJobStatusMaxPollingTime();
|
107
122
|
|
108
123
|
@Config("job_status_polling_interval")
|
109
124
|
@ConfigDefault("10")
|
110
|
-
|
125
|
+
int getJobStatusPollingInterval();
|
111
126
|
|
112
127
|
@Config("is_skip_job_result_check")
|
113
128
|
@ConfigDefault("false")
|
114
|
-
|
129
|
+
boolean getIsSkipJobResultCheck();
|
115
130
|
|
116
131
|
@Config("ignore_unknown_values")
|
117
132
|
@ConfigDefault("false")
|
118
|
-
|
133
|
+
boolean getIgnoreUnknownValues();
|
119
134
|
|
120
135
|
@Config("allow_quoted_newlines")
|
121
136
|
@ConfigDefault("false")
|
122
|
-
|
137
|
+
boolean getAllowQuotedNewlines();
|
123
138
|
}
|
124
139
|
|
125
140
|
private final Logger log = Exec.getLogger(BigqueryOutputPlugin.class);
|
126
141
|
private static BigqueryWriter bigQueryWriter;
|
127
142
|
|
143
|
+
@Override
|
128
144
|
public ConfigDiff transaction(ConfigSource config, int taskCount,
|
129
145
|
FileOutputPlugin.Control control)
|
130
146
|
{
|
131
147
|
final PluginTask task = config.loadConfig(PluginTask.class);
|
132
148
|
|
149
|
+
if (task.getP12KeyfilePath().isPresent()) {
|
150
|
+
if (task.getP12Keyfile().isPresent()) {
|
151
|
+
throw new ConfigException("Setting both p12_keyfile_path and p12_keyfile is invalid");
|
152
|
+
}
|
153
|
+
try {
|
154
|
+
task.setP12Keyfile(Optional.of(LocalFile.of(task.getP12KeyfilePath().get())));
|
155
|
+
} catch (IOException ex) {
|
156
|
+
throw Throwables.propagate(ex);
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
if (task.getSchemaPath().isPresent()) {
|
161
|
+
if (task.getSchemaFile().isPresent()) {
|
162
|
+
throw new ConfigException("Setting both p12_keyfile_path and p12_keyfile is invalid");
|
163
|
+
}
|
164
|
+
try {
|
165
|
+
task.setSchemaFile(Optional.of(LocalFile.of(task.getSchemaPath().get())));
|
166
|
+
} catch (IOException ex) {
|
167
|
+
throw Throwables.propagate(ex);
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
133
171
|
try {
|
134
|
-
bigQueryWriter = new BigqueryWriter.Builder(
|
135
|
-
.
|
136
|
-
|
137
|
-
.
|
138
|
-
|
139
|
-
.setDataset(task.getDataset())
|
140
|
-
.setTable(generateTableName(task.getTable()))
|
172
|
+
bigQueryWriter = new BigqueryWriter.Builder (
|
173
|
+
task.getAuthMethod().getString(),
|
174
|
+
task.getServiceAccountEmail(),
|
175
|
+
task.getP12Keyfile().transform(localFileToPathString()),
|
176
|
+
task.getApplicationName())
|
141
177
|
.setAutoCreateTable(task.getAutoCreateTable())
|
142
|
-
.setSchemaPath(task.
|
143
|
-
.setSourceFormat(task.getSourceFormat())
|
144
|
-
.setFieldDelimiter(task.getFieldDelimiter())
|
145
|
-
.
|
146
|
-
.setEncoding(task.getEncoding())
|
178
|
+
.setSchemaPath(task.getSchemaFile().transform(localFileToPathString()))
|
179
|
+
.setSourceFormat(task.getSourceFormat().getString())
|
180
|
+
.setFieldDelimiter(String.valueOf(task.getFieldDelimiter()))
|
181
|
+
.setMaxBadRecords(task.getMaxBadrecords())
|
182
|
+
.setEncoding(String.valueOf(task.getEncoding()))
|
147
183
|
.setPreventDuplicateInsert(task.getPreventDuplicateInsert())
|
148
184
|
.setJobStatusMaxPollingTime(task.getJobStatusMaxPollingTime())
|
149
185
|
.setJobStatusPollingInterval(task.getJobStatusPollingInterval())
|
@@ -151,8 +187,9 @@ public class BigqueryOutputPlugin
|
|
151
187
|
.setIgnoreUnknownValues(task.getIgnoreUnknownValues())
|
152
188
|
.setAllowQuotedNewlines(task.getAllowQuotedNewlines())
|
153
189
|
.build();
|
154
|
-
|
155
|
-
|
190
|
+
|
191
|
+
bigQueryWriter.checkConfig(task.getProject(), task.getDataset(), task.getTable());
|
192
|
+
|
156
193
|
} catch (IOException | GeneralSecurityException ex) {
|
157
194
|
throw new ConfigException(ex);
|
158
195
|
}
|
@@ -160,6 +197,7 @@ public class BigqueryOutputPlugin
|
|
160
197
|
return resume(task.dump(), taskCount, control);
|
161
198
|
}
|
162
199
|
|
200
|
+
@Override
|
163
201
|
public ConfigDiff resume(TaskSource taskSource,
|
164
202
|
int taskCount,
|
165
203
|
FileOutputPlugin.Control control)
|
@@ -176,6 +214,17 @@ public class BigqueryOutputPlugin
|
|
176
214
|
{
|
177
215
|
}
|
178
216
|
|
217
|
+
private Function<LocalFile, String> localFileToPathString()
|
218
|
+
{
|
219
|
+
return new Function<LocalFile, String>()
|
220
|
+
{
|
221
|
+
public String apply(LocalFile file)
|
222
|
+
{
|
223
|
+
return file.getPath().toString();
|
224
|
+
}
|
225
|
+
};
|
226
|
+
}
|
227
|
+
|
179
228
|
@Override
|
180
229
|
public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
|
181
230
|
{
|
@@ -186,6 +235,11 @@ public class BigqueryOutputPlugin
|
|
186
235
|
final String pathSuffix = task.getFileNameExtension();
|
187
236
|
|
188
237
|
return new TransactionalFileOutput() {
|
238
|
+
private final String project = task.getProject();
|
239
|
+
private final String dataset = task.getDataset();
|
240
|
+
private final String table = generateTableName(task.getTable());
|
241
|
+
private final boolean deleteFromLocalWhenJobEnd = task.getDeleteFromLocalWhenJobEnd();
|
242
|
+
|
189
243
|
private int fileIndex = 0;
|
190
244
|
private BufferedOutputStream output = null;
|
191
245
|
private File file;
|
@@ -243,9 +297,9 @@ public class BigqueryOutputPlugin
|
|
243
297
|
closeFile();
|
244
298
|
if (filePath != null) {
|
245
299
|
try {
|
246
|
-
bigQueryWriter.executeLoad(filePath);
|
300
|
+
bigQueryWriter.executeLoad(project, dataset, table, filePath);
|
247
301
|
|
248
|
-
if (
|
302
|
+
if (deleteFromLocalWhenJobEnd) {
|
249
303
|
log.info(String.format("Delete local file [%s]", filePath));
|
250
304
|
file.delete();
|
251
305
|
}
|
@@ -281,4 +335,40 @@ public class BigqueryOutputPlugin
|
|
281
335
|
|
282
336
|
return result.toString();
|
283
337
|
}
|
284
|
-
|
338
|
+
|
339
|
+
public enum SourceFormat
|
340
|
+
{
|
341
|
+
CSV("CSV"),
|
342
|
+
NEWLINE_DELIMITED_JSON("NEWLINE_DELIMITED_JSON");
|
343
|
+
|
344
|
+
private final String string;
|
345
|
+
|
346
|
+
SourceFormat(String string)
|
347
|
+
{
|
348
|
+
this.string = string;
|
349
|
+
}
|
350
|
+
|
351
|
+
public String getString()
|
352
|
+
{
|
353
|
+
return string;
|
354
|
+
}
|
355
|
+
}
|
356
|
+
|
357
|
+
public enum AuthMethod
|
358
|
+
{
|
359
|
+
private_key("private_key"),
|
360
|
+
compute_engine("compute_engine");
|
361
|
+
|
362
|
+
private final String string;
|
363
|
+
|
364
|
+
AuthMethod(String string)
|
365
|
+
{
|
366
|
+
this.string = string;
|
367
|
+
}
|
368
|
+
|
369
|
+
public String getString()
|
370
|
+
{
|
371
|
+
return string;
|
372
|
+
}
|
373
|
+
}
|
374
|
+
}
|
@@ -6,17 +6,17 @@ import java.io.FileNotFoundException;
|
|
6
6
|
import java.io.FileInputStream;
|
7
7
|
import java.io.BufferedInputStream;
|
8
8
|
import com.google.api.client.http.InputStreamContent;
|
9
|
+
|
9
10
|
import java.security.MessageDigest;
|
10
11
|
import java.security.NoSuchAlgorithmException;
|
11
12
|
import java.util.List;
|
12
13
|
import java.util.concurrent.TimeoutException;
|
13
14
|
import com.google.common.base.Optional;
|
14
|
-
import com.google.api.client.util.Base64;
|
15
|
-
import com.google.common.base.Throwables;
|
16
15
|
import java.security.GeneralSecurityException;
|
17
16
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
18
17
|
import com.fasterxml.jackson.core.type.TypeReference;
|
19
18
|
|
19
|
+
import com.google.common.collect.ImmutableList;
|
20
20
|
import org.apache.commons.codec.binary.Hex;
|
21
21
|
import org.embulk.spi.Exec;
|
22
22
|
import org.slf4j.Logger;
|
@@ -40,17 +40,13 @@ import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
|
|
40
40
|
|
41
41
|
public class BigqueryWriter
|
42
42
|
{
|
43
|
-
|
44
43
|
private final Logger log = Exec.getLogger(BigqueryWriter.class);
|
45
|
-
private final String project;
|
46
|
-
private final String dataset;
|
47
|
-
private final String table;
|
48
44
|
private final boolean autoCreateTable;
|
49
45
|
private final Optional<String> schemaPath;
|
50
46
|
private final TableSchema tableSchema;
|
51
47
|
private final String sourceFormat;
|
52
48
|
private final String fieldDelimiter;
|
53
|
-
private final int
|
49
|
+
private final int maxBadRecords;
|
54
50
|
private final String encoding;
|
55
51
|
private final boolean preventDuplicateInsert;
|
56
52
|
private final long jobStatusMaxPollingTime;
|
@@ -60,16 +56,14 @@ public class BigqueryWriter
|
|
60
56
|
private final boolean allowQuotedNewlines;
|
61
57
|
private final Bigquery bigQueryClient;
|
62
58
|
|
63
|
-
public BigqueryWriter(Builder builder)
|
59
|
+
public BigqueryWriter(Builder builder)
|
60
|
+
throws IOException, GeneralSecurityException
|
64
61
|
{
|
65
|
-
this.project = builder.project;
|
66
|
-
this.dataset = builder.dataset;
|
67
|
-
this.table = builder.table;
|
68
62
|
this.autoCreateTable = builder.autoCreateTable;
|
69
63
|
this.schemaPath = builder.schemaPath;
|
70
64
|
this.sourceFormat = builder.sourceFormat.toUpperCase();
|
71
65
|
this.fieldDelimiter = builder.fieldDelimiter;
|
72
|
-
this.
|
66
|
+
this.maxBadRecords = builder.maxBadRecords;
|
73
67
|
this.encoding = builder.encoding.toUpperCase();
|
74
68
|
this.preventDuplicateInsert = builder.preventDuplicateInsert;
|
75
69
|
this.jobStatusMaxPollingTime = builder.jobStatusMaxPollingTime;
|
@@ -81,15 +75,14 @@ public class BigqueryWriter
|
|
81
75
|
BigqueryAuthentication auth = new BigqueryAuthentication(builder.authMethod, builder.serviceAccountEmail, builder.p12KeyFilePath, builder.applicationName);
|
82
76
|
this.bigQueryClient = auth.getBigqueryClient();
|
83
77
|
|
84
|
-
checkConfig();
|
85
78
|
if (autoCreateTable) {
|
86
|
-
this.tableSchema = createTableSchema(
|
79
|
+
this.tableSchema = createTableSchema();
|
87
80
|
} else {
|
88
81
|
this.tableSchema = null;
|
89
82
|
}
|
90
83
|
}
|
91
84
|
|
92
|
-
private String getJobStatus(JobReference jobRef) throws JobFailedException
|
85
|
+
private String getJobStatus(String project, JobReference jobRef) throws JobFailedException
|
93
86
|
{
|
94
87
|
try {
|
95
88
|
Job job = bigQueryClient.jobs().get(project, jobRef.getJobId()).execute();
|
@@ -108,7 +101,6 @@ public class BigqueryWriter
|
|
108
101
|
String jobStatus = job.getStatus().getState();
|
109
102
|
if (jobStatus.equals("DONE")) {
|
110
103
|
JobStatistics statistics = job.getStatistics();
|
111
|
-
//log.info(String.format("Job end. create:[%s] end:[%s]", statistics.getCreationTime(), statistics.getEndTime()));
|
112
104
|
log.info(String.format("Job statistics [%s]", statistics.getLoad()));
|
113
105
|
}
|
114
106
|
return jobStatus;
|
@@ -118,14 +110,14 @@ public class BigqueryWriter
|
|
118
110
|
}
|
119
111
|
}
|
120
112
|
|
121
|
-
private void getJobStatusUntilDone(JobReference jobRef) throws TimeoutException, JobFailedException
|
113
|
+
private void getJobStatusUntilDone(String project, JobReference jobRef) throws TimeoutException, JobFailedException
|
122
114
|
{
|
123
115
|
long startTime = System.currentTimeMillis();
|
124
116
|
long elapsedTime;
|
125
117
|
|
126
118
|
try {
|
127
119
|
while (true) {
|
128
|
-
String jobStatus = getJobStatus(jobRef);
|
120
|
+
String jobStatus = getJobStatus(project, jobRef);
|
129
121
|
elapsedTime = System.currentTimeMillis() - startTime;
|
130
122
|
if (jobStatus.equals("DONE")) {
|
131
123
|
log.info(String.format("Job completed successfully. job id:[%s] elapsed_time:%dms status:[%s]", jobRef.getJobId(), elapsedTime, "SUCCESS"));
|
@@ -142,44 +134,28 @@ public class BigqueryWriter
|
|
142
134
|
}
|
143
135
|
}
|
144
136
|
|
145
|
-
public void executeLoad(String
|
146
|
-
TimeoutException, JobFailedException, IOException
|
137
|
+
public void executeLoad(String project, String dataset, String table, String localFilePath)
|
138
|
+
throws NoSuchAlgorithmException, TimeoutException, JobFailedException, IOException
|
147
139
|
{
|
148
140
|
log.info(String.format("Job preparing... project:%s dataset:%s table:%s", project, dataset, table));
|
149
141
|
|
150
142
|
Job job = new Job();
|
151
143
|
JobReference jobRef = new JobReference();
|
152
|
-
JobConfiguration jobConfig = new JobConfiguration();
|
153
|
-
JobConfigurationLoad loadConfig = new JobConfigurationLoad();
|
154
|
-
jobConfig.setLoad(loadConfig);
|
144
|
+
JobConfiguration jobConfig = new JobConfiguration().setLoad(setLoadConfig(project, dataset, table));
|
155
145
|
job.setConfiguration(jobConfig);
|
156
146
|
|
157
147
|
if (preventDuplicateInsert) {
|
158
|
-
String
|
148
|
+
ImmutableList<String> elements = ImmutableList.of(
|
149
|
+
getLocalMd5hash(localFilePath), dataset, table,
|
150
|
+
String.valueOf(tableSchema), sourceFormat, fieldDelimiter, String.valueOf(maxBadRecords),
|
151
|
+
encoding, String.valueOf(ignoreUnknownValues), String.valueOf(allowQuotedNewlines)
|
152
|
+
);
|
153
|
+
String jobId = createJobId(elements);
|
154
|
+
|
159
155
|
jobRef.setJobId(jobId);
|
160
156
|
job.setJobReference(jobRef);
|
161
157
|
}
|
162
158
|
|
163
|
-
loadConfig.setAllowQuotedNewlines(allowQuotedNewlines);
|
164
|
-
loadConfig.setEncoding(encoding);
|
165
|
-
loadConfig.setMaxBadRecords(maxBadrecords);
|
166
|
-
if (sourceFormat.equals("NEWLINE_DELIMITED_JSON")) {
|
167
|
-
loadConfig.setSourceFormat("NEWLINE_DELIMITED_JSON");
|
168
|
-
} else {
|
169
|
-
loadConfig.setFieldDelimiter(fieldDelimiter);
|
170
|
-
}
|
171
|
-
loadConfig.setWriteDisposition("WRITE_APPEND");
|
172
|
-
if (autoCreateTable) {
|
173
|
-
loadConfig.setSchema(tableSchema);
|
174
|
-
loadConfig.setCreateDisposition("CREATE_IF_NEEDED");
|
175
|
-
log.info(String.format("table:[%s] will be create if not exists", table));
|
176
|
-
} else {
|
177
|
-
loadConfig.setCreateDisposition("CREATE_NEVER");
|
178
|
-
}
|
179
|
-
loadConfig.setIgnoreUnknownValues(ignoreUnknownValues);
|
180
|
-
|
181
|
-
loadConfig.setDestinationTable(createTableReference());
|
182
|
-
|
183
159
|
File file = new File(localFilePath);
|
184
160
|
InputStreamContent mediaContent = new InputStreamContent("application/octet-stream",
|
185
161
|
new BufferedInputStream(
|
@@ -206,31 +182,52 @@ public class BigqueryWriter
|
|
206
182
|
if (isSkipJobResultCheck) {
|
207
183
|
log.info(String.format("Skip job status check. job id:[%s]", jobRef.getJobId()));
|
208
184
|
} else {
|
209
|
-
getJobStatusUntilDone(jobRef);
|
185
|
+
getJobStatusUntilDone(project, jobRef);
|
210
186
|
}
|
211
187
|
}
|
212
188
|
|
213
|
-
private
|
189
|
+
private JobConfigurationLoad setLoadConfig(String project, String dataset, String table)
|
190
|
+
{
|
191
|
+
JobConfigurationLoad config = new JobConfigurationLoad();
|
192
|
+
config.setAllowQuotedNewlines(allowQuotedNewlines)
|
193
|
+
.setEncoding(encoding)
|
194
|
+
.setMaxBadRecords(maxBadRecords)
|
195
|
+
.setSourceFormat(sourceFormat)
|
196
|
+
.setIgnoreUnknownValues(ignoreUnknownValues)
|
197
|
+
.setDestinationTable(createTableReference(project, dataset, table))
|
198
|
+
.setWriteDisposition("WRITE_APPEND");
|
199
|
+
|
200
|
+
if (sourceFormat.equals("CSV")) {
|
201
|
+
config.setFieldDelimiter(String.valueOf(fieldDelimiter));
|
202
|
+
}
|
203
|
+
if (autoCreateTable) {
|
204
|
+
config.setSchema(tableSchema);
|
205
|
+
config.setCreateDisposition("CREATE_IF_NEEDED");
|
206
|
+
log.info(String.format("table:[%s] will be create if not exists", table));
|
207
|
+
} else {
|
208
|
+
config.setCreateDisposition("CREATE_NEVER");
|
209
|
+
}
|
210
|
+
return config;
|
211
|
+
}
|
212
|
+
|
213
|
+
private String createJobId(ImmutableList<String> elements) throws NoSuchAlgorithmException, IOException
|
214
214
|
{
|
215
215
|
StringBuilder sb = new StringBuilder();
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
sb.append(tableSchema);
|
220
|
-
sb.append(sourceFormat);
|
221
|
-
sb.append(fieldDelimiter);
|
222
|
-
sb.append(maxBadrecords);
|
223
|
-
sb.append(encoding);
|
224
|
-
sb.append(ignoreUnknownValues);
|
216
|
+
for (String element : elements) {
|
217
|
+
sb.append(element);
|
218
|
+
}
|
225
219
|
|
226
220
|
MessageDigest md = MessageDigest.getInstance("MD5");
|
227
|
-
|
228
|
-
byte[] digest = md.digest(str.getBytes());
|
221
|
+
byte[] digest = md.digest(new String(sb).getBytes());
|
229
222
|
String hash = new String(Hex.encodeHex(digest));
|
230
|
-
|
223
|
+
|
224
|
+
StringBuilder jobId = new StringBuilder();
|
225
|
+
jobId.append("embulk_job_");
|
226
|
+
jobId.append(hash);
|
227
|
+
return jobId.toString();
|
231
228
|
}
|
232
229
|
|
233
|
-
private TableReference createTableReference()
|
230
|
+
private TableReference createTableReference(String project, String dataset, String table)
|
234
231
|
{
|
235
232
|
return new TableReference()
|
236
233
|
.setProjectId(project)
|
@@ -238,7 +235,7 @@ public class BigqueryWriter
|
|
238
235
|
.setTableId(table);
|
239
236
|
}
|
240
237
|
|
241
|
-
|
238
|
+
public TableSchema createTableSchema() throws IOException
|
242
239
|
{
|
243
240
|
String path = schemaPath.orNull();
|
244
241
|
File file = new File(path);
|
@@ -247,8 +244,7 @@ public class BigqueryWriter
|
|
247
244
|
stream = new FileInputStream(file);
|
248
245
|
ObjectMapper mapper = new ObjectMapper();
|
249
246
|
List<TableFieldSchema> fields = mapper.readValue(stream, new TypeReference<List<TableFieldSchema>>() {});
|
250
|
-
|
251
|
-
return tableSchema;
|
247
|
+
return new TableSchema().setFields(fields);
|
252
248
|
} finally {
|
253
249
|
if (stream != null) {
|
254
250
|
stream.close();
|
@@ -256,22 +252,22 @@ public class BigqueryWriter
|
|
256
252
|
}
|
257
253
|
}
|
258
254
|
|
259
|
-
public boolean isExistTable(String
|
255
|
+
public boolean isExistTable(String project, String dataset, String table) throws IOException
|
260
256
|
{
|
261
257
|
Tables tableRequest = bigQueryClient.tables();
|
262
258
|
try {
|
263
|
-
Table tableData = tableRequest.get(project, dataset,
|
259
|
+
Table tableData = tableRequest.get(project, dataset, table).execute();
|
264
260
|
} catch (GoogleJsonResponseException ex) {
|
265
261
|
return false;
|
266
262
|
}
|
267
263
|
return true;
|
268
264
|
}
|
269
265
|
|
270
|
-
public void checkConfig() throws
|
266
|
+
public void checkConfig(String project, String dataset, String table) throws IOException
|
271
267
|
{
|
272
268
|
if (autoCreateTable) {
|
273
269
|
if (!schemaPath.isPresent()) {
|
274
|
-
throw new FileNotFoundException("
|
270
|
+
throw new FileNotFoundException("schema_file is empty");
|
275
271
|
} else {
|
276
272
|
File file = new File(schemaPath.orNull());
|
277
273
|
if (!file.exists()) {
|
@@ -279,7 +275,7 @@ public class BigqueryWriter
|
|
279
275
|
}
|
280
276
|
}
|
281
277
|
} else {
|
282
|
-
if (!isExistTable(table)) {
|
278
|
+
if (!isExistTable(project, dataset, table)) {
|
283
279
|
throw new IOException(String.format("table [%s] is not exists", table));
|
284
280
|
}
|
285
281
|
}
|
@@ -341,14 +337,11 @@ public class BigqueryWriter
|
|
341
337
|
private Optional<String> serviceAccountEmail;
|
342
338
|
private Optional<String> p12KeyFilePath;
|
343
339
|
private String applicationName;
|
344
|
-
private String project;
|
345
|
-
private String dataset;
|
346
|
-
private String table;
|
347
340
|
private boolean autoCreateTable;
|
348
341
|
private Optional<String> schemaPath;
|
349
342
|
private String sourceFormat;
|
350
343
|
private String fieldDelimiter;
|
351
|
-
private int
|
344
|
+
private int maxBadRecords;
|
352
345
|
private String encoding;
|
353
346
|
private boolean preventDuplicateInsert;
|
354
347
|
private int jobStatusMaxPollingTime;
|
@@ -357,45 +350,12 @@ public class BigqueryWriter
|
|
357
350
|
private boolean ignoreUnknownValues;
|
358
351
|
private boolean allowQuotedNewlines;
|
359
352
|
|
360
|
-
public Builder(String authMethod)
|
353
|
+
public Builder(String authMethod, Optional<String> serviceAccountEmail, Optional<String> p12KeyFilePath, String applicationName)
|
361
354
|
{
|
362
355
|
this.authMethod = authMethod;
|
363
|
-
}
|
364
|
-
|
365
|
-
public Builder setServiceAccountEmail(Optional<String> serviceAccountEmail)
|
366
|
-
{
|
367
356
|
this.serviceAccountEmail = serviceAccountEmail;
|
368
|
-
return this;
|
369
|
-
}
|
370
|
-
|
371
|
-
public Builder setP12KeyFilePath(Optional<String> p12KeyFilePath)
|
372
|
-
{
|
373
357
|
this.p12KeyFilePath = p12KeyFilePath;
|
374
|
-
return this;
|
375
|
-
}
|
376
|
-
|
377
|
-
public Builder setApplicationName(String applicationName)
|
378
|
-
{
|
379
358
|
this.applicationName = applicationName;
|
380
|
-
return this;
|
381
|
-
}
|
382
|
-
|
383
|
-
public Builder setProject(String project)
|
384
|
-
{
|
385
|
-
this.project = project;
|
386
|
-
return this;
|
387
|
-
}
|
388
|
-
|
389
|
-
public Builder setDataset(String dataset)
|
390
|
-
{
|
391
|
-
this.dataset = dataset;
|
392
|
-
return this;
|
393
|
-
}
|
394
|
-
|
395
|
-
public Builder setTable(String table)
|
396
|
-
{
|
397
|
-
this.table = table;
|
398
|
-
return this;
|
399
359
|
}
|
400
360
|
|
401
361
|
public Builder setAutoCreateTable(boolean autoCreateTable)
|
@@ -422,9 +382,9 @@ public class BigqueryWriter
|
|
422
382
|
return this;
|
423
383
|
}
|
424
384
|
|
425
|
-
public Builder
|
385
|
+
public Builder setMaxBadRecords(int maxBadRecords)
|
426
386
|
{
|
427
|
-
this.
|
387
|
+
this.maxBadRecords = maxBadRecords;
|
428
388
|
return this;
|
429
389
|
}
|
430
390
|
|
@@ -482,4 +442,4 @@ public class BigqueryWriter
|
|
482
442
|
super(message);
|
483
443
|
}
|
484
444
|
}
|
485
|
-
}
|
445
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,7 +62,7 @@ files:
|
|
62
62
|
- src/test/java/org/embulk/output/TestBigqueryWriter.java
|
63
63
|
- classpath/commons-codec-1.3.jar
|
64
64
|
- classpath/commons-logging-1.1.1.jar
|
65
|
-
- classpath/embulk-output-bigquery-0.1.
|
65
|
+
- classpath/embulk-output-bigquery-0.1.8.jar
|
66
66
|
- classpath/google-api-client-1.20.0.jar
|
67
67
|
- classpath/google-api-services-bigquery-v2-rev205-1.20.0.jar
|
68
68
|
- classpath/google-http-client-1.20.0.jar
|
@@ -72,7 +72,7 @@ files:
|
|
72
72
|
- classpath/httpclient-4.0.1.jar
|
73
73
|
- classpath/httpcore-4.0.1.jar
|
74
74
|
- classpath/jsr305-1.3.9.jar
|
75
|
-
homepage: https://github.com/
|
75
|
+
homepage: https://github.com/embulk/embulk-output-bigquery
|
76
76
|
licenses:
|
77
77
|
- Apache-2.0
|
78
78
|
metadata: {}
|