embulk-output-bigquery 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/LICENSE.txt +21 -0
- data/README.md +87 -0
- data/build.gradle +64 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/output/bigquery.rb +3 -0
- data/settings.gradle +2 -0
- data/src/main/java/org/embulk/output/BigqueryAuthentication.java +99 -0
- data/src/main/java/org/embulk/output/BigqueryGcsWriter.java +201 -0
- data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +293 -0
- data/src/main/java/org/embulk/output/BigqueryWriter.java +432 -0
- data/src/test/java/org/embulk/output/TestBigqueryAuthentication.java +5 -0
- data/src/test/java/org/embulk/output/TestBigqueryGcsWriter.java +5 -0
- data/src/test/java/org/embulk/output/TestBigqueryOutputPlugin.java +5 -0
- data/src/test/java/org/embulk/output/TestBigqueryWriter.java +5 -0
- metadata +104 -0
data/settings.gradle
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import java.io.File;
|
4
|
+
import java.io.FileNotFoundException;
|
5
|
+
import java.io.FileInputStream;
|
6
|
+
import java.io.IOException;
|
7
|
+
import java.util.ArrayList;
|
8
|
+
import java.util.List;
|
9
|
+
import java.util.IllegalFormatException;
|
10
|
+
import com.google.api.client.auth.oauth2.Credential;
|
11
|
+
import com.google.api.client.auth.oauth2.CredentialRefreshListener;
|
12
|
+
import com.google.api.client.auth.oauth2.TokenErrorResponse;
|
13
|
+
import com.google.api.client.auth.oauth2.TokenResponse;
|
14
|
+
import com.google.common.collect.ImmutableList;
|
15
|
+
import java.security.GeneralSecurityException;
|
16
|
+
|
17
|
+
import org.embulk.spi.Exec;
|
18
|
+
import org.slf4j.Logger;
|
19
|
+
|
20
|
+
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
|
21
|
+
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
|
22
|
+
import com.google.api.client.http.HttpTransport;
|
23
|
+
import com.google.api.client.http.InputStreamContent;
|
24
|
+
import com.google.api.client.json.JsonFactory;
|
25
|
+
import com.google.api.client.json.jackson2.JacksonFactory;
|
26
|
+
import com.google.api.services.storage.Storage;
|
27
|
+
import com.google.api.services.storage.StorageScopes;
|
28
|
+
import com.google.api.services.bigquery.Bigquery;
|
29
|
+
import com.google.api.services.bigquery.BigqueryScopes;
|
30
|
+
import com.google.api.services.bigquery.model.ProjectList;
|
31
|
+
|
32
|
+
public class BigqueryAuthentication
|
33
|
+
{
|
34
|
+
|
35
|
+
private final Logger log = Exec.getLogger(BigqueryAuthentication.class);
|
36
|
+
private final String serviceAccountEmail;
|
37
|
+
private final String p12KeyFilePath;
|
38
|
+
private final String applicationName;
|
39
|
+
private final HttpTransport httpTransport;
|
40
|
+
private final JsonFactory jsonFactory;
|
41
|
+
private final GoogleCredential credentials;
|
42
|
+
|
43
|
+
public BigqueryAuthentication(String serviceAccountEmail, String p12KeyFilePath, String applicationName) throws IOException, GeneralSecurityException
|
44
|
+
{
|
45
|
+
this.serviceAccountEmail = serviceAccountEmail;
|
46
|
+
this.p12KeyFilePath = p12KeyFilePath;
|
47
|
+
this.applicationName = applicationName;
|
48
|
+
|
49
|
+
this.httpTransport = GoogleNetHttpTransport.newTrustedTransport();
|
50
|
+
this.jsonFactory = new JacksonFactory();
|
51
|
+
this.credentials = getCredentialProvider();
|
52
|
+
}
|
53
|
+
|
54
|
+
/**
|
55
|
+
* @see https://developers.google.com/accounts/docs/OAuth2ServiceAccount#authorizingrequests
|
56
|
+
*/
|
57
|
+
private GoogleCredential getCredentialProvider() throws IOException, GeneralSecurityException
|
58
|
+
{
|
59
|
+
// @see https://cloud.google.com/compute/docs/api/how-tos/authorization
|
60
|
+
// @see https://developers.google.com/resources/api-libraries/documentation/storage/v1/java/latest/com/google/api/services/storage/STORAGE_SCOPE.html
|
61
|
+
GoogleCredential cred = new GoogleCredential.Builder()
|
62
|
+
.setTransport(httpTransport)
|
63
|
+
.setJsonFactory(jsonFactory)
|
64
|
+
.setServiceAccountId(serviceAccountEmail)
|
65
|
+
.setServiceAccountScopes(
|
66
|
+
ImmutableList.of(
|
67
|
+
BigqueryScopes.DEVSTORAGE_READ_WRITE,
|
68
|
+
BigqueryScopes.BIGQUERY
|
69
|
+
)
|
70
|
+
)
|
71
|
+
.setServiceAccountPrivateKeyFromP12File(new File(p12KeyFilePath))
|
72
|
+
.build();
|
73
|
+
return cred;
|
74
|
+
}
|
75
|
+
|
76
|
+
public Bigquery getBigqueryClient() throws IOException
|
77
|
+
{
|
78
|
+
Bigquery client = new Bigquery.Builder(httpTransport, jsonFactory, credentials)
|
79
|
+
.setHttpRequestInitializer(credentials)
|
80
|
+
.setApplicationName(applicationName)
|
81
|
+
.build();
|
82
|
+
|
83
|
+
// For throw IOException when authentication is failed.
|
84
|
+
long maxResults = 1;
|
85
|
+
Bigquery.Projects.List req = client.projects().list().setMaxResults(maxResults);
|
86
|
+
ProjectList projectList = req.execute();
|
87
|
+
|
88
|
+
return client;
|
89
|
+
}
|
90
|
+
|
91
|
+
public Storage getGcsClient() throws IOException
|
92
|
+
{
|
93
|
+
Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
|
94
|
+
.setApplicationName(applicationName)
|
95
|
+
.build();
|
96
|
+
|
97
|
+
return client;
|
98
|
+
}
|
99
|
+
}
|
@@ -0,0 +1,201 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import java.io.File;
|
4
|
+
import java.io.FileNotFoundException;
|
5
|
+
import java.io.FileInputStream;
|
6
|
+
import java.io.IOException;
|
7
|
+
import java.util.ArrayList;
|
8
|
+
import java.util.List;
|
9
|
+
import java.util.Collection;
|
10
|
+
import java.util.Iterator;
|
11
|
+
import java.util.IllegalFormatException;
|
12
|
+
import java.nio.charset.Charset;
|
13
|
+
import java.nio.charset.StandardCharsets;
|
14
|
+
import com.google.common.base.Optional;
|
15
|
+
import com.google.common.collect.ImmutableList;
|
16
|
+
//import eu.medsea.mimeutil.MimeType;
|
17
|
+
//import eu.medsea.mimeutil.MimeUtil;
|
18
|
+
//import eu.medsea.mimeutil.detector.MimeDetector;
|
19
|
+
import org.apache.commons.lang3.StringUtils;
|
20
|
+
import org.apache.commons.codec.binary.Base64;
|
21
|
+
import java.security.GeneralSecurityException;
|
22
|
+
|
23
|
+
import org.embulk.spi.Exec;
|
24
|
+
import org.slf4j.Logger;
|
25
|
+
|
26
|
+
import com.google.api.services.storage.Storage;
|
27
|
+
import com.google.api.services.storage.StorageScopes;
|
28
|
+
import com.google.api.services.storage.model.Bucket;
|
29
|
+
import com.google.api.services.storage.model.Objects;
|
30
|
+
import com.google.api.services.storage.model.StorageObject;
|
31
|
+
|
32
|
+
import com.google.api.client.http.InputStreamContent;
|
33
|
+
|
34
|
+
public class BigqueryGcsWriter
|
35
|
+
{
|
36
|
+
|
37
|
+
private final Logger log = Exec.getLogger(BigqueryGcsWriter.class);
|
38
|
+
private final String bucket;
|
39
|
+
private final String sourceFormat;
|
40
|
+
private final boolean isFileCompressed;
|
41
|
+
private final boolean deleteFromBucketWhenJobEnd;
|
42
|
+
private Storage storageClient;
|
43
|
+
|
44
|
+
public BigqueryGcsWriter(Builder builder) throws IOException, GeneralSecurityException
|
45
|
+
{
|
46
|
+
this.bucket = builder.bucket;
|
47
|
+
this.sourceFormat = builder.sourceFormat.toUpperCase();
|
48
|
+
this.isFileCompressed = builder.isFileCompressed;
|
49
|
+
this.deleteFromBucketWhenJobEnd = builder.deleteFromBucketWhenJobEnd;
|
50
|
+
|
51
|
+
BigqueryAuthentication auth = new BigqueryAuthentication(builder.serviceAccountEmail, builder.p12KeyFilePath, builder.applicationName);
|
52
|
+
this.storageClient = auth.getGcsClient();
|
53
|
+
}
|
54
|
+
|
55
|
+
public void uploadFile(String localFilePath, String fileName, Optional<String> remotePath) throws IOException
|
56
|
+
{
|
57
|
+
FileInputStream stream = null;
|
58
|
+
|
59
|
+
try {
|
60
|
+
String path;
|
61
|
+
if (remotePath.isPresent()) {
|
62
|
+
path = remotePath.get();
|
63
|
+
} else {
|
64
|
+
path = "";
|
65
|
+
}
|
66
|
+
String gcsPath = getRemotePath(path, fileName);
|
67
|
+
StorageObject objectMetadata = new StorageObject().setName(gcsPath);
|
68
|
+
log.info(String.format("Uploading file [%s] to [gs://%s/%s]", localFilePath, bucket, gcsPath));
|
69
|
+
|
70
|
+
File file = new File(localFilePath);
|
71
|
+
stream = new FileInputStream(file);
|
72
|
+
InputStreamContent content = new InputStreamContent(getContentType(), stream);
|
73
|
+
Storage.Objects.Insert insertObject = storageClient.objects().insert(bucket, objectMetadata, content);
|
74
|
+
insertObject.setDisableGZipContent(true);
|
75
|
+
|
76
|
+
StorageObject response = insertObject.execute();
|
77
|
+
log.info(String.format("Upload completed [%s] to [gs://%s/%s]", localFilePath, bucket, gcsPath));
|
78
|
+
} finally {
|
79
|
+
stream.close();
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
private String getRemotePath(String remotePath, String fileName)
|
84
|
+
{
|
85
|
+
if (remotePath.isEmpty()) {
|
86
|
+
return fileName;
|
87
|
+
}
|
88
|
+
String[] pathList = StringUtils.split(remotePath, '/');
|
89
|
+
String path = StringUtils.join(pathList) + "/";
|
90
|
+
if (!path.endsWith("/")) {
|
91
|
+
path = path + "/";
|
92
|
+
}
|
93
|
+
return path + fileName;
|
94
|
+
}
|
95
|
+
|
96
|
+
public void deleteFile(String remotePath, String fileName) throws IOException
|
97
|
+
{
|
98
|
+
String path = getRemotePath(remotePath, fileName);
|
99
|
+
storageClient.objects().delete(bucket, path).execute();
|
100
|
+
log.info(String.format("Delete remote file [gs://%s/%s]", bucket, path));
|
101
|
+
}
|
102
|
+
|
103
|
+
public boolean getDeleteFromBucketWhenJobEnd()
|
104
|
+
{
|
105
|
+
return this.deleteFromBucketWhenJobEnd;
|
106
|
+
}
|
107
|
+
|
108
|
+
private String getContentType()
|
109
|
+
{
|
110
|
+
if (isFileCompressed) {
|
111
|
+
return "application/x-gzip";
|
112
|
+
} else {
|
113
|
+
if (sourceFormat.equals("NEWLINE_DELIMITED_JSON)")) {
|
114
|
+
return "application/json";
|
115
|
+
} else {
|
116
|
+
return "text/csv";
|
117
|
+
}
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
/*
|
122
|
+
private void registerMimeDetector()
|
123
|
+
{
|
124
|
+
String mimeDetector = "eu.medsea.mimeutil.detector.MagicMimeMimeDetector";
|
125
|
+
MimeDetector registeredMimeDetector = MimeUtil.getMimeDetector(mimeDetector);
|
126
|
+
MimeUtil.registerMimeDetector(mimeDetector);
|
127
|
+
}
|
128
|
+
|
129
|
+
public String detectMimeType(File file)
|
130
|
+
{
|
131
|
+
try {
|
132
|
+
Collection<?> mimeTypes = MimeUtil.getMimeTypes(file);
|
133
|
+
if (!mimeTypes.isEmpty()) {
|
134
|
+
Iterator<?> iterator = mimeTypes.iterator();
|
135
|
+
MimeType mimeType = (MimeType) iterator.next();
|
136
|
+
return mimeType.getMediaType() + "/" + mimeType.getSubType();
|
137
|
+
}
|
138
|
+
} catch (Exception ex) {
|
139
|
+
}
|
140
|
+
return "application/octet-stream";
|
141
|
+
}
|
142
|
+
*/
|
143
|
+
|
144
|
+
public static class Builder
|
145
|
+
{
|
146
|
+
private final String serviceAccountEmail;
|
147
|
+
private String p12KeyFilePath;
|
148
|
+
private String applicationName;
|
149
|
+
private String bucket;
|
150
|
+
private String sourceFormat;
|
151
|
+
private boolean isFileCompressed;
|
152
|
+
private boolean deleteFromBucketWhenJobEnd;
|
153
|
+
private boolean enableMd5hashCheck;
|
154
|
+
|
155
|
+
public Builder(String serviceAccountEmail)
|
156
|
+
{
|
157
|
+
this.serviceAccountEmail = serviceAccountEmail;
|
158
|
+
}
|
159
|
+
|
160
|
+
public Builder setP12KeyFilePath(String p12KeyFilePath)
|
161
|
+
{
|
162
|
+
this.p12KeyFilePath = p12KeyFilePath;
|
163
|
+
return this;
|
164
|
+
}
|
165
|
+
|
166
|
+
public Builder setApplicationName(String applicationName)
|
167
|
+
{
|
168
|
+
this.applicationName = applicationName;
|
169
|
+
return this;
|
170
|
+
}
|
171
|
+
|
172
|
+
public Builder setBucket(String bucket)
|
173
|
+
{
|
174
|
+
this.bucket = bucket;
|
175
|
+
return this;
|
176
|
+
}
|
177
|
+
|
178
|
+
public Builder setSourceFormat(String sourceFormat)
|
179
|
+
{
|
180
|
+
this.sourceFormat = sourceFormat;
|
181
|
+
return this;
|
182
|
+
}
|
183
|
+
|
184
|
+
public Builder setIsFileCompressed(boolean isFileCompressed)
|
185
|
+
{
|
186
|
+
this.isFileCompressed = isFileCompressed;
|
187
|
+
return this;
|
188
|
+
}
|
189
|
+
|
190
|
+
public Builder setDeleteFromBucketWhenJobEnd(boolean deleteFromBucketWhenJobEnd)
|
191
|
+
{
|
192
|
+
this.deleteFromBucketWhenJobEnd = deleteFromBucketWhenJobEnd;
|
193
|
+
return this;
|
194
|
+
}
|
195
|
+
|
196
|
+
public BigqueryGcsWriter build() throws IOException, GeneralSecurityException
|
197
|
+
{
|
198
|
+
return new BigqueryGcsWriter(this);
|
199
|
+
}
|
200
|
+
}
|
201
|
+
}
|
@@ -0,0 +1,293 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import java.io.File;
|
4
|
+
import java.io.FileWriter;
|
5
|
+
import java.io.FileNotFoundException;
|
6
|
+
import java.io.FileOutputStream;
|
7
|
+
import java.io.BufferedOutputStream;
|
8
|
+
import java.io.IOException;
|
9
|
+
import java.util.List;
|
10
|
+
import java.util.ArrayList;
|
11
|
+
import java.util.HashMap;
|
12
|
+
import java.util.concurrent.TimeoutException;
|
13
|
+
import com.google.common.base.Optional;
|
14
|
+
import com.google.common.base.Throwables;
|
15
|
+
import java.security.GeneralSecurityException;
|
16
|
+
|
17
|
+
import org.embulk.config.Config;
|
18
|
+
import org.embulk.config.ConfigException;
|
19
|
+
import org.embulk.config.ConfigDefault;
|
20
|
+
import org.embulk.config.ConfigSource;
|
21
|
+
import org.embulk.config.ConfigDiff;
|
22
|
+
import org.embulk.config.CommitReport;
|
23
|
+
import org.embulk.config.Task;
|
24
|
+
import org.embulk.config.TaskSource;
|
25
|
+
import org.embulk.spi.Buffer;
|
26
|
+
import org.embulk.spi.FileOutputPlugin;
|
27
|
+
import org.embulk.spi.TransactionalFileOutput;
|
28
|
+
import org.embulk.spi.Exec;
|
29
|
+
|
30
|
+
import org.slf4j.Logger;
|
31
|
+
|
32
|
+
public class BigqueryOutputPlugin
|
33
|
+
implements FileOutputPlugin
|
34
|
+
{
|
35
|
+
public interface PluginTask
|
36
|
+
extends Task
|
37
|
+
{
|
38
|
+
@Config("service_account_email")
|
39
|
+
public String getServiceAccountEmail();
|
40
|
+
|
41
|
+
@Config("p12_keyfile_path")
|
42
|
+
public String getP12KeyfilePath();
|
43
|
+
|
44
|
+
@Config("application_name")
|
45
|
+
@ConfigDefault("\"Embulk BigQuery plugin\"")
|
46
|
+
public String getApplicationName();
|
47
|
+
|
48
|
+
@Config("path_prefix")
|
49
|
+
public String getPathPrefix();
|
50
|
+
|
51
|
+
@Config("sequence_format")
|
52
|
+
@ConfigDefault("\".%03d.%02d\"")
|
53
|
+
public String getSequenceFormat();
|
54
|
+
|
55
|
+
@Config("file_ext")
|
56
|
+
public String getFileNameExtension();
|
57
|
+
|
58
|
+
@Config("source_format")
|
59
|
+
@ConfigDefault("\"CSV\"")
|
60
|
+
public String getSourceFormat();
|
61
|
+
|
62
|
+
@Config("is_file_compressed")
|
63
|
+
@ConfigDefault("true")
|
64
|
+
public boolean getIsFileCompressed();
|
65
|
+
|
66
|
+
@Config("field_delimiter")
|
67
|
+
@ConfigDefault("\",\"")
|
68
|
+
public String getFieldDelimiter();
|
69
|
+
|
70
|
+
@Config("max_bad_records")
|
71
|
+
@ConfigDefault("0")
|
72
|
+
public int getMaxBadrecords();
|
73
|
+
|
74
|
+
@Config("delete_from_local_when_upload_end")
|
75
|
+
@ConfigDefault("false")
|
76
|
+
public boolean getDeleteFromLocalWhenUploadEnd();
|
77
|
+
|
78
|
+
@Config("delete_from_bucket_when_job_end")
|
79
|
+
@ConfigDefault("false")
|
80
|
+
public boolean getDeleteFromBucketWhenJobEnd();
|
81
|
+
|
82
|
+
@Config("bucket")
|
83
|
+
public String getBucket();
|
84
|
+
|
85
|
+
@Config("remote_path")
|
86
|
+
@ConfigDefault("null")
|
87
|
+
public Optional<String> getRemotePath();
|
88
|
+
|
89
|
+
@Config("project")
|
90
|
+
public String getProject();
|
91
|
+
|
92
|
+
@Config("dataset")
|
93
|
+
public String getDataset();
|
94
|
+
|
95
|
+
@Config("table")
|
96
|
+
public String getTable();
|
97
|
+
|
98
|
+
@Config("auto_create_table")
|
99
|
+
@ConfigDefault("false")
|
100
|
+
public boolean getAutoCreateTable();
|
101
|
+
|
102
|
+
@Config("schema_path")
|
103
|
+
@ConfigDefault("null")
|
104
|
+
public Optional<String> getSchemaPath();
|
105
|
+
|
106
|
+
@Config("job_status_max_polling_time")
|
107
|
+
@ConfigDefault("3600")
|
108
|
+
public int getJobStatusMaxPollingTime();
|
109
|
+
|
110
|
+
@Config("job_status_polling_interval")
|
111
|
+
@ConfigDefault("10")
|
112
|
+
public int getJobStatusPollingInterval();
|
113
|
+
|
114
|
+
@Config("is_skip_job_result_check")
|
115
|
+
@ConfigDefault("0")
|
116
|
+
public boolean getIsSkipJobResultCheck();
|
117
|
+
}
|
118
|
+
|
119
|
+
private final Logger log = Exec.getLogger(BigqueryOutputPlugin.class);
|
120
|
+
private static BigqueryGcsWriter bigQueryGcsWriter;
|
121
|
+
private static BigqueryWriter bigQueryWriter;
|
122
|
+
|
123
|
+
public ConfigDiff transaction(ConfigSource config, int taskCount,
|
124
|
+
FileOutputPlugin.Control control)
|
125
|
+
{
|
126
|
+
final PluginTask task = config.loadConfig(PluginTask.class);
|
127
|
+
|
128
|
+
try {
|
129
|
+
bigQueryGcsWriter = new BigqueryGcsWriter.Builder(task.getServiceAccountEmail())
|
130
|
+
.setP12KeyFilePath(task.getP12KeyfilePath())
|
131
|
+
.setApplicationName(task.getApplicationName())
|
132
|
+
.setBucket(task.getBucket())
|
133
|
+
.setSourceFormat(task.getSourceFormat())
|
134
|
+
.setIsFileCompressed(task.getIsFileCompressed())
|
135
|
+
.setDeleteFromBucketWhenJobEnd(task.getDeleteFromBucketWhenJobEnd())
|
136
|
+
.build();
|
137
|
+
|
138
|
+
bigQueryWriter = new BigqueryWriter.Builder(task.getServiceAccountEmail())
|
139
|
+
.setP12KeyFilePath(task.getP12KeyfilePath())
|
140
|
+
.setApplicationName(task.getApplicationName())
|
141
|
+
.setProject(task.getProject())
|
142
|
+
.setDataset(task.getDataset())
|
143
|
+
.setTable(task.getTable())
|
144
|
+
.setAutoCreateTable(task.getAutoCreateTable())
|
145
|
+
.setSchemaPath(task.getSchemaPath())
|
146
|
+
.setBucket(task.getBucket())
|
147
|
+
.setSourceFormat(task.getSourceFormat())
|
148
|
+
.setFieldDelimiter(task.getFieldDelimiter())
|
149
|
+
.setMaxBadrecords(task.getMaxBadrecords())
|
150
|
+
.setJobStatusMaxPollingTime(task.getJobStatusMaxPollingTime())
|
151
|
+
.setJobStatusPollingInterval(task.getJobStatusPollingInterval())
|
152
|
+
.setIsSkipJobResultCheck(task.getIsSkipJobResultCheck())
|
153
|
+
.build();
|
154
|
+
} catch (IOException | GeneralSecurityException ex) {
|
155
|
+
log.warn("Google Authentication was failed. Please Check your configurations.");
|
156
|
+
throw new ConfigException(ex);
|
157
|
+
}
|
158
|
+
// non-retryable (non-idempotent) output:
|
159
|
+
return resume(task.dump(), taskCount, control);
|
160
|
+
}
|
161
|
+
|
162
|
+
public ConfigDiff resume(TaskSource taskSource,
|
163
|
+
int taskCount,
|
164
|
+
FileOutputPlugin.Control control)
|
165
|
+
{
|
166
|
+
control.run(taskSource);
|
167
|
+
|
168
|
+
try {
|
169
|
+
bigQueryWriter.executeJob();
|
170
|
+
// TODO refactor
|
171
|
+
if (bigQueryGcsWriter.getDeleteFromBucketWhenJobEnd()) {
|
172
|
+
ArrayList<HashMap<String, String>> fileList = bigQueryWriter.getFileList();
|
173
|
+
for (HashMap<String, String> file : fileList) {
|
174
|
+
bigQueryGcsWriter.deleteFile(file.get("remote_path"), file.get("file_name"));
|
175
|
+
}
|
176
|
+
}
|
177
|
+
} catch (IOException | TimeoutException | BigqueryWriter.JobFailedException ex) {
|
178
|
+
log.warn(ex.getMessage());
|
179
|
+
throw Throwables.propagate(ex);
|
180
|
+
}
|
181
|
+
return Exec.newConfigDiff();
|
182
|
+
}
|
183
|
+
|
184
|
+
@Override
|
185
|
+
public void cleanup(TaskSource taskSource,
|
186
|
+
int taskCount,
|
187
|
+
List<CommitReport> successCommitReports)
|
188
|
+
{
|
189
|
+
}
|
190
|
+
|
191
|
+
@Override
|
192
|
+
public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
|
193
|
+
{
|
194
|
+
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
195
|
+
|
196
|
+
final String pathPrefix = task.getPathPrefix();
|
197
|
+
final String sequenceFormat = task.getSequenceFormat();
|
198
|
+
final String pathSuffix = task.getFileNameExtension();
|
199
|
+
final Optional<String> remotePath = task.getRemotePath();
|
200
|
+
|
201
|
+
return new TransactionalFileOutput() {
|
202
|
+
private int fileIndex = 0;
|
203
|
+
private BufferedOutputStream output = null;
|
204
|
+
private File file;
|
205
|
+
private String filePath;
|
206
|
+
private String fileName;
|
207
|
+
private long fileSize;
|
208
|
+
|
209
|
+
public void nextFile()
|
210
|
+
{
|
211
|
+
closeFile();
|
212
|
+
|
213
|
+
try {
|
214
|
+
String suffix = pathSuffix;
|
215
|
+
if (!suffix.startsWith(".")) {
|
216
|
+
suffix = "." + suffix;
|
217
|
+
}
|
218
|
+
filePath = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + suffix;
|
219
|
+
file = new File(filePath);
|
220
|
+
fileName = file.getName();
|
221
|
+
fileSize = file.length();
|
222
|
+
|
223
|
+
String parentPath = file.getParent();
|
224
|
+
File dir = new File(parentPath);
|
225
|
+
if (!dir.exists()) {
|
226
|
+
dir.mkdir();
|
227
|
+
}
|
228
|
+
log.info(String.format("Writing file [%s]", filePath));
|
229
|
+
output = new BufferedOutputStream(new FileOutputStream(filePath));
|
230
|
+
} catch (FileNotFoundException ex) {
|
231
|
+
throw Throwables.propagate(ex);
|
232
|
+
}
|
233
|
+
fileIndex++;
|
234
|
+
}
|
235
|
+
|
236
|
+
private void closeFile()
|
237
|
+
{
|
238
|
+
if (output != null) {
|
239
|
+
try {
|
240
|
+
output.close();
|
241
|
+
} catch (IOException ex) {
|
242
|
+
throw Throwables.propagate(ex);
|
243
|
+
}
|
244
|
+
}
|
245
|
+
}
|
246
|
+
|
247
|
+
public void add(Buffer buffer)
|
248
|
+
{
|
249
|
+
try {
|
250
|
+
output.write(buffer.array(), buffer.offset(), buffer.limit());
|
251
|
+
} catch (IOException ex) {
|
252
|
+
throw Throwables.propagate(ex);
|
253
|
+
} finally {
|
254
|
+
buffer.release();
|
255
|
+
}
|
256
|
+
}
|
257
|
+
|
258
|
+
public void finish()
|
259
|
+
{
|
260
|
+
closeFile();
|
261
|
+
if (fileName != null) {
|
262
|
+
try {
|
263
|
+
bigQueryGcsWriter.uploadFile(filePath, fileName, remotePath);
|
264
|
+
|
265
|
+
if (task.getDeleteFromLocalWhenUploadEnd()) {
|
266
|
+
log.info(String.format("Delete local file [%s]", filePath));
|
267
|
+
file.delete();
|
268
|
+
}
|
269
|
+
|
270
|
+
bigQueryWriter.addTask(remotePath, fileName, fileSize);
|
271
|
+
} catch (IOException ex) {
|
272
|
+
throw Throwables.propagate(ex);
|
273
|
+
}
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
public void close()
|
278
|
+
{
|
279
|
+
closeFile();
|
280
|
+
}
|
281
|
+
|
282
|
+
public void abort()
|
283
|
+
{
|
284
|
+
}
|
285
|
+
|
286
|
+
public CommitReport commit()
|
287
|
+
{
|
288
|
+
CommitReport report = Exec.newCommitReport();
|
289
|
+
return report;
|
290
|
+
}
|
291
|
+
};
|
292
|
+
}
|
293
|
+
}
|