embulk-output-bigquery 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +24 -25
- data/build.gradle +1 -3
- data/src/main/java/org/embulk/output/BigqueryAuthentication.java +0 -12
- data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +23 -51
- data/src/main/java/org/embulk/output/BigqueryWriter.java +136 -151
- metadata +3 -7
- data/src/main/java/org/embulk/output/BigqueryGcsWriter.java +0 -201
- data/src/test/java/org/embulk/output/TestBigqueryGcsWriter.java +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 46c61dd1c73ff99c3c69bd217ca772f07b2e1127
|
4
|
+
data.tar.gz: ba184360972884260c1fe90264af7d5386791804
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa693e59cb4b45c2d43f07479f3d61e63242185be9964d4f00b83a4a784a0443ae270a63760f3f2f188e74deb77cbb94a89a18db49d2c5cd4621f18b73363ab3
|
7
|
+
data.tar.gz: 7c0ea783220de28befd7c565ff83ec5ff58f13af0db16b3d341a12c3e415adeacba375e5688a42fcbb26d0402a48071622ed5b161fa52fd08b1f56444faf66e1
|
data/README.md
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
|
2
2
|
# embulk-output-bigquery
|
3
3
|
|
4
|
-
[Embulk](https://github.com/embulk/embulk/) output plugin to load/insert data into [Google BigQuery](https://cloud.google.com/bigquery/)
|
4
|
+
[Embulk](https://github.com/embulk/embulk/) output plugin to load/insert data into [Google BigQuery](https://cloud.google.com/bigquery/)
|
5
5
|
|
6
6
|
## Overview
|
7
7
|
|
8
|
-
load data into Google BigQuery as batch jobs
|
8
|
+
load data into Google BigQuery as batch jobs for big amount of data
|
9
9
|
https://developers.google.com/bigquery/loading-data-into-bigquery
|
10
10
|
|
11
11
|
* **Plugin type**: output
|
12
12
|
* **Resume supported**: no
|
13
13
|
* **Cleanup supported**: no
|
14
|
-
* **Dynamic table creating**:
|
14
|
+
* **Dynamic table creating**: yes
|
15
15
|
|
16
16
|
### NOT IMPLEMENTED
|
17
17
|
* insert data over streaming inserts
|
@@ -30,32 +30,19 @@ OAuth flow for installed applications.
|
|
30
30
|
- **sequence_format**: (string, optional, default is %03d.%02d)
|
31
31
|
- **file_ext**: (string, required)
|
32
32
|
- **source_format**: file type (NEWLINE_DELIMITED_JSON or CSV) (string, required, default is CSV)
|
33
|
-
- **is_file_compressed**: upload file is gzip compressed or not. (boolean, optional, default is 1)
|
34
|
-
- **bucket**: Google Cloud Storage output bucket name (string, required)
|
35
|
-
- **remote_path**: folder name in GCS bucket (string, optional)
|
36
33
|
- **project**: project_id (string, required)
|
37
34
|
- **dataset**: dataset (string, required)
|
38
35
|
- **table**: table name (string, required)
|
36
|
+
- **auto_create_table**: (boolean, optional default is 0)
|
37
|
+
- **schema_path**: (string, optional)
|
39
38
|
- **application_name**: application name anything you like (string, optional)
|
40
|
-
- **
|
41
|
-
- **delete_from_bucket_when_job_end**: (boolean, optional, default is 0)
|
39
|
+
- **delete_from_local_when_job_end**: (boolean, optional, default is 0)
|
42
40
|
- **job_status_max_polling_time**: max job status polling time. (int, optional, default is 3600 sec)
|
43
41
|
- **job_status_polling_interval**: job status polling interval. (int, optional, default is 10 sec)
|
44
42
|
- **is_skip_job_result_check**: (boolean, optional, default is 0)
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
* Supported
|
50
|
-
* Maximum size per load job: 1TB across all input files
|
51
|
-
* Maximum number of files per load job: 10,000
|
52
|
-
* embulk-output-bigquery divides a file into more than one job, like below.
|
53
|
-
* job1: file1(1GB) file2(1GB)...file10(1GB)
|
54
|
-
* job2: file11(1GB) file12(1GB)
|
55
|
-
|
56
|
-
* Not Supported
|
57
|
-
* Daily limit: 1,000 load jobs per table per day (including failures)
|
58
|
-
* 10,000 load jobs per project per day (including failures)
|
43
|
+
- **field_delimiter**: (string, optional, default is ",")
|
44
|
+
- **max_bad_records**: (int, optional, default is 0)
|
45
|
+
- **encoding**: (UTF-8 or ISO-8859-1) (string, optional, default is "UTF-8")
|
59
46
|
|
60
47
|
## Example
|
61
48
|
|
@@ -67,10 +54,7 @@ out:
|
|
67
54
|
path_prefix: /path/to/output
|
68
55
|
file_ext: csv.gz
|
69
56
|
source_format: CSV
|
70
|
-
is_file_compressed: 1
|
71
57
|
project: your-project-000
|
72
|
-
bucket: output_bucket_name
|
73
|
-
remote_path: folder_name
|
74
58
|
dataset: your_dataset_name
|
75
59
|
table: your_table_name
|
76
60
|
formatter:
|
@@ -80,6 +64,21 @@ out:
|
|
80
64
|
- {type: gzip}
|
81
65
|
```
|
82
66
|
|
67
|
+
## Dynamic table creating
|
68
|
+
|
69
|
+
When `auto_create_table` is set to true, try to create the table using BigQuery API.
|
70
|
+
|
71
|
+
To describe the schema of the target table, please write schema path.
|
72
|
+
|
73
|
+
`table` option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
|
74
|
+
format of ruby to construct table name.
|
75
|
+
|
76
|
+
```
|
77
|
+
auto_create_table: true
|
78
|
+
table: table_%Y_%m
|
79
|
+
schema_path: /path/to/schema.json
|
80
|
+
```
|
81
|
+
|
83
82
|
## Build
|
84
83
|
|
85
84
|
```
|
data/build.gradle
CHANGED
@@ -15,16 +15,14 @@ configurations {
|
|
15
15
|
sourceCompatibility = 1.7
|
16
16
|
targetCompatibility = 1.7
|
17
17
|
|
18
|
-
version = "0.1.
|
18
|
+
version = "0.1.2"
|
19
19
|
|
20
20
|
dependencies {
|
21
21
|
compile "org.embulk:embulk-core:0.5.1"
|
22
22
|
provided "org.embulk:embulk-core:0.5.1"
|
23
23
|
|
24
24
|
compile "com.google.http-client:google-http-client-jackson2:1.19.0"
|
25
|
-
compile ("com.google.apis:google-api-services-storage:v1-rev27-1.19.1") {exclude module: "guava-jdk5"}
|
26
25
|
compile "com.google.apis:google-api-services-bigquery:v2-rev193-1.19.1"
|
27
|
-
compile "eu.medsea.mimeutil:mime-util:2.1.3"
|
28
26
|
|
29
27
|
testCompile "junit:junit:4.+"
|
30
28
|
}
|
@@ -23,8 +23,6 @@ import com.google.api.client.http.HttpTransport;
|
|
23
23
|
import com.google.api.client.http.InputStreamContent;
|
24
24
|
import com.google.api.client.json.JsonFactory;
|
25
25
|
import com.google.api.client.json.jackson2.JacksonFactory;
|
26
|
-
import com.google.api.services.storage.Storage;
|
27
|
-
import com.google.api.services.storage.StorageScopes;
|
28
26
|
import com.google.api.services.bigquery.Bigquery;
|
29
27
|
import com.google.api.services.bigquery.BigqueryScopes;
|
30
28
|
import com.google.api.services.bigquery.model.ProjectList;
|
@@ -64,7 +62,6 @@ public class BigqueryAuthentication
|
|
64
62
|
.setServiceAccountId(serviceAccountEmail)
|
65
63
|
.setServiceAccountScopes(
|
66
64
|
ImmutableList.of(
|
67
|
-
BigqueryScopes.DEVSTORAGE_READ_WRITE,
|
68
65
|
BigqueryScopes.BIGQUERY
|
69
66
|
)
|
70
67
|
)
|
@@ -87,13 +84,4 @@ public class BigqueryAuthentication
|
|
87
84
|
|
88
85
|
return client;
|
89
86
|
}
|
90
|
-
|
91
|
-
public Storage getGcsClient() throws IOException
|
92
|
-
{
|
93
|
-
Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
|
94
|
-
.setApplicationName(applicationName)
|
95
|
-
.build();
|
96
|
-
|
97
|
-
return client;
|
98
|
-
}
|
99
87
|
}
|
@@ -13,6 +13,7 @@ import java.util.concurrent.TimeoutException;
|
|
13
13
|
import com.google.common.base.Optional;
|
14
14
|
import com.google.common.base.Throwables;
|
15
15
|
import java.security.GeneralSecurityException;
|
16
|
+
import org.jruby.embed.ScriptingContainer;
|
16
17
|
|
17
18
|
import org.embulk.config.Config;
|
18
19
|
import org.embulk.config.ConfigException;
|
@@ -59,10 +60,6 @@ public class BigqueryOutputPlugin
|
|
59
60
|
@ConfigDefault("\"CSV\"")
|
60
61
|
public String getSourceFormat();
|
61
62
|
|
62
|
-
@Config("is_file_compressed")
|
63
|
-
@ConfigDefault("true")
|
64
|
-
public boolean getIsFileCompressed();
|
65
|
-
|
66
63
|
@Config("field_delimiter")
|
67
64
|
@ConfigDefault("\",\"")
|
68
65
|
public String getFieldDelimiter();
|
@@ -71,20 +68,13 @@ public class BigqueryOutputPlugin
|
|
71
68
|
@ConfigDefault("0")
|
72
69
|
public int getMaxBadrecords();
|
73
70
|
|
74
|
-
@Config("
|
75
|
-
@ConfigDefault("
|
76
|
-
public
|
71
|
+
@Config("encoding")
|
72
|
+
@ConfigDefault("\"UTF-8\"")
|
73
|
+
public String getEncoding();
|
77
74
|
|
78
|
-
@Config("
|
75
|
+
@Config("delete_from_local_when_job_end")
|
79
76
|
@ConfigDefault("false")
|
80
|
-
public boolean
|
81
|
-
|
82
|
-
@Config("bucket")
|
83
|
-
public String getBucket();
|
84
|
-
|
85
|
-
@Config("remote_path")
|
86
|
-
@ConfigDefault("null")
|
87
|
-
public Optional<String> getRemotePath();
|
77
|
+
public boolean getDeleteFromLocalWhenJobEnd();
|
88
78
|
|
89
79
|
@Config("project")
|
90
80
|
public String getProject();
|
@@ -117,7 +107,6 @@ public class BigqueryOutputPlugin
|
|
117
107
|
}
|
118
108
|
|
119
109
|
private final Logger log = Exec.getLogger(BigqueryOutputPlugin.class);
|
120
|
-
private static BigqueryGcsWriter bigQueryGcsWriter;
|
121
110
|
private static BigqueryWriter bigQueryWriter;
|
122
111
|
|
123
112
|
public ConfigDiff transaction(ConfigSource config, int taskCount,
|
@@ -126,33 +115,25 @@ public class BigqueryOutputPlugin
|
|
126
115
|
final PluginTask task = config.loadConfig(PluginTask.class);
|
127
116
|
|
128
117
|
try {
|
129
|
-
bigQueryGcsWriter = new BigqueryGcsWriter.Builder(task.getServiceAccountEmail())
|
130
|
-
.setP12KeyFilePath(task.getP12KeyfilePath())
|
131
|
-
.setApplicationName(task.getApplicationName())
|
132
|
-
.setBucket(task.getBucket())
|
133
|
-
.setSourceFormat(task.getSourceFormat())
|
134
|
-
.setIsFileCompressed(task.getIsFileCompressed())
|
135
|
-
.setDeleteFromBucketWhenJobEnd(task.getDeleteFromBucketWhenJobEnd())
|
136
|
-
.build();
|
137
|
-
|
138
118
|
bigQueryWriter = new BigqueryWriter.Builder(task.getServiceAccountEmail())
|
139
119
|
.setP12KeyFilePath(task.getP12KeyfilePath())
|
140
120
|
.setApplicationName(task.getApplicationName())
|
141
121
|
.setProject(task.getProject())
|
142
122
|
.setDataset(task.getDataset())
|
143
|
-
.setTable(task.getTable())
|
123
|
+
.setTable(generateTableName(task.getTable()))
|
144
124
|
.setAutoCreateTable(task.getAutoCreateTable())
|
145
125
|
.setSchemaPath(task.getSchemaPath())
|
146
|
-
.setBucket(task.getBucket())
|
147
126
|
.setSourceFormat(task.getSourceFormat())
|
148
127
|
.setFieldDelimiter(task.getFieldDelimiter())
|
149
128
|
.setMaxBadrecords(task.getMaxBadrecords())
|
129
|
+
.setEncoding(task.getEncoding())
|
150
130
|
.setJobStatusMaxPollingTime(task.getJobStatusMaxPollingTime())
|
151
131
|
.setJobStatusPollingInterval(task.getJobStatusPollingInterval())
|
152
132
|
.setIsSkipJobResultCheck(task.getIsSkipJobResultCheck())
|
153
133
|
.build();
|
134
|
+
} catch (FileNotFoundException ex) {
|
135
|
+
throw new ConfigException(ex);
|
154
136
|
} catch (IOException | GeneralSecurityException ex) {
|
155
|
-
log.warn("Google Authentication was failed. Please Check your configurations.");
|
156
137
|
throw new ConfigException(ex);
|
157
138
|
}
|
158
139
|
// non-retryable (non-idempotent) output:
|
@@ -165,19 +146,6 @@ public class BigqueryOutputPlugin
|
|
165
146
|
{
|
166
147
|
control.run(taskSource);
|
167
148
|
|
168
|
-
try {
|
169
|
-
bigQueryWriter.executeJob();
|
170
|
-
// TODO refactor
|
171
|
-
if (bigQueryGcsWriter.getDeleteFromBucketWhenJobEnd()) {
|
172
|
-
ArrayList<HashMap<String, String>> fileList = bigQueryWriter.getFileList();
|
173
|
-
for (HashMap<String, String> file : fileList) {
|
174
|
-
bigQueryGcsWriter.deleteFile(file.get("remote_path"), file.get("file_name"));
|
175
|
-
}
|
176
|
-
}
|
177
|
-
} catch (IOException | TimeoutException | BigqueryWriter.JobFailedException ex) {
|
178
|
-
log.warn(ex.getMessage());
|
179
|
-
throw Throwables.propagate(ex);
|
180
|
-
}
|
181
149
|
return Exec.newConfigDiff();
|
182
150
|
}
|
183
151
|
|
@@ -196,7 +164,6 @@ public class BigqueryOutputPlugin
|
|
196
164
|
final String pathPrefix = task.getPathPrefix();
|
197
165
|
final String sequenceFormat = task.getSequenceFormat();
|
198
166
|
final String pathSuffix = task.getFileNameExtension();
|
199
|
-
final Optional<String> remotePath = task.getRemotePath();
|
200
167
|
|
201
168
|
return new TransactionalFileOutput() {
|
202
169
|
private int fileIndex = 0;
|
@@ -217,7 +184,6 @@ public class BigqueryOutputPlugin
|
|
217
184
|
}
|
218
185
|
filePath = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + suffix;
|
219
186
|
file = new File(filePath);
|
220
|
-
fileName = file.getName();
|
221
187
|
|
222
188
|
String parentPath = file.getParent();
|
223
189
|
File dir = new File(parentPath);
|
@@ -257,18 +223,15 @@ public class BigqueryOutputPlugin
|
|
257
223
|
public void finish()
|
258
224
|
{
|
259
225
|
closeFile();
|
260
|
-
if (
|
261
|
-
fileSize = file.length();
|
226
|
+
if (filePath != null) {
|
262
227
|
try {
|
263
|
-
|
228
|
+
bigQueryWriter.executeLoad(filePath);
|
264
229
|
|
265
|
-
if (task.
|
230
|
+
if (task.getDeleteFromLocalWhenJobEnd()) {
|
266
231
|
log.info(String.format("Delete local file [%s]", filePath));
|
267
232
|
file.delete();
|
268
233
|
}
|
269
|
-
|
270
|
-
bigQueryWriter.addTask(remotePath, fileName, fileSize);
|
271
|
-
} catch (IOException ex) {
|
234
|
+
} catch (IOException | TimeoutException | BigqueryWriter.JobFailedException ex) {
|
272
235
|
throw Throwables.propagate(ex);
|
273
236
|
}
|
274
237
|
}
|
@@ -290,4 +253,13 @@ public class BigqueryOutputPlugin
|
|
290
253
|
}
|
291
254
|
};
|
292
255
|
}
|
256
|
+
|
257
|
+
// Parse like "table_%Y_%m"(include pattern or not) format using Java is difficult. So use jRuby.
|
258
|
+
public String generateTableName(String tableName)
|
259
|
+
{
|
260
|
+
ScriptingContainer jruby = new ScriptingContainer();
|
261
|
+
Object result = jruby.runScriptlet("Time.now.strftime('" + tableName + "')");
|
262
|
+
|
263
|
+
return result.toString();
|
264
|
+
}
|
293
265
|
}
|
@@ -1,6 +1,11 @@
|
|
1
1
|
package org.embulk.output;
|
2
2
|
|
3
|
+
import java.io.File;
|
3
4
|
import java.io.IOException;
|
5
|
+
import java.io.FileNotFoundException;
|
6
|
+
import java.io.FileInputStream;
|
7
|
+
import java.io.BufferedInputStream;
|
8
|
+
import com.google.api.client.http.InputStreamContent;
|
4
9
|
import java.util.ArrayList;
|
5
10
|
import java.util.List;
|
6
11
|
import java.util.Iterator;
|
@@ -11,14 +16,19 @@ import java.util.concurrent.TimeoutException;
|
|
11
16
|
import org.apache.commons.lang3.StringUtils;
|
12
17
|
import com.google.common.base.Optional;
|
13
18
|
import com.google.common.collect.ImmutableSet;
|
19
|
+
import com.google.common.base.Throwables;
|
14
20
|
import java.security.GeneralSecurityException;
|
15
21
|
|
22
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
23
|
+
import com.fasterxml.jackson.core.type.TypeReference;
|
24
|
+
|
16
25
|
import org.embulk.spi.Exec;
|
17
26
|
import org.slf4j.Logger;
|
18
27
|
|
19
28
|
import com.google.api.services.bigquery.Bigquery;
|
20
29
|
import com.google.api.services.bigquery.BigqueryScopes;
|
21
30
|
import com.google.api.services.bigquery.Bigquery.Datasets;
|
31
|
+
import com.google.api.services.bigquery.Bigquery.Tables;
|
22
32
|
import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
|
23
33
|
import com.google.api.services.bigquery.Bigquery.Jobs.GetQueryResults;
|
24
34
|
import com.google.api.services.bigquery.model.Job;
|
@@ -28,11 +38,19 @@ import com.google.api.services.bigquery.model.JobStatus;
|
|
28
38
|
import com.google.api.services.bigquery.model.JobStatistics;
|
29
39
|
import com.google.api.services.bigquery.model.JobReference;
|
30
40
|
import com.google.api.services.bigquery.model.DatasetList;
|
41
|
+
import com.google.api.services.bigquery.model.Table;
|
42
|
+
import com.google.api.services.bigquery.model.TableList;
|
31
43
|
import com.google.api.services.bigquery.model.TableSchema;
|
32
44
|
import com.google.api.services.bigquery.model.TableReference;
|
33
45
|
import com.google.api.services.bigquery.model.TableFieldSchema;
|
34
46
|
import com.google.api.services.bigquery.model.TableCell;
|
35
47
|
import com.google.api.services.bigquery.model.TableRow;
|
48
|
+
import com.google.api.services.bigquery.model.ErrorProto;
|
49
|
+
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
|
50
|
+
|
51
|
+
import com.google.api.client.googleapis.media.MediaHttpUploader;
|
52
|
+
import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
|
53
|
+
import com.google.api.client.googleapis.media.MediaHttpUploader.UploadState;
|
36
54
|
|
37
55
|
public class BigqueryWriter
|
38
56
|
{
|
@@ -43,43 +61,58 @@ public class BigqueryWriter
|
|
43
61
|
private final String table;
|
44
62
|
private final boolean autoCreateTable;
|
45
63
|
private final Optional<String> schemaPath;
|
46
|
-
private final
|
64
|
+
private final TableSchema tableSchema;
|
47
65
|
private final String sourceFormat;
|
48
66
|
private final String fieldDelimiter;
|
49
67
|
private final int maxBadrecords;
|
68
|
+
private final String encoding;
|
50
69
|
private final long jobStatusMaxPollingTime;
|
51
70
|
private final long jobStatusPollingInterval;
|
52
71
|
private final boolean isSkipJobResultCheck;
|
53
72
|
private final Bigquery bigQueryClient;
|
54
|
-
private final EmbulkBigqueryTask writerTask;
|
55
73
|
|
56
|
-
public BigqueryWriter(Builder builder) throws IOException, GeneralSecurityException
|
74
|
+
public BigqueryWriter(Builder builder) throws FileNotFoundException, IOException, GeneralSecurityException
|
57
75
|
{
|
58
76
|
this.project = builder.project;
|
59
77
|
this.dataset = builder.dataset;
|
60
78
|
this.table = builder.table;
|
61
79
|
this.autoCreateTable = builder.autoCreateTable;
|
62
80
|
this.schemaPath = builder.schemaPath;
|
63
|
-
this.bucket = builder.bucket;
|
64
81
|
this.sourceFormat = builder.sourceFormat.toUpperCase();
|
65
82
|
this.fieldDelimiter = builder.fieldDelimiter;
|
66
83
|
this.maxBadrecords = builder.maxBadrecords;
|
84
|
+
this.encoding = builder.encoding.toUpperCase();
|
67
85
|
this.jobStatusMaxPollingTime = builder.jobStatusMaxPollingTime;
|
68
86
|
this.jobStatusPollingInterval = builder.jobStatusPollingInterval;
|
69
87
|
this.isSkipJobResultCheck = builder.isSkipJobResultCheck;
|
70
88
|
|
71
89
|
BigqueryAuthentication auth = new BigqueryAuthentication(builder.serviceAccountEmail, builder.p12KeyFilePath, builder.applicationName);
|
72
90
|
this.bigQueryClient = auth.getBigqueryClient();
|
73
|
-
|
91
|
+
|
92
|
+
checkConfig();
|
93
|
+
if (autoCreateTable) {
|
94
|
+
this.tableSchema = createTableSchema(builder.schemaPath);
|
95
|
+
} else {
|
96
|
+
this.tableSchema = null;
|
97
|
+
}
|
74
98
|
}
|
75
99
|
|
76
100
|
private String getJobStatus(JobReference jobRef) throws JobFailedException
|
77
101
|
{
|
78
102
|
try {
|
79
103
|
Job job = bigQueryClient.jobs().get(project, jobRef.getJobId()).execute();
|
80
|
-
|
81
|
-
|
104
|
+
|
105
|
+
ErrorProto fatalError = job.getStatus().getErrorResult();
|
106
|
+
if (fatalError != null) {
|
107
|
+
throw new JobFailedException(String.format("Job failed. job id:[%s] reason:[%s][%s] status:[FAILED]", jobRef.getJobId(), fatalError.getReason(), fatalError.getMessage()));
|
108
|
+
}
|
109
|
+
List<ErrorProto> errors = job.getStatus().getErrors();
|
110
|
+
if (errors != null) {
|
111
|
+
for (ErrorProto error : errors) {
|
112
|
+
log.warn(String.format("Error: job id:[%s] reason[%s][%s] location:[%s]", jobRef.getJobId(), error.getReason(), error.getMessage(), error.getLocation()));
|
113
|
+
}
|
82
114
|
}
|
115
|
+
|
83
116
|
String jobStatus = job.getStatus().getState();
|
84
117
|
if (jobStatus.equals("DONE")) {
|
85
118
|
JobStatistics statistics = job.getStatistics();
|
@@ -117,59 +150,68 @@ public class BigqueryWriter
|
|
117
150
|
}
|
118
151
|
}
|
119
152
|
|
120
|
-
public void
|
121
|
-
{
|
122
|
-
// TODO: refactor
|
123
|
-
ArrayList<ArrayList<HashMap<String, String>>> taskList = writerTask.createJobList();
|
124
|
-
for (ArrayList<HashMap<String, String>> task : taskList) {
|
125
|
-
Job job = createJob(task);
|
126
|
-
// TODO: multi-threading
|
127
|
-
new EmbulkBigqueryJob(job).call();
|
128
|
-
}
|
129
|
-
}
|
130
|
-
|
131
|
-
private Job createJob(ArrayList<HashMap<String, String>> task)
|
153
|
+
public void executeLoad(String localFilePath) throws GoogleJsonResponseException, IOException, TimeoutException, JobFailedException
|
132
154
|
{
|
133
155
|
log.info(String.format("Job preparing... project:%s dataset:%s table:%s", project, dataset, table));
|
134
156
|
|
135
157
|
Job job = new Job();
|
158
|
+
JobReference jobRef = null;
|
136
159
|
JobConfiguration jobConfig = new JobConfiguration();
|
137
160
|
JobConfigurationLoad loadConfig = new JobConfigurationLoad();
|
138
161
|
jobConfig.setLoad(loadConfig);
|
139
162
|
job.setConfiguration(jobConfig);
|
140
163
|
|
141
164
|
loadConfig.setAllowQuotedNewlines(false);
|
165
|
+
loadConfig.setEncoding(encoding);
|
166
|
+
loadConfig.setMaxBadRecords(maxBadrecords);
|
142
167
|
if (sourceFormat.equals("NEWLINE_DELIMITED_JSON")) {
|
143
168
|
loadConfig.setSourceFormat("NEWLINE_DELIMITED_JSON");
|
144
169
|
} else {
|
145
170
|
loadConfig.setFieldDelimiter(fieldDelimiter);
|
146
171
|
}
|
172
|
+
loadConfig.setWriteDisposition("WRITE_APPEND");
|
147
173
|
if (autoCreateTable) {
|
148
|
-
loadConfig.setSchema(
|
149
|
-
loadConfig.setWriteDisposition("WRITE_EMPTY");
|
174
|
+
loadConfig.setSchema(tableSchema);
|
150
175
|
loadConfig.setCreateDisposition("CREATE_IF_NEEDED");
|
151
|
-
log.info(String.format("table:[%s] will be create
|
176
|
+
log.info(String.format("table:[%s] will be create if not exists", table));
|
152
177
|
} else {
|
153
|
-
loadConfig.setWriteDisposition("WRITE_APPEND");
|
154
178
|
loadConfig.setCreateDisposition("CREATE_NEVER");
|
155
179
|
}
|
156
|
-
loadConfig.setMaxBadRecords(maxBadrecords);
|
157
180
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
}
|
166
|
-
loadConfig.setSourceUris(sources);
|
167
|
-
loadConfig.setDestinationTable(getTableReference());
|
181
|
+
loadConfig.setDestinationTable(createTableReference());
|
182
|
+
|
183
|
+
File file = new File(localFilePath);
|
184
|
+
InputStreamContent mediaContent = new InputStreamContent("application/octet-stream",
|
185
|
+
new BufferedInputStream(
|
186
|
+
new FileInputStream(file)));
|
187
|
+
mediaContent.setLength(file.length());
|
168
188
|
|
169
|
-
|
189
|
+
Insert insert = bigQueryClient.jobs().insert(project, job, mediaContent);
|
190
|
+
insert.setProjectId(project);
|
191
|
+
insert.setDisableGZipContent(true);
|
192
|
+
|
193
|
+
// @see https://code.google.com/p/google-api-java-client/wiki/MediaUpload
|
194
|
+
UploadProgressListener listner = new UploadProgressListener();
|
195
|
+
listner.setFileName(localFilePath);
|
196
|
+
insert.getMediaHttpUploader()
|
197
|
+
.setProgressListener(listner)
|
198
|
+
.setDirectUploadEnabled(false);
|
199
|
+
|
200
|
+
try {
|
201
|
+
jobRef = insert.execute().getJobReference();
|
202
|
+
} catch (Exception ex) {
|
203
|
+
log.warn("Job execution was failed. Please check your settings or data... like data matches schema");
|
204
|
+
throw Throwables.propagate(ex);
|
205
|
+
}
|
206
|
+
log.info(String.format("Job executed. job id:[%s] file:[%s]", jobRef.getJobId(), localFilePath));
|
207
|
+
if (isSkipJobResultCheck) {
|
208
|
+
log.info(String.format("Skip job status check. job id:[%s]", jobRef.getJobId()));
|
209
|
+
} else {
|
210
|
+
getJobStatusUntilDone(jobRef);
|
211
|
+
}
|
170
212
|
}
|
171
213
|
|
172
|
-
private TableReference
|
214
|
+
private TableReference createTableReference()
|
173
215
|
{
|
174
216
|
return new TableReference()
|
175
217
|
.setProjectId(project)
|
@@ -177,135 +219,78 @@ public class BigqueryWriter
|
|
177
219
|
.setTableId(table);
|
178
220
|
}
|
179
221
|
|
180
|
-
private TableSchema
|
222
|
+
private TableSchema createTableSchema(Optional<String> schemaPath) throws FileNotFoundException, IOException
|
181
223
|
{
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
224
|
+
String path = schemaPath.orNull();
|
225
|
+
File file = new File(path);
|
226
|
+
FileInputStream stream = null;
|
227
|
+
try {
|
228
|
+
stream = new FileInputStream(file);
|
229
|
+
ObjectMapper mapper = new ObjectMapper();
|
230
|
+
List<TableFieldSchema> fields = mapper.readValue(stream, new TypeReference<List<TableFieldSchema>>() {});
|
231
|
+
TableSchema tableSchema = new TableSchema().setFields(fields);
|
232
|
+
return tableSchema;
|
233
|
+
} finally {
|
234
|
+
if (stream != null) {
|
235
|
+
stream.close();
|
236
|
+
}
|
192
237
|
}
|
193
|
-
*/
|
194
|
-
|
195
|
-
tableSchema.setFields(fields);
|
196
|
-
return tableSchema;
|
197
238
|
}
|
198
239
|
|
199
|
-
|
240
|
+
public boolean isExistTable(String tableName) throws IOException
|
200
241
|
{
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
path = bucket + "/" + StringUtils.join(pathList) + "/" + fileName;
|
242
|
+
Tables tableRequest = bigQueryClient.tables();
|
243
|
+
try {
|
244
|
+
Table tableData = tableRequest.get(project, dataset, tableName).execute();
|
245
|
+
} catch (GoogleJsonResponseException ex) {
|
246
|
+
return false;
|
207
247
|
}
|
208
|
-
return
|
209
|
-
}
|
210
|
-
|
211
|
-
public void addTask(Optional<String> remotePath, String fileName, long fileSize)
|
212
|
-
{
|
213
|
-
writerTask.addTaskFile(remotePath, fileName, fileSize);
|
214
|
-
}
|
215
|
-
|
216
|
-
public ArrayList<HashMap<String, String>> getFileList()
|
217
|
-
{
|
218
|
-
return writerTask.getFileList();
|
248
|
+
return true;
|
219
249
|
}
|
220
250
|
|
221
|
-
|
251
|
+
public void checkConfig() throws FileNotFoundException, IOException
|
222
252
|
{
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
{
|
227
|
-
this.job = job;
|
228
|
-
}
|
229
|
-
|
230
|
-
public Void call() throws IOException, TimeoutException, JobFailedException
|
231
|
-
{
|
232
|
-
Insert insert = bigQueryClient.jobs().insert(project, job);
|
233
|
-
insert.setProjectId(project);
|
234
|
-
JobReference jobRef = insert.execute().getJobReference();
|
235
|
-
log.info(String.format("Job executed. job id:[%s]", jobRef.getJobId()));
|
236
|
-
if (isSkipJobResultCheck) {
|
237
|
-
log.info(String.format("Skip job status check. job id:[%s]", jobRef.getJobId()));
|
253
|
+
if (autoCreateTable) {
|
254
|
+
if (!schemaPath.isPresent()) {
|
255
|
+
throw new IOException("schema_path is empty");
|
238
256
|
} else {
|
239
|
-
|
257
|
+
File file = new File(schemaPath.orNull());
|
258
|
+
if (!file.exists()) {
|
259
|
+
throw new FileNotFoundException("Can not load schema file.");
|
260
|
+
}
|
261
|
+
}
|
262
|
+
} else {
|
263
|
+
if (!isExistTable(table)) {
|
264
|
+
throw new IOException(String.format("table [%s] is not exists", table));
|
240
265
|
}
|
241
|
-
return null;
|
242
266
|
}
|
243
267
|
}
|
244
268
|
|
245
|
-
private class
|
269
|
+
private class UploadProgressListener implements MediaHttpUploaderProgressListener
|
246
270
|
{
|
247
|
-
|
248
|
-
private final long MAX_SIZE_PER_LOAD_JOB = 1000 * 1024 * 1024 * 1024L; // 1TB
|
249
|
-
private final int MAX_NUMBER_OF_FILES_PER_LOAD_JOB = 10000;
|
271
|
+
private String fileName;
|
250
272
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
public void addTaskFile(Optional<String> remotePath, String fileName, long fileSize)
|
273
|
+
@Override
|
274
|
+
public void progressChanged(MediaHttpUploader uploader) throws IOException
|
255
275
|
{
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
{
|
269
|
-
long currentBundleSize = 0;
|
270
|
-
int currentFileCount = 0;
|
271
|
-
ArrayList<HashMap<String, String>> job = new ArrayList<HashMap<String, String>>();
|
272
|
-
for (HashMap<String, String> task : taskList) {
|
273
|
-
boolean isNeedNextJobList = false;
|
274
|
-
long fileSize = Long.valueOf(task.get("file_size")).longValue();
|
275
|
-
|
276
|
-
if (currentBundleSize + fileSize > MAX_SIZE_PER_LOAD_JOB) {
|
277
|
-
isNeedNextJobList = true;
|
278
|
-
}
|
279
|
-
|
280
|
-
if (currentFileCount >= MAX_NUMBER_OF_FILES_PER_LOAD_JOB) {
|
281
|
-
isNeedNextJobList = true;
|
282
|
-
}
|
283
|
-
|
284
|
-
if (isNeedNextJobList) {
|
285
|
-
jobList.add(job);
|
286
|
-
job = new ArrayList<HashMap<String, String>>();
|
287
|
-
job.add(task);
|
288
|
-
currentBundleSize = 0;
|
289
|
-
} else {
|
290
|
-
job.add(task);
|
291
|
-
}
|
292
|
-
currentBundleSize += fileSize;
|
293
|
-
currentFileCount++;
|
294
|
-
|
295
|
-
log.debug(String.format("currentBundleSize:%s currentFileCount:%s", currentBundleSize, currentFileCount));
|
296
|
-
log.debug(String.format("fileSize:%s, MAX_SIZE_PER_LOAD_JOB:%s MAX_NUMBER_OF_FILES_PER_LOAD_JOB:%s",
|
297
|
-
fileSize, MAX_SIZE_PER_LOAD_JOB, MAX_NUMBER_OF_FILES_PER_LOAD_JOB));
|
298
|
-
|
299
|
-
}
|
300
|
-
if (job.size() > 0) {
|
301
|
-
jobList.add(job);
|
276
|
+
switch (uploader.getUploadState()) {
|
277
|
+
case INITIATION_STARTED:
|
278
|
+
log.info(String.format("Upload start [%s]", fileName));
|
279
|
+
break;
|
280
|
+
case INITIATION_COMPLETE:
|
281
|
+
//log.info(String.format("Upload initiation completed file [%s]", fileName));
|
282
|
+
break;
|
283
|
+
case MEDIA_IN_PROGRESS:
|
284
|
+
log.debug(String.format("Uploading [%s] progress %3.0f", fileName, uploader.getProgress() * 100) + "%");
|
285
|
+
break;
|
286
|
+
case MEDIA_COMPLETE:
|
287
|
+
log.info(String.format("Upload completed [%s]", fileName));
|
302
288
|
}
|
303
|
-
return jobList;
|
304
289
|
}
|
305
290
|
|
306
|
-
public
|
291
|
+
public void setFileName(String fileName)
|
307
292
|
{
|
308
|
-
|
293
|
+
this.fileName = fileName;
|
309
294
|
}
|
310
295
|
}
|
311
296
|
|
@@ -319,10 +304,10 @@ public class BigqueryWriter
|
|
319
304
|
private String table;
|
320
305
|
private boolean autoCreateTable;
|
321
306
|
private Optional<String> schemaPath;
|
322
|
-
private String bucket;
|
323
307
|
private String sourceFormat;
|
324
308
|
private String fieldDelimiter;
|
325
309
|
private int maxBadrecords;
|
310
|
+
private String encoding;
|
326
311
|
private int jobStatusMaxPollingTime;
|
327
312
|
private int jobStatusPollingInterval;
|
328
313
|
private boolean isSkipJobResultCheck;
|
@@ -375,12 +360,6 @@ public class BigqueryWriter
|
|
375
360
|
return this;
|
376
361
|
}
|
377
362
|
|
378
|
-
public Builder setBucket(String bucket)
|
379
|
-
{
|
380
|
-
this.bucket = bucket;
|
381
|
-
return this;
|
382
|
-
}
|
383
|
-
|
384
363
|
public Builder setSourceFormat(String sourceFormat)
|
385
364
|
{
|
386
365
|
this.sourceFormat = sourceFormat;
|
@@ -399,6 +378,12 @@ public class BigqueryWriter
|
|
399
378
|
return this;
|
400
379
|
}
|
401
380
|
|
381
|
+
public Builder setEncoding(String encoding)
|
382
|
+
{
|
383
|
+
this.encoding = encoding;
|
384
|
+
return this;
|
385
|
+
}
|
386
|
+
|
402
387
|
public Builder setJobStatusMaxPollingTime(int jobStatusMaxPollingTime)
|
403
388
|
{
|
404
389
|
this.jobStatusMaxPollingTime = jobStatusMaxPollingTime;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -56,19 +56,16 @@ files:
|
|
56
56
|
- lib/embulk/output/bigquery.rb
|
57
57
|
- settings.gradle
|
58
58
|
- src/main/java/org/embulk/output/BigqueryAuthentication.java
|
59
|
-
- src/main/java/org/embulk/output/BigqueryGcsWriter.java
|
60
59
|
- src/main/java/org/embulk/output/BigqueryOutputPlugin.java
|
61
60
|
- src/main/java/org/embulk/output/BigqueryWriter.java
|
62
61
|
- src/test/java/org/embulk/output/TestBigqueryAuthentication.java
|
63
|
-
- src/test/java/org/embulk/output/TestBigqueryGcsWriter.java
|
64
62
|
- src/test/java/org/embulk/output/TestBigqueryOutputPlugin.java
|
65
63
|
- src/test/java/org/embulk/output/TestBigqueryWriter.java
|
66
64
|
- classpath/commons-codec-1.3.jar
|
67
65
|
- classpath/commons-logging-1.1.1.jar
|
68
|
-
- classpath/embulk-output-bigquery-0.1.
|
66
|
+
- classpath/embulk-output-bigquery-0.1.2.jar
|
69
67
|
- classpath/google-api-client-1.19.1.jar
|
70
68
|
- classpath/google-api-services-bigquery-v2-rev193-1.19.1.jar
|
71
|
-
- classpath/google-api-services-storage-v1-rev27-1.19.1.jar
|
72
69
|
- classpath/google-http-client-1.19.0.jar
|
73
70
|
- classpath/google-http-client-jackson2-1.19.0.jar
|
74
71
|
- classpath/google-oauth-client-1.19.0.jar
|
@@ -76,7 +73,6 @@ files:
|
|
76
73
|
- classpath/httpclient-4.0.1.jar
|
77
74
|
- classpath/httpcore-4.0.1.jar
|
78
75
|
- classpath/jsr305-1.3.9.jar
|
79
|
-
- classpath/mime-util-2.1.3.jar
|
80
76
|
homepage: https://github.com/sakama/embulk-output-bigquery
|
81
77
|
licenses:
|
82
78
|
- Apache-2.0
|
@@ -1,201 +0,0 @@
|
|
1
|
-
package org.embulk.output;
|
2
|
-
|
3
|
-
import java.io.File;
|
4
|
-
import java.io.FileNotFoundException;
|
5
|
-
import java.io.FileInputStream;
|
6
|
-
import java.io.IOException;
|
7
|
-
import java.util.ArrayList;
|
8
|
-
import java.util.List;
|
9
|
-
import java.util.Collection;
|
10
|
-
import java.util.Iterator;
|
11
|
-
import java.util.IllegalFormatException;
|
12
|
-
import java.nio.charset.Charset;
|
13
|
-
import java.nio.charset.StandardCharsets;
|
14
|
-
import com.google.common.base.Optional;
|
15
|
-
import com.google.common.collect.ImmutableList;
|
16
|
-
//import eu.medsea.mimeutil.MimeType;
|
17
|
-
//import eu.medsea.mimeutil.MimeUtil;
|
18
|
-
//import eu.medsea.mimeutil.detector.MimeDetector;
|
19
|
-
import org.apache.commons.lang3.StringUtils;
|
20
|
-
import org.apache.commons.codec.binary.Base64;
|
21
|
-
import java.security.GeneralSecurityException;
|
22
|
-
|
23
|
-
import org.embulk.spi.Exec;
|
24
|
-
import org.slf4j.Logger;
|
25
|
-
|
26
|
-
import com.google.api.services.storage.Storage;
|
27
|
-
import com.google.api.services.storage.StorageScopes;
|
28
|
-
import com.google.api.services.storage.model.Bucket;
|
29
|
-
import com.google.api.services.storage.model.Objects;
|
30
|
-
import com.google.api.services.storage.model.StorageObject;
|
31
|
-
|
32
|
-
import com.google.api.client.http.InputStreamContent;
|
33
|
-
|
34
|
-
public class BigqueryGcsWriter
|
35
|
-
{
|
36
|
-
|
37
|
-
private final Logger log = Exec.getLogger(BigqueryGcsWriter.class);
|
38
|
-
private final String bucket;
|
39
|
-
private final String sourceFormat;
|
40
|
-
private final boolean isFileCompressed;
|
41
|
-
private final boolean deleteFromBucketWhenJobEnd;
|
42
|
-
private Storage storageClient;
|
43
|
-
|
44
|
-
public BigqueryGcsWriter(Builder builder) throws IOException, GeneralSecurityException
|
45
|
-
{
|
46
|
-
this.bucket = builder.bucket;
|
47
|
-
this.sourceFormat = builder.sourceFormat.toUpperCase();
|
48
|
-
this.isFileCompressed = builder.isFileCompressed;
|
49
|
-
this.deleteFromBucketWhenJobEnd = builder.deleteFromBucketWhenJobEnd;
|
50
|
-
|
51
|
-
BigqueryAuthentication auth = new BigqueryAuthentication(builder.serviceAccountEmail, builder.p12KeyFilePath, builder.applicationName);
|
52
|
-
this.storageClient = auth.getGcsClient();
|
53
|
-
}
|
54
|
-
|
55
|
-
public void uploadFile(String localFilePath, String fileName, Optional<String> remotePath) throws IOException
|
56
|
-
{
|
57
|
-
FileInputStream stream = null;
|
58
|
-
|
59
|
-
try {
|
60
|
-
String path;
|
61
|
-
if (remotePath.isPresent()) {
|
62
|
-
path = remotePath.get();
|
63
|
-
} else {
|
64
|
-
path = "";
|
65
|
-
}
|
66
|
-
String gcsPath = getRemotePath(path, fileName);
|
67
|
-
StorageObject objectMetadata = new StorageObject().setName(gcsPath);
|
68
|
-
log.info(String.format("Uploading file [%s] to [gs://%s/%s]", localFilePath, bucket, gcsPath));
|
69
|
-
|
70
|
-
File file = new File(localFilePath);
|
71
|
-
stream = new FileInputStream(file);
|
72
|
-
InputStreamContent content = new InputStreamContent(getContentType(), stream);
|
73
|
-
Storage.Objects.Insert insertObject = storageClient.objects().insert(bucket, objectMetadata, content);
|
74
|
-
insertObject.setDisableGZipContent(true);
|
75
|
-
|
76
|
-
StorageObject response = insertObject.execute();
|
77
|
-
log.info(String.format("Upload completed [%s] to [gs://%s/%s]", localFilePath, bucket, gcsPath));
|
78
|
-
} finally {
|
79
|
-
stream.close();
|
80
|
-
}
|
81
|
-
}
|
82
|
-
|
83
|
-
private String getRemotePath(String remotePath, String fileName)
|
84
|
-
{
|
85
|
-
if (remotePath.isEmpty()) {
|
86
|
-
return fileName;
|
87
|
-
}
|
88
|
-
String[] pathList = StringUtils.split(remotePath, '/');
|
89
|
-
String path = StringUtils.join(pathList) + "/";
|
90
|
-
if (!path.endsWith("/")) {
|
91
|
-
path = path + "/";
|
92
|
-
}
|
93
|
-
return path + fileName;
|
94
|
-
}
|
95
|
-
|
96
|
-
public void deleteFile(String remotePath, String fileName) throws IOException
|
97
|
-
{
|
98
|
-
String path = getRemotePath(remotePath, fileName);
|
99
|
-
storageClient.objects().delete(bucket, path).execute();
|
100
|
-
log.info(String.format("Delete remote file [gs://%s/%s]", bucket, path));
|
101
|
-
}
|
102
|
-
|
103
|
-
public boolean getDeleteFromBucketWhenJobEnd()
|
104
|
-
{
|
105
|
-
return this.deleteFromBucketWhenJobEnd;
|
106
|
-
}
|
107
|
-
|
108
|
-
private String getContentType()
|
109
|
-
{
|
110
|
-
if (isFileCompressed) {
|
111
|
-
return "application/x-gzip";
|
112
|
-
} else {
|
113
|
-
if (sourceFormat.equals("NEWLINE_DELIMITED_JSON)")) {
|
114
|
-
return "application/json";
|
115
|
-
} else {
|
116
|
-
return "text/csv";
|
117
|
-
}
|
118
|
-
}
|
119
|
-
}
|
120
|
-
|
121
|
-
/*
|
122
|
-
private void registerMimeDetector()
|
123
|
-
{
|
124
|
-
String mimeDetector = "eu.medsea.mimeutil.detector.MagicMimeMimeDetector";
|
125
|
-
MimeDetector registeredMimeDetector = MimeUtil.getMimeDetector(mimeDetector);
|
126
|
-
MimeUtil.registerMimeDetector(mimeDetector);
|
127
|
-
}
|
128
|
-
|
129
|
-
public String detectMimeType(File file)
|
130
|
-
{
|
131
|
-
try {
|
132
|
-
Collection<?> mimeTypes = MimeUtil.getMimeTypes(file);
|
133
|
-
if (!mimeTypes.isEmpty()) {
|
134
|
-
Iterator<?> iterator = mimeTypes.iterator();
|
135
|
-
MimeType mimeType = (MimeType) iterator.next();
|
136
|
-
return mimeType.getMediaType() + "/" + mimeType.getSubType();
|
137
|
-
}
|
138
|
-
} catch (Exception ex) {
|
139
|
-
}
|
140
|
-
return "application/octet-stream";
|
141
|
-
}
|
142
|
-
*/
|
143
|
-
|
144
|
-
public static class Builder
|
145
|
-
{
|
146
|
-
private final String serviceAccountEmail;
|
147
|
-
private String p12KeyFilePath;
|
148
|
-
private String applicationName;
|
149
|
-
private String bucket;
|
150
|
-
private String sourceFormat;
|
151
|
-
private boolean isFileCompressed;
|
152
|
-
private boolean deleteFromBucketWhenJobEnd;
|
153
|
-
private boolean enableMd5hashCheck;
|
154
|
-
|
155
|
-
public Builder(String serviceAccountEmail)
|
156
|
-
{
|
157
|
-
this.serviceAccountEmail = serviceAccountEmail;
|
158
|
-
}
|
159
|
-
|
160
|
-
public Builder setP12KeyFilePath(String p12KeyFilePath)
|
161
|
-
{
|
162
|
-
this.p12KeyFilePath = p12KeyFilePath;
|
163
|
-
return this;
|
164
|
-
}
|
165
|
-
|
166
|
-
public Builder setApplicationName(String applicationName)
|
167
|
-
{
|
168
|
-
this.applicationName = applicationName;
|
169
|
-
return this;
|
170
|
-
}
|
171
|
-
|
172
|
-
public Builder setBucket(String bucket)
|
173
|
-
{
|
174
|
-
this.bucket = bucket;
|
175
|
-
return this;
|
176
|
-
}
|
177
|
-
|
178
|
-
public Builder setSourceFormat(String sourceFormat)
|
179
|
-
{
|
180
|
-
this.sourceFormat = sourceFormat;
|
181
|
-
return this;
|
182
|
-
}
|
183
|
-
|
184
|
-
public Builder setIsFileCompressed(boolean isFileCompressed)
|
185
|
-
{
|
186
|
-
this.isFileCompressed = isFileCompressed;
|
187
|
-
return this;
|
188
|
-
}
|
189
|
-
|
190
|
-
public Builder setDeleteFromBucketWhenJobEnd(boolean deleteFromBucketWhenJobEnd)
|
191
|
-
{
|
192
|
-
this.deleteFromBucketWhenJobEnd = deleteFromBucketWhenJobEnd;
|
193
|
-
return this;
|
194
|
-
}
|
195
|
-
|
196
|
-
public BigqueryGcsWriter build() throws IOException, GeneralSecurityException
|
197
|
-
{
|
198
|
-
return new BigqueryGcsWriter(this);
|
199
|
-
}
|
200
|
-
}
|
201
|
-
}
|