embulk-output-bigquery 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +24 -25
- data/build.gradle +1 -3
- data/src/main/java/org/embulk/output/BigqueryAuthentication.java +0 -12
- data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +23 -51
- data/src/main/java/org/embulk/output/BigqueryWriter.java +136 -151
- metadata +3 -7
- data/src/main/java/org/embulk/output/BigqueryGcsWriter.java +0 -201
- data/src/test/java/org/embulk/output/TestBigqueryGcsWriter.java +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 46c61dd1c73ff99c3c69bd217ca772f07b2e1127
|
4
|
+
data.tar.gz: ba184360972884260c1fe90264af7d5386791804
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa693e59cb4b45c2d43f07479f3d61e63242185be9964d4f00b83a4a784a0443ae270a63760f3f2f188e74deb77cbb94a89a18db49d2c5cd4621f18b73363ab3
|
7
|
+
data.tar.gz: 7c0ea783220de28befd7c565ff83ec5ff58f13af0db16b3d341a12c3e415adeacba375e5688a42fcbb26d0402a48071622ed5b161fa52fd08b1f56444faf66e1
|
data/README.md
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
|
2
2
|
# embulk-output-bigquery
|
3
3
|
|
4
|
-
[Embulk](https://github.com/embulk/embulk/) output plugin to load/insert data into [Google BigQuery](https://cloud.google.com/bigquery/)
|
4
|
+
[Embulk](https://github.com/embulk/embulk/) output plugin to load/insert data into [Google BigQuery](https://cloud.google.com/bigquery/)
|
5
5
|
|
6
6
|
## Overview
|
7
7
|
|
8
|
-
load data into Google BigQuery as batch jobs
|
8
|
+
load data into Google BigQuery as batch jobs for big amount of data
|
9
9
|
https://developers.google.com/bigquery/loading-data-into-bigquery
|
10
10
|
|
11
11
|
* **Plugin type**: output
|
12
12
|
* **Resume supported**: no
|
13
13
|
* **Cleanup supported**: no
|
14
|
-
* **Dynamic table creating**:
|
14
|
+
* **Dynamic table creating**: yes
|
15
15
|
|
16
16
|
### NOT IMPLEMENTED
|
17
17
|
* insert data over streaming inserts
|
@@ -30,32 +30,19 @@ OAuth flow for installed applications.
|
|
30
30
|
- **sequence_format**: (string, optional, default is %03d.%02d)
|
31
31
|
- **file_ext**: (string, required)
|
32
32
|
- **source_format**: file type (NEWLINE_DELIMITED_JSON or CSV) (string, required, default is CSV)
|
33
|
-
- **is_file_compressed**: upload file is gzip compressed or not. (boolean, optional, default is 1)
|
34
|
-
- **bucket**: Google Cloud Storage output bucket name (string, required)
|
35
|
-
- **remote_path**: folder name in GCS bucket (string, optional)
|
36
33
|
- **project**: project_id (string, required)
|
37
34
|
- **dataset**: dataset (string, required)
|
38
35
|
- **table**: table name (string, required)
|
36
|
+
- **auto_create_table**: (boolean, optional default is 0)
|
37
|
+
- **schema_path**: (string, optional)
|
39
38
|
- **application_name**: application name anything you like (string, optional)
|
40
|
-
- **
|
41
|
-
- **delete_from_bucket_when_job_end**: (boolean, optional, default is 0)
|
39
|
+
- **delete_from_local_when_job_end**: (boolean, optional, default is 0)
|
42
40
|
- **job_status_max_polling_time**: max job status polling time. (int, optional, default is 3600 sec)
|
43
41
|
- **job_status_polling_interval**: job status polling interval. (int, optional, default is 10 sec)
|
44
42
|
- **is_skip_job_result_check**: (boolean, optional, default is 0)
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
* Supported
|
50
|
-
* Maximum size per load job: 1TB across all input files
|
51
|
-
* Maximum number of files per load job: 10,000
|
52
|
-
* embulk-output-bigquery divides a file into more than one job, like below.
|
53
|
-
* job1: file1(1GB) file2(1GB)...file10(1GB)
|
54
|
-
* job2: file11(1GB) file12(1GB)
|
55
|
-
|
56
|
-
* Not Supported
|
57
|
-
* Daily limit: 1,000 load jobs per table per day (including failures)
|
58
|
-
* 10,000 load jobs per project per day (including failures)
|
43
|
+
- **field_delimiter**: (string, optional, default is ",")
|
44
|
+
- **max_bad_records**: (int, optional, default is 0)
|
45
|
+
- **encoding**: (UTF-8 or ISO-8859-1) (string, optional, default is "UTF-8")
|
59
46
|
|
60
47
|
## Example
|
61
48
|
|
@@ -67,10 +54,7 @@ out:
|
|
67
54
|
path_prefix: /path/to/output
|
68
55
|
file_ext: csv.gz
|
69
56
|
source_format: CSV
|
70
|
-
is_file_compressed: 1
|
71
57
|
project: your-project-000
|
72
|
-
bucket: output_bucket_name
|
73
|
-
remote_path: folder_name
|
74
58
|
dataset: your_dataset_name
|
75
59
|
table: your_table_name
|
76
60
|
formatter:
|
@@ -80,6 +64,21 @@ out:
|
|
80
64
|
- {type: gzip}
|
81
65
|
```
|
82
66
|
|
67
|
+
## Dynamic table creating
|
68
|
+
|
69
|
+
When `auto_create_table` is set to true, try to create the table using BigQuery API.
|
70
|
+
|
71
|
+
To describe the schema of the target table, please write schema path.
|
72
|
+
|
73
|
+
`table` option accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
|
74
|
+
format of ruby to construct table name.
|
75
|
+
|
76
|
+
```
|
77
|
+
auto_create_table: true
|
78
|
+
table: table_%Y_%m
|
79
|
+
schema_path: /path/to/schema.json
|
80
|
+
```
|
81
|
+
|
83
82
|
## Build
|
84
83
|
|
85
84
|
```
|
data/build.gradle
CHANGED
@@ -15,16 +15,14 @@ configurations {
|
|
15
15
|
sourceCompatibility = 1.7
|
16
16
|
targetCompatibility = 1.7
|
17
17
|
|
18
|
-
version = "0.1.
|
18
|
+
version = "0.1.2"
|
19
19
|
|
20
20
|
dependencies {
|
21
21
|
compile "org.embulk:embulk-core:0.5.1"
|
22
22
|
provided "org.embulk:embulk-core:0.5.1"
|
23
23
|
|
24
24
|
compile "com.google.http-client:google-http-client-jackson2:1.19.0"
|
25
|
-
compile ("com.google.apis:google-api-services-storage:v1-rev27-1.19.1") {exclude module: "guava-jdk5"}
|
26
25
|
compile "com.google.apis:google-api-services-bigquery:v2-rev193-1.19.1"
|
27
|
-
compile "eu.medsea.mimeutil:mime-util:2.1.3"
|
28
26
|
|
29
27
|
testCompile "junit:junit:4.+"
|
30
28
|
}
|
@@ -23,8 +23,6 @@ import com.google.api.client.http.HttpTransport;
|
|
23
23
|
import com.google.api.client.http.InputStreamContent;
|
24
24
|
import com.google.api.client.json.JsonFactory;
|
25
25
|
import com.google.api.client.json.jackson2.JacksonFactory;
|
26
|
-
import com.google.api.services.storage.Storage;
|
27
|
-
import com.google.api.services.storage.StorageScopes;
|
28
26
|
import com.google.api.services.bigquery.Bigquery;
|
29
27
|
import com.google.api.services.bigquery.BigqueryScopes;
|
30
28
|
import com.google.api.services.bigquery.model.ProjectList;
|
@@ -64,7 +62,6 @@ public class BigqueryAuthentication
|
|
64
62
|
.setServiceAccountId(serviceAccountEmail)
|
65
63
|
.setServiceAccountScopes(
|
66
64
|
ImmutableList.of(
|
67
|
-
BigqueryScopes.DEVSTORAGE_READ_WRITE,
|
68
65
|
BigqueryScopes.BIGQUERY
|
69
66
|
)
|
70
67
|
)
|
@@ -87,13 +84,4 @@ public class BigqueryAuthentication
|
|
87
84
|
|
88
85
|
return client;
|
89
86
|
}
|
90
|
-
|
91
|
-
public Storage getGcsClient() throws IOException
|
92
|
-
{
|
93
|
-
Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
|
94
|
-
.setApplicationName(applicationName)
|
95
|
-
.build();
|
96
|
-
|
97
|
-
return client;
|
98
|
-
}
|
99
87
|
}
|
@@ -13,6 +13,7 @@ import java.util.concurrent.TimeoutException;
|
|
13
13
|
import com.google.common.base.Optional;
|
14
14
|
import com.google.common.base.Throwables;
|
15
15
|
import java.security.GeneralSecurityException;
|
16
|
+
import org.jruby.embed.ScriptingContainer;
|
16
17
|
|
17
18
|
import org.embulk.config.Config;
|
18
19
|
import org.embulk.config.ConfigException;
|
@@ -59,10 +60,6 @@ public class BigqueryOutputPlugin
|
|
59
60
|
@ConfigDefault("\"CSV\"")
|
60
61
|
public String getSourceFormat();
|
61
62
|
|
62
|
-
@Config("is_file_compressed")
|
63
|
-
@ConfigDefault("true")
|
64
|
-
public boolean getIsFileCompressed();
|
65
|
-
|
66
63
|
@Config("field_delimiter")
|
67
64
|
@ConfigDefault("\",\"")
|
68
65
|
public String getFieldDelimiter();
|
@@ -71,20 +68,13 @@ public class BigqueryOutputPlugin
|
|
71
68
|
@ConfigDefault("0")
|
72
69
|
public int getMaxBadrecords();
|
73
70
|
|
74
|
-
@Config("
|
75
|
-
@ConfigDefault("
|
76
|
-
public
|
71
|
+
@Config("encoding")
|
72
|
+
@ConfigDefault("\"UTF-8\"")
|
73
|
+
public String getEncoding();
|
77
74
|
|
78
|
-
@Config("
|
75
|
+
@Config("delete_from_local_when_job_end")
|
79
76
|
@ConfigDefault("false")
|
80
|
-
public boolean
|
81
|
-
|
82
|
-
@Config("bucket")
|
83
|
-
public String getBucket();
|
84
|
-
|
85
|
-
@Config("remote_path")
|
86
|
-
@ConfigDefault("null")
|
87
|
-
public Optional<String> getRemotePath();
|
77
|
+
public boolean getDeleteFromLocalWhenJobEnd();
|
88
78
|
|
89
79
|
@Config("project")
|
90
80
|
public String getProject();
|
@@ -117,7 +107,6 @@ public class BigqueryOutputPlugin
|
|
117
107
|
}
|
118
108
|
|
119
109
|
private final Logger log = Exec.getLogger(BigqueryOutputPlugin.class);
|
120
|
-
private static BigqueryGcsWriter bigQueryGcsWriter;
|
121
110
|
private static BigqueryWriter bigQueryWriter;
|
122
111
|
|
123
112
|
public ConfigDiff transaction(ConfigSource config, int taskCount,
|
@@ -126,33 +115,25 @@ public class BigqueryOutputPlugin
|
|
126
115
|
final PluginTask task = config.loadConfig(PluginTask.class);
|
127
116
|
|
128
117
|
try {
|
129
|
-
bigQueryGcsWriter = new BigqueryGcsWriter.Builder(task.getServiceAccountEmail())
|
130
|
-
.setP12KeyFilePath(task.getP12KeyfilePath())
|
131
|
-
.setApplicationName(task.getApplicationName())
|
132
|
-
.setBucket(task.getBucket())
|
133
|
-
.setSourceFormat(task.getSourceFormat())
|
134
|
-
.setIsFileCompressed(task.getIsFileCompressed())
|
135
|
-
.setDeleteFromBucketWhenJobEnd(task.getDeleteFromBucketWhenJobEnd())
|
136
|
-
.build();
|
137
|
-
|
138
118
|
bigQueryWriter = new BigqueryWriter.Builder(task.getServiceAccountEmail())
|
139
119
|
.setP12KeyFilePath(task.getP12KeyfilePath())
|
140
120
|
.setApplicationName(task.getApplicationName())
|
141
121
|
.setProject(task.getProject())
|
142
122
|
.setDataset(task.getDataset())
|
143
|
-
.setTable(task.getTable())
|
123
|
+
.setTable(generateTableName(task.getTable()))
|
144
124
|
.setAutoCreateTable(task.getAutoCreateTable())
|
145
125
|
.setSchemaPath(task.getSchemaPath())
|
146
|
-
.setBucket(task.getBucket())
|
147
126
|
.setSourceFormat(task.getSourceFormat())
|
148
127
|
.setFieldDelimiter(task.getFieldDelimiter())
|
149
128
|
.setMaxBadrecords(task.getMaxBadrecords())
|
129
|
+
.setEncoding(task.getEncoding())
|
150
130
|
.setJobStatusMaxPollingTime(task.getJobStatusMaxPollingTime())
|
151
131
|
.setJobStatusPollingInterval(task.getJobStatusPollingInterval())
|
152
132
|
.setIsSkipJobResultCheck(task.getIsSkipJobResultCheck())
|
153
133
|
.build();
|
134
|
+
} catch (FileNotFoundException ex) {
|
135
|
+
throw new ConfigException(ex);
|
154
136
|
} catch (IOException | GeneralSecurityException ex) {
|
155
|
-
log.warn("Google Authentication was failed. Please Check your configurations.");
|
156
137
|
throw new ConfigException(ex);
|
157
138
|
}
|
158
139
|
// non-retryable (non-idempotent) output:
|
@@ -165,19 +146,6 @@ public class BigqueryOutputPlugin
|
|
165
146
|
{
|
166
147
|
control.run(taskSource);
|
167
148
|
|
168
|
-
try {
|
169
|
-
bigQueryWriter.executeJob();
|
170
|
-
// TODO refactor
|
171
|
-
if (bigQueryGcsWriter.getDeleteFromBucketWhenJobEnd()) {
|
172
|
-
ArrayList<HashMap<String, String>> fileList = bigQueryWriter.getFileList();
|
173
|
-
for (HashMap<String, String> file : fileList) {
|
174
|
-
bigQueryGcsWriter.deleteFile(file.get("remote_path"), file.get("file_name"));
|
175
|
-
}
|
176
|
-
}
|
177
|
-
} catch (IOException | TimeoutException | BigqueryWriter.JobFailedException ex) {
|
178
|
-
log.warn(ex.getMessage());
|
179
|
-
throw Throwables.propagate(ex);
|
180
|
-
}
|
181
149
|
return Exec.newConfigDiff();
|
182
150
|
}
|
183
151
|
|
@@ -196,7 +164,6 @@ public class BigqueryOutputPlugin
|
|
196
164
|
final String pathPrefix = task.getPathPrefix();
|
197
165
|
final String sequenceFormat = task.getSequenceFormat();
|
198
166
|
final String pathSuffix = task.getFileNameExtension();
|
199
|
-
final Optional<String> remotePath = task.getRemotePath();
|
200
167
|
|
201
168
|
return new TransactionalFileOutput() {
|
202
169
|
private int fileIndex = 0;
|
@@ -217,7 +184,6 @@ public class BigqueryOutputPlugin
|
|
217
184
|
}
|
218
185
|
filePath = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + suffix;
|
219
186
|
file = new File(filePath);
|
220
|
-
fileName = file.getName();
|
221
187
|
|
222
188
|
String parentPath = file.getParent();
|
223
189
|
File dir = new File(parentPath);
|
@@ -257,18 +223,15 @@ public class BigqueryOutputPlugin
|
|
257
223
|
public void finish()
|
258
224
|
{
|
259
225
|
closeFile();
|
260
|
-
if (
|
261
|
-
fileSize = file.length();
|
226
|
+
if (filePath != null) {
|
262
227
|
try {
|
263
|
-
|
228
|
+
bigQueryWriter.executeLoad(filePath);
|
264
229
|
|
265
|
-
if (task.
|
230
|
+
if (task.getDeleteFromLocalWhenJobEnd()) {
|
266
231
|
log.info(String.format("Delete local file [%s]", filePath));
|
267
232
|
file.delete();
|
268
233
|
}
|
269
|
-
|
270
|
-
bigQueryWriter.addTask(remotePath, fileName, fileSize);
|
271
|
-
} catch (IOException ex) {
|
234
|
+
} catch (IOException | TimeoutException | BigqueryWriter.JobFailedException ex) {
|
272
235
|
throw Throwables.propagate(ex);
|
273
236
|
}
|
274
237
|
}
|
@@ -290,4 +253,13 @@ public class BigqueryOutputPlugin
|
|
290
253
|
}
|
291
254
|
};
|
292
255
|
}
|
256
|
+
|
257
|
+
// Parse like "table_%Y_%m"(include pattern or not) format using Java is difficult. So use jRuby.
|
258
|
+
public String generateTableName(String tableName)
|
259
|
+
{
|
260
|
+
ScriptingContainer jruby = new ScriptingContainer();
|
261
|
+
Object result = jruby.runScriptlet("Time.now.strftime('" + tableName + "')");
|
262
|
+
|
263
|
+
return result.toString();
|
264
|
+
}
|
293
265
|
}
|
@@ -1,6 +1,11 @@
|
|
1
1
|
package org.embulk.output;
|
2
2
|
|
3
|
+
import java.io.File;
|
3
4
|
import java.io.IOException;
|
5
|
+
import java.io.FileNotFoundException;
|
6
|
+
import java.io.FileInputStream;
|
7
|
+
import java.io.BufferedInputStream;
|
8
|
+
import com.google.api.client.http.InputStreamContent;
|
4
9
|
import java.util.ArrayList;
|
5
10
|
import java.util.List;
|
6
11
|
import java.util.Iterator;
|
@@ -11,14 +16,19 @@ import java.util.concurrent.TimeoutException;
|
|
11
16
|
import org.apache.commons.lang3.StringUtils;
|
12
17
|
import com.google.common.base.Optional;
|
13
18
|
import com.google.common.collect.ImmutableSet;
|
19
|
+
import com.google.common.base.Throwables;
|
14
20
|
import java.security.GeneralSecurityException;
|
15
21
|
|
22
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
23
|
+
import com.fasterxml.jackson.core.type.TypeReference;
|
24
|
+
|
16
25
|
import org.embulk.spi.Exec;
|
17
26
|
import org.slf4j.Logger;
|
18
27
|
|
19
28
|
import com.google.api.services.bigquery.Bigquery;
|
20
29
|
import com.google.api.services.bigquery.BigqueryScopes;
|
21
30
|
import com.google.api.services.bigquery.Bigquery.Datasets;
|
31
|
+
import com.google.api.services.bigquery.Bigquery.Tables;
|
22
32
|
import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
|
23
33
|
import com.google.api.services.bigquery.Bigquery.Jobs.GetQueryResults;
|
24
34
|
import com.google.api.services.bigquery.model.Job;
|
@@ -28,11 +38,19 @@ import com.google.api.services.bigquery.model.JobStatus;
|
|
28
38
|
import com.google.api.services.bigquery.model.JobStatistics;
|
29
39
|
import com.google.api.services.bigquery.model.JobReference;
|
30
40
|
import com.google.api.services.bigquery.model.DatasetList;
|
41
|
+
import com.google.api.services.bigquery.model.Table;
|
42
|
+
import com.google.api.services.bigquery.model.TableList;
|
31
43
|
import com.google.api.services.bigquery.model.TableSchema;
|
32
44
|
import com.google.api.services.bigquery.model.TableReference;
|
33
45
|
import com.google.api.services.bigquery.model.TableFieldSchema;
|
34
46
|
import com.google.api.services.bigquery.model.TableCell;
|
35
47
|
import com.google.api.services.bigquery.model.TableRow;
|
48
|
+
import com.google.api.services.bigquery.model.ErrorProto;
|
49
|
+
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
|
50
|
+
|
51
|
+
import com.google.api.client.googleapis.media.MediaHttpUploader;
|
52
|
+
import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
|
53
|
+
import com.google.api.client.googleapis.media.MediaHttpUploader.UploadState;
|
36
54
|
|
37
55
|
public class BigqueryWriter
|
38
56
|
{
|
@@ -43,43 +61,58 @@ public class BigqueryWriter
|
|
43
61
|
private final String table;
|
44
62
|
private final boolean autoCreateTable;
|
45
63
|
private final Optional<String> schemaPath;
|
46
|
-
private final
|
64
|
+
private final TableSchema tableSchema;
|
47
65
|
private final String sourceFormat;
|
48
66
|
private final String fieldDelimiter;
|
49
67
|
private final int maxBadrecords;
|
68
|
+
private final String encoding;
|
50
69
|
private final long jobStatusMaxPollingTime;
|
51
70
|
private final long jobStatusPollingInterval;
|
52
71
|
private final boolean isSkipJobResultCheck;
|
53
72
|
private final Bigquery bigQueryClient;
|
54
|
-
private final EmbulkBigqueryTask writerTask;
|
55
73
|
|
56
|
-
public BigqueryWriter(Builder builder) throws IOException, GeneralSecurityException
|
74
|
+
public BigqueryWriter(Builder builder) throws FileNotFoundException, IOException, GeneralSecurityException
|
57
75
|
{
|
58
76
|
this.project = builder.project;
|
59
77
|
this.dataset = builder.dataset;
|
60
78
|
this.table = builder.table;
|
61
79
|
this.autoCreateTable = builder.autoCreateTable;
|
62
80
|
this.schemaPath = builder.schemaPath;
|
63
|
-
this.bucket = builder.bucket;
|
64
81
|
this.sourceFormat = builder.sourceFormat.toUpperCase();
|
65
82
|
this.fieldDelimiter = builder.fieldDelimiter;
|
66
83
|
this.maxBadrecords = builder.maxBadrecords;
|
84
|
+
this.encoding = builder.encoding.toUpperCase();
|
67
85
|
this.jobStatusMaxPollingTime = builder.jobStatusMaxPollingTime;
|
68
86
|
this.jobStatusPollingInterval = builder.jobStatusPollingInterval;
|
69
87
|
this.isSkipJobResultCheck = builder.isSkipJobResultCheck;
|
70
88
|
|
71
89
|
BigqueryAuthentication auth = new BigqueryAuthentication(builder.serviceAccountEmail, builder.p12KeyFilePath, builder.applicationName);
|
72
90
|
this.bigQueryClient = auth.getBigqueryClient();
|
73
|
-
|
91
|
+
|
92
|
+
checkConfig();
|
93
|
+
if (autoCreateTable) {
|
94
|
+
this.tableSchema = createTableSchema(builder.schemaPath);
|
95
|
+
} else {
|
96
|
+
this.tableSchema = null;
|
97
|
+
}
|
74
98
|
}
|
75
99
|
|
76
100
|
private String getJobStatus(JobReference jobRef) throws JobFailedException
|
77
101
|
{
|
78
102
|
try {
|
79
103
|
Job job = bigQueryClient.jobs().get(project, jobRef.getJobId()).execute();
|
80
|
-
|
81
|
-
|
104
|
+
|
105
|
+
ErrorProto fatalError = job.getStatus().getErrorResult();
|
106
|
+
if (fatalError != null) {
|
107
|
+
throw new JobFailedException(String.format("Job failed. job id:[%s] reason:[%s][%s] status:[FAILED]", jobRef.getJobId(), fatalError.getReason(), fatalError.getMessage()));
|
108
|
+
}
|
109
|
+
List<ErrorProto> errors = job.getStatus().getErrors();
|
110
|
+
if (errors != null) {
|
111
|
+
for (ErrorProto error : errors) {
|
112
|
+
log.warn(String.format("Error: job id:[%s] reason[%s][%s] location:[%s]", jobRef.getJobId(), error.getReason(), error.getMessage(), error.getLocation()));
|
113
|
+
}
|
82
114
|
}
|
115
|
+
|
83
116
|
String jobStatus = job.getStatus().getState();
|
84
117
|
if (jobStatus.equals("DONE")) {
|
85
118
|
JobStatistics statistics = job.getStatistics();
|
@@ -117,59 +150,68 @@ public class BigqueryWriter
|
|
117
150
|
}
|
118
151
|
}
|
119
152
|
|
120
|
-
public void
|
121
|
-
{
|
122
|
-
// TODO: refactor
|
123
|
-
ArrayList<ArrayList<HashMap<String, String>>> taskList = writerTask.createJobList();
|
124
|
-
for (ArrayList<HashMap<String, String>> task : taskList) {
|
125
|
-
Job job = createJob(task);
|
126
|
-
// TODO: multi-threading
|
127
|
-
new EmbulkBigqueryJob(job).call();
|
128
|
-
}
|
129
|
-
}
|
130
|
-
|
131
|
-
private Job createJob(ArrayList<HashMap<String, String>> task)
|
153
|
+
public void executeLoad(String localFilePath) throws GoogleJsonResponseException, IOException, TimeoutException, JobFailedException
|
132
154
|
{
|
133
155
|
log.info(String.format("Job preparing... project:%s dataset:%s table:%s", project, dataset, table));
|
134
156
|
|
135
157
|
Job job = new Job();
|
158
|
+
JobReference jobRef = null;
|
136
159
|
JobConfiguration jobConfig = new JobConfiguration();
|
137
160
|
JobConfigurationLoad loadConfig = new JobConfigurationLoad();
|
138
161
|
jobConfig.setLoad(loadConfig);
|
139
162
|
job.setConfiguration(jobConfig);
|
140
163
|
|
141
164
|
loadConfig.setAllowQuotedNewlines(false);
|
165
|
+
loadConfig.setEncoding(encoding);
|
166
|
+
loadConfig.setMaxBadRecords(maxBadrecords);
|
142
167
|
if (sourceFormat.equals("NEWLINE_DELIMITED_JSON")) {
|
143
168
|
loadConfig.setSourceFormat("NEWLINE_DELIMITED_JSON");
|
144
169
|
} else {
|
145
170
|
loadConfig.setFieldDelimiter(fieldDelimiter);
|
146
171
|
}
|
172
|
+
loadConfig.setWriteDisposition("WRITE_APPEND");
|
147
173
|
if (autoCreateTable) {
|
148
|
-
loadConfig.setSchema(
|
149
|
-
loadConfig.setWriteDisposition("WRITE_EMPTY");
|
174
|
+
loadConfig.setSchema(tableSchema);
|
150
175
|
loadConfig.setCreateDisposition("CREATE_IF_NEEDED");
|
151
|
-
log.info(String.format("table:[%s] will be create
|
176
|
+
log.info(String.format("table:[%s] will be create if not exists", table));
|
152
177
|
} else {
|
153
|
-
loadConfig.setWriteDisposition("WRITE_APPEND");
|
154
178
|
loadConfig.setCreateDisposition("CREATE_NEVER");
|
155
179
|
}
|
156
|
-
loadConfig.setMaxBadRecords(maxBadrecords);
|
157
180
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
}
|
166
|
-
loadConfig.setSourceUris(sources);
|
167
|
-
loadConfig.setDestinationTable(getTableReference());
|
181
|
+
loadConfig.setDestinationTable(createTableReference());
|
182
|
+
|
183
|
+
File file = new File(localFilePath);
|
184
|
+
InputStreamContent mediaContent = new InputStreamContent("application/octet-stream",
|
185
|
+
new BufferedInputStream(
|
186
|
+
new FileInputStream(file)));
|
187
|
+
mediaContent.setLength(file.length());
|
168
188
|
|
169
|
-
|
189
|
+
Insert insert = bigQueryClient.jobs().insert(project, job, mediaContent);
|
190
|
+
insert.setProjectId(project);
|
191
|
+
insert.setDisableGZipContent(true);
|
192
|
+
|
193
|
+
// @see https://code.google.com/p/google-api-java-client/wiki/MediaUpload
|
194
|
+
UploadProgressListener listner = new UploadProgressListener();
|
195
|
+
listner.setFileName(localFilePath);
|
196
|
+
insert.getMediaHttpUploader()
|
197
|
+
.setProgressListener(listner)
|
198
|
+
.setDirectUploadEnabled(false);
|
199
|
+
|
200
|
+
try {
|
201
|
+
jobRef = insert.execute().getJobReference();
|
202
|
+
} catch (Exception ex) {
|
203
|
+
log.warn("Job execution was failed. Please check your settings or data... like data matches schema");
|
204
|
+
throw Throwables.propagate(ex);
|
205
|
+
}
|
206
|
+
log.info(String.format("Job executed. job id:[%s] file:[%s]", jobRef.getJobId(), localFilePath));
|
207
|
+
if (isSkipJobResultCheck) {
|
208
|
+
log.info(String.format("Skip job status check. job id:[%s]", jobRef.getJobId()));
|
209
|
+
} else {
|
210
|
+
getJobStatusUntilDone(jobRef);
|
211
|
+
}
|
170
212
|
}
|
171
213
|
|
172
|
-
private TableReference
|
214
|
+
private TableReference createTableReference()
|
173
215
|
{
|
174
216
|
return new TableReference()
|
175
217
|
.setProjectId(project)
|
@@ -177,135 +219,78 @@ public class BigqueryWriter
|
|
177
219
|
.setTableId(table);
|
178
220
|
}
|
179
221
|
|
180
|
-
private TableSchema
|
222
|
+
private TableSchema createTableSchema(Optional<String> schemaPath) throws FileNotFoundException, IOException
|
181
223
|
{
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
224
|
+
String path = schemaPath.orNull();
|
225
|
+
File file = new File(path);
|
226
|
+
FileInputStream stream = null;
|
227
|
+
try {
|
228
|
+
stream = new FileInputStream(file);
|
229
|
+
ObjectMapper mapper = new ObjectMapper();
|
230
|
+
List<TableFieldSchema> fields = mapper.readValue(stream, new TypeReference<List<TableFieldSchema>>() {});
|
231
|
+
TableSchema tableSchema = new TableSchema().setFields(fields);
|
232
|
+
return tableSchema;
|
233
|
+
} finally {
|
234
|
+
if (stream != null) {
|
235
|
+
stream.close();
|
236
|
+
}
|
192
237
|
}
|
193
|
-
*/
|
194
|
-
|
195
|
-
tableSchema.setFields(fields);
|
196
|
-
return tableSchema;
|
197
238
|
}
|
198
239
|
|
199
|
-
|
240
|
+
public boolean isExistTable(String tableName) throws IOException
|
200
241
|
{
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
path = bucket + "/" + StringUtils.join(pathList) + "/" + fileName;
|
242
|
+
Tables tableRequest = bigQueryClient.tables();
|
243
|
+
try {
|
244
|
+
Table tableData = tableRequest.get(project, dataset, tableName).execute();
|
245
|
+
} catch (GoogleJsonResponseException ex) {
|
246
|
+
return false;
|
207
247
|
}
|
208
|
-
return
|
209
|
-
}
|
210
|
-
|
211
|
-
public void addTask(Optional<String> remotePath, String fileName, long fileSize)
|
212
|
-
{
|
213
|
-
writerTask.addTaskFile(remotePath, fileName, fileSize);
|
214
|
-
}
|
215
|
-
|
216
|
-
public ArrayList<HashMap<String, String>> getFileList()
|
217
|
-
{
|
218
|
-
return writerTask.getFileList();
|
248
|
+
return true;
|
219
249
|
}
|
220
250
|
|
221
|
-
|
251
|
+
public void checkConfig() throws FileNotFoundException, IOException
|
222
252
|
{
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
{
|
227
|
-
this.job = job;
|
228
|
-
}
|
229
|
-
|
230
|
-
public Void call() throws IOException, TimeoutException, JobFailedException
|
231
|
-
{
|
232
|
-
Insert insert = bigQueryClient.jobs().insert(project, job);
|
233
|
-
insert.setProjectId(project);
|
234
|
-
JobReference jobRef = insert.execute().getJobReference();
|
235
|
-
log.info(String.format("Job executed. job id:[%s]", jobRef.getJobId()));
|
236
|
-
if (isSkipJobResultCheck) {
|
237
|
-
log.info(String.format("Skip job status check. job id:[%s]", jobRef.getJobId()));
|
253
|
+
if (autoCreateTable) {
|
254
|
+
if (!schemaPath.isPresent()) {
|
255
|
+
throw new IOException("schema_path is empty");
|
238
256
|
} else {
|
239
|
-
|
257
|
+
File file = new File(schemaPath.orNull());
|
258
|
+
if (!file.exists()) {
|
259
|
+
throw new FileNotFoundException("Can not load schema file.");
|
260
|
+
}
|
261
|
+
}
|
262
|
+
} else {
|
263
|
+
if (!isExistTable(table)) {
|
264
|
+
throw new IOException(String.format("table [%s] is not exists", table));
|
240
265
|
}
|
241
|
-
return null;
|
242
266
|
}
|
243
267
|
}
|
244
268
|
|
245
|
-
private class
|
269
|
+
private class UploadProgressListener implements MediaHttpUploaderProgressListener
|
246
270
|
{
|
247
|
-
|
248
|
-
private final long MAX_SIZE_PER_LOAD_JOB = 1000 * 1024 * 1024 * 1024L; // 1TB
|
249
|
-
private final int MAX_NUMBER_OF_FILES_PER_LOAD_JOB = 10000;
|
271
|
+
private String fileName;
|
250
272
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
public void addTaskFile(Optional<String> remotePath, String fileName, long fileSize)
|
273
|
+
@Override
|
274
|
+
public void progressChanged(MediaHttpUploader uploader) throws IOException
|
255
275
|
{
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
{
|
269
|
-
long currentBundleSize = 0;
|
270
|
-
int currentFileCount = 0;
|
271
|
-
ArrayList<HashMap<String, String>> job = new ArrayList<HashMap<String, String>>();
|
272
|
-
for (HashMap<String, String> task : taskList) {
|
273
|
-
boolean isNeedNextJobList = false;
|
274
|
-
long fileSize = Long.valueOf(task.get("file_size")).longValue();
|
275
|
-
|
276
|
-
if (currentBundleSize + fileSize > MAX_SIZE_PER_LOAD_JOB) {
|
277
|
-
isNeedNextJobList = true;
|
278
|
-
}
|
279
|
-
|
280
|
-
if (currentFileCount >= MAX_NUMBER_OF_FILES_PER_LOAD_JOB) {
|
281
|
-
isNeedNextJobList = true;
|
282
|
-
}
|
283
|
-
|
284
|
-
if (isNeedNextJobList) {
|
285
|
-
jobList.add(job);
|
286
|
-
job = new ArrayList<HashMap<String, String>>();
|
287
|
-
job.add(task);
|
288
|
-
currentBundleSize = 0;
|
289
|
-
} else {
|
290
|
-
job.add(task);
|
291
|
-
}
|
292
|
-
currentBundleSize += fileSize;
|
293
|
-
currentFileCount++;
|
294
|
-
|
295
|
-
log.debug(String.format("currentBundleSize:%s currentFileCount:%s", currentBundleSize, currentFileCount));
|
296
|
-
log.debug(String.format("fileSize:%s, MAX_SIZE_PER_LOAD_JOB:%s MAX_NUMBER_OF_FILES_PER_LOAD_JOB:%s",
|
297
|
-
fileSize, MAX_SIZE_PER_LOAD_JOB, MAX_NUMBER_OF_FILES_PER_LOAD_JOB));
|
298
|
-
|
299
|
-
}
|
300
|
-
if (job.size() > 0) {
|
301
|
-
jobList.add(job);
|
276
|
+
switch (uploader.getUploadState()) {
|
277
|
+
case INITIATION_STARTED:
|
278
|
+
log.info(String.format("Upload start [%s]", fileName));
|
279
|
+
break;
|
280
|
+
case INITIATION_COMPLETE:
|
281
|
+
//log.info(String.format("Upload initiation completed file [%s]", fileName));
|
282
|
+
break;
|
283
|
+
case MEDIA_IN_PROGRESS:
|
284
|
+
log.debug(String.format("Uploading [%s] progress %3.0f", fileName, uploader.getProgress() * 100) + "%");
|
285
|
+
break;
|
286
|
+
case MEDIA_COMPLETE:
|
287
|
+
log.info(String.format("Upload completed [%s]", fileName));
|
302
288
|
}
|
303
|
-
return jobList;
|
304
289
|
}
|
305
290
|
|
306
|
-
public
|
291
|
+
public void setFileName(String fileName)
|
307
292
|
{
|
308
|
-
|
293
|
+
this.fileName = fileName;
|
309
294
|
}
|
310
295
|
}
|
311
296
|
|
@@ -319,10 +304,10 @@ public class BigqueryWriter
|
|
319
304
|
private String table;
|
320
305
|
private boolean autoCreateTable;
|
321
306
|
private Optional<String> schemaPath;
|
322
|
-
private String bucket;
|
323
307
|
private String sourceFormat;
|
324
308
|
private String fieldDelimiter;
|
325
309
|
private int maxBadrecords;
|
310
|
+
private String encoding;
|
326
311
|
private int jobStatusMaxPollingTime;
|
327
312
|
private int jobStatusPollingInterval;
|
328
313
|
private boolean isSkipJobResultCheck;
|
@@ -375,12 +360,6 @@ public class BigqueryWriter
|
|
375
360
|
return this;
|
376
361
|
}
|
377
362
|
|
378
|
-
public Builder setBucket(String bucket)
|
379
|
-
{
|
380
|
-
this.bucket = bucket;
|
381
|
-
return this;
|
382
|
-
}
|
383
|
-
|
384
363
|
public Builder setSourceFormat(String sourceFormat)
|
385
364
|
{
|
386
365
|
this.sourceFormat = sourceFormat;
|
@@ -399,6 +378,12 @@ public class BigqueryWriter
|
|
399
378
|
return this;
|
400
379
|
}
|
401
380
|
|
381
|
+
public Builder setEncoding(String encoding)
|
382
|
+
{
|
383
|
+
this.encoding = encoding;
|
384
|
+
return this;
|
385
|
+
}
|
386
|
+
|
402
387
|
public Builder setJobStatusMaxPollingTime(int jobStatusMaxPollingTime)
|
403
388
|
{
|
404
389
|
this.jobStatusMaxPollingTime = jobStatusMaxPollingTime;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -56,19 +56,16 @@ files:
|
|
56
56
|
- lib/embulk/output/bigquery.rb
|
57
57
|
- settings.gradle
|
58
58
|
- src/main/java/org/embulk/output/BigqueryAuthentication.java
|
59
|
-
- src/main/java/org/embulk/output/BigqueryGcsWriter.java
|
60
59
|
- src/main/java/org/embulk/output/BigqueryOutputPlugin.java
|
61
60
|
- src/main/java/org/embulk/output/BigqueryWriter.java
|
62
61
|
- src/test/java/org/embulk/output/TestBigqueryAuthentication.java
|
63
|
-
- src/test/java/org/embulk/output/TestBigqueryGcsWriter.java
|
64
62
|
- src/test/java/org/embulk/output/TestBigqueryOutputPlugin.java
|
65
63
|
- src/test/java/org/embulk/output/TestBigqueryWriter.java
|
66
64
|
- classpath/commons-codec-1.3.jar
|
67
65
|
- classpath/commons-logging-1.1.1.jar
|
68
|
-
- classpath/embulk-output-bigquery-0.1.
|
66
|
+
- classpath/embulk-output-bigquery-0.1.2.jar
|
69
67
|
- classpath/google-api-client-1.19.1.jar
|
70
68
|
- classpath/google-api-services-bigquery-v2-rev193-1.19.1.jar
|
71
|
-
- classpath/google-api-services-storage-v1-rev27-1.19.1.jar
|
72
69
|
- classpath/google-http-client-1.19.0.jar
|
73
70
|
- classpath/google-http-client-jackson2-1.19.0.jar
|
74
71
|
- classpath/google-oauth-client-1.19.0.jar
|
@@ -76,7 +73,6 @@ files:
|
|
76
73
|
- classpath/httpclient-4.0.1.jar
|
77
74
|
- classpath/httpcore-4.0.1.jar
|
78
75
|
- classpath/jsr305-1.3.9.jar
|
79
|
-
- classpath/mime-util-2.1.3.jar
|
80
76
|
homepage: https://github.com/sakama/embulk-output-bigquery
|
81
77
|
licenses:
|
82
78
|
- Apache-2.0
|
@@ -1,201 +0,0 @@
|
|
1
|
-
package org.embulk.output;
|
2
|
-
|
3
|
-
import java.io.File;
|
4
|
-
import java.io.FileNotFoundException;
|
5
|
-
import java.io.FileInputStream;
|
6
|
-
import java.io.IOException;
|
7
|
-
import java.util.ArrayList;
|
8
|
-
import java.util.List;
|
9
|
-
import java.util.Collection;
|
10
|
-
import java.util.Iterator;
|
11
|
-
import java.util.IllegalFormatException;
|
12
|
-
import java.nio.charset.Charset;
|
13
|
-
import java.nio.charset.StandardCharsets;
|
14
|
-
import com.google.common.base.Optional;
|
15
|
-
import com.google.common.collect.ImmutableList;
|
16
|
-
//import eu.medsea.mimeutil.MimeType;
|
17
|
-
//import eu.medsea.mimeutil.MimeUtil;
|
18
|
-
//import eu.medsea.mimeutil.detector.MimeDetector;
|
19
|
-
import org.apache.commons.lang3.StringUtils;
|
20
|
-
import org.apache.commons.codec.binary.Base64;
|
21
|
-
import java.security.GeneralSecurityException;
|
22
|
-
|
23
|
-
import org.embulk.spi.Exec;
|
24
|
-
import org.slf4j.Logger;
|
25
|
-
|
26
|
-
import com.google.api.services.storage.Storage;
|
27
|
-
import com.google.api.services.storage.StorageScopes;
|
28
|
-
import com.google.api.services.storage.model.Bucket;
|
29
|
-
import com.google.api.services.storage.model.Objects;
|
30
|
-
import com.google.api.services.storage.model.StorageObject;
|
31
|
-
|
32
|
-
import com.google.api.client.http.InputStreamContent;
|
33
|
-
|
34
|
-
public class BigqueryGcsWriter
|
35
|
-
{
|
36
|
-
|
37
|
-
private final Logger log = Exec.getLogger(BigqueryGcsWriter.class);
|
38
|
-
private final String bucket;
|
39
|
-
private final String sourceFormat;
|
40
|
-
private final boolean isFileCompressed;
|
41
|
-
private final boolean deleteFromBucketWhenJobEnd;
|
42
|
-
private Storage storageClient;
|
43
|
-
|
44
|
-
public BigqueryGcsWriter(Builder builder) throws IOException, GeneralSecurityException
|
45
|
-
{
|
46
|
-
this.bucket = builder.bucket;
|
47
|
-
this.sourceFormat = builder.sourceFormat.toUpperCase();
|
48
|
-
this.isFileCompressed = builder.isFileCompressed;
|
49
|
-
this.deleteFromBucketWhenJobEnd = builder.deleteFromBucketWhenJobEnd;
|
50
|
-
|
51
|
-
BigqueryAuthentication auth = new BigqueryAuthentication(builder.serviceAccountEmail, builder.p12KeyFilePath, builder.applicationName);
|
52
|
-
this.storageClient = auth.getGcsClient();
|
53
|
-
}
|
54
|
-
|
55
|
-
public void uploadFile(String localFilePath, String fileName, Optional<String> remotePath) throws IOException
|
56
|
-
{
|
57
|
-
FileInputStream stream = null;
|
58
|
-
|
59
|
-
try {
|
60
|
-
String path;
|
61
|
-
if (remotePath.isPresent()) {
|
62
|
-
path = remotePath.get();
|
63
|
-
} else {
|
64
|
-
path = "";
|
65
|
-
}
|
66
|
-
String gcsPath = getRemotePath(path, fileName);
|
67
|
-
StorageObject objectMetadata = new StorageObject().setName(gcsPath);
|
68
|
-
log.info(String.format("Uploading file [%s] to [gs://%s/%s]", localFilePath, bucket, gcsPath));
|
69
|
-
|
70
|
-
File file = new File(localFilePath);
|
71
|
-
stream = new FileInputStream(file);
|
72
|
-
InputStreamContent content = new InputStreamContent(getContentType(), stream);
|
73
|
-
Storage.Objects.Insert insertObject = storageClient.objects().insert(bucket, objectMetadata, content);
|
74
|
-
insertObject.setDisableGZipContent(true);
|
75
|
-
|
76
|
-
StorageObject response = insertObject.execute();
|
77
|
-
log.info(String.format("Upload completed [%s] to [gs://%s/%s]", localFilePath, bucket, gcsPath));
|
78
|
-
} finally {
|
79
|
-
stream.close();
|
80
|
-
}
|
81
|
-
}
|
82
|
-
|
83
|
-
private String getRemotePath(String remotePath, String fileName)
|
84
|
-
{
|
85
|
-
if (remotePath.isEmpty()) {
|
86
|
-
return fileName;
|
87
|
-
}
|
88
|
-
String[] pathList = StringUtils.split(remotePath, '/');
|
89
|
-
String path = StringUtils.join(pathList) + "/";
|
90
|
-
if (!path.endsWith("/")) {
|
91
|
-
path = path + "/";
|
92
|
-
}
|
93
|
-
return path + fileName;
|
94
|
-
}
|
95
|
-
|
96
|
-
public void deleteFile(String remotePath, String fileName) throws IOException
|
97
|
-
{
|
98
|
-
String path = getRemotePath(remotePath, fileName);
|
99
|
-
storageClient.objects().delete(bucket, path).execute();
|
100
|
-
log.info(String.format("Delete remote file [gs://%s/%s]", bucket, path));
|
101
|
-
}
|
102
|
-
|
103
|
-
public boolean getDeleteFromBucketWhenJobEnd()
|
104
|
-
{
|
105
|
-
return this.deleteFromBucketWhenJobEnd;
|
106
|
-
}
|
107
|
-
|
108
|
-
private String getContentType()
|
109
|
-
{
|
110
|
-
if (isFileCompressed) {
|
111
|
-
return "application/x-gzip";
|
112
|
-
} else {
|
113
|
-
if (sourceFormat.equals("NEWLINE_DELIMITED_JSON)")) {
|
114
|
-
return "application/json";
|
115
|
-
} else {
|
116
|
-
return "text/csv";
|
117
|
-
}
|
118
|
-
}
|
119
|
-
}
|
120
|
-
|
121
|
-
/*
|
122
|
-
private void registerMimeDetector()
|
123
|
-
{
|
124
|
-
String mimeDetector = "eu.medsea.mimeutil.detector.MagicMimeMimeDetector";
|
125
|
-
MimeDetector registeredMimeDetector = MimeUtil.getMimeDetector(mimeDetector);
|
126
|
-
MimeUtil.registerMimeDetector(mimeDetector);
|
127
|
-
}
|
128
|
-
|
129
|
-
public String detectMimeType(File file)
|
130
|
-
{
|
131
|
-
try {
|
132
|
-
Collection<?> mimeTypes = MimeUtil.getMimeTypes(file);
|
133
|
-
if (!mimeTypes.isEmpty()) {
|
134
|
-
Iterator<?> iterator = mimeTypes.iterator();
|
135
|
-
MimeType mimeType = (MimeType) iterator.next();
|
136
|
-
return mimeType.getMediaType() + "/" + mimeType.getSubType();
|
137
|
-
}
|
138
|
-
} catch (Exception ex) {
|
139
|
-
}
|
140
|
-
return "application/octet-stream";
|
141
|
-
}
|
142
|
-
*/
|
143
|
-
|
144
|
-
public static class Builder
|
145
|
-
{
|
146
|
-
private final String serviceAccountEmail;
|
147
|
-
private String p12KeyFilePath;
|
148
|
-
private String applicationName;
|
149
|
-
private String bucket;
|
150
|
-
private String sourceFormat;
|
151
|
-
private boolean isFileCompressed;
|
152
|
-
private boolean deleteFromBucketWhenJobEnd;
|
153
|
-
private boolean enableMd5hashCheck;
|
154
|
-
|
155
|
-
public Builder(String serviceAccountEmail)
|
156
|
-
{
|
157
|
-
this.serviceAccountEmail = serviceAccountEmail;
|
158
|
-
}
|
159
|
-
|
160
|
-
public Builder setP12KeyFilePath(String p12KeyFilePath)
|
161
|
-
{
|
162
|
-
this.p12KeyFilePath = p12KeyFilePath;
|
163
|
-
return this;
|
164
|
-
}
|
165
|
-
|
166
|
-
public Builder setApplicationName(String applicationName)
|
167
|
-
{
|
168
|
-
this.applicationName = applicationName;
|
169
|
-
return this;
|
170
|
-
}
|
171
|
-
|
172
|
-
public Builder setBucket(String bucket)
|
173
|
-
{
|
174
|
-
this.bucket = bucket;
|
175
|
-
return this;
|
176
|
-
}
|
177
|
-
|
178
|
-
public Builder setSourceFormat(String sourceFormat)
|
179
|
-
{
|
180
|
-
this.sourceFormat = sourceFormat;
|
181
|
-
return this;
|
182
|
-
}
|
183
|
-
|
184
|
-
public Builder setIsFileCompressed(boolean isFileCompressed)
|
185
|
-
{
|
186
|
-
this.isFileCompressed = isFileCompressed;
|
187
|
-
return this;
|
188
|
-
}
|
189
|
-
|
190
|
-
public Builder setDeleteFromBucketWhenJobEnd(boolean deleteFromBucketWhenJobEnd)
|
191
|
-
{
|
192
|
-
this.deleteFromBucketWhenJobEnd = deleteFromBucketWhenJobEnd;
|
193
|
-
return this;
|
194
|
-
}
|
195
|
-
|
196
|
-
public BigqueryGcsWriter build() throws IOException, GeneralSecurityException
|
197
|
-
{
|
198
|
-
return new BigqueryGcsWriter(this);
|
199
|
-
}
|
200
|
-
}
|
201
|
-
}
|