embulk-input-bigquery_extract_files 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/config.yml +2 -1
- data/src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportGcsFileInputPlugin.java +51 -16
- data/src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportUtils.java +43 -16
- data/src/main/java/org/embulk/input/bigquery_export_gcs/PHASE.java +7 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 85c5df6d2dabf2727e267cabd33a2fd163e22942
|
4
|
+
data.tar.gz: 7b24ecc581b1b5c789ad4f33ab73303bc7c6ca8d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b868edc052a4eec8cdd57fc5ae5899d25bb7fa65ab1289785e6c8e6f05e7d280a79e43ea608e31d9c883fc72776ea91b1a7cc1b381a0f4d898c651ebc203fae
|
7
|
+
data.tar.gz: 61c5c9616ca9b96ab7587144b25a16b7bdc59e34ebf9b6f38def677e080c8b7f5df26b75b725009df18cd35a96434d224a8644bba88510af7d307d429f1e0a54
|
data/build.gradle
CHANGED
data/config.yml
CHANGED
@@ -25,6 +25,7 @@ out:
|
|
25
25
|
port: 10315
|
26
26
|
database: dbname
|
27
27
|
table: test_table
|
28
|
-
|
28
|
+
# https://dev.mysql.com/doc/connector-j/5.1/en/connector-j-reference-configuration-properties.html
|
29
|
+
options: {connectTimeout: 0, enableQueryTimeouts: false, waitTimeout: 0}
|
29
30
|
mode: insert_direct
|
30
31
|
|
data/src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportGcsFileInputPlugin.java
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
package org.embulk.input.bigquery_export_gcs;
|
2
2
|
|
3
3
|
import java.io.File;
|
4
|
+
import java.io.FileNotFoundException;
|
4
5
|
import java.io.IOException;
|
5
6
|
import java.io.InputStream;
|
6
7
|
import java.nio.file.Path;
|
@@ -132,6 +133,15 @@ public class BigqueryExportGcsFileInputPlugin
|
|
132
133
|
@ConfigDefault("true")
|
133
134
|
public boolean getCleanupTempTable();
|
134
135
|
|
136
|
+
@Config("cleanup_gcs_before_executing")
|
137
|
+
@ConfigDefault("true")
|
138
|
+
public boolean getCleanupGcsBeforeExcuting();
|
139
|
+
|
140
|
+
|
141
|
+
@Config("start_phase")
|
142
|
+
@ConfigDefault("0")
|
143
|
+
public int getStartPhase();
|
144
|
+
|
135
145
|
public List<String> getFiles();
|
136
146
|
public void setFiles(List<String> files);
|
137
147
|
|
@@ -156,7 +166,7 @@ public class BigqueryExportGcsFileInputPlugin
|
|
156
166
|
//public Schema getSchemaConfig();
|
157
167
|
//public void setSchameConfig(SchemaConfig schema);
|
158
168
|
}
|
159
|
-
|
169
|
+
|
160
170
|
@Override
|
161
171
|
public ConfigDiff guess(ConfigSource execConfig, ConfigSource inputConfig) {
|
162
172
|
|
@@ -192,20 +202,39 @@ public class BigqueryExportGcsFileInputPlugin
|
|
192
202
|
|
193
203
|
public void executeBigqueryApi(PluginTask task) {
|
194
204
|
|
205
|
+
log.info("[0] Initialize Settings ... ");
|
206
|
+
|
195
207
|
BigqueryExportUtils.parseGcsUri(task);
|
196
208
|
|
197
|
-
|
209
|
+
if(task.getCleanupGcsBeforeExcuting()){
|
210
|
+
log.info("clean up before executing. delete all file in : {}",task.getGcsUri());
|
211
|
+
BigqueryExportUtils.removeGcsFilesBeforeExecuting(task);
|
212
|
+
}
|
213
|
+
|
214
|
+
PHASE phase = BigqueryExportUtils.initTask(task);
|
215
|
+
log.info("Configuration : {}",task.toString());
|
198
216
|
|
217
|
+
Bigquery bigquery = BigqueryExportUtils.newBigqueryClient(task);
|
218
|
+
|
219
|
+
if(phase == PHASE.QUERY){
|
220
|
+
log.info("[1] Query to Table");
|
221
|
+
extractQueryToTable(bigquery, task);
|
222
|
+
|
223
|
+
}
|
224
|
+
log.info("[2] Table to GCS");
|
225
|
+
Schema schema = extractTableToGcs(bigquery, task);
|
199
226
|
log.info("Schema : {}",schema.toString());
|
200
227
|
|
228
|
+
log.info("[3] Write Schema ");
|
201
229
|
writeSchemaFileIfSpecified(schema, task);
|
202
230
|
|
231
|
+
log.info("[4] read file list in gcs ");
|
203
232
|
List<String> files = listFilesOfGcs(task);
|
204
233
|
|
205
234
|
task.setFiles(files);
|
206
235
|
|
207
236
|
}
|
208
|
-
|
237
|
+
|
209
238
|
public void writeSchemaFileIfSpecified(Schema schema, PluginTask task) {
|
210
239
|
if(task.getTempSchemaFilePath().isPresent()) {
|
211
240
|
log.info("generate temp {} schema file to ... {}", task.getTempSchemaFileType().or(""), task.getTempSchemaFilePath().orNull());
|
@@ -213,16 +242,22 @@ public class BigqueryExportGcsFileInputPlugin
|
|
213
242
|
}
|
214
243
|
}
|
215
244
|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
245
|
+
public void extractQueryToTable(Bigquery bigquery, PluginTask task){
|
246
|
+
try {
|
247
|
+
BigqueryExportUtils.executeQueryToDestinationWorkTable(bigquery, task);
|
248
|
+
} catch (IOException e) {
|
249
|
+
log.error("bigquery io error",e);
|
250
|
+
throw new RuntimeIOException(e);
|
251
|
+
} catch (InterruptedException e) {
|
252
|
+
log.error("bigquery job error",e);
|
253
|
+
throw new RuntimeException(e);
|
254
|
+
}
|
255
|
+
}
|
256
|
+
|
257
|
+
public Schema extractTableToGcs(Bigquery bigquery, PluginTask task){
|
258
|
+
try {
|
223
259
|
// extract table and get schema
|
224
260
|
Schema schema = BigqueryExportUtils.extractWorkTable(bigquery, task);
|
225
|
-
|
226
261
|
return schema;
|
227
262
|
} catch (IOException e) {
|
228
263
|
log.error("bigquery io error",e);
|
@@ -231,11 +266,12 @@ public class BigqueryExportGcsFileInputPlugin
|
|
231
266
|
log.error("bigquery job error",e);
|
232
267
|
throw new RuntimeException(e);
|
233
268
|
}
|
234
|
-
|
235
|
-
|
269
|
+
}
|
270
|
+
|
271
|
+
// usually, you have an method to create list of files
|
236
272
|
List<String> listFilesOfGcs(PluginTask task)
|
237
273
|
{
|
238
|
-
log.info("get file list in to gcs of ... {}.{} -> gs://{}/{}", task.
|
274
|
+
log.info("get file list in to gcs of ... {}.{} -> gs://{}/{}", task.getWorkDataset(), task.getWorkTable(),task.getGcsBucket(),task.getGcsBlobNamePrefix());
|
239
275
|
|
240
276
|
try {
|
241
277
|
return BigqueryExportUtils.getFileListFromGcs(task);
|
@@ -298,8 +334,7 @@ public class BigqueryExportGcsFileInputPlugin
|
|
298
334
|
p.toFile().delete();
|
299
335
|
|
300
336
|
if(task.getCleanupGcsTempFile()){
|
301
|
-
|
302
|
-
log.info("delete temp gcs file... {} ... not now... ", file);
|
337
|
+
BigqueryExportUtils.removeTempGcsFiles(task, file);
|
303
338
|
}
|
304
339
|
|
305
340
|
//
|
@@ -25,6 +25,7 @@ import org.slf4j.Logger;
|
|
25
25
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
26
26
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
27
27
|
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
|
28
|
+
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
|
28
29
|
import com.google.api.client.http.HttpTransport;
|
29
30
|
import com.google.api.client.http.javanet.NetHttpTransport;
|
30
31
|
import com.google.api.client.json.JsonFactory;
|
@@ -52,6 +53,8 @@ import com.google.common.base.Optional;
|
|
52
53
|
import com.google.common.collect.ImmutableList;
|
53
54
|
import com.google.common.collect.Lists;
|
54
55
|
|
56
|
+
import io.airlift.slice.RuntimeIOException;
|
57
|
+
|
55
58
|
/**
|
56
59
|
*
|
57
60
|
*
|
@@ -108,7 +111,9 @@ public class BigqueryExportUtils
|
|
108
111
|
|
109
112
|
public static void executeQueryToDestinationWorkTable(Bigquery bigquery, PluginTask task) throws IOException, InterruptedException {
|
110
113
|
|
111
|
-
|
114
|
+
log.info("execute Query to Table ");
|
115
|
+
log.info("# Query # {}",task.getQuery().get());
|
116
|
+
log.info("# Table # {}.{} ",task.getWorkDataset(), task.getWorkTable());
|
112
117
|
|
113
118
|
JobConfigurationQuery queryConfig = new JobConfigurationQuery();
|
114
119
|
queryConfig.setQuery(task.getQuery().get());
|
@@ -130,8 +135,7 @@ public class BigqueryExportUtils
|
|
130
135
|
JobReference jobRef = jobRes.getJobReference();
|
131
136
|
String jobId = jobRef.getJobId();
|
132
137
|
|
133
|
-
log.info("query to Table jobId : {}",jobId);
|
134
|
-
log.info("waiting for job end....... ");
|
138
|
+
log.info("query to Table jobId : {} : waiting for job end...",jobId);
|
135
139
|
|
136
140
|
Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getBigqueryJobWaitingSecond().get());
|
137
141
|
|
@@ -158,10 +162,15 @@ public class BigqueryExportUtils
|
|
158
162
|
* @throws IOException
|
159
163
|
* @throws FileNotFoundException
|
160
164
|
*/
|
161
|
-
public static Bigquery newBigqueryClient(PluginTask task)
|
165
|
+
public static Bigquery newBigqueryClient(PluginTask task){
|
162
166
|
log.debug("# Starting Google BigQuery API ... ");
|
163
|
-
|
164
|
-
|
167
|
+
try {
|
168
|
+
GoogleCredentialSet set = googleCredential(task);
|
169
|
+
return new Bigquery.Builder(set.transport, set.jsonFactory, set.googleCredential).setApplicationName("embulk-input-bigquey-export-gcs").build();
|
170
|
+
} catch (Exception e) {
|
171
|
+
throw new RuntimeException("bigquery connect fail",e);
|
172
|
+
}
|
173
|
+
|
165
174
|
}
|
166
175
|
|
167
176
|
public static Storage newGcsClient(PluginTask task) throws FileNotFoundException, IOException{
|
@@ -244,7 +253,7 @@ public class BigqueryExportUtils
|
|
244
253
|
return builder.build();
|
245
254
|
}
|
246
255
|
|
247
|
-
public static
|
256
|
+
public static PHASE initTask(PluginTask task) {
|
248
257
|
|
249
258
|
if(task.getQuery().isPresent()){
|
250
259
|
task.setWorkId(generateTempTableName(task.getQuery().get()));
|
@@ -259,17 +268,18 @@ public class BigqueryExportUtils
|
|
259
268
|
// actual target table setting
|
260
269
|
task.setWorkDataset(task.getTempDataset().get());
|
261
270
|
task.setWorkTable(task.getTempTable().get());
|
262
|
-
|
263
|
-
// call google api
|
264
|
-
executeQueryToDestinationWorkTable(bigquery, task);
|
265
271
|
|
272
|
+
return PHASE.QUERY;
|
266
273
|
}else if(task.getTable().isPresent() && task.getDataset().isPresent()){
|
267
274
|
task.setWorkId(generateTempTableName(null, task.getTable().get()));
|
268
275
|
// actual target table setting
|
269
276
|
task.setWorkDataset(task.getDataset().get());
|
270
277
|
task.setWorkTable(task.getTable().get());
|
278
|
+
|
279
|
+
return PHASE.TABLE;
|
280
|
+
|
271
281
|
}else{
|
272
|
-
throw new
|
282
|
+
throw new RuntimeException("please set config file [dataset]+[table] or [query]");
|
273
283
|
}
|
274
284
|
}
|
275
285
|
|
@@ -301,7 +311,10 @@ public class BigqueryExportUtils
|
|
301
311
|
log.info("extract jobId : {}",jobId);
|
302
312
|
log.debug("waiting for job end....... ");
|
303
313
|
|
304
|
-
waitForJob(bigquery, task.getProject(), jobId, task.getBigqueryJobWaitingSecond().get());
|
314
|
+
Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getBigqueryJobWaitingSecond().get());
|
315
|
+
|
316
|
+
log.info("table extract result : {}",lastJob.toPrettyString());
|
317
|
+
|
305
318
|
return embulkSchema;
|
306
319
|
}
|
307
320
|
|
@@ -437,14 +450,28 @@ public class BigqueryExportUtils
|
|
437
450
|
}
|
438
451
|
}
|
439
452
|
|
453
|
+
public static void removeGcsFilesBeforeExecuting(PluginTask task){
|
454
|
+
try {
|
455
|
+
Storage gcs = BigqueryExportUtils.newGcsClient(task);
|
456
|
+
gcs.objects().delete(task.getGcsBucket(), task.getGcsBlobNamePrefix()).execute();
|
457
|
+
} catch (GoogleJsonResponseException e) {
|
458
|
+
if(e.getStatusCode() == 404){
|
459
|
+
log.info("file not found in gs://{}/{} :: it's ok ",task.getGcsBucket(), task.getGcsBlobNamePrefix());
|
460
|
+
}else{
|
461
|
+
throw new RuntimeException("# Remove GCS files gs://" + task.getGcsBucket() + "/" + task.getGcsBlobNamePrefix(),e);
|
462
|
+
}
|
463
|
+
} catch (Exception e) {
|
464
|
+
throw new RuntimeException("# Remove GCS files gs://" + task.getGcsBucket() + "/" + task.getGcsBlobNamePrefix(),e);
|
465
|
+
}
|
466
|
+
}
|
467
|
+
|
440
468
|
public static void removeTempGcsFiles(PluginTask task, String file){
|
441
469
|
try {
|
442
470
|
Storage gcs = BigqueryExportUtils.newGcsClient(task);
|
443
|
-
|
444
|
-
|
445
|
-
|
471
|
+
log.info("delete finish file gs://{}{}", task.getGcsBucket(), file);
|
472
|
+
gcs.objects().delete(task.getGcsBucket(), file).execute();
|
446
473
|
} catch (Exception e) {
|
447
|
-
log.error("# Remove temp
|
474
|
+
log.error("# Remove temp gcs file FAIL : " + file,e);
|
448
475
|
}
|
449
476
|
}
|
450
477
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-bigquery_extract_files
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jo8937
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-11-
|
11
|
+
date: 2017-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -59,12 +59,13 @@ files:
|
|
59
59
|
- lib/embulk/input/bigquery_extract_files.rb
|
60
60
|
- src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportGcsFileInputPlugin.java
|
61
61
|
- src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportUtils.java
|
62
|
+
- src/main/java/org/embulk/input/bigquery_export_gcs/PHASE.java
|
62
63
|
- src/test/java/org/embulk/input/bigquery_export_gcs/TestGoogleCloudAccessData.java
|
63
64
|
- src/test/java/org/embulk/input/bigquery_export_gcs/TestPluginFunctions.java
|
64
65
|
- src/test/java/org/embulk/input/bigquery_export_gcs/UnitTestInitializer.java
|
65
66
|
- classpath/commons-codec-1.3.jar
|
66
67
|
- classpath/commons-logging-1.1.1.jar
|
67
|
-
- classpath/embulk-input-bigquery_extract_files-0.0.
|
68
|
+
- classpath/embulk-input-bigquery_extract_files-0.0.5.jar
|
68
69
|
- classpath/google-api-client-1.23.0.jar
|
69
70
|
- classpath/google-api-services-bigquery-v2-rev363-1.23.0.jar
|
70
71
|
- classpath/google-api-services-storage-v1-rev59-1.21.0.jar
|