embulk-input-bigquery_extract_files 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 23ea87433e2ed5453de192fc5ea73c4e1b7499b6
4
- data.tar.gz: f28b4f9530974d7f2614392f951823af1213ed4a
3
+ metadata.gz: 85c5df6d2dabf2727e267cabd33a2fd163e22942
4
+ data.tar.gz: 7b24ecc581b1b5c789ad4f33ab73303bc7c6ca8d
5
5
  SHA512:
6
- metadata.gz: 54384d90f6fab9406c1d56aa4f8eff7fb3ed78aff8564bf910f8aa36610447bfc878e4f025a8a466e4e14afdca0457a5ca087e8cb62151744b786635179bfd8c
7
- data.tar.gz: 775aff297708f182088b36ad48664d70ae0a4a411184ba6ae47cf719651c32b9ef582f3037b1d75d6dac7288511cc9420179d02d052553a50748f404bcbebda3
6
+ metadata.gz: 7b868edc052a4eec8cdd57fc5ae5899d25bb7fa65ab1289785e6c8e6f05e7d280a79e43ea608e31d9c883fc72776ea91b1a7cc1b381a0f4d898c651ebc203fae
7
+ data.tar.gz: 61c5c9616ca9b96ab7587144b25a16b7bdc59e34ebf9b6f38def677e080c8b7f5df26b75b725009df18cd35a96434d224a8644bba88510af7d307d429f1e0a54
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.0.4"
16
+ version = "0.0.5"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
data/config.yml CHANGED
@@ -25,6 +25,7 @@ out:
25
25
  port: 10315
26
26
  database: dbname
27
27
  table: test_table
28
- options: {connectTimeout: 200000}
28
+ # https://dev.mysql.com/doc/connector-j/5.1/en/connector-j-reference-configuration-properties.html
29
+ options: {connectTimeout: 0, enableQueryTimeouts: false, waitTimeout: 0}
29
30
  mode: insert_direct
30
31
 
@@ -1,6 +1,7 @@
1
1
  package org.embulk.input.bigquery_export_gcs;
2
2
 
3
3
  import java.io.File;
4
+ import java.io.FileNotFoundException;
4
5
  import java.io.IOException;
5
6
  import java.io.InputStream;
6
7
  import java.nio.file.Path;
@@ -132,6 +133,15 @@ public class BigqueryExportGcsFileInputPlugin
132
133
  @ConfigDefault("true")
133
134
  public boolean getCleanupTempTable();
134
135
 
136
+ @Config("cleanup_gcs_before_executing")
137
+ @ConfigDefault("true")
138
+ public boolean getCleanupGcsBeforeExcuting();
139
+
140
+
141
+ @Config("start_phase")
142
+ @ConfigDefault("0")
143
+ public int getStartPhase();
144
+
135
145
  public List<String> getFiles();
136
146
  public void setFiles(List<String> files);
137
147
 
@@ -156,7 +166,7 @@ public class BigqueryExportGcsFileInputPlugin
156
166
  //public Schema getSchemaConfig();
157
167
  //public void setSchameConfig(SchemaConfig schema);
158
168
  }
159
-
169
+
160
170
  @Override
161
171
  public ConfigDiff guess(ConfigSource execConfig, ConfigSource inputConfig) {
162
172
 
@@ -192,20 +202,39 @@ public class BigqueryExportGcsFileInputPlugin
192
202
 
193
203
  public void executeBigqueryApi(PluginTask task) {
194
204
 
205
+ log.info("[0] Initialize Settings ... ");
206
+
195
207
  BigqueryExportUtils.parseGcsUri(task);
196
208
 
197
- Schema schema = extractBigqueryToGcs(task);
209
+ if(task.getCleanupGcsBeforeExcuting()){
210
+ log.info("clean up before executing. delete all file in : {}",task.getGcsUri());
211
+ BigqueryExportUtils.removeGcsFilesBeforeExecuting(task);
212
+ }
213
+
214
+ PHASE phase = BigqueryExportUtils.initTask(task);
215
+ log.info("Configuration : {}",task.toString());
198
216
 
217
+ Bigquery bigquery = BigqueryExportUtils.newBigqueryClient(task);
218
+
219
+ if(phase == PHASE.QUERY){
220
+ log.info("[1] Query to Table");
221
+ extractQueryToTable(bigquery, task);
222
+
223
+ }
224
+ log.info("[2] Table to GCS");
225
+ Schema schema = extractTableToGcs(bigquery, task);
199
226
  log.info("Schema : {}",schema.toString());
200
227
 
228
+ log.info("[3] Write Schema ");
201
229
  writeSchemaFileIfSpecified(schema, task);
202
230
 
231
+ log.info("[4] read file list in gcs ");
203
232
  List<String> files = listFilesOfGcs(task);
204
233
 
205
234
  task.setFiles(files);
206
235
 
207
236
  }
208
-
237
+
209
238
  public void writeSchemaFileIfSpecified(Schema schema, PluginTask task) {
210
239
  if(task.getTempSchemaFilePath().isPresent()) {
211
240
  log.info("generate temp {} schema file to ... {}", task.getTempSchemaFileType().or(""), task.getTempSchemaFilePath().orNull());
@@ -213,16 +242,22 @@ public class BigqueryExportGcsFileInputPlugin
213
242
  }
214
243
  }
215
244
 
216
- public Schema extractBigqueryToGcs(PluginTask task){
217
- try {
218
- Bigquery bigquery = BigqueryExportUtils.newBigqueryClient(task);
219
-
220
- // query init or execute query
221
- BigqueryExportUtils.initWorkTableWithExecuteQuery(bigquery,task);
222
-
245
+ public void extractQueryToTable(Bigquery bigquery, PluginTask task){
246
+ try {
247
+ BigqueryExportUtils.executeQueryToDestinationWorkTable(bigquery, task);
248
+ } catch (IOException e) {
249
+ log.error("bigquery io error",e);
250
+ throw new RuntimeIOException(e);
251
+ } catch (InterruptedException e) {
252
+ log.error("bigquery job error",e);
253
+ throw new RuntimeException(e);
254
+ }
255
+ }
256
+
257
+ public Schema extractTableToGcs(Bigquery bigquery, PluginTask task){
258
+ try {
223
259
  // extract table and get schema
224
260
  Schema schema = BigqueryExportUtils.extractWorkTable(bigquery, task);
225
-
226
261
  return schema;
227
262
  } catch (IOException e) {
228
263
  log.error("bigquery io error",e);
@@ -231,11 +266,12 @@ public class BigqueryExportGcsFileInputPlugin
231
266
  log.error("bigquery job error",e);
232
267
  throw new RuntimeException(e);
233
268
  }
234
- }
235
- // usually, you have an method to create list of files
269
+ }
270
+
271
+ // usually, you have an method to create list of files
236
272
  List<String> listFilesOfGcs(PluginTask task)
237
273
  {
238
- log.info("get file list in to gcs of ... {}.{} -> gs://{}/{}", task.getDataset(), task.getWorkTable(),task.getGcsBucket(),task.getGcsBlobNamePrefix());
274
+ log.info("get file list in to gcs of ... {}.{} -> gs://{}/{}", task.getWorkDataset(), task.getWorkTable(),task.getGcsBucket(),task.getGcsBlobNamePrefix());
239
275
 
240
276
  try {
241
277
  return BigqueryExportUtils.getFileListFromGcs(task);
@@ -298,8 +334,7 @@ public class BigqueryExportGcsFileInputPlugin
298
334
  p.toFile().delete();
299
335
 
300
336
  if(task.getCleanupGcsTempFile()){
301
- //TODO : delete temp file in gcs
302
- log.info("delete temp gcs file... {} ... not now... ", file);
337
+ BigqueryExportUtils.removeTempGcsFiles(task, file);
303
338
  }
304
339
 
305
340
  //
@@ -25,6 +25,7 @@ import org.slf4j.Logger;
25
25
  import com.fasterxml.jackson.core.JsonProcessingException;
26
26
  import com.fasterxml.jackson.databind.ObjectMapper;
27
27
  import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
28
+ import com.google.api.client.googleapis.json.GoogleJsonResponseException;
28
29
  import com.google.api.client.http.HttpTransport;
29
30
  import com.google.api.client.http.javanet.NetHttpTransport;
30
31
  import com.google.api.client.json.JsonFactory;
@@ -52,6 +53,8 @@ import com.google.common.base.Optional;
52
53
  import com.google.common.collect.ImmutableList;
53
54
  import com.google.common.collect.Lists;
54
55
 
56
+ import io.airlift.slice.RuntimeIOException;
57
+
55
58
  /**
56
59
  *
57
60
  *
@@ -108,7 +111,9 @@ public class BigqueryExportUtils
108
111
 
109
112
  public static void executeQueryToDestinationWorkTable(Bigquery bigquery, PluginTask task) throws IOException, InterruptedException {
110
113
 
111
- log.info("extract query result {} => {}.{} ",task.getQuery().get(), task.getWorkDataset(), task.getWorkTable());
114
+ log.info("execute Query to Table ");
115
+ log.info("# Query # {}",task.getQuery().get());
116
+ log.info("# Table # {}.{} ",task.getWorkDataset(), task.getWorkTable());
112
117
 
113
118
  JobConfigurationQuery queryConfig = new JobConfigurationQuery();
114
119
  queryConfig.setQuery(task.getQuery().get());
@@ -130,8 +135,7 @@ public class BigqueryExportUtils
130
135
  JobReference jobRef = jobRes.getJobReference();
131
136
  String jobId = jobRef.getJobId();
132
137
 
133
- log.info("query to Table jobId : {}",jobId);
134
- log.info("waiting for job end....... ");
138
+ log.info("query to Table jobId : {} : waiting for job end...",jobId);
135
139
 
136
140
  Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getBigqueryJobWaitingSecond().get());
137
141
 
@@ -158,10 +162,15 @@ public class BigqueryExportUtils
158
162
  * @throws IOException
159
163
  * @throws FileNotFoundException
160
164
  */
161
- public static Bigquery newBigqueryClient(PluginTask task) throws FileNotFoundException, IOException{
165
+ public static Bigquery newBigqueryClient(PluginTask task){
162
166
  log.debug("# Starting Google BigQuery API ... ");
163
- GoogleCredentialSet set = googleCredential(task);
164
- return new Bigquery.Builder(set.transport, set.jsonFactory, set.googleCredential).setApplicationName("embulk-input-bigquey-export-gcs").build();
167
+ try {
168
+ GoogleCredentialSet set = googleCredential(task);
169
+ return new Bigquery.Builder(set.transport, set.jsonFactory, set.googleCredential).setApplicationName("embulk-input-bigquey-export-gcs").build();
170
+ } catch (Exception e) {
171
+ throw new RuntimeException("bigquery connect fail",e);
172
+ }
173
+
165
174
  }
166
175
 
167
176
  public static Storage newGcsClient(PluginTask task) throws FileNotFoundException, IOException{
@@ -244,7 +253,7 @@ public class BigqueryExportUtils
244
253
  return builder.build();
245
254
  }
246
255
 
247
- public static void initWorkTableWithExecuteQuery(Bigquery bigquery, PluginTask task) throws FileNotFoundException, IOException, InterruptedException{
256
+ public static PHASE initTask(PluginTask task) {
248
257
 
249
258
  if(task.getQuery().isPresent()){
250
259
  task.setWorkId(generateTempTableName(task.getQuery().get()));
@@ -259,17 +268,18 @@ public class BigqueryExportUtils
259
268
  // actual target table setting
260
269
  task.setWorkDataset(task.getTempDataset().get());
261
270
  task.setWorkTable(task.getTempTable().get());
262
-
263
- // call google api
264
- executeQueryToDestinationWorkTable(bigquery, task);
265
271
 
272
+ return PHASE.QUERY;
266
273
  }else if(task.getTable().isPresent() && task.getDataset().isPresent()){
267
274
  task.setWorkId(generateTempTableName(null, task.getTable().get()));
268
275
  // actual target table setting
269
276
  task.setWorkDataset(task.getDataset().get());
270
277
  task.setWorkTable(task.getTable().get());
278
+
279
+ return PHASE.TABLE;
280
+
271
281
  }else{
272
- throw new IOException("please set config file [dataset]+[table] or [query]");
282
+ throw new RuntimeException("please set config file [dataset]+[table] or [query]");
273
283
  }
274
284
  }
275
285
 
@@ -301,7 +311,10 @@ public class BigqueryExportUtils
301
311
  log.info("extract jobId : {}",jobId);
302
312
  log.debug("waiting for job end....... ");
303
313
 
304
- waitForJob(bigquery, task.getProject(), jobId, task.getBigqueryJobWaitingSecond().get());
314
+ Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getBigqueryJobWaitingSecond().get());
315
+
316
+ log.info("table extract result : {}",lastJob.toPrettyString());
317
+
305
318
  return embulkSchema;
306
319
  }
307
320
 
@@ -437,14 +450,28 @@ public class BigqueryExportUtils
437
450
  }
438
451
  }
439
452
 
453
+ public static void removeGcsFilesBeforeExecuting(PluginTask task){
454
+ try {
455
+ Storage gcs = BigqueryExportUtils.newGcsClient(task);
456
+ gcs.objects().delete(task.getGcsBucket(), task.getGcsBlobNamePrefix()).execute();
457
+ } catch (GoogleJsonResponseException e) {
458
+ if(e.getStatusCode() == 404){
459
+ log.info("file not found in gs://{}/{} :: it's ok ",task.getGcsBucket(), task.getGcsBlobNamePrefix());
460
+ }else{
461
+ throw new RuntimeException("# Remove GCS files gs://" + task.getGcsBucket() + "/" + task.getGcsBlobNamePrefix(),e);
462
+ }
463
+ } catch (Exception e) {
464
+ throw new RuntimeException("# Remove GCS files gs://" + task.getGcsBucket() + "/" + task.getGcsBlobNamePrefix(),e);
465
+ }
466
+ }
467
+
440
468
  public static void removeTempGcsFiles(PluginTask task, String file){
441
469
  try {
442
470
  Storage gcs = BigqueryExportUtils.newGcsClient(task);
443
- Bucket bucket = gcs.buckets().get(task.getGcsBucket()).execute();
444
- List<String> fileList = task.getFiles();
445
-
471
+ log.info("delete finish file gs://{}{}", task.getGcsBucket(), file);
472
+ gcs.objects().delete(task.getGcsBucket(), file).execute();
446
473
  } catch (Exception e) {
447
- log.error("# Remove temp table FAIL : " + task.getTempDataset().orNull() + "." + task.getTempTable().orNull(),e);
474
+ log.error("# Remove temp gcs file FAIL : " + file,e);
448
475
  }
449
476
  }
450
477
  }
@@ -0,0 +1,7 @@
1
+ package org.embulk.input.bigquery_export_gcs;
2
+
3
+ public enum PHASE {
4
+ QUERY,
5
+ TABLE,
6
+ GCS
7
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-bigquery_extract_files
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - jo8937
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-20 00:00:00.000000000 Z
11
+ date: 2017-11-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -59,12 +59,13 @@ files:
59
59
  - lib/embulk/input/bigquery_extract_files.rb
60
60
  - src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportGcsFileInputPlugin.java
61
61
  - src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportUtils.java
62
+ - src/main/java/org/embulk/input/bigquery_export_gcs/PHASE.java
62
63
  - src/test/java/org/embulk/input/bigquery_export_gcs/TestGoogleCloudAccessData.java
63
64
  - src/test/java/org/embulk/input/bigquery_export_gcs/TestPluginFunctions.java
64
65
  - src/test/java/org/embulk/input/bigquery_export_gcs/UnitTestInitializer.java
65
66
  - classpath/commons-codec-1.3.jar
66
67
  - classpath/commons-logging-1.1.1.jar
67
- - classpath/embulk-input-bigquery_extract_files-0.0.4.jar
68
+ - classpath/embulk-input-bigquery_extract_files-0.0.5.jar
68
69
  - classpath/google-api-client-1.23.0.jar
69
70
  - classpath/google-api-services-bigquery-v2-rev363-1.23.0.jar
70
71
  - classpath/google-api-services-storage-v1-rev59-1.21.0.jar