embulk-input-bigquery_extract_files 0.0.13 → 0.0.14

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bdb3cb2064c1b19c0748adb981d2593fd30950af
4
- data.tar.gz: 4c54a7075d3b4877e0ad66d2e91125fbd2b1af61
3
+ metadata.gz: be7d0d070196d522edcb8bf289e5d5156acfef52
4
+ data.tar.gz: da58c00653e0a96db45475fc2c8f91012d620c2b
5
5
  SHA512:
6
- metadata.gz: 8e4c7646e18867ef61a6488151eed43bda4001b1935bcffc1d5cb5ef545eaf731f3aa94f3f663a07d8d9e363f60b407072dcfea4c41722c5cd8c4dcb807e48cf
7
- data.tar.gz: c5f6d6843afb3848714be3bb0ae9dc249ea68f40cf92db75911222c7d72adf0bd55f4b65c8cb1971a8eb357e8678425d0a06befc6d9fb7e0a6668c93da0c04a3
6
+ metadata.gz: 1076f9ac8e7fca9c6ec6e4558310700591c31249d9ea6bd76022ad2787a877f908338df9c64c77f23edb005cfb1f651a449f580affc09b49081acb8bb05b4053
7
+ data.tar.gz: 5510bc7e0b676b152dcd6ab31bc56ee54a2a5fc8009e93fb1b09c928fd08c3ef1242ce85bfa7a6fb1423cf64998414d1a295def64776a6b43f39f1dff8a2c36b
data/README.md CHANGED
@@ -4,9 +4,9 @@ embulk file input plugin.
4
4
 
5
5
  - embulk : http://www.embulk.org/docs/
6
6
 
7
- - embulk plugins : http://www.embulk.org/plugins/
7
+ - embulk plugins : https://plugins.embulk.org/
8
8
 
9
- Read files stored in Google Cloud Storage that extracted from Google Cloud Bigquery's table or query result.
9
+ Reads files stored on Google Cloud Storage that extracted from bigquery table or query result
10
10
 
11
11
  ## Overview
12
12
 
@@ -16,9 +16,9 @@ Read files stored in Google Cloud Storage that extracted from Google Cloud Bigqu
16
16
 
17
17
  ### Detail
18
18
 
19
- Read files stored in Google Cloud Storage, that exported from Google Cloud Bigquery's table or query result.
19
+ Reads files stored on Google Cloud Storage that extracted from bigquery table or query result
20
20
 
21
- Maybe solution for very big data in bigquery.
21
+ Maybe solution for download very big data in bigquery.
22
22
 
23
23
  If you set **table** config without **query** config,
24
24
  then just extract table to Google Cloud Storage.
@@ -26,6 +26,7 @@ then just extract table to Google Cloud Storage.
26
26
  If you set **query** config,
27
27
  then query result save to temp table and then extracted that temp table to Google Cloud Storage uri.
28
28
  see : https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract
29
+
29
30
 
30
31
  ## Usage
31
32
 
@@ -35,6 +36,12 @@ see : https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuratio
35
36
  embulk gem install embulk-input-bigquery_extract_files
36
37
  ```
37
38
 
39
+ ### Update plugin (latest version : 0.0.13)
40
+
41
+ ```bash
42
+ embulk gem update embulk-input-bigquery_extract_files
43
+ ```
44
+
38
45
  * rubygem url : https://rubygems.org/profiles/jo8937
39
46
 
40
47
 
@@ -64,6 +71,8 @@ embulk gem install embulk-input-bigquery_extract_files
64
71
 
65
72
  - **bigquery_job_wait_second**: bigquery job waiting second. (Optional) (string, default: `600`)
66
73
 
74
+ - **throw_bigquery_job_wait_timeout**: throw exception when bigquery job waiting second timeout. (Optional) (string, default: `false`)
75
+
67
76
  - **cleanup_gcs_before_executing**: delete all file in gcs temp path before process start (Optional) (string, default: `true`)
68
77
 
69
78
  - **cleanup_gcs_files**: delete all file in gcs temp path after process end (Optional) (string, default: `false`)
@@ -164,6 +173,20 @@ out:
164
173
  $ ./gradlew gem # -t to watch change of files and rebuild continuously
165
174
  ```
166
175
 
176
+
177
+ ## Plugin maintenance
178
+
179
+ for old version user
180
+
181
+ ### Remove plugin specific version
182
+
183
+ ```bash
184
+ embulk gem uninstall embulk-input-bigquery_extract_files --version 0.0.13
185
+ ```
186
+
187
+ * rubygem url : https://rubygems.org/profiles/jo8937
188
+
189
+
167
190
  # Another choice
168
191
 
169
192
  This plugin useful for file-input type. but maybe so complicated to use.
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.0.13"
16
+ version = "0.0.14"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
@@ -70,7 +70,8 @@ public class BigqueryExportGcsFileInputPlugin implements FileInputPlugin
70
70
  @Config("query")
71
71
  @ConfigDefault("null")
72
72
  public Optional<String> getQuery();
73
-
73
+ public void setQuery(Optional<String> tempDataset);
74
+
74
75
  @Config("file_format")
75
76
  @ConfigDefault("\"CSV\"")
76
77
  public Optional<String> getFileFormat();
@@ -185,6 +186,17 @@ public class BigqueryExportGcsFileInputPlugin implements FileInputPlugin
185
186
  public boolean getThrowBigqueryJobWaitTimeout();
186
187
  public void setThrowBigqueryJobWaitTimeout(boolean toThrow);
187
188
 
189
+ /**
190
+ * 2020.11.18 sometime, bigquery job return "DONE" but include errors.
191
+ * DONE does not mean job success.
192
+ * https://cloud.google.com/bigquery/docs/running-jobs#bigquery_create_job-java
193
+ *
194
+ * @return
195
+ */
196
+ @Config("throw_bigquery_job_includes_error")
197
+ @ConfigDefault("false")
198
+ public boolean getThrowBigqueryJobIncludesError();
199
+ public void setThrowBigqueryJobIncludesError(boolean toThrow);
188
200
  }
189
201
 
190
202
  @Override
@@ -4,6 +4,7 @@ import java.io.*;
4
4
  import java.math.BigInteger;
5
5
  import java.nio.file.FileSystems;
6
6
  import java.nio.file.Path;
7
+ import java.util.Collections;
7
8
  import java.util.Date;
8
9
  import java.util.List;
9
10
  import java.util.UUID;
@@ -136,7 +137,7 @@ public class BigqueryExportUtils
136
137
 
137
138
  log.info("query to Table jobId : {} : waiting for job end...",jobId);
138
139
 
139
- Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout());
140
+ Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout(), task.getThrowBigqueryJobIncludesError());
140
141
 
141
142
  log.debug("waiting for job end....... {}", lastJob.toPrettyString());
142
143
  }
@@ -335,14 +336,14 @@ public class BigqueryExportUtils
335
336
  log.info("extract jobId : {}",jobId);
336
337
  log.debug("waiting for job end....... ");
337
338
 
338
- Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout());
339
+ Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout(), task.getThrowBigqueryJobIncludesError());
339
340
 
340
341
  log.info("table extract result : {}",lastJob.toPrettyString());
341
342
 
342
343
  return embulkSchema;
343
344
  }
344
345
 
345
- public static Job waitForJob(Bigquery bigquery, String project, String jobId, String location, int bigqueryJobWaitingSecond, boolean exceptionWhenTimeout) throws IOException, InterruptedException{
346
+ public static Job waitForJob(Bigquery bigquery, String project, String jobId, String location, int bigqueryJobWaitingSecond, boolean exceptionWhenTimeout, boolean exceptionWhenErrorResult) throws IOException, InterruptedException{
346
347
  int maxAttempts = bigqueryJobWaitingSecond;
347
348
  int initialRetryDelay = 1000; // ms
348
349
  Job pollingJob = null;
@@ -352,9 +353,16 @@ public class BigqueryExportUtils
352
353
  pollingJob = bigquery.jobs().get(project, jobId).setLocation(location).execute();
353
354
  String state = pollingJob.getStatus().getState();
354
355
  log.debug("Job Status {} : {}",jobId, state);
355
-
356
+
357
+ // 2020-11-18 DONE is not means "no error" then, we must handle it explictly
358
+ if(exceptionWhenErrorResult){
359
+ if(pollingJob.getStatus().getErrorResult() != null){
360
+ throw new IOException(pollingJob.getStatus().getErrorResult().getMessage());
361
+ }
362
+ }
363
+
356
364
  if (pollingJob.getStatus().getState().equals("DONE")) {
357
- break;
365
+ break;
358
366
  }
359
367
  log.info("waiting {} ... {} ", tryCnt,state);
360
368
  Thread.sleep(initialRetryDelay);
@@ -28,6 +28,21 @@ public class TestGoogleCloudAccessData extends UnitTestInitializer
28
28
  }
29
29
 
30
30
 
31
+ @Test(expected=Exception.class)
32
+ public void testJobDoneButError() throws FileNotFoundException, IOException
33
+ {
34
+ BigqueryExportGcsFileInputPlugin.PluginTask task = config.loadConfig(BigqueryExportGcsFileInputPlugin.PluginTask.class );
35
+ task.setThrowBigqueryJobWaitTimeout(true);
36
+ task.setThrowBigqueryJobIncludesError(true);
37
+ task.setQuery(Optional.of("select a from b"));
38
+ plugin.executeBigqueryApi(task);
39
+
40
+ InputStream ins = BigqueryExportUtils.openInputStream(task, task.getFiles().get(0));
41
+ log.info("file size : {}",org.apache.commons.compress.utils.IOUtils.toByteArray(ins).length);
42
+
43
+ }
44
+
45
+
31
46
  @Test(expected=Exception.class)
32
47
  public void testJobWaitTimeout() throws FileNotFoundException, IOException
33
48
  {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-bigquery_extract_files
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - jo8937
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-16 00:00:00.000000000 Z
11
+ date: 2020-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -65,7 +65,7 @@ files:
65
65
  - src/test/java/org/embulk/input/bigquery_export_gcs/UnitTestInitializer.java
66
66
  - classpath/animal-sniffer-annotations-1.14.jar
67
67
  - classpath/checker-compat-qual-2.5.2.jar
68
- - classpath/embulk-input-bigquery_extract_files-0.0.13.jar
68
+ - classpath/embulk-input-bigquery_extract_files-0.0.14.jar
69
69
  - classpath/error_prone_annotations-2.1.3.jar
70
70
  - classpath/google-api-client-1.25.0.jar
71
71
  - classpath/google-api-services-bigquery-v2-rev429-1.25.0.jar