embulk-input-bigquery_extract_files 0.0.13 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bdb3cb2064c1b19c0748adb981d2593fd30950af
4
- data.tar.gz: 4c54a7075d3b4877e0ad66d2e91125fbd2b1af61
3
+ metadata.gz: be7d0d070196d522edcb8bf289e5d5156acfef52
4
+ data.tar.gz: da58c00653e0a96db45475fc2c8f91012d620c2b
5
5
  SHA512:
6
- metadata.gz: 8e4c7646e18867ef61a6488151eed43bda4001b1935bcffc1d5cb5ef545eaf731f3aa94f3f663a07d8d9e363f60b407072dcfea4c41722c5cd8c4dcb807e48cf
7
- data.tar.gz: c5f6d6843afb3848714be3bb0ae9dc249ea68f40cf92db75911222c7d72adf0bd55f4b65c8cb1971a8eb357e8678425d0a06befc6d9fb7e0a6668c93da0c04a3
6
+ metadata.gz: 1076f9ac8e7fca9c6ec6e4558310700591c31249d9ea6bd76022ad2787a877f908338df9c64c77f23edb005cfb1f651a449f580affc09b49081acb8bb05b4053
7
+ data.tar.gz: 5510bc7e0b676b152dcd6ab31bc56ee54a2a5fc8009e93fb1b09c928fd08c3ef1242ce85bfa7a6fb1423cf64998414d1a295def64776a6b43f39f1dff8a2c36b
data/README.md CHANGED
@@ -4,9 +4,9 @@ embulk file input plugin.
4
4
 
5
5
  - embulk : http://www.embulk.org/docs/
6
6
 
7
- - embulk plugins : http://www.embulk.org/plugins/
7
+ - embulk plugins : https://plugins.embulk.org/
8
8
 
9
- Read files stored in Google Cloud Storage that extracted from Google Cloud Bigquery's table or query result.
9
+ Reads files stored on Google Cloud Storage that extracted from bigquery table or query result
10
10
 
11
11
  ## Overview
12
12
 
@@ -16,9 +16,9 @@ Read files stored in Google Cloud Storage that extracted from Google Cloud Bigqu
16
16
 
17
17
  ### Detail
18
18
 
19
- Read files stored in Google Cloud Storage, that exported from Google Cloud Bigquery's table or query result.
19
+ Reads files stored on Google Cloud Storage that extracted from bigquery table or query result
20
20
 
21
- Maybe solution for very big data in bigquery.
21
+ Maybe solution for download very big data in bigquery.
22
22
 
23
23
  If you set **table** config without **query** config,
24
24
  then just extract table to Google Cloud Storage.
@@ -26,6 +26,7 @@ then just extract table to Google Cloud Storage.
26
26
  If you set **query** config,
27
27
  then query result save to temp table and then extracted that temp table to Google Cloud Storage uri.
28
28
  see : https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract
29
+
29
30
 
30
31
  ## Usage
31
32
 
@@ -35,6 +36,12 @@ see : https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuratio
35
36
  embulk gem install embulk-input-bigquery_extract_files
36
37
  ```
37
38
 
39
+ ### Update plugin (latest version : 0.0.13)
40
+
41
+ ```bash
42
+ embulk gem update embulk-input-bigquery_extract_files
43
+ ```
44
+
38
45
  * rubygem url : https://rubygems.org/profiles/jo8937
39
46
 
40
47
 
@@ -64,6 +71,8 @@ embulk gem install embulk-input-bigquery_extract_files
64
71
 
65
72
  - **bigquery_job_wait_second**: bigquery job waiting second. (Optional) (string, default: `600`)
66
73
 
74
+ - **throw_bigquery_job_wait_timeout**: throw exception when bigquery job waiting second timeout. (Optional) (string, default: `false`)
75
+
67
76
  - **cleanup_gcs_before_executing**: delete all file in gcs temp path before process start (Optional) (string, default: `true`)
68
77
 
69
78
  - **cleanup_gcs_files**: delete all file in gcs temp path after process end (Optional) (string, default: `false`)
@@ -164,6 +173,20 @@ out:
164
173
  $ ./gradlew gem # -t to watch change of files and rebuild continuously
165
174
  ```
166
175
 
176
+
177
+ ## Plugin maintenance
178
+
179
+ for old version user
180
+
181
+ ### Remove plugin specific version
182
+
183
+ ```bash
184
+ embulk gem uninstall embulk-input-bigquery_extract_files --version 0.0.13
185
+ ```
186
+
187
+ * rubygem url : https://rubygems.org/profiles/jo8937
188
+
189
+
167
190
  # Another choice
168
191
 
169
192
  This plugin useful for file-input type. but maybe so complicated to use.
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.0.13"
16
+ version = "0.0.14"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
@@ -70,7 +70,8 @@ public class BigqueryExportGcsFileInputPlugin implements FileInputPlugin
70
70
  @Config("query")
71
71
  @ConfigDefault("null")
72
72
  public Optional<String> getQuery();
73
-
73
+ public void setQuery(Optional<String> tempDataset);
74
+
74
75
  @Config("file_format")
75
76
  @ConfigDefault("\"CSV\"")
76
77
  public Optional<String> getFileFormat();
@@ -185,6 +186,17 @@ public class BigqueryExportGcsFileInputPlugin implements FileInputPlugin
185
186
  public boolean getThrowBigqueryJobWaitTimeout();
186
187
  public void setThrowBigqueryJobWaitTimeout(boolean toThrow);
187
188
 
189
+ /**
190
+ * 2020.11.18 sometime, bigquery job return "DONE" but include errors.
191
+ * DONE does not mean job success.
192
+ * https://cloud.google.com/bigquery/docs/running-jobs#bigquery_create_job-java
193
+ *
194
+ * @return
195
+ */
196
+ @Config("throw_bigquery_job_includes_error")
197
+ @ConfigDefault("false")
198
+ public boolean getThrowBigqueryJobIncludesError();
199
+ public void setThrowBigqueryJobIncludesError(boolean toThrow);
188
200
  }
189
201
 
190
202
  @Override
@@ -4,6 +4,7 @@ import java.io.*;
4
4
  import java.math.BigInteger;
5
5
  import java.nio.file.FileSystems;
6
6
  import java.nio.file.Path;
7
+ import java.util.Collections;
7
8
  import java.util.Date;
8
9
  import java.util.List;
9
10
  import java.util.UUID;
@@ -136,7 +137,7 @@ public class BigqueryExportUtils
136
137
 
137
138
  log.info("query to Table jobId : {} : waiting for job end...",jobId);
138
139
 
139
- Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout());
140
+ Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout(), task.getThrowBigqueryJobIncludesError());
140
141
 
141
142
  log.debug("waiting for job end....... {}", lastJob.toPrettyString());
142
143
  }
@@ -335,14 +336,14 @@ public class BigqueryExportUtils
335
336
  log.info("extract jobId : {}",jobId);
336
337
  log.debug("waiting for job end....... ");
337
338
 
338
- Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout());
339
+ Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout(), task.getThrowBigqueryJobIncludesError());
339
340
 
340
341
  log.info("table extract result : {}",lastJob.toPrettyString());
341
342
 
342
343
  return embulkSchema;
343
344
  }
344
345
 
345
- public static Job waitForJob(Bigquery bigquery, String project, String jobId, String location, int bigqueryJobWaitingSecond, boolean exceptionWhenTimeout) throws IOException, InterruptedException{
346
+ public static Job waitForJob(Bigquery bigquery, String project, String jobId, String location, int bigqueryJobWaitingSecond, boolean exceptionWhenTimeout, boolean exceptionWhenErrorResult) throws IOException, InterruptedException{
346
347
  int maxAttempts = bigqueryJobWaitingSecond;
347
348
  int initialRetryDelay = 1000; // ms
348
349
  Job pollingJob = null;
@@ -352,9 +353,16 @@ public class BigqueryExportUtils
352
353
  pollingJob = bigquery.jobs().get(project, jobId).setLocation(location).execute();
353
354
  String state = pollingJob.getStatus().getState();
354
355
  log.debug("Job Status {} : {}",jobId, state);
355
-
356
+
357
+ // 2020-11-18 DONE is not means "no error" then, we must handle it explictly
358
+ if(exceptionWhenErrorResult){
359
+ if(pollingJob.getStatus().getErrorResult() != null){
360
+ throw new IOException(pollingJob.getStatus().getErrorResult().getMessage());
361
+ }
362
+ }
363
+
356
364
  if (pollingJob.getStatus().getState().equals("DONE")) {
357
- break;
365
+ break;
358
366
  }
359
367
  log.info("waiting {} ... {} ", tryCnt,state);
360
368
  Thread.sleep(initialRetryDelay);
@@ -28,6 +28,21 @@ public class TestGoogleCloudAccessData extends UnitTestInitializer
28
28
  }
29
29
 
30
30
 
31
+ @Test(expected=Exception.class)
32
+ public void testJobDoneButError() throws FileNotFoundException, IOException
33
+ {
34
+ BigqueryExportGcsFileInputPlugin.PluginTask task = config.loadConfig(BigqueryExportGcsFileInputPlugin.PluginTask.class );
35
+ task.setThrowBigqueryJobWaitTimeout(true);
36
+ task.setThrowBigqueryJobIncludesError(true);
37
+ task.setQuery(Optional.of("select a from b"));
38
+ plugin.executeBigqueryApi(task);
39
+
40
+ InputStream ins = BigqueryExportUtils.openInputStream(task, task.getFiles().get(0));
41
+ log.info("file size : {}",org.apache.commons.compress.utils.IOUtils.toByteArray(ins).length);
42
+
43
+ }
44
+
45
+
31
46
  @Test(expected=Exception.class)
32
47
  public void testJobWaitTimeout() throws FileNotFoundException, IOException
33
48
  {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-bigquery_extract_files
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - jo8937
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-16 00:00:00.000000000 Z
11
+ date: 2020-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -65,7 +65,7 @@ files:
65
65
  - src/test/java/org/embulk/input/bigquery_export_gcs/UnitTestInitializer.java
66
66
  - classpath/animal-sniffer-annotations-1.14.jar
67
67
  - classpath/checker-compat-qual-2.5.2.jar
68
- - classpath/embulk-input-bigquery_extract_files-0.0.13.jar
68
+ - classpath/embulk-input-bigquery_extract_files-0.0.14.jar
69
69
  - classpath/error_prone_annotations-2.1.3.jar
70
70
  - classpath/google-api-client-1.25.0.jar
71
71
  - classpath/google-api-services-bigquery-v2-rev429-1.25.0.jar