embulk-input-bigquery_extract_files 0.0.13 → 0.0.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +27 -4
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportGcsFileInputPlugin.java +13 -1
- data/src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportUtils.java +13 -5
- data/src/test/java/org/embulk/input/bigquery_export_gcs/TestGoogleCloudAccessData.java +15 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be7d0d070196d522edcb8bf289e5d5156acfef52
|
4
|
+
data.tar.gz: da58c00653e0a96db45475fc2c8f91012d620c2b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1076f9ac8e7fca9c6ec6e4558310700591c31249d9ea6bd76022ad2787a877f908338df9c64c77f23edb005cfb1f651a449f580affc09b49081acb8bb05b4053
|
7
|
+
data.tar.gz: 5510bc7e0b676b152dcd6ab31bc56ee54a2a5fc8009e93fb1b09c928fd08c3ef1242ce85bfa7a6fb1423cf64998414d1a295def64776a6b43f39f1dff8a2c36b
|
data/README.md
CHANGED
@@ -4,9 +4,9 @@ embulk file input plugin.
|
|
4
4
|
|
5
5
|
- embulk : http://www.embulk.org/docs/
|
6
6
|
|
7
|
-
- embulk plugins :
|
7
|
+
- embulk plugins : https://plugins.embulk.org/
|
8
8
|
|
9
|
-
|
9
|
+
Reads files stored on Google Cloud Storage that extracted from bigquery table or query result
|
10
10
|
|
11
11
|
## Overview
|
12
12
|
|
@@ -16,9 +16,9 @@ Read files stored in Google Cloud Storage that extracted from Google Cloud Bigqu
|
|
16
16
|
|
17
17
|
### Detail
|
18
18
|
|
19
|
-
|
19
|
+
Reads files stored on Google Cloud Storage that extracted from bigquery table or query result
|
20
20
|
|
21
|
-
Maybe solution for very big data in bigquery.
|
21
|
+
Maybe solution for download very big data in bigquery.
|
22
22
|
|
23
23
|
If you set **table** config without **query** config,
|
24
24
|
then just extract table to Google Cloud Storage.
|
@@ -26,6 +26,7 @@ then just extract table to Google Cloud Storage.
|
|
26
26
|
If you set **query** config,
|
27
27
|
then query result save to temp table and then extracted that temp table to Google Cloud Storage uri.
|
28
28
|
see : https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract
|
29
|
+
|
29
30
|
|
30
31
|
## Usage
|
31
32
|
|
@@ -35,6 +36,12 @@ see : https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuratio
|
|
35
36
|
embulk gem install embulk-input-bigquery_extract_files
|
36
37
|
```
|
37
38
|
|
39
|
+
### Update plugin (latest version : 0.0.13)
|
40
|
+
|
41
|
+
```bash
|
42
|
+
embulk gem update embulk-input-bigquery_extract_files
|
43
|
+
```
|
44
|
+
|
38
45
|
* rubygem url : https://rubygems.org/profiles/jo8937
|
39
46
|
|
40
47
|
|
@@ -64,6 +71,8 @@ embulk gem install embulk-input-bigquery_extract_files
|
|
64
71
|
|
65
72
|
- **bigquery_job_wait_second**: bigquery job waiting second. (Optional) (string, default: `600`)
|
66
73
|
|
74
|
+
- **throw_bigquery_job_wait_timeout**: throw exception when bigquery job waiting second timeout. (Optional) (string, default: `false`)
|
75
|
+
|
67
76
|
- **cleanup_gcs_before_executing**: delete all file in gcs temp path before process start (Optional) (string, default: `true`)
|
68
77
|
|
69
78
|
- **cleanup_gcs_files**: delete all file in gcs temp path after process end (Optional) (string, default: `false`)
|
@@ -164,6 +173,20 @@ out:
|
|
164
173
|
$ ./gradlew gem # -t to watch change of files and rebuild continuously
|
165
174
|
```
|
166
175
|
|
176
|
+
|
177
|
+
## Plugin maintenance
|
178
|
+
|
179
|
+
for old version user
|
180
|
+
|
181
|
+
### Remove plugin specific version
|
182
|
+
|
183
|
+
```bash
|
184
|
+
embulk gem uninstall embulk-input-bigquery_extract_files --version 0.0.13
|
185
|
+
```
|
186
|
+
|
187
|
+
* rubygem url : https://rubygems.org/profiles/jo8937
|
188
|
+
|
189
|
+
|
167
190
|
# Another choice
|
168
191
|
|
169
192
|
This plugin useful for file-input type. but maybe so complicated to use.
|
data/build.gradle
CHANGED
data/src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportGcsFileInputPlugin.java
CHANGED
@@ -70,7 +70,8 @@ public class BigqueryExportGcsFileInputPlugin implements FileInputPlugin
|
|
70
70
|
@Config("query")
|
71
71
|
@ConfigDefault("null")
|
72
72
|
public Optional<String> getQuery();
|
73
|
-
|
73
|
+
public void setQuery(Optional<String> tempDataset);
|
74
|
+
|
74
75
|
@Config("file_format")
|
75
76
|
@ConfigDefault("\"CSV\"")
|
76
77
|
public Optional<String> getFileFormat();
|
@@ -185,6 +186,17 @@ public class BigqueryExportGcsFileInputPlugin implements FileInputPlugin
|
|
185
186
|
public boolean getThrowBigqueryJobWaitTimeout();
|
186
187
|
public void setThrowBigqueryJobWaitTimeout(boolean toThrow);
|
187
188
|
|
189
|
+
/**
|
190
|
+
* 2020.11.18 sometime, bigquery job return "DONE" but include errors.
|
191
|
+
* DONE does not mean job success.
|
192
|
+
* https://cloud.google.com/bigquery/docs/running-jobs#bigquery_create_job-java
|
193
|
+
*
|
194
|
+
* @return
|
195
|
+
*/
|
196
|
+
@Config("throw_bigquery_job_includes_error")
|
197
|
+
@ConfigDefault("false")
|
198
|
+
public boolean getThrowBigqueryJobIncludesError();
|
199
|
+
public void setThrowBigqueryJobIncludesError(boolean toThrow);
|
188
200
|
}
|
189
201
|
|
190
202
|
@Override
|
@@ -4,6 +4,7 @@ import java.io.*;
|
|
4
4
|
import java.math.BigInteger;
|
5
5
|
import java.nio.file.FileSystems;
|
6
6
|
import java.nio.file.Path;
|
7
|
+
import java.util.Collections;
|
7
8
|
import java.util.Date;
|
8
9
|
import java.util.List;
|
9
10
|
import java.util.UUID;
|
@@ -136,7 +137,7 @@ public class BigqueryExportUtils
|
|
136
137
|
|
137
138
|
log.info("query to Table jobId : {} : waiting for job end...",jobId);
|
138
139
|
|
139
|
-
Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout());
|
140
|
+
Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout(), task.getThrowBigqueryJobIncludesError());
|
140
141
|
|
141
142
|
log.debug("waiting for job end....... {}", lastJob.toPrettyString());
|
142
143
|
}
|
@@ -335,14 +336,14 @@ public class BigqueryExportUtils
|
|
335
336
|
log.info("extract jobId : {}",jobId);
|
336
337
|
log.debug("waiting for job end....... ");
|
337
338
|
|
338
|
-
Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout());
|
339
|
+
Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout(), task.getThrowBigqueryJobIncludesError());
|
339
340
|
|
340
341
|
log.info("table extract result : {}",lastJob.toPrettyString());
|
341
342
|
|
342
343
|
return embulkSchema;
|
343
344
|
}
|
344
345
|
|
345
|
-
public static Job waitForJob(Bigquery bigquery, String project, String jobId, String location, int bigqueryJobWaitingSecond, boolean exceptionWhenTimeout) throws IOException, InterruptedException{
|
346
|
+
public static Job waitForJob(Bigquery bigquery, String project, String jobId, String location, int bigqueryJobWaitingSecond, boolean exceptionWhenTimeout, boolean exceptionWhenErrorResult) throws IOException, InterruptedException{
|
346
347
|
int maxAttempts = bigqueryJobWaitingSecond;
|
347
348
|
int initialRetryDelay = 1000; // ms
|
348
349
|
Job pollingJob = null;
|
@@ -352,9 +353,16 @@ public class BigqueryExportUtils
|
|
352
353
|
pollingJob = bigquery.jobs().get(project, jobId).setLocation(location).execute();
|
353
354
|
String state = pollingJob.getStatus().getState();
|
354
355
|
log.debug("Job Status {} : {}",jobId, state);
|
355
|
-
|
356
|
+
|
357
|
+
// 2020-11-18 DONE is not means "no error" then, we must handle it explictly
|
358
|
+
if(exceptionWhenErrorResult){
|
359
|
+
if(pollingJob.getStatus().getErrorResult() != null){
|
360
|
+
throw new IOException(pollingJob.getStatus().getErrorResult().getMessage());
|
361
|
+
}
|
362
|
+
}
|
363
|
+
|
356
364
|
if (pollingJob.getStatus().getState().equals("DONE")) {
|
357
|
-
|
365
|
+
break;
|
358
366
|
}
|
359
367
|
log.info("waiting {} ... {} ", tryCnt,state);
|
360
368
|
Thread.sleep(initialRetryDelay);
|
@@ -28,6 +28,21 @@ public class TestGoogleCloudAccessData extends UnitTestInitializer
|
|
28
28
|
}
|
29
29
|
|
30
30
|
|
31
|
+
@Test(expected=Exception.class)
|
32
|
+
public void testJobDoneButError() throws FileNotFoundException, IOException
|
33
|
+
{
|
34
|
+
BigqueryExportGcsFileInputPlugin.PluginTask task = config.loadConfig(BigqueryExportGcsFileInputPlugin.PluginTask.class );
|
35
|
+
task.setThrowBigqueryJobWaitTimeout(true);
|
36
|
+
task.setThrowBigqueryJobIncludesError(true);
|
37
|
+
task.setQuery(Optional.of("select a from b"));
|
38
|
+
plugin.executeBigqueryApi(task);
|
39
|
+
|
40
|
+
InputStream ins = BigqueryExportUtils.openInputStream(task, task.getFiles().get(0));
|
41
|
+
log.info("file size : {}",org.apache.commons.compress.utils.IOUtils.toByteArray(ins).length);
|
42
|
+
|
43
|
+
}
|
44
|
+
|
45
|
+
|
31
46
|
@Test(expected=Exception.class)
|
32
47
|
public void testJobWaitTimeout() throws FileNotFoundException, IOException
|
33
48
|
{
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-bigquery_extract_files
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jo8937
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-11-
|
11
|
+
date: 2020-11-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -65,7 +65,7 @@ files:
|
|
65
65
|
- src/test/java/org/embulk/input/bigquery_export_gcs/UnitTestInitializer.java
|
66
66
|
- classpath/animal-sniffer-annotations-1.14.jar
|
67
67
|
- classpath/checker-compat-qual-2.5.2.jar
|
68
|
-
- classpath/embulk-input-bigquery_extract_files-0.0.
|
68
|
+
- classpath/embulk-input-bigquery_extract_files-0.0.14.jar
|
69
69
|
- classpath/error_prone_annotations-2.1.3.jar
|
70
70
|
- classpath/google-api-client-1.25.0.jar
|
71
71
|
- classpath/google-api-services-bigquery-v2-rev429-1.25.0.jar
|