embulk-input-bigquery_extract_files 0.0.13 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +27 -4
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportGcsFileInputPlugin.java +13 -1
- data/src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportUtils.java +13 -5
- data/src/test/java/org/embulk/input/bigquery_export_gcs/TestGoogleCloudAccessData.java +15 -0
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: be7d0d070196d522edcb8bf289e5d5156acfef52
|
|
4
|
+
data.tar.gz: da58c00653e0a96db45475fc2c8f91012d620c2b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1076f9ac8e7fca9c6ec6e4558310700591c31249d9ea6bd76022ad2787a877f908338df9c64c77f23edb005cfb1f651a449f580affc09b49081acb8bb05b4053
|
|
7
|
+
data.tar.gz: 5510bc7e0b676b152dcd6ab31bc56ee54a2a5fc8009e93fb1b09c928fd08c3ef1242ce85bfa7a6fb1423cf64998414d1a295def64776a6b43f39f1dff8a2c36b
|
data/README.md
CHANGED
|
@@ -4,9 +4,9 @@ embulk file input plugin.
|
|
|
4
4
|
|
|
5
5
|
- embulk : http://www.embulk.org/docs/
|
|
6
6
|
|
|
7
|
-
- embulk plugins :
|
|
7
|
+
- embulk plugins : https://plugins.embulk.org/
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
Reads files stored on Google Cloud Storage that extracted from bigquery table or query result
|
|
10
10
|
|
|
11
11
|
## Overview
|
|
12
12
|
|
|
@@ -16,9 +16,9 @@ Read files stored in Google Cloud Storage that extracted from Google Cloud Bigqu
|
|
|
16
16
|
|
|
17
17
|
### Detail
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
Reads files stored on Google Cloud Storage that extracted from bigquery table or query result
|
|
20
20
|
|
|
21
|
-
Maybe solution for very big data in bigquery.
|
|
21
|
+
Maybe solution for download very big data in bigquery.
|
|
22
22
|
|
|
23
23
|
If you set **table** config without **query** config,
|
|
24
24
|
then just extract table to Google Cloud Storage.
|
|
@@ -26,6 +26,7 @@ then just extract table to Google Cloud Storage.
|
|
|
26
26
|
If you set **query** config,
|
|
27
27
|
then query result save to temp table and then extracted that temp table to Google Cloud Storage uri.
|
|
28
28
|
see : https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract
|
|
29
|
+
|
|
29
30
|
|
|
30
31
|
## Usage
|
|
31
32
|
|
|
@@ -35,6 +36,12 @@ see : https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuratio
|
|
|
35
36
|
embulk gem install embulk-input-bigquery_extract_files
|
|
36
37
|
```
|
|
37
38
|
|
|
39
|
+
### Update plugin (latest version : 0.0.13)
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
embulk gem update embulk-input-bigquery_extract_files
|
|
43
|
+
```
|
|
44
|
+
|
|
38
45
|
* rubygem url : https://rubygems.org/profiles/jo8937
|
|
39
46
|
|
|
40
47
|
|
|
@@ -64,6 +71,8 @@ embulk gem install embulk-input-bigquery_extract_files
|
|
|
64
71
|
|
|
65
72
|
- **bigquery_job_wait_second**: bigquery job waiting second. (Optional) (string, default: `600`)
|
|
66
73
|
|
|
74
|
+
- **throw_bigquery_job_wait_timeout**: throw exception when bigquery job waiting second timeout. (Optional) (string, default: `false`)
|
|
75
|
+
|
|
67
76
|
- **cleanup_gcs_before_executing**: delete all file in gcs temp path before process start (Optional) (string, default: `true`)
|
|
68
77
|
|
|
69
78
|
- **cleanup_gcs_files**: delete all file in gcs temp path after process end (Optional) (string, default: `false`)
|
|
@@ -164,6 +173,20 @@ out:
|
|
|
164
173
|
$ ./gradlew gem # -t to watch change of files and rebuild continuously
|
|
165
174
|
```
|
|
166
175
|
|
|
176
|
+
|
|
177
|
+
## Plugin maintenance
|
|
178
|
+
|
|
179
|
+
for old version user
|
|
180
|
+
|
|
181
|
+
### Remove plugin specific version
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
embulk gem uninstall embulk-input-bigquery_extract_files --version 0.0.13
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
* rubygem url : https://rubygems.org/profiles/jo8937
|
|
188
|
+
|
|
189
|
+
|
|
167
190
|
# Another choice
|
|
168
191
|
|
|
169
192
|
This plugin useful for file-input type. but maybe so complicated to use.
|
data/build.gradle
CHANGED
data/src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportGcsFileInputPlugin.java
CHANGED
|
@@ -70,7 +70,8 @@ public class BigqueryExportGcsFileInputPlugin implements FileInputPlugin
|
|
|
70
70
|
@Config("query")
|
|
71
71
|
@ConfigDefault("null")
|
|
72
72
|
public Optional<String> getQuery();
|
|
73
|
-
|
|
73
|
+
public void setQuery(Optional<String> tempDataset);
|
|
74
|
+
|
|
74
75
|
@Config("file_format")
|
|
75
76
|
@ConfigDefault("\"CSV\"")
|
|
76
77
|
public Optional<String> getFileFormat();
|
|
@@ -185,6 +186,17 @@ public class BigqueryExportGcsFileInputPlugin implements FileInputPlugin
|
|
|
185
186
|
public boolean getThrowBigqueryJobWaitTimeout();
|
|
186
187
|
public void setThrowBigqueryJobWaitTimeout(boolean toThrow);
|
|
187
188
|
|
|
189
|
+
/**
|
|
190
|
+
* 2020.11.18 sometime, bigquery job return "DONE" but include errors.
|
|
191
|
+
* DONE does not mean job success.
|
|
192
|
+
* https://cloud.google.com/bigquery/docs/running-jobs#bigquery_create_job-java
|
|
193
|
+
*
|
|
194
|
+
* @return
|
|
195
|
+
*/
|
|
196
|
+
@Config("throw_bigquery_job_includes_error")
|
|
197
|
+
@ConfigDefault("false")
|
|
198
|
+
public boolean getThrowBigqueryJobIncludesError();
|
|
199
|
+
public void setThrowBigqueryJobIncludesError(boolean toThrow);
|
|
188
200
|
}
|
|
189
201
|
|
|
190
202
|
@Override
|
|
@@ -4,6 +4,7 @@ import java.io.*;
|
|
|
4
4
|
import java.math.BigInteger;
|
|
5
5
|
import java.nio.file.FileSystems;
|
|
6
6
|
import java.nio.file.Path;
|
|
7
|
+
import java.util.Collections;
|
|
7
8
|
import java.util.Date;
|
|
8
9
|
import java.util.List;
|
|
9
10
|
import java.util.UUID;
|
|
@@ -136,7 +137,7 @@ public class BigqueryExportUtils
|
|
|
136
137
|
|
|
137
138
|
log.info("query to Table jobId : {} : waiting for job end...",jobId);
|
|
138
139
|
|
|
139
|
-
Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout());
|
|
140
|
+
Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout(), task.getThrowBigqueryJobIncludesError());
|
|
140
141
|
|
|
141
142
|
log.debug("waiting for job end....... {}", lastJob.toPrettyString());
|
|
142
143
|
}
|
|
@@ -335,14 +336,14 @@ public class BigqueryExportUtils
|
|
|
335
336
|
log.info("extract jobId : {}",jobId);
|
|
336
337
|
log.debug("waiting for job end....... ");
|
|
337
338
|
|
|
338
|
-
Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout());
|
|
339
|
+
Job lastJob = waitForJob(bigquery, task.getProject(), jobId, task.getLocation().get(), task.getBigqueryJobWaitingSecond().get(), task.getThrowBigqueryJobWaitTimeout(), task.getThrowBigqueryJobIncludesError());
|
|
339
340
|
|
|
340
341
|
log.info("table extract result : {}",lastJob.toPrettyString());
|
|
341
342
|
|
|
342
343
|
return embulkSchema;
|
|
343
344
|
}
|
|
344
345
|
|
|
345
|
-
public static Job waitForJob(Bigquery bigquery, String project, String jobId, String location, int bigqueryJobWaitingSecond, boolean exceptionWhenTimeout) throws IOException, InterruptedException{
|
|
346
|
+
public static Job waitForJob(Bigquery bigquery, String project, String jobId, String location, int bigqueryJobWaitingSecond, boolean exceptionWhenTimeout, boolean exceptionWhenErrorResult) throws IOException, InterruptedException{
|
|
346
347
|
int maxAttempts = bigqueryJobWaitingSecond;
|
|
347
348
|
int initialRetryDelay = 1000; // ms
|
|
348
349
|
Job pollingJob = null;
|
|
@@ -352,9 +353,16 @@ public class BigqueryExportUtils
|
|
|
352
353
|
pollingJob = bigquery.jobs().get(project, jobId).setLocation(location).execute();
|
|
353
354
|
String state = pollingJob.getStatus().getState();
|
|
354
355
|
log.debug("Job Status {} : {}",jobId, state);
|
|
355
|
-
|
|
356
|
+
|
|
357
|
+
// 2020-11-18 DONE is not means "no error" then, we must handle it explictly
|
|
358
|
+
if(exceptionWhenErrorResult){
|
|
359
|
+
if(pollingJob.getStatus().getErrorResult() != null){
|
|
360
|
+
throw new IOException(pollingJob.getStatus().getErrorResult().getMessage());
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
356
364
|
if (pollingJob.getStatus().getState().equals("DONE")) {
|
|
357
|
-
|
|
365
|
+
break;
|
|
358
366
|
}
|
|
359
367
|
log.info("waiting {} ... {} ", tryCnt,state);
|
|
360
368
|
Thread.sleep(initialRetryDelay);
|
|
@@ -28,6 +28,21 @@ public class TestGoogleCloudAccessData extends UnitTestInitializer
|
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
|
|
31
|
+
@Test(expected=Exception.class)
|
|
32
|
+
public void testJobDoneButError() throws FileNotFoundException, IOException
|
|
33
|
+
{
|
|
34
|
+
BigqueryExportGcsFileInputPlugin.PluginTask task = config.loadConfig(BigqueryExportGcsFileInputPlugin.PluginTask.class );
|
|
35
|
+
task.setThrowBigqueryJobWaitTimeout(true);
|
|
36
|
+
task.setThrowBigqueryJobIncludesError(true);
|
|
37
|
+
task.setQuery(Optional.of("select a from b"));
|
|
38
|
+
plugin.executeBigqueryApi(task);
|
|
39
|
+
|
|
40
|
+
InputStream ins = BigqueryExportUtils.openInputStream(task, task.getFiles().get(0));
|
|
41
|
+
log.info("file size : {}",org.apache.commons.compress.utils.IOUtils.toByteArray(ins).length);
|
|
42
|
+
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
31
46
|
@Test(expected=Exception.class)
|
|
32
47
|
public void testJobWaitTimeout() throws FileNotFoundException, IOException
|
|
33
48
|
{
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: embulk-input-bigquery_extract_files
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.14
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- jo8937
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-11-
|
|
11
|
+
date: 2020-11-20 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -65,7 +65,7 @@ files:
|
|
|
65
65
|
- src/test/java/org/embulk/input/bigquery_export_gcs/UnitTestInitializer.java
|
|
66
66
|
- classpath/animal-sniffer-annotations-1.14.jar
|
|
67
67
|
- classpath/checker-compat-qual-2.5.2.jar
|
|
68
|
-
- classpath/embulk-input-bigquery_extract_files-0.0.
|
|
68
|
+
- classpath/embulk-input-bigquery_extract_files-0.0.14.jar
|
|
69
69
|
- classpath/error_prone_annotations-2.1.3.jar
|
|
70
70
|
- classpath/google-api-client-1.25.0.jar
|
|
71
71
|
- classpath/google-api-services-bigquery-v2-rev429-1.25.0.jar
|