embulk-output-bigquery 0.2.3 → 0.3.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -12
- data/CHANGELOG.md +18 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +20 -0
- data/README.md +165 -39
- data/Rakefile +11 -0
- data/embulk-output-bigquery.gemspec +20 -0
- data/example/config_client_options.yml +33 -0
- data/example/config_csv.yml +30 -0
- data/example/config_delete_in_advance.yml +29 -0
- data/example/config_expose_errors.yml +30 -0
- data/example/config_guess_from_embulk_schema.yml +29 -0
- data/example/config_guess_with_column_options.yml +40 -0
- data/example/config_gzip.yml +30 -0
- data/example/config_jsonl.yml +30 -0
- data/example/config_mode_append.yml +30 -0
- data/example/config_mode_append_direct.yml +30 -0
- data/example/config_payload_column.yml +20 -0
- data/example/config_payload_column_index.yml +20 -0
- data/example/config_prevent_duplicate_insert.yml +30 -0
- data/example/config_replace.yml +30 -0
- data/example/config_replace_backup.yml +32 -0
- data/example/config_skip_file_generation.yml +32 -0
- data/example/config_table_strftime.yml +30 -0
- data/example/config_template_table.yml +21 -0
- data/example/config_uncompressed.yml +30 -0
- data/example/config_with_rehearsal.yml +32 -0
- data/example/example.csv +17 -0
- data/example/example.jsonl +16 -0
- data/example/example.yml +30 -0
- data/example/json_key.json +12 -0
- data/example/nested_example.jsonl +16 -0
- data/example/schema.json +30 -0
- data/example/schema_expose_errors.json +30 -0
- data/lib/embulk/output/bigquery.rb +388 -3
- data/lib/embulk/output/bigquery/bigquery_client.rb +396 -0
- data/lib/embulk/output/bigquery/file_writer.rb +103 -0
- data/lib/embulk/output/bigquery/helper.rb +78 -0
- data/lib/embulk/output/bigquery/value_converter_factory.rb +292 -0
- data/test/helper.rb +13 -0
- data/test/test_bigquery_client.rb +166 -0
- data/test/test_configure.rb +254 -0
- data/test/test_example.rb +34 -0
- data/test/test_file_writer.rb +129 -0
- data/test/test_helper.rb +103 -0
- data/test/test_transaction.rb +129 -0
- data/test/test_value_converter_factory.rb +316 -0
- metadata +114 -45
- data/build.gradle +0 -80
- data/config/checkstyle/checkstyle.xml +0 -128
- data/config/checkstyle/default.xml +0 -108
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +0 -6
- data/gradlew +0 -164
- data/gradlew.bat +0 -90
- data/settings.gradle +0 -2
- data/src/main/java/org/embulk/output/BigqueryAuthentication.java +0 -117
- data/src/main/java/org/embulk/output/BigqueryOutputPlugin.java +0 -508
- data/src/main/java/org/embulk/output/BigqueryWriter.java +0 -575
- data/src/test/java/org/embulk/output/TestBigqueryAuthentication.java +0 -5
- data/src/test/java/org/embulk/output/TestBigqueryOutputPlugin.java +0 -5
- data/src/test/java/org/embulk/output/TestBigqueryWriter.java +0 -5
data/gradlew.bat
DELETED
@@ -1,90 +0,0 @@
|
|
1
|
-
@if "%DEBUG%" == "" @echo off
|
2
|
-
@rem ##########################################################################
|
3
|
-
@rem
|
4
|
-
@rem Gradle startup script for Windows
|
5
|
-
@rem
|
6
|
-
@rem ##########################################################################
|
7
|
-
|
8
|
-
@rem Set local scope for the variables with windows NT shell
|
9
|
-
if "%OS%"=="Windows_NT" setlocal
|
10
|
-
|
11
|
-
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
-
set DEFAULT_JVM_OPTS=
|
13
|
-
|
14
|
-
set DIRNAME=%~dp0
|
15
|
-
if "%DIRNAME%" == "" set DIRNAME=.
|
16
|
-
set APP_BASE_NAME=%~n0
|
17
|
-
set APP_HOME=%DIRNAME%
|
18
|
-
|
19
|
-
@rem Find java.exe
|
20
|
-
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
-
|
22
|
-
set JAVA_EXE=java.exe
|
23
|
-
%JAVA_EXE% -version >NUL 2>&1
|
24
|
-
if "%ERRORLEVEL%" == "0" goto init
|
25
|
-
|
26
|
-
echo.
|
27
|
-
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
-
echo.
|
29
|
-
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
-
echo location of your Java installation.
|
31
|
-
|
32
|
-
goto fail
|
33
|
-
|
34
|
-
:findJavaFromJavaHome
|
35
|
-
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
-
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
-
|
38
|
-
if exist "%JAVA_EXE%" goto init
|
39
|
-
|
40
|
-
echo.
|
41
|
-
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
-
echo.
|
43
|
-
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
-
echo location of your Java installation.
|
45
|
-
|
46
|
-
goto fail
|
47
|
-
|
48
|
-
:init
|
49
|
-
@rem Get command-line arguments, handling Windowz variants
|
50
|
-
|
51
|
-
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
-
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
|
-
|
54
|
-
:win9xME_args
|
55
|
-
@rem Slurp the command line arguments.
|
56
|
-
set CMD_LINE_ARGS=
|
57
|
-
set _SKIP=2
|
58
|
-
|
59
|
-
:win9xME_args_slurp
|
60
|
-
if "x%~1" == "x" goto execute
|
61
|
-
|
62
|
-
set CMD_LINE_ARGS=%*
|
63
|
-
goto execute
|
64
|
-
|
65
|
-
:4NT_args
|
66
|
-
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
-
set CMD_LINE_ARGS=%$
|
68
|
-
|
69
|
-
:execute
|
70
|
-
@rem Setup the command line
|
71
|
-
|
72
|
-
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
73
|
-
|
74
|
-
@rem Execute Gradle
|
75
|
-
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
76
|
-
|
77
|
-
:end
|
78
|
-
@rem End local scope for the variables with windows NT shell
|
79
|
-
if "%ERRORLEVEL%"=="0" goto mainEnd
|
80
|
-
|
81
|
-
:fail
|
82
|
-
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
83
|
-
rem the _cmd.exe /c_ return code!
|
84
|
-
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
85
|
-
exit /b 1
|
86
|
-
|
87
|
-
:mainEnd
|
88
|
-
if "%OS%"=="Windows_NT" endlocal
|
89
|
-
|
90
|
-
:omega
|
data/settings.gradle
DELETED
@@ -1,117 +0,0 @@
|
|
1
|
-
package org.embulk.output;
|
2
|
-
|
3
|
-
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
|
4
|
-
import com.google.api.client.googleapis.compute.ComputeCredential;
|
5
|
-
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
|
6
|
-
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
|
7
|
-
import com.google.api.client.http.HttpRequestInitializer;
|
8
|
-
import com.google.api.client.http.HttpTransport;
|
9
|
-
import com.google.api.client.json.JsonFactory;
|
10
|
-
import com.google.api.client.json.jackson2.JacksonFactory;
|
11
|
-
import com.google.api.services.bigquery.Bigquery;
|
12
|
-
import com.google.api.services.bigquery.BigqueryScopes;
|
13
|
-
import com.google.api.services.bigquery.model.ProjectList;
|
14
|
-
import com.google.common.base.Optional;
|
15
|
-
import com.google.common.collect.ImmutableList;
|
16
|
-
import org.embulk.spi.Exec;
|
17
|
-
import org.slf4j.Logger;
|
18
|
-
|
19
|
-
import java.io.File;
|
20
|
-
import java.io.FileInputStream;
|
21
|
-
|
22
|
-
import java.io.IOException;
|
23
|
-
import java.security.GeneralSecurityException;
|
24
|
-
import java.util.Collections;
|
25
|
-
|
26
|
-
public class BigqueryAuthentication
|
27
|
-
{
|
28
|
-
private final Logger log = Exec.getLogger(BigqueryAuthentication.class);
|
29
|
-
private final Optional<String> serviceAccountEmail;
|
30
|
-
private final Optional<String> p12KeyFilePath;
|
31
|
-
private final Optional<String> jsonKeyFilePath;
|
32
|
-
private final String applicationName;
|
33
|
-
private final HttpTransport httpTransport;
|
34
|
-
private final JsonFactory jsonFactory;
|
35
|
-
private final HttpRequestInitializer credentials;
|
36
|
-
|
37
|
-
public BigqueryAuthentication(String authMethod, Optional<String> serviceAccountEmail,
|
38
|
-
Optional<String> p12KeyFilePath, Optional<String> jsonKeyFilePath, String applicationName)
|
39
|
-
throws IOException, GeneralSecurityException
|
40
|
-
{
|
41
|
-
this.serviceAccountEmail = serviceAccountEmail;
|
42
|
-
this.p12KeyFilePath = p12KeyFilePath;
|
43
|
-
this.jsonKeyFilePath = jsonKeyFilePath;
|
44
|
-
this.applicationName = applicationName;
|
45
|
-
|
46
|
-
this.httpTransport = GoogleNetHttpTransport.newTrustedTransport();
|
47
|
-
this.jsonFactory = new JacksonFactory();
|
48
|
-
|
49
|
-
if (authMethod.toLowerCase().equals("compute_engine")) {
|
50
|
-
this.credentials = getComputeCredential();
|
51
|
-
}
|
52
|
-
else if (authMethod.toLowerCase().equals("json_key")) {
|
53
|
-
this.credentials = getServiceAccountCredentialFromJsonFile();
|
54
|
-
}
|
55
|
-
else {
|
56
|
-
this.credentials = getServiceAccountCredential();
|
57
|
-
}
|
58
|
-
}
|
59
|
-
|
60
|
-
/**
|
61
|
-
* @see https://developers.google.com/accounts/docs/OAuth2ServiceAccount#authorizingrequests
|
62
|
-
*/
|
63
|
-
private GoogleCredential getServiceAccountCredential() throws IOException, GeneralSecurityException
|
64
|
-
{
|
65
|
-
// @see https://cloud.google.com/compute/docs/api/how-tos/authorization
|
66
|
-
// @see https://developers.google.com/resources/api-libraries/documentation/storage/v1/java/latest/com/google/api/services/storage/STORAGE_SCOPE.html
|
67
|
-
// @see https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/BigqueryScopes.html
|
68
|
-
return new GoogleCredential.Builder()
|
69
|
-
.setTransport(httpTransport)
|
70
|
-
.setJsonFactory(jsonFactory)
|
71
|
-
.setServiceAccountId(serviceAccountEmail.orNull())
|
72
|
-
.setServiceAccountScopes(
|
73
|
-
ImmutableList.of(
|
74
|
-
BigqueryScopes.BIGQUERY
|
75
|
-
)
|
76
|
-
)
|
77
|
-
.setServiceAccountPrivateKeyFromP12File(new File(p12KeyFilePath.orNull()))
|
78
|
-
.build();
|
79
|
-
}
|
80
|
-
|
81
|
-
private GoogleCredential getServiceAccountCredentialFromJsonFile() throws IOException
|
82
|
-
{
|
83
|
-
FileInputStream stream = new FileInputStream(jsonKeyFilePath.orNull());
|
84
|
-
|
85
|
-
return GoogleCredential.fromStream(stream, httpTransport, jsonFactory)
|
86
|
-
.createScoped(Collections.singleton(BigqueryScopes.BIGQUERY));
|
87
|
-
}
|
88
|
-
|
89
|
-
/**
|
90
|
-
* @see http://developers.guge.io/accounts/docs/OAuth2ServiceAccount#creatinganaccount
|
91
|
-
* @see https://developers.google.com/accounts/docs/OAuth2
|
92
|
-
*/
|
93
|
-
private ComputeCredential getComputeCredential() throws IOException
|
94
|
-
{
|
95
|
-
ComputeCredential credential = new ComputeCredential.Builder(httpTransport, jsonFactory)
|
96
|
-
.build();
|
97
|
-
credential.refreshToken();
|
98
|
-
|
99
|
-
//log.debug("access_token:" + credential.getAccessToken());
|
100
|
-
log.debug("access_token expired:" + credential.getExpiresInSeconds());
|
101
|
-
|
102
|
-
return credential;
|
103
|
-
}
|
104
|
-
|
105
|
-
public Bigquery getBigqueryClient() throws GoogleJsonResponseException, IOException
|
106
|
-
{
|
107
|
-
Bigquery client = new Bigquery.Builder(httpTransport, jsonFactory, credentials)
|
108
|
-
.setApplicationName(applicationName)
|
109
|
-
.build();
|
110
|
-
|
111
|
-
// For throw IOException when authentication is fail.
|
112
|
-
long maxResults = 1;
|
113
|
-
ProjectList projectList = client.projects().list().setMaxResults(maxResults).execute();
|
114
|
-
|
115
|
-
return client;
|
116
|
-
}
|
117
|
-
}
|
@@ -1,508 +0,0 @@
|
|
1
|
-
package org.embulk.output;
|
2
|
-
|
3
|
-
import com.google.common.base.Function;
|
4
|
-
import com.google.common.base.Optional;
|
5
|
-
import com.google.common.base.Throwables;
|
6
|
-
import org.embulk.config.Config;
|
7
|
-
import org.embulk.config.ConfigDefault;
|
8
|
-
import org.embulk.config.ConfigDiff;
|
9
|
-
import org.embulk.config.ConfigException;
|
10
|
-
import org.embulk.config.ConfigSource;
|
11
|
-
import org.embulk.config.Task;
|
12
|
-
import org.embulk.config.TaskReport;
|
13
|
-
import org.embulk.config.TaskSource;
|
14
|
-
import org.embulk.spi.Buffer;
|
15
|
-
import org.embulk.spi.Exec;
|
16
|
-
import org.embulk.spi.FileOutputPlugin;
|
17
|
-
import org.embulk.spi.TransactionalFileOutput;
|
18
|
-
import org.embulk.spi.unit.LocalFile;
|
19
|
-
import org.jruby.embed.ScriptingContainer;
|
20
|
-
import org.slf4j.Logger;
|
21
|
-
|
22
|
-
import java.io.BufferedOutputStream;
|
23
|
-
import java.io.File;
|
24
|
-
import java.io.FileNotFoundException;
|
25
|
-
import java.io.FileOutputStream;
|
26
|
-
|
27
|
-
import java.io.IOException;
|
28
|
-
import java.nio.charset.Charset;
|
29
|
-
import java.security.GeneralSecurityException;
|
30
|
-
import java.security.NoSuchAlgorithmException;
|
31
|
-
import java.util.List;
|
32
|
-
import java.util.concurrent.TimeoutException;
|
33
|
-
|
34
|
-
public class BigqueryOutputPlugin
|
35
|
-
implements FileOutputPlugin
|
36
|
-
{
|
37
|
-
public interface PluginTask
|
38
|
-
extends Task
|
39
|
-
{
|
40
|
-
@Config("auth_method")
|
41
|
-
@ConfigDefault("\"private_key\"")
|
42
|
-
AuthMethod getAuthMethod();
|
43
|
-
|
44
|
-
@Config("service_account_email")
|
45
|
-
@ConfigDefault("null")
|
46
|
-
Optional<String> getServiceAccountEmail();
|
47
|
-
|
48
|
-
// kept for backward compatibility
|
49
|
-
@Config("p12_keyfile_path")
|
50
|
-
@ConfigDefault("null")
|
51
|
-
Optional<String> getP12KeyfilePath();
|
52
|
-
|
53
|
-
@Config("p12_keyfile")
|
54
|
-
@ConfigDefault("null")
|
55
|
-
Optional<LocalFile> getP12Keyfile();
|
56
|
-
void setP12Keyfile(Optional<LocalFile> p12Keyfile);
|
57
|
-
|
58
|
-
@Config("json_keyfile")
|
59
|
-
@ConfigDefault("null")
|
60
|
-
Optional<LocalFile> getJsonKeyfile();
|
61
|
-
|
62
|
-
@Config("application_name")
|
63
|
-
@ConfigDefault("\"Embulk BigQuery plugin\"")
|
64
|
-
String getApplicationName();
|
65
|
-
|
66
|
-
@Config("path_prefix")
|
67
|
-
String getPathPrefix();
|
68
|
-
|
69
|
-
@Config("sequence_format")
|
70
|
-
@ConfigDefault("\".%03d.%02d\"")
|
71
|
-
String getSequenceFormat();
|
72
|
-
|
73
|
-
@Config("file_ext")
|
74
|
-
String getFileNameExtension();
|
75
|
-
|
76
|
-
@Config("source_format")
|
77
|
-
@ConfigDefault("\"CSV\"")
|
78
|
-
SourceFormat getSourceFormat();
|
79
|
-
|
80
|
-
@Config("field_delimiter")
|
81
|
-
@ConfigDefault("\",\"")
|
82
|
-
char getFieldDelimiter();
|
83
|
-
|
84
|
-
@Config("max_bad_records")
|
85
|
-
@ConfigDefault("0")
|
86
|
-
int getMaxBadrecords();
|
87
|
-
|
88
|
-
@Config("encoding")
|
89
|
-
@ConfigDefault("\"UTF-8\"")
|
90
|
-
Charset getEncoding();
|
91
|
-
|
92
|
-
@Config("delete_from_local_when_job_end")
|
93
|
-
@ConfigDefault("false")
|
94
|
-
boolean getDeleteFromLocalWhenJobEnd();
|
95
|
-
|
96
|
-
@Config("project")
|
97
|
-
String getProject();
|
98
|
-
|
99
|
-
@Config("dataset")
|
100
|
-
String getDataset();
|
101
|
-
|
102
|
-
@Config("table")
|
103
|
-
String getTable();
|
104
|
-
|
105
|
-
@Config("auto_create_table")
|
106
|
-
@ConfigDefault("false")
|
107
|
-
boolean getAutoCreateTable();
|
108
|
-
|
109
|
-
// kept for backward compatibility
|
110
|
-
@Config("schema_path")
|
111
|
-
@ConfigDefault("null")
|
112
|
-
Optional<String> getSchemaPath();
|
113
|
-
|
114
|
-
@Config("schema_file")
|
115
|
-
@ConfigDefault("null")
|
116
|
-
Optional<LocalFile> getSchemaFile();
|
117
|
-
void setSchemaFile(Optional<LocalFile> schemaFile);
|
118
|
-
|
119
|
-
@Config("template_table")
|
120
|
-
@ConfigDefault("null")
|
121
|
-
Optional<String> getTemplateTable();
|
122
|
-
|
123
|
-
@Config("prevent_duplicate_insert")
|
124
|
-
@ConfigDefault("false")
|
125
|
-
boolean getPreventDuplicateInsert();
|
126
|
-
|
127
|
-
@Config("job_status_max_polling_time")
|
128
|
-
@ConfigDefault("3600")
|
129
|
-
int getJobStatusMaxPollingTime();
|
130
|
-
|
131
|
-
@Config("job_status_polling_interval")
|
132
|
-
@ConfigDefault("10")
|
133
|
-
int getJobStatusPollingInterval();
|
134
|
-
|
135
|
-
@Config("is_skip_job_result_check")
|
136
|
-
@ConfigDefault("false")
|
137
|
-
boolean getIsSkipJobResultCheck();
|
138
|
-
|
139
|
-
@Config("ignore_unknown_values")
|
140
|
-
@ConfigDefault("false")
|
141
|
-
boolean getIgnoreUnknownValues();
|
142
|
-
|
143
|
-
@Config("allow_quoted_newlines")
|
144
|
-
@ConfigDefault("false")
|
145
|
-
boolean getAllowQuotedNewlines();
|
146
|
-
|
147
|
-
@Config("mode")
|
148
|
-
@ConfigDefault("\"append\"")
|
149
|
-
Mode getMode();
|
150
|
-
}
|
151
|
-
|
152
|
-
private final Logger log = Exec.getLogger(BigqueryOutputPlugin.class);
|
153
|
-
private static final String temporaryTableSuffix = Long.toString(System.currentTimeMillis());
|
154
|
-
private static BigqueryWriter bigQueryWriter;
|
155
|
-
|
156
|
-
@Override
|
157
|
-
public ConfigDiff transaction(ConfigSource config, int taskCount,
|
158
|
-
FileOutputPlugin.Control control)
|
159
|
-
{
|
160
|
-
final PluginTask task = config.loadConfig(PluginTask.class);
|
161
|
-
|
162
|
-
if (task.getP12KeyfilePath().isPresent()) {
|
163
|
-
if (task.getP12Keyfile().isPresent()) {
|
164
|
-
throw new ConfigException("Setting both p12_keyfile_path and p12_keyfile is invalid");
|
165
|
-
}
|
166
|
-
try {
|
167
|
-
task.setP12Keyfile(Optional.of(LocalFile.of(task.getP12KeyfilePath().get())));
|
168
|
-
}
|
169
|
-
catch (IOException ex) {
|
170
|
-
throw Throwables.propagate(ex);
|
171
|
-
}
|
172
|
-
}
|
173
|
-
|
174
|
-
if (task.getSchemaPath().isPresent()) {
|
175
|
-
if (task.getSchemaFile().isPresent()) {
|
176
|
-
throw new ConfigException("Setting both p12_keyfile_path and p12_keyfile is invalid");
|
177
|
-
}
|
178
|
-
try {
|
179
|
-
task.setSchemaFile(Optional.of(LocalFile.of(task.getSchemaPath().get())));
|
180
|
-
}
|
181
|
-
catch (IOException ex) {
|
182
|
-
throw Throwables.propagate(ex);
|
183
|
-
}
|
184
|
-
}
|
185
|
-
|
186
|
-
if (task.getAuthMethod().getString().equals("json_key")) {
|
187
|
-
if (!task.getJsonKeyfile().isPresent()) {
|
188
|
-
throw new ConfigException("If auth_method is json_key, you have to set json_keyfile");
|
189
|
-
}
|
190
|
-
}
|
191
|
-
else if (task.getAuthMethod().getString().equals("private_key")) {
|
192
|
-
if (!task.getP12Keyfile().isPresent() || !task.getServiceAccountEmail().isPresent()) {
|
193
|
-
throw new ConfigException("If auth_method is private_key, you have to set both service_account_email and p12_keyfile");
|
194
|
-
}
|
195
|
-
}
|
196
|
-
|
197
|
-
if (task.getMode().isReplaceMode()) {
|
198
|
-
if (task.getIsSkipJobResultCheck()) {
|
199
|
-
throw new ConfigException("If mode is replace or replace_backup, is_skip_job_result_check must be false");
|
200
|
-
}
|
201
|
-
}
|
202
|
-
|
203
|
-
if (task.getMode().isDeleteInAdvance()) {
|
204
|
-
if (!task.getAutoCreateTable()) {
|
205
|
-
throw new ConfigException("If mode is delete_in_advance, auto_create_table must be true");
|
206
|
-
}
|
207
|
-
}
|
208
|
-
|
209
|
-
try {
|
210
|
-
bigQueryWriter = new BigqueryWriter.Builder(
|
211
|
-
task.getAuthMethod().getString(),
|
212
|
-
task.getServiceAccountEmail(),
|
213
|
-
task.getP12Keyfile().transform(localFileToPathString()),
|
214
|
-
task.getJsonKeyfile().transform(localFileToPathString()),
|
215
|
-
task.getApplicationName())
|
216
|
-
.setProject(task.getProject())
|
217
|
-
.setDataset(task.getDataset())
|
218
|
-
.setTable(task.getTable())
|
219
|
-
.setAutoCreateTable(task.getAutoCreateTable())
|
220
|
-
.setSchemaPath(task.getSchemaFile().transform(localFileToPathString()))
|
221
|
-
.setTemplateTable(task.getTemplateTable())
|
222
|
-
.setSourceFormat(task.getSourceFormat().getString())
|
223
|
-
.setFieldDelimiter(String.valueOf(task.getFieldDelimiter()))
|
224
|
-
.setMaxBadRecords(task.getMaxBadrecords())
|
225
|
-
.setEncoding(String.valueOf(task.getEncoding()))
|
226
|
-
.setPreventDuplicateInsert(task.getPreventDuplicateInsert())
|
227
|
-
.setJobStatusMaxPollingTime(task.getJobStatusMaxPollingTime())
|
228
|
-
.setJobStatusPollingInterval(task.getJobStatusPollingInterval())
|
229
|
-
.setIsSkipJobResultCheck(task.getIsSkipJobResultCheck())
|
230
|
-
.setIgnoreUnknownValues(task.getIgnoreUnknownValues())
|
231
|
-
.setAllowQuotedNewlines(task.getAllowQuotedNewlines())
|
232
|
-
.build();
|
233
|
-
}
|
234
|
-
catch (IOException | GeneralSecurityException ex) {
|
235
|
-
throw new ConfigException(ex);
|
236
|
-
}
|
237
|
-
// non-retryable (non-idempotent) output:
|
238
|
-
return resume(task.dump(), taskCount, control);
|
239
|
-
}
|
240
|
-
|
241
|
-
@Override
|
242
|
-
public ConfigDiff resume(TaskSource taskSource,
|
243
|
-
int taskCount,
|
244
|
-
FileOutputPlugin.Control control)
|
245
|
-
{
|
246
|
-
Mode mode = taskSource.get(Mode.class, "Mode");
|
247
|
-
String project = taskSource.get(String.class, "Project");
|
248
|
-
String dataset = taskSource.get(String.class, "Dataset");
|
249
|
-
String tableName = taskSource.get(String.class, "Table");
|
250
|
-
|
251
|
-
if (mode == Mode.delete_in_advance) {
|
252
|
-
try {
|
253
|
-
bigQueryWriter.deleteTable(project, dataset, generateTableName(tableName));
|
254
|
-
}
|
255
|
-
catch (IOException ex) {
|
256
|
-
log.warn(ex.getMessage());
|
257
|
-
}
|
258
|
-
}
|
259
|
-
|
260
|
-
control.run(taskSource);
|
261
|
-
|
262
|
-
if (mode.isReplaceMode()) {
|
263
|
-
try {
|
264
|
-
if (mode == Mode.replace_backup && bigQueryWriter.isExistTable(project, dataset, generateTableName(tableName))) {
|
265
|
-
bigQueryWriter.replaceTable(project, dataset, generateTableName(tableName) + "_old", generateTableName(tableName));
|
266
|
-
}
|
267
|
-
bigQueryWriter.replaceTable(project, dataset, generateTableName(tableName), generateTemporaryTableName(tableName));
|
268
|
-
}
|
269
|
-
catch (TimeoutException | BigqueryWriter.JobFailedException | IOException ex) {
|
270
|
-
log.error(ex.getMessage());
|
271
|
-
throw Throwables.propagate(ex);
|
272
|
-
}
|
273
|
-
finally {
|
274
|
-
try {
|
275
|
-
bigQueryWriter.deleteTable(project, dataset, generateTemporaryTableName(tableName));
|
276
|
-
}
|
277
|
-
catch (IOException ex) {
|
278
|
-
log.warn(ex.getMessage());
|
279
|
-
}
|
280
|
-
}
|
281
|
-
}
|
282
|
-
|
283
|
-
return Exec.newConfigDiff();
|
284
|
-
}
|
285
|
-
|
286
|
-
@Override
|
287
|
-
public void cleanup(TaskSource taskSource,
|
288
|
-
int taskCount,
|
289
|
-
List<TaskReport> successTaskReports)
|
290
|
-
{
|
291
|
-
}
|
292
|
-
|
293
|
-
private Function<LocalFile, String> localFileToPathString()
|
294
|
-
{
|
295
|
-
return new Function<LocalFile, String>()
|
296
|
-
{
|
297
|
-
public String apply(LocalFile file)
|
298
|
-
{
|
299
|
-
return file.getPath().toString();
|
300
|
-
}
|
301
|
-
};
|
302
|
-
}
|
303
|
-
|
304
|
-
@Override
|
305
|
-
public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
|
306
|
-
{
|
307
|
-
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
308
|
-
|
309
|
-
final String pathPrefix = task.getPathPrefix();
|
310
|
-
final String sequenceFormat = task.getSequenceFormat();
|
311
|
-
final String pathSuffix = task.getFileNameExtension();
|
312
|
-
|
313
|
-
return new TransactionalFileOutput() {
|
314
|
-
private final String project = task.getProject();
|
315
|
-
private final String dataset = task.getDataset();
|
316
|
-
private final String table = task.getMode().isReplaceMode() ?
|
317
|
-
generateTemporaryTableName(task.getTable()) : generateTableName(task.getTable());
|
318
|
-
private final boolean deleteFromLocalWhenJobEnd = task.getDeleteFromLocalWhenJobEnd();
|
319
|
-
|
320
|
-
private int fileIndex = 0;
|
321
|
-
private BufferedOutputStream output = null;
|
322
|
-
private File file;
|
323
|
-
private String filePath;
|
324
|
-
|
325
|
-
public void nextFile()
|
326
|
-
{
|
327
|
-
closeFile();
|
328
|
-
|
329
|
-
try {
|
330
|
-
String suffix = pathSuffix;
|
331
|
-
if (!suffix.startsWith(".")) {
|
332
|
-
suffix = "." + suffix;
|
333
|
-
}
|
334
|
-
filePath = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + suffix;
|
335
|
-
file = new File(filePath);
|
336
|
-
|
337
|
-
String parentPath = file.getParent();
|
338
|
-
File dir = new File(parentPath);
|
339
|
-
if (!dir.exists()) {
|
340
|
-
dir.mkdir();
|
341
|
-
}
|
342
|
-
log.info(String.format("Writing file [%s]", filePath));
|
343
|
-
output = new BufferedOutputStream(new FileOutputStream(filePath));
|
344
|
-
}
|
345
|
-
catch (FileNotFoundException ex) {
|
346
|
-
throw Throwables.propagate(ex);
|
347
|
-
}
|
348
|
-
fileIndex++;
|
349
|
-
}
|
350
|
-
|
351
|
-
private void closeFile()
|
352
|
-
{
|
353
|
-
if (output != null) {
|
354
|
-
try {
|
355
|
-
output.close();
|
356
|
-
}
|
357
|
-
catch (IOException ex) {
|
358
|
-
throw Throwables.propagate(ex);
|
359
|
-
}
|
360
|
-
}
|
361
|
-
}
|
362
|
-
|
363
|
-
public void add(Buffer buffer)
|
364
|
-
{
|
365
|
-
try {
|
366
|
-
output.write(buffer.array(), buffer.offset(), buffer.limit());
|
367
|
-
}
|
368
|
-
catch (IOException ex) {
|
369
|
-
throw Throwables.propagate(ex);
|
370
|
-
}
|
371
|
-
finally {
|
372
|
-
buffer.release();
|
373
|
-
}
|
374
|
-
}
|
375
|
-
|
376
|
-
public void finish()
|
377
|
-
{
|
378
|
-
closeFile();
|
379
|
-
if (filePath != null) {
|
380
|
-
try {
|
381
|
-
bigQueryWriter.executeLoad(project, dataset, table, filePath);
|
382
|
-
|
383
|
-
if (deleteFromLocalWhenJobEnd) {
|
384
|
-
log.info(String.format("Delete local file [%s]", filePath));
|
385
|
-
file.delete();
|
386
|
-
}
|
387
|
-
}
|
388
|
-
catch (NoSuchAlgorithmException | TimeoutException | BigqueryWriter.JobFailedException | IOException ex) {
|
389
|
-
log.error(ex.getMessage());
|
390
|
-
throw Throwables.propagate(ex);
|
391
|
-
}
|
392
|
-
}
|
393
|
-
}
|
394
|
-
|
395
|
-
public void close()
|
396
|
-
{
|
397
|
-
closeFile();
|
398
|
-
}
|
399
|
-
|
400
|
-
public void abort()
|
401
|
-
{
|
402
|
-
}
|
403
|
-
|
404
|
-
public TaskReport commit()
|
405
|
-
{
|
406
|
-
TaskReport report = Exec.newTaskReport();
|
407
|
-
return report;
|
408
|
-
}
|
409
|
-
};
|
410
|
-
}
|
411
|
-
|
412
|
-
// Parse like "table_%Y_%m"(include pattern or not) format using Java is difficult. So use jRuby.
|
413
|
-
public String generateTableName(String tableName)
|
414
|
-
{
|
415
|
-
ScriptingContainer jruby = new ScriptingContainer();
|
416
|
-
Object result = jruby.runScriptlet("Time.now.strftime('" + tableName + "')");
|
417
|
-
|
418
|
-
return result.toString();
|
419
|
-
}
|
420
|
-
|
421
|
-
public String generateTemporaryTableName(String tableName)
|
422
|
-
{
|
423
|
-
return generateTableName(tableName) + temporaryTableSuffix;
|
424
|
-
}
|
425
|
-
|
426
|
-
public enum SourceFormat
|
427
|
-
{
|
428
|
-
CSV("CSV"),
|
429
|
-
NEWLINE_DELIMITED_JSON("NEWLINE_DELIMITED_JSON");
|
430
|
-
|
431
|
-
private final String string;
|
432
|
-
|
433
|
-
SourceFormat(String string)
|
434
|
-
{
|
435
|
-
this.string = string;
|
436
|
-
}
|
437
|
-
|
438
|
-
public String getString()
|
439
|
-
{
|
440
|
-
return string;
|
441
|
-
}
|
442
|
-
}
|
443
|
-
|
444
|
-
public enum AuthMethod
|
445
|
-
{
|
446
|
-
private_key("private_key"),
|
447
|
-
compute_engine("compute_engine"),
|
448
|
-
json_key("json_key");
|
449
|
-
|
450
|
-
private final String string;
|
451
|
-
|
452
|
-
AuthMethod(String string)
|
453
|
-
{
|
454
|
-
this.string = string;
|
455
|
-
}
|
456
|
-
|
457
|
-
public String getString()
|
458
|
-
{
|
459
|
-
return string;
|
460
|
-
}
|
461
|
-
}
|
462
|
-
|
463
|
-
public enum Mode
|
464
|
-
{
|
465
|
-
append("append"),
|
466
|
-
delete_in_advance("delete_in_advance") {
|
467
|
-
@Override
|
468
|
-
public boolean isDeleteInAdvance()
|
469
|
-
{
|
470
|
-
return true;
|
471
|
-
}
|
472
|
-
},
|
473
|
-
replace("replace") {
|
474
|
-
@Override
|
475
|
-
public boolean isReplaceMode()
|
476
|
-
{
|
477
|
-
return true;
|
478
|
-
}
|
479
|
-
},
|
480
|
-
replace_backup("replace_backup") {
|
481
|
-
@Override
|
482
|
-
public boolean isReplaceMode()
|
483
|
-
{
|
484
|
-
return true;
|
485
|
-
}
|
486
|
-
};
|
487
|
-
|
488
|
-
private final String string;
|
489
|
-
|
490
|
-
Mode(String string)
|
491
|
-
{
|
492
|
-
this.string = string;
|
493
|
-
}
|
494
|
-
|
495
|
-
public String getString()
|
496
|
-
{
|
497
|
-
return string;
|
498
|
-
}
|
499
|
-
public boolean isReplaceMode()
|
500
|
-
{
|
501
|
-
return false;
|
502
|
-
}
|
503
|
-
public boolean isDeleteInAdvance()
|
504
|
-
{
|
505
|
-
return false;
|
506
|
-
}
|
507
|
-
}
|
508
|
-
}
|