embulk-input-bigquery_extract_files 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/LICENSE.txt +21 -0
- data/README.md +92 -0
- data/build.gradle +102 -0
- data/config.yml +30 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +5 -0
- data/gradlew +172 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/input/bigquery_extract_files.rb +3 -0
- data/src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportGcsFileInputPlugin.java +340 -0
- data/src/main/java/org/embulk/input/bigquery_export_gcs/BigqueryExportUtils.java +444 -0
- data/src/test/java/org/embulk/input/bigquery_export_gcs/TestGoogleCloudAccessData.java +33 -0
- data/src/test/java/org/embulk/input/bigquery_export_gcs/TestPluginFunctions.java +56 -0
- data/src/test/java/org/embulk/input/bigquery_export_gcs/UnitTestInitializer.java +86 -0
- metadata +101 -0
@@ -0,0 +1,444 @@
|
|
1
|
+
package org.embulk.input.bigquery_export_gcs;
|
2
|
+
|
3
|
+
import java.io.File;
|
4
|
+
import java.io.FileInputStream;
|
5
|
+
import java.io.FileNotFoundException;
|
6
|
+
import java.io.FileOutputStream;
|
7
|
+
import java.io.IOException;
|
8
|
+
import java.io.InputStream;
|
9
|
+
import java.nio.file.FileSystems;
|
10
|
+
import java.nio.file.Path;
|
11
|
+
import java.util.Date;
|
12
|
+
import java.util.List;
|
13
|
+
import java.util.UUID;
|
14
|
+
import java.util.regex.Matcher;
|
15
|
+
import java.util.regex.Pattern;
|
16
|
+
|
17
|
+
import org.apache.commons.lang3.StringUtils;
|
18
|
+
import org.apache.commons.lang3.time.FastDateFormat;
|
19
|
+
import org.embulk.input.bigquery_export_gcs.BigqueryExportGcsFileInputPlugin.PluginTask;
|
20
|
+
import org.embulk.spi.Exec;
|
21
|
+
import org.embulk.spi.Schema;
|
22
|
+
import org.embulk.spi.type.Types;
|
23
|
+
import org.slf4j.Logger;
|
24
|
+
|
25
|
+
import com.fasterxml.jackson.core.JsonProcessingException;
|
26
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
27
|
+
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
|
28
|
+
import com.google.api.client.http.HttpTransport;
|
29
|
+
import com.google.api.client.http.javanet.NetHttpTransport;
|
30
|
+
import com.google.api.client.json.JsonFactory;
|
31
|
+
import com.google.api.client.json.jackson2.JacksonFactory;
|
32
|
+
import com.google.api.client.repackaged.com.google.common.base.Strings;
|
33
|
+
import com.google.api.services.bigquery.Bigquery;
|
34
|
+
import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
|
35
|
+
import com.google.api.services.bigquery.Bigquery.Tables.Delete;
|
36
|
+
import com.google.api.services.bigquery.BigqueryScopes;
|
37
|
+
import com.google.api.services.bigquery.model.Job;
|
38
|
+
import com.google.api.services.bigquery.model.JobConfiguration;
|
39
|
+
import com.google.api.services.bigquery.model.JobConfigurationExtract;
|
40
|
+
import com.google.api.services.bigquery.model.JobConfigurationQuery;
|
41
|
+
import com.google.api.services.bigquery.model.JobReference;
|
42
|
+
import com.google.api.services.bigquery.model.Table;
|
43
|
+
import com.google.api.services.bigquery.model.TableFieldSchema;
|
44
|
+
import com.google.api.services.bigquery.model.TableReference;
|
45
|
+
import com.google.api.services.bigquery.model.TableSchema;
|
46
|
+
import com.google.api.services.storage.Storage;
|
47
|
+
import com.google.api.services.storage.StorageScopes;
|
48
|
+
import com.google.api.services.storage.model.Bucket;
|
49
|
+
import com.google.api.services.storage.model.Objects;
|
50
|
+
import com.google.api.services.storage.model.StorageObject;
|
51
|
+
import com.google.common.base.Optional;
|
52
|
+
import com.google.common.collect.ImmutableList;
|
53
|
+
import com.google.common.collect.Lists;
|
54
|
+
|
55
|
+
/**
|
56
|
+
*
|
57
|
+
*
|
58
|
+
*
|
59
|
+
* #reference :
|
60
|
+
*
|
61
|
+
* # https://github.com/embulk/embulk
|
62
|
+
* # https://github.com/embulk/embulk-input-s3
|
63
|
+
* # https://github.com/embulk/embulk-input-gcs
|
64
|
+
* # https://github.com/embulk/embulk-input-jdbc
|
65
|
+
* # https://github.com/GoogleCloudPlatform/java-docs-samples/blob/master/storage/json-api/src/main/java/StorageSample.java
|
66
|
+
*
|
67
|
+
*
|
68
|
+
* @author george 2017. 11. 16.
|
69
|
+
*
|
70
|
+
*/
|
71
|
+
public class BigqueryExportUtils
|
72
|
+
{
|
73
|
+
private static final Logger log = Exec.getLogger(BigqueryExportUtils.class);
|
74
|
+
|
75
|
+
|
76
|
+
public static String parseQueryToBaseTableName(String query){
|
77
|
+
if( query == null){
|
78
|
+
return null;
|
79
|
+
}
|
80
|
+
|
81
|
+
Pattern p = Pattern.compile(" from [\\[]?([^ \\$\\[\\]]+)[\\]]?", Pattern.CASE_INSENSITIVE);
|
82
|
+
Matcher m = p.matcher(query);
|
83
|
+
if(m.find() && m.groupCount() > 0){
|
84
|
+
return Strings.nullToEmpty(m.group(1)).replaceAll(".*\\.","").replaceAll("[^\\w\\s]","");
|
85
|
+
}else{
|
86
|
+
return null;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
public static String generateTempTableName(String query){
|
91
|
+
return generateTempTableName(query, null);
|
92
|
+
}
|
93
|
+
|
94
|
+
public static String generateTempTableName(String query, String tablename){
|
95
|
+
|
96
|
+
String tname = tablename;
|
97
|
+
|
98
|
+
if (tname == null){
|
99
|
+
tname = parseQueryToBaseTableName(query);
|
100
|
+
if(tname == null){
|
101
|
+
tname = "temp";
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
105
|
+
return "embulk_" + tname + "_" + FastDateFormat.getInstance("yyyyMMdd_HHmmss").format(new Date()) + "_" + UUID.randomUUID().toString().replaceAll("-", "");
|
106
|
+
}
|
107
|
+
|
108
|
+
|
109
|
+
public static void executeQueryToDestinationWorkTable(Bigquery bigquery, PluginTask task) throws IOException, InterruptedException {
|
110
|
+
|
111
|
+
log.info("extract query result {} => {}.{} ",task.getQuery().get(), task.getWorkDataset(), task.getWorkTable());
|
112
|
+
|
113
|
+
JobConfigurationQuery queryConfig = new JobConfigurationQuery();
|
114
|
+
queryConfig.setQuery(task.getQuery().get());
|
115
|
+
queryConfig.setDestinationTable(new TableReference()
|
116
|
+
.setProjectId(task.getProject())
|
117
|
+
.setDatasetId(task.getWorkDataset())
|
118
|
+
.setTableId(task.getWorkTable()));
|
119
|
+
queryConfig.setUseLegacySql(task.getUseLegacySql());
|
120
|
+
queryConfig.setCreateDisposition(task.getCreateDisposition());
|
121
|
+
queryConfig.setWriteDisposition(task.getWriteDisposition());
|
122
|
+
queryConfig.setUseQueryCache(task.getQueryCache());
|
123
|
+
queryConfig.setAllowLargeResults(true);
|
124
|
+
|
125
|
+
com.google.api.services.bigquery.Bigquery.Jobs.Insert insert = bigquery.jobs().insert(task.getProject(),
|
126
|
+
new Job().setConfiguration(new JobConfiguration().setQuery(queryConfig))
|
127
|
+
);
|
128
|
+
Job jobRes = insert.execute(); // ~~~~~~~~~~~~~~~~~~~~~ API CALL
|
129
|
+
|
130
|
+
JobReference jobRef = jobRes.getJobReference();
|
131
|
+
String jobId = jobRef.getJobId();
|
132
|
+
|
133
|
+
log.info("query to Table jobId : {}",jobId);
|
134
|
+
log.info("waiting for job end....... ");
|
135
|
+
|
136
|
+
Job lastJob = waitForJob(bigquery, task.getProject(), jobId);
|
137
|
+
|
138
|
+
log.debug("waiting for job end....... {}", lastJob.toPrettyString());
|
139
|
+
}
|
140
|
+
|
141
|
+
public static void parseGcsUri(PluginTask task){
|
142
|
+
|
143
|
+
if(StringUtils.isEmpty(task.getGcsUri()) || false == task.getGcsUri().matches("gs://[^/]+/.+") ){
|
144
|
+
throw new RuntimeException("gcs_uri not found : " + task.getGcsUri());
|
145
|
+
}
|
146
|
+
|
147
|
+
task.setGcsBucket(task.getGcsUri().replaceAll("gs://([^/]+)/.+", "$1"));
|
148
|
+
task.setGcsBlobNamePrefix(task.getGcsUri().replaceAll("gs://[^/]+/(.+)", "$1").replaceAll("[\\*]*$", ""));
|
149
|
+
|
150
|
+
}
|
151
|
+
|
152
|
+
|
153
|
+
/***
|
154
|
+
*
|
155
|
+
* google cloud sdk
|
156
|
+
*
|
157
|
+
* @param task
|
158
|
+
* @throws IOException
|
159
|
+
* @throws FileNotFoundException
|
160
|
+
*/
|
161
|
+
public static Bigquery newBigqueryClient(PluginTask task) throws FileNotFoundException, IOException{
|
162
|
+
log.debug("# Starting Google BigQuery API ... ");
|
163
|
+
GoogleCredentialSet set = googleCredential(task);
|
164
|
+
return new Bigquery.Builder(set.transport, set.jsonFactory, set.googleCredential).setApplicationName("embulk-input-bigquey-export-gcs").build();
|
165
|
+
}
|
166
|
+
|
167
|
+
public static Storage newGcsClient(PluginTask task) throws FileNotFoundException, IOException{
|
168
|
+
log.debug("# Starting Google Cloud Storage ... ");
|
169
|
+
GoogleCredentialSet set = googleCredential(task);
|
170
|
+
return new Storage.Builder(set.transport, set.jsonFactory, set.googleCredential).setApplicationName("embulk-input-bigquey-export-gcs").build();
|
171
|
+
}
|
172
|
+
|
173
|
+
|
174
|
+
public static class GoogleCredentialSet {
|
175
|
+
public GoogleCredential googleCredential = null;
|
176
|
+
public HttpTransport transport = new NetHttpTransport();
|
177
|
+
public JsonFactory jsonFactory = new JacksonFactory();
|
178
|
+
}
|
179
|
+
|
180
|
+
public static GoogleCredentialSet googleCredential(PluginTask task) throws IOException {
|
181
|
+
GoogleCredentialSet ret = new GoogleCredentialSet();
|
182
|
+
|
183
|
+
log.debug("### init googleCredentialFile : {} ",task.getJsonKeyfile());
|
184
|
+
|
185
|
+
ret.transport = new NetHttpTransport();
|
186
|
+
ret.jsonFactory = new JacksonFactory();
|
187
|
+
|
188
|
+
GoogleCredential credential = GoogleCredential.fromStream(new FileInputStream( task.getJsonKeyfile() ), ret.transport, ret.jsonFactory);
|
189
|
+
if (credential.createScopedRequired()) {
|
190
|
+
credential = credential.createScoped(BigqueryScopes.all()).createScoped(StorageScopes.all());
|
191
|
+
}
|
192
|
+
ret.googleCredential = credential;
|
193
|
+
return ret;
|
194
|
+
}
|
195
|
+
|
196
|
+
|
197
|
+
public static List<String> getFileListFromGcs(PluginTask task) throws FileNotFoundException, IOException{
|
198
|
+
Storage gcs = newGcsClient(task);
|
199
|
+
return getFileListFromGcs(gcs, task.getGcsBucket(), task.getGcsBlobNamePrefix());
|
200
|
+
}
|
201
|
+
|
202
|
+
public static List<String> getFileListFromGcs(Storage gcs, String bucket, String blobName) throws IOException{
|
203
|
+
ImmutableList.Builder<String> builder = ImmutableList.builder();
|
204
|
+
Storage.Objects.List listRequest = gcs.objects().list(bucket).setPrefix(blobName);
|
205
|
+
Objects objects;
|
206
|
+
|
207
|
+
do {
|
208
|
+
objects = listRequest.execute();
|
209
|
+
for(StorageObject obj : objects.getItems()){
|
210
|
+
builder.add(obj.getName());
|
211
|
+
}
|
212
|
+
listRequest.setPageToken(objects.getNextPageToken());
|
213
|
+
} while (null != objects.getNextPageToken());
|
214
|
+
|
215
|
+
return builder.build().asList();
|
216
|
+
}
|
217
|
+
|
218
|
+
public static final String TYPE_INTEGER = "INTEGER";
|
219
|
+
public static final String TYPE_STRING = "STRING";
|
220
|
+
public static final String TYPE_FLOAT = "FLOAT";
|
221
|
+
public static final String TYPE_TIMESTAMP = "TIMESTAMP";
|
222
|
+
|
223
|
+
public static Schema convertTableSchemaToEmbulkSchema(Table table){
|
224
|
+
Schema.Builder builder = Schema.builder();
|
225
|
+
TableSchema ts = table.getSchema();
|
226
|
+
for( TableFieldSchema field : ts.getFields() ){
|
227
|
+
String name = field.getName();
|
228
|
+
org.embulk.spi.type.Type type = Types.JSON;
|
229
|
+
switch(field.getType()){
|
230
|
+
case "INTEGER":
|
231
|
+
builder.add(name, Types.LONG);
|
232
|
+
break;
|
233
|
+
case "FLOAT":
|
234
|
+
builder.add(name, Types.DOUBLE);
|
235
|
+
break;
|
236
|
+
case "TIMESTAMP":
|
237
|
+
builder.add(name, Types.TIMESTAMP);
|
238
|
+
break;
|
239
|
+
default:
|
240
|
+
builder.add(name, Types.STRING);
|
241
|
+
break;
|
242
|
+
}
|
243
|
+
}
|
244
|
+
return builder.build();
|
245
|
+
}
|
246
|
+
|
247
|
+
public static void initWorkTableWithExecuteQuery(Bigquery bigquery, PluginTask task) throws FileNotFoundException, IOException, InterruptedException{
|
248
|
+
|
249
|
+
if(task.getQuery().isPresent()){
|
250
|
+
task.setWorkId(generateTempTableName(task.getQuery().get()));
|
251
|
+
|
252
|
+
if(task.getTempTable().isPresent() == false){
|
253
|
+
task.setTempTable(Optional.of(task.getWorkId()));
|
254
|
+
}
|
255
|
+
if(task.getTempDataset().isPresent() == false && task.getDataset().isPresent()){
|
256
|
+
task.setTempDataset(Optional.of(task.getDataset().get()));
|
257
|
+
}
|
258
|
+
|
259
|
+
// actual target table setting
|
260
|
+
task.setWorkDataset(task.getTempDataset().get());
|
261
|
+
task.setWorkTable(task.getTempTable().get());
|
262
|
+
|
263
|
+
// call google api
|
264
|
+
executeQueryToDestinationWorkTable(bigquery, task);
|
265
|
+
|
266
|
+
}else if(task.getTable().isPresent() && task.getDataset().isPresent()){
|
267
|
+
task.setWorkId(generateTempTableName(null, task.getTable().get()));
|
268
|
+
// actual target table setting
|
269
|
+
task.setWorkDataset(task.getDataset().get());
|
270
|
+
task.setWorkTable(task.getTable().get());
|
271
|
+
}else{
|
272
|
+
throw new IOException("please set config file [dataset]+[table] or [query]");
|
273
|
+
}
|
274
|
+
}
|
275
|
+
|
276
|
+
public static Schema extractWorkTable(Bigquery bigquery, PluginTask task) throws FileNotFoundException, IOException, InterruptedException{
|
277
|
+
|
278
|
+
Table table = bigquery.tables().get(task.getProject(), task.getWorkDataset(), task.getWorkTable()).execute();
|
279
|
+
|
280
|
+
Schema embulkSchema = convertTableSchemaToEmbulkSchema(table);
|
281
|
+
//task.setSchame(embulkSchema);
|
282
|
+
log.debug("Table Schema : {}", table.getSchema());
|
283
|
+
|
284
|
+
//Tabledata. req = bigquery.tabledata().list(projectId, dataset, table);
|
285
|
+
|
286
|
+
log.info("start table extract [{}.{}] to {} ...", task.getWorkDataset(), task.getWorkTable(), task.getGcsUri());
|
287
|
+
|
288
|
+
Job jobReq = new Job();
|
289
|
+
JobConfigurationExtract extract = new JobConfigurationExtract();
|
290
|
+
extract.setDestinationFormat(task.getFileFormat().get());
|
291
|
+
extract.setCompression(task.getCompression().get());
|
292
|
+
extract.setDestinationUris(Lists.newArrayList(task.getGcsUri()));
|
293
|
+
extract.setSourceTable(table.getTableReference());
|
294
|
+
jobReq.setConfiguration(new JobConfiguration().setExtract(extract));
|
295
|
+
|
296
|
+
Insert jobInsert = bigquery.jobs().insert(task.getProject(), jobReq);
|
297
|
+
Job res = jobInsert.execute();
|
298
|
+
|
299
|
+
JobReference jobRef = res.getJobReference();
|
300
|
+
String jobId = jobRef.getJobId();
|
301
|
+
log.info("extract jobId : {}",jobId);
|
302
|
+
log.debug("waiting for job end....... ");
|
303
|
+
|
304
|
+
waitForJob(bigquery, task.getProject(), jobId);
|
305
|
+
return embulkSchema;
|
306
|
+
}
|
307
|
+
|
308
|
+
public static Job waitForJob(Bigquery bigquery, String project, String jobId) throws IOException, InterruptedException{
|
309
|
+
int maxAttempts = 20;
|
310
|
+
int initialRetryDelay = 1000; // ms
|
311
|
+
Job pollingJob = null;
|
312
|
+
for (int i=0; i < maxAttempts; i++){
|
313
|
+
pollingJob = bigquery.jobs().get(project, jobId).execute();
|
314
|
+
String state = pollingJob.getStatus().getState();
|
315
|
+
log.debug("Job Status {} : {}",jobId, state);
|
316
|
+
if (pollingJob.getStatus().getState().equals("DONE")) {
|
317
|
+
break;
|
318
|
+
}
|
319
|
+
log.debug("wait 1 second and waiting for end ...");
|
320
|
+
Thread.sleep(initialRetryDelay);
|
321
|
+
|
322
|
+
}
|
323
|
+
return pollingJob;
|
324
|
+
}
|
325
|
+
|
326
|
+
public static Schema predictSchema(Bigquery bigquery){
|
327
|
+
Schema schema = Schema.builder().add("", org.embulk.spi.type.Types.LONG).build();
|
328
|
+
return schema;
|
329
|
+
}
|
330
|
+
|
331
|
+
/**
|
332
|
+
*
|
333
|
+
* https://github.com/google/google-api-java-client-samples/blob/master/storage-cmdline-sample/src/main/java/com/google/api/services/samples/storage/examples/ObjectsDownloadExample.java
|
334
|
+
*
|
335
|
+
*/
|
336
|
+
public static InputStream openInputStream(PluginTask task, String file)
|
337
|
+
{
|
338
|
+
try {
|
339
|
+
|
340
|
+
|
341
|
+
Storage gcs = newGcsClient(task);
|
342
|
+
|
343
|
+
|
344
|
+
Path fullLocalFilePath = getFullPath(task, file);
|
345
|
+
|
346
|
+
log.info("Start download : gs://{}/{} ...to ... {} ",task.getGcsBucket(), file, task.getTempLocalPath());
|
347
|
+
|
348
|
+
Storage.Objects.Get getObject = gcs.objects().get(task.getGcsBucket(), file);
|
349
|
+
getObject.getMediaHttpDownloader().setDirectDownloadEnabled(true);
|
350
|
+
|
351
|
+
// return getObject.executeMediaAsInputStream() // direct InputStream ?? I Think this is faster then temp file. but ...
|
352
|
+
|
353
|
+
try(FileOutputStream s = new FileOutputStream(fullLocalFilePath.toFile())){
|
354
|
+
getObject.executeMediaAndDownloadTo(s);
|
355
|
+
}
|
356
|
+
return new FileInputStream(fullLocalFilePath.toFile());
|
357
|
+
|
358
|
+
} catch (FileNotFoundException e) {
|
359
|
+
log.error("gcs file not found error",e);
|
360
|
+
return null;
|
361
|
+
} catch(IOException e){
|
362
|
+
log.error("gcs file read error",e);
|
363
|
+
return null;
|
364
|
+
}
|
365
|
+
}
|
366
|
+
|
367
|
+
|
368
|
+
public static Path getFullPath(PluginTask task, String file){
|
369
|
+
String baseName = file.replaceFirst(".*/", "");
|
370
|
+
Path fullLocalFilePath = FileSystems.getDefault().getPath(task.getTempLocalPath(), baseName);
|
371
|
+
return fullLocalFilePath ;
|
372
|
+
}
|
373
|
+
|
374
|
+
public enum SCHEMA_TYPE{
|
375
|
+
EMBULK,
|
376
|
+
AVRO
|
377
|
+
}
|
378
|
+
|
379
|
+
public static Schema decnodeSchemaJson(String json) {
|
380
|
+
ObjectMapper mapper = new ObjectMapper();
|
381
|
+
try {
|
382
|
+
Schema schema = mapper.readValue(json, Schema.class);
|
383
|
+
return schema;
|
384
|
+
} catch (Exception e) {
|
385
|
+
log.error("error when parse schema object : " + json,e);
|
386
|
+
return null;
|
387
|
+
}
|
388
|
+
}
|
389
|
+
|
390
|
+
public static void writeSchemaFile(Schema schema, String schemaType, File file) {
|
391
|
+
ObjectMapper mapper = new ObjectMapper();
|
392
|
+
try {
|
393
|
+
mapper.writeValue(file, schema);
|
394
|
+
} catch (Exception e) {
|
395
|
+
log.error("error when create schema json {}",file);
|
396
|
+
throw new RuntimeException(e);
|
397
|
+
}
|
398
|
+
}
|
399
|
+
|
400
|
+
public static String generateSchemaJson(Schema schema, String schemaType) {
|
401
|
+
SCHEMA_TYPE tp = SCHEMA_TYPE.EMBULK;
|
402
|
+
if(schemaType != null) {
|
403
|
+
tp.valueOf(schemaType);
|
404
|
+
}
|
405
|
+
|
406
|
+
ObjectMapper mapper = new ObjectMapper();
|
407
|
+
try {
|
408
|
+
String jsonString = mapper.writeValueAsString(schema);
|
409
|
+
return jsonString;
|
410
|
+
} catch (JsonProcessingException e) {
|
411
|
+
log.error("error when create schema json",e);
|
412
|
+
return null;
|
413
|
+
}
|
414
|
+
|
415
|
+
//for(Column col : schema.getColumns()) {
|
416
|
+
|
417
|
+
}
|
418
|
+
|
419
|
+
/**
|
420
|
+
*
|
421
|
+
* @param task
|
422
|
+
*/
|
423
|
+
public static void removeTempTable(PluginTask task){
|
424
|
+
try {
|
425
|
+
log.info("Remove temp table {}.{}",task.getTempDataset().get(), task.getTempTable().get());
|
426
|
+
Bigquery bigquery = newBigqueryClient(task);
|
427
|
+
Delete del = bigquery.tables().delete(task.getProject(), task.getTempDataset().get(), task.getTempTable().get());
|
428
|
+
del.execute();
|
429
|
+
} catch (Exception e) {
|
430
|
+
log.error("# Remove temp table FAIL : " + task.getTempDataset().orNull() + "." + task.getTempTable().orNull(),e);
|
431
|
+
}
|
432
|
+
}
|
433
|
+
|
434
|
+
public static void removeTempGcsFiles(PluginTask task, String file){
|
435
|
+
try {
|
436
|
+
Storage gcs = BigqueryExportUtils.newGcsClient(task);
|
437
|
+
Bucket bucket = gcs.buckets().get(task.getGcsBucket()).execute();
|
438
|
+
List<String> fileList = task.getFiles();
|
439
|
+
|
440
|
+
} catch (Exception e) {
|
441
|
+
log.error("# Remove temp table FAIL : " + task.getTempDataset().orNull() + "." + task.getTempTable().orNull(),e);
|
442
|
+
}
|
443
|
+
}
|
444
|
+
}
|
@@ -0,0 +1,33 @@
|
|
1
|
+
package org.embulk.input.bigquery_export_gcs;
|
2
|
+
|
3
|
+
import java.io.FileNotFoundException;
|
4
|
+
import java.io.IOException;
|
5
|
+
import java.io.InputStream;
|
6
|
+
|
7
|
+
import org.junit.Test;
|
8
|
+
import org.slf4j.Logger;
|
9
|
+
import org.slf4j.LoggerFactory;
|
10
|
+
|
11
|
+
public class TestGoogleCloudAccessData extends UnitTestInitializer
|
12
|
+
{
|
13
|
+
private static final Logger log = LoggerFactory.getLogger(TestGoogleCloudAccessData.class);
|
14
|
+
|
15
|
+
@Test
|
16
|
+
public void envTest(){
|
17
|
+
log.info("{}",System.getenv("GCP_PROJECT"));
|
18
|
+
}
|
19
|
+
|
20
|
+
@Test
|
21
|
+
public void testGcsInputStreamOpen() throws FileNotFoundException, IOException
|
22
|
+
{
|
23
|
+
BigqueryExportGcsFileInputPlugin.PluginTask task = config.loadConfig(BigqueryExportGcsFileInputPlugin.PluginTask.class );
|
24
|
+
|
25
|
+
plugin.executeBigqueryApi(task);
|
26
|
+
|
27
|
+
InputStream ins = BigqueryExportUtils.openInputStream(task, task.getFiles().get(0));
|
28
|
+
|
29
|
+
log.info("file size : {}",org.apache.commons.compress.utils.IOUtils.toByteArray(ins).length);
|
30
|
+
}
|
31
|
+
|
32
|
+
|
33
|
+
}
|