embulk-output-bigquery 0.1.11 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9eed778576242d0ea0d44e445c5f126e93cc6333
4
- data.tar.gz: 96bc25e1ff2caccfbd7f2a4be1730b80dca185de
3
+ metadata.gz: 9e4b4a5467fb4589a3d88541af95ee5b002a5f9d
4
+ data.tar.gz: 2845c3d881b88183bdf3355647fbfecd6e455372
5
5
  SHA512:
6
- metadata.gz: 95d91febb961ede313d9aa66e32a177812e3a32a96203de2d82cc8d5682517be5f688997b16123d0cedd820d858762fc90755dce109ce89f32b6bb96496226e7
7
- data.tar.gz: 778932e6c37a30992ffb2593ee69cb6be463369d122b8339e794ec063083606dd486fbfe5b21f31a6468556e1166654a3caa22422bc0f78af75c2396291ddd08
6
+ metadata.gz: 61e5481dd18b750555f3462d680cb2b4d680ce768683780e4b2ce2cac6e66134c7eb38227adf8e48a4655447e27e166f14487c129fe9154a164131a4be06270b
7
+ data.tar.gz: 09036cb1d4b20556cb36e11839c7807b00ae453976b4192f9c6f489284aa5f1cbe48b6c42e5b9149b1ed3c3d04a92ee9546cce030b3d9e0c0929f70e365d3ae7
data/ChangeLog CHANGED
@@ -1,3 +1,7 @@
1
+ Release 0.2.0 - 2016-01-26
2
+
3
+ * Added mode parameters and support 4 modes(append, replace, replace_backup, delete_in_advance). @joker1007 thanks!
4
+
1
5
  Release 0.1.11 - 2015-11-16
2
6
 
3
7
  * Change error result display for easy investigation.
data/README.md CHANGED
@@ -28,6 +28,7 @@ OAuth flow for installed applications.
28
28
 
29
29
  | name | type | required? | default | description |
30
30
  |:--------------------------|:------------|:-----------|:-------------|:-----------------------|
31
+ | mode | string | optional | append | [See below](#mode) |
31
32
  | auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
32
33
  | service_account_email | string | required when auth_method is private_key | | Your Google service account email
33
34
  | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
@@ -80,6 +81,36 @@ out:
80
81
  - {type: gzip}
81
82
  ```
82
83
 
84
+ ### mode
85
+
86
+ 4 modes are provided.
87
+
88
+ #### append
89
+
90
+ default. When append mode, plugin will insert data into existing table.
91
+
92
+ #### replace
93
+
94
+ 1. Load to temporary table.
95
+ 2. Copy temporary table to destination table. (WRITE_TRUNCATE)
96
+
97
+ ```is_skip_job_result_check``` must be false when replace mode
98
+
99
+ #### replace_backup
100
+
101
+ 1. Load to temporary table.
102
+ 2. Copy destination table to backup table. (table_name_old)
103
+ 3. Copy temporary table to destination table. (WRITE_TRUNCATE)
104
+
105
+ ```is_skip_job_result_check``` must be false when replace_backup mode.
106
+
107
+ #### delete_in_advance
108
+
109
+ 1. Delete destination table, if it exists.
110
+ 2. Load to destination table.
111
+
112
+ ```auto_create_table``` must be true when delete_in_advance mode.
113
+
83
114
  ### Authentication
84
115
 
85
116
  There are three methods supported to fetch access token for the service account.
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  sourceCompatibility = 1.7
16
16
  targetCompatibility = 1.7
17
17
 
18
- version = "0.1.11"
18
+ version = "0.2.0"
19
19
 
20
20
  dependencies {
21
21
  compile "org.embulk:embulk-core:0.7.1"
@@ -139,9 +139,14 @@ public class BigqueryOutputPlugin
139
139
  @Config("allow_quoted_newlines")
140
140
  @ConfigDefault("false")
141
141
  boolean getAllowQuotedNewlines();
142
+
143
+ @Config("mode")
144
+ @ConfigDefault("\"append\"")
145
+ Mode getMode();
142
146
  }
143
147
 
144
148
  private final Logger log = Exec.getLogger(BigqueryOutputPlugin.class);
149
+ private final static String temporaryTableSuffix = Long.toString(System.currentTimeMillis());
145
150
  private static BigqueryWriter bigQueryWriter;
146
151
 
147
152
  @Override
@@ -182,6 +187,18 @@ public class BigqueryOutputPlugin
182
187
  }
183
188
  }
184
189
 
190
+ if (task.getMode().isReplaceMode()) {
191
+ if (task.getIsSkipJobResultCheck()) {
192
+ throw new ConfigException("If mode is replace or replace_backup, is_skip_job_result_check must be false");
193
+ }
194
+ }
195
+
196
+ if (task.getMode().isDeleteInAdvance()) {
197
+ if (!task.getAutoCreateTable()) {
198
+ throw new ConfigException("If mode is delete_in_advance, auto_create_table must be true");
199
+ }
200
+ }
201
+
185
202
  try {
186
203
  bigQueryWriter = new BigqueryWriter.Builder (
187
204
  task.getAuthMethod().getString(),
@@ -217,8 +234,39 @@ public class BigqueryOutputPlugin
217
234
  int taskCount,
218
235
  FileOutputPlugin.Control control)
219
236
  {
237
+ Mode mode = taskSource.get(Mode.class, "Mode");
238
+ String project = taskSource.get(String.class, "Project");
239
+ String dataset = taskSource.get(String.class, "Dataset");
240
+ String tableName = taskSource.get(String.class, "Table");
241
+
242
+ if (mode == Mode.delete_in_advance) {
243
+ try {
244
+ bigQueryWriter.deleteTable(project, dataset, generateTableName(tableName));
245
+ } catch (IOException ex) {
246
+ log.warn(ex.getMessage());
247
+ }
248
+ }
249
+
220
250
  control.run(taskSource);
221
251
 
252
+ if (mode.isReplaceMode()) {
253
+ try {
254
+ if (mode == Mode.replace_backup && bigQueryWriter.isExistTable(project, dataset, generateTableName(tableName))) {
255
+ bigQueryWriter.replaceTable(project, dataset, generateTableName(tableName) + "_old", generateTableName(tableName));
256
+ }
257
+ bigQueryWriter.replaceTable(project, dataset, generateTableName(tableName), generateTemporaryTableName(tableName));
258
+ } catch (TimeoutException | BigqueryWriter.JobFailedException | IOException ex) {
259
+ log.error(ex.getMessage());
260
+ throw Throwables.propagate(ex);
261
+ } finally {
262
+ try {
263
+ bigQueryWriter.deleteTable(project, dataset, generateTemporaryTableName(tableName));
264
+ } catch (IOException ex) {
265
+ log.warn(ex.getMessage());
266
+ }
267
+ }
268
+ }
269
+
222
270
  return Exec.newConfigDiff();
223
271
  }
224
272
 
@@ -252,7 +300,8 @@ public class BigqueryOutputPlugin
252
300
  return new TransactionalFileOutput() {
253
301
  private final String project = task.getProject();
254
302
  private final String dataset = task.getDataset();
255
- private final String table = generateTableName(task.getTable());
303
+ private final String table = task.getMode().isReplaceMode() ?
304
+ generateTemporaryTableName(task.getTable()) : generateTableName(task.getTable());
256
305
  private final boolean deleteFromLocalWhenJobEnd = task.getDeleteFromLocalWhenJobEnd();
257
306
 
258
307
  private int fileIndex = 0;
@@ -351,6 +400,11 @@ public class BigqueryOutputPlugin
351
400
  return result.toString();
352
401
  }
353
402
 
403
+ public String generateTemporaryTableName(String tableName)
404
+ {
405
+ return generateTableName(tableName) + temporaryTableSuffix;
406
+ }
407
+
354
408
  public enum SourceFormat
355
409
  {
356
410
  CSV("CSV"),
@@ -387,4 +441,32 @@ public class BigqueryOutputPlugin
387
441
  return string;
388
442
  }
389
443
  }
444
+
445
+ public enum Mode
446
+ {
447
+ append("append"),
448
+ delete_in_advance("delete_in_advance") {
449
+ @Override
450
+ public boolean isDeleteInAdvance() { return true; }
451
+ },
452
+ replace("replace") {
453
+ @Override
454
+ public boolean isReplaceMode() { return true; }
455
+ },
456
+ replace_backup("replace_backup") {
457
+ @Override
458
+ public boolean isReplaceMode() { return true; }
459
+ };
460
+
461
+ private final String string;
462
+
463
+ Mode(String string)
464
+ {
465
+ this.string = string;
466
+ }
467
+
468
+ public String getString() { return string; }
469
+ public boolean isReplaceMode() { return false; }
470
+ public boolean isDeleteInAdvance() { return true; }
471
+ }
390
472
  }
@@ -11,6 +11,8 @@ import java.security.MessageDigest;
11
11
  import java.security.NoSuchAlgorithmException;
12
12
  import java.util.List;
13
13
  import java.util.concurrent.TimeoutException;
14
+
15
+ import com.google.api.services.bigquery.model.*;
14
16
  import com.google.common.base.Optional;
15
17
  import java.security.GeneralSecurityException;
16
18
  import com.fasterxml.jackson.databind.ObjectMapper;
@@ -24,16 +26,6 @@ import org.slf4j.Logger;
24
26
  import com.google.api.services.bigquery.Bigquery;
25
27
  import com.google.api.services.bigquery.Bigquery.Tables;
26
28
  import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
27
- import com.google.api.services.bigquery.model.Job;
28
- import com.google.api.services.bigquery.model.JobConfiguration;
29
- import com.google.api.services.bigquery.model.JobConfigurationLoad;
30
- import com.google.api.services.bigquery.model.JobStatistics;
31
- import com.google.api.services.bigquery.model.JobReference;
32
- import com.google.api.services.bigquery.model.Table;
33
- import com.google.api.services.bigquery.model.TableSchema;
34
- import com.google.api.services.bigquery.model.TableReference;
35
- import com.google.api.services.bigquery.model.TableFieldSchema;
36
- import com.google.api.services.bigquery.model.ErrorProto;
37
29
  import com.google.api.client.googleapis.json.GoogleJsonResponseException;
38
30
  import com.google.api.client.googleapis.media.MediaHttpUploader;
39
31
  import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
@@ -190,6 +182,44 @@ public class BigqueryWriter
190
182
  }
191
183
  }
192
184
 
185
+ public void replaceTable(String project, String dataset, String oldTable, String newTable)
186
+ throws TimeoutException, JobFailedException, IOException
187
+ {
188
+ copyTable(project, dataset, newTable, oldTable, false);
189
+ }
190
+
191
+ public void copyTable(String project, String dataset, String fromTable, String toTable, boolean append)
192
+ throws TimeoutException, JobFailedException, IOException
193
+ {
194
+ log.info(String.format("Copy Job preparing... project:%s dataset:%s from:%s to:%s", project, dataset, fromTable, toTable));
195
+
196
+ Job job = new Job();
197
+ JobReference jobRef = null;
198
+ JobConfiguration jobConfig = new JobConfiguration().setCopy(setCopyConfig(project, dataset, fromTable, toTable, append));
199
+ job.setConfiguration(jobConfig);
200
+ Insert insert = bigQueryClient.jobs().insert(project, job);
201
+ insert.setProjectId(project);
202
+ insert.setDisableGZipContent(true);
203
+
204
+ try {
205
+ jobRef = insert.execute().getJobReference();
206
+ } catch (IllegalStateException ex) {
207
+ throw new JobFailedException(ex.getMessage());
208
+ }
209
+ log.info(String.format("Job executed. job id:[%s]", jobRef.getJobId()));
210
+ getJobStatusUntilDone(project, jobRef);
211
+ }
212
+
213
+ public void deleteTable(String project, String dataset, String table) throws IOException {
214
+ try {
215
+ Tables.Delete delete = bigQueryClient.tables().delete(project, dataset, table);
216
+ delete.execute();
217
+ log.info(String.format("Table deleted. project:%s dataset:%s table:%s", delete.getProjectId(), delete.getDatasetId(), delete.getTableId()));
218
+ } catch (GoogleJsonResponseException ex) {
219
+ log.warn(ex.getMessage());
220
+ }
221
+ }
222
+
193
223
  private JobConfigurationLoad setLoadConfig(String project, String dataset, String table)
194
224
  {
195
225
  JobConfigurationLoad config = new JobConfigurationLoad();
@@ -214,6 +244,21 @@ public class BigqueryWriter
214
244
  return config;
215
245
  }
216
246
 
247
+ private JobConfigurationTableCopy setCopyConfig(String project, String dataset, String fromTable, String toTable, boolean append)
248
+ {
249
+ JobConfigurationTableCopy config = new JobConfigurationTableCopy();
250
+ config.setSourceTable(createTableReference(project, dataset, fromTable))
251
+ .setDestinationTable(createTableReference(project, dataset, toTable));
252
+
253
+ if (append) {
254
+ config.setWriteDisposition("WRITE_APPEND");
255
+ } else {
256
+ config.setWriteDisposition("WRITE_TRUNCATE");
257
+ }
258
+
259
+ return config;
260
+ }
261
+
217
262
  private String createJobId(ImmutableList<String> elements) throws NoSuchAlgorithmException, IOException
218
263
  {
219
264
  StringBuilder sb = new StringBuilder();
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-16 00:00:00.000000000 Z
11
+ date: 2016-01-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -63,7 +63,7 @@ files:
63
63
  - src/test/java/org/embulk/output/TestBigqueryWriter.java
64
64
  - classpath/commons-codec-1.3.jar
65
65
  - classpath/commons-logging-1.1.1.jar
66
- - classpath/embulk-output-bigquery-0.1.11.jar
66
+ - classpath/embulk-output-bigquery-0.2.0.jar
67
67
  - classpath/google-api-client-1.20.0.jar
68
68
  - classpath/google-api-services-bigquery-v2-rev205-1.20.0.jar
69
69
  - classpath/google-http-client-1.20.0.jar