embulk-output-bigquery 0.1.11 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9eed778576242d0ea0d44e445c5f126e93cc6333
4
- data.tar.gz: 96bc25e1ff2caccfbd7f2a4be1730b80dca185de
3
+ metadata.gz: 9e4b4a5467fb4589a3d88541af95ee5b002a5f9d
4
+ data.tar.gz: 2845c3d881b88183bdf3355647fbfecd6e455372
5
5
  SHA512:
6
- metadata.gz: 95d91febb961ede313d9aa66e32a177812e3a32a96203de2d82cc8d5682517be5f688997b16123d0cedd820d858762fc90755dce109ce89f32b6bb96496226e7
7
- data.tar.gz: 778932e6c37a30992ffb2593ee69cb6be463369d122b8339e794ec063083606dd486fbfe5b21f31a6468556e1166654a3caa22422bc0f78af75c2396291ddd08
6
+ metadata.gz: 61e5481dd18b750555f3462d680cb2b4d680ce768683780e4b2ce2cac6e66134c7eb38227adf8e48a4655447e27e166f14487c129fe9154a164131a4be06270b
7
+ data.tar.gz: 09036cb1d4b20556cb36e11839c7807b00ae453976b4192f9c6f489284aa5f1cbe48b6c42e5b9149b1ed3c3d04a92ee9546cce030b3d9e0c0929f70e365d3ae7
data/ChangeLog CHANGED
@@ -1,3 +1,7 @@
1
+ Release 0.2.0 - 2016-01-26
2
+
3
+ * Added mode parameters and support 4 modes(append, replace, replace_backup, delete_in_advance). @joker1007 thanks!
4
+
1
5
  Release 0.1.11 - 2015-11-16
2
6
 
3
7
  * Change error result display for easy investigation.
data/README.md CHANGED
@@ -28,6 +28,7 @@ OAuth flow for installed applications.
28
28
 
29
29
  | name | type | required? | default | description |
30
30
  |:--------------------------|:------------|:-----------|:-------------|:-----------------------|
31
+ | mode | string | optional | append | [See below](#mode) |
31
32
  | auth_method | string | optional | "private_key" | `private_key` , `json_key` or `compute_engine`
32
33
  | service_account_email | string | required when auth_method is private_key | | Your Google service account email
33
34
  | p12_keyfile | string | required when auth_method is private_key | | Fullpath of private key in P12(PKCS12) format |
@@ -80,6 +81,36 @@ out:
80
81
  - {type: gzip}
81
82
  ```
82
83
 
84
+ ### mode
85
+
86
+ 4 modes are provided.
87
+
88
+ #### append
89
+
90
+ default. When append mode, plugin will insert data into existing table.
91
+
92
+ #### replace
93
+
94
+ 1. Load to temporary table.
95
+ 2. Copy temporary table to destination table. (WRITE_TRUNCATE)
96
+
97
+ ```is_skip_job_result_check``` must be false when replace mode
98
+
99
+ #### replace_backup
100
+
101
+ 1. Load to temporary table.
102
+ 2. Copy destination table to backup table. (table_name_old)
103
+ 3. Copy temporary table to destination table. (WRITE_TRUNCATE)
104
+
105
+ ```is_skip_job_result_check``` must be false when replace_backup mode.
106
+
107
+ #### delete_in_advance
108
+
109
+ 1. Delete destination table, if it exists.
110
+ 2. Load to destination table.
111
+
112
+ ```auto_create_table``` must be true when delete_in_advance mode.
113
+
83
114
  ### Authentication
84
115
 
85
116
  There are three methods supported to fetch access token for the service account.
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  sourceCompatibility = 1.7
16
16
  targetCompatibility = 1.7
17
17
 
18
- version = "0.1.11"
18
+ version = "0.2.0"
19
19
 
20
20
  dependencies {
21
21
  compile "org.embulk:embulk-core:0.7.1"
@@ -139,9 +139,14 @@ public class BigqueryOutputPlugin
139
139
  @Config("allow_quoted_newlines")
140
140
  @ConfigDefault("false")
141
141
  boolean getAllowQuotedNewlines();
142
+
143
+ @Config("mode")
144
+ @ConfigDefault("\"append\"")
145
+ Mode getMode();
142
146
  }
143
147
 
144
148
  private final Logger log = Exec.getLogger(BigqueryOutputPlugin.class);
149
+ private final static String temporaryTableSuffix = Long.toString(System.currentTimeMillis());
145
150
  private static BigqueryWriter bigQueryWriter;
146
151
 
147
152
  @Override
@@ -182,6 +187,18 @@ public class BigqueryOutputPlugin
182
187
  }
183
188
  }
184
189
 
190
+ if (task.getMode().isReplaceMode()) {
191
+ if (task.getIsSkipJobResultCheck()) {
192
+ throw new ConfigException("If mode is replace or replace_backup, is_skip_job_result_check must be false");
193
+ }
194
+ }
195
+
196
+ if (task.getMode().isDeleteInAdvance()) {
197
+ if (!task.getAutoCreateTable()) {
198
+ throw new ConfigException("If mode is delete_in_advance, auto_create_table must be true");
199
+ }
200
+ }
201
+
185
202
  try {
186
203
  bigQueryWriter = new BigqueryWriter.Builder (
187
204
  task.getAuthMethod().getString(),
@@ -217,8 +234,39 @@ public class BigqueryOutputPlugin
217
234
  int taskCount,
218
235
  FileOutputPlugin.Control control)
219
236
  {
237
+ Mode mode = taskSource.get(Mode.class, "Mode");
238
+ String project = taskSource.get(String.class, "Project");
239
+ String dataset = taskSource.get(String.class, "Dataset");
240
+ String tableName = taskSource.get(String.class, "Table");
241
+
242
+ if (mode == Mode.delete_in_advance) {
243
+ try {
244
+ bigQueryWriter.deleteTable(project, dataset, generateTableName(tableName));
245
+ } catch (IOException ex) {
246
+ log.warn(ex.getMessage());
247
+ }
248
+ }
249
+
220
250
  control.run(taskSource);
221
251
 
252
+ if (mode.isReplaceMode()) {
253
+ try {
254
+ if (mode == Mode.replace_backup && bigQueryWriter.isExistTable(project, dataset, generateTableName(tableName))) {
255
+ bigQueryWriter.replaceTable(project, dataset, generateTableName(tableName) + "_old", generateTableName(tableName));
256
+ }
257
+ bigQueryWriter.replaceTable(project, dataset, generateTableName(tableName), generateTemporaryTableName(tableName));
258
+ } catch (TimeoutException | BigqueryWriter.JobFailedException | IOException ex) {
259
+ log.error(ex.getMessage());
260
+ throw Throwables.propagate(ex);
261
+ } finally {
262
+ try {
263
+ bigQueryWriter.deleteTable(project, dataset, generateTemporaryTableName(tableName));
264
+ } catch (IOException ex) {
265
+ log.warn(ex.getMessage());
266
+ }
267
+ }
268
+ }
269
+
222
270
  return Exec.newConfigDiff();
223
271
  }
224
272
 
@@ -252,7 +300,8 @@ public class BigqueryOutputPlugin
252
300
  return new TransactionalFileOutput() {
253
301
  private final String project = task.getProject();
254
302
  private final String dataset = task.getDataset();
255
- private final String table = generateTableName(task.getTable());
303
+ private final String table = task.getMode().isReplaceMode() ?
304
+ generateTemporaryTableName(task.getTable()) : generateTableName(task.getTable());
256
305
  private final boolean deleteFromLocalWhenJobEnd = task.getDeleteFromLocalWhenJobEnd();
257
306
 
258
307
  private int fileIndex = 0;
@@ -351,6 +400,11 @@ public class BigqueryOutputPlugin
351
400
  return result.toString();
352
401
  }
353
402
 
403
+ public String generateTemporaryTableName(String tableName)
404
+ {
405
+ return generateTableName(tableName) + temporaryTableSuffix;
406
+ }
407
+
354
408
  public enum SourceFormat
355
409
  {
356
410
  CSV("CSV"),
@@ -387,4 +441,32 @@ public class BigqueryOutputPlugin
387
441
  return string;
388
442
  }
389
443
  }
444
+
445
+ public enum Mode
446
+ {
447
+ append("append"),
448
+ delete_in_advance("delete_in_advance") {
449
+ @Override
450
+ public boolean isDeleteInAdvance() { return true; }
451
+ },
452
+ replace("replace") {
453
+ @Override
454
+ public boolean isReplaceMode() { return true; }
455
+ },
456
+ replace_backup("replace_backup") {
457
+ @Override
458
+ public boolean isReplaceMode() { return true; }
459
+ };
460
+
461
+ private final String string;
462
+
463
+ Mode(String string)
464
+ {
465
+ this.string = string;
466
+ }
467
+
468
+ public String getString() { return string; }
469
+ public boolean isReplaceMode() { return false; }
470
+ public boolean isDeleteInAdvance() { return true; }
471
+ }
390
472
  }
@@ -11,6 +11,8 @@ import java.security.MessageDigest;
11
11
  import java.security.NoSuchAlgorithmException;
12
12
  import java.util.List;
13
13
  import java.util.concurrent.TimeoutException;
14
+
15
+ import com.google.api.services.bigquery.model.*;
14
16
  import com.google.common.base.Optional;
15
17
  import java.security.GeneralSecurityException;
16
18
  import com.fasterxml.jackson.databind.ObjectMapper;
@@ -24,16 +26,6 @@ import org.slf4j.Logger;
24
26
  import com.google.api.services.bigquery.Bigquery;
25
27
  import com.google.api.services.bigquery.Bigquery.Tables;
26
28
  import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
27
- import com.google.api.services.bigquery.model.Job;
28
- import com.google.api.services.bigquery.model.JobConfiguration;
29
- import com.google.api.services.bigquery.model.JobConfigurationLoad;
30
- import com.google.api.services.bigquery.model.JobStatistics;
31
- import com.google.api.services.bigquery.model.JobReference;
32
- import com.google.api.services.bigquery.model.Table;
33
- import com.google.api.services.bigquery.model.TableSchema;
34
- import com.google.api.services.bigquery.model.TableReference;
35
- import com.google.api.services.bigquery.model.TableFieldSchema;
36
- import com.google.api.services.bigquery.model.ErrorProto;
37
29
  import com.google.api.client.googleapis.json.GoogleJsonResponseException;
38
30
  import com.google.api.client.googleapis.media.MediaHttpUploader;
39
31
  import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
@@ -190,6 +182,44 @@ public class BigqueryWriter
190
182
  }
191
183
  }
192
184
 
185
+ public void replaceTable(String project, String dataset, String oldTable, String newTable)
186
+ throws TimeoutException, JobFailedException, IOException
187
+ {
188
+ copyTable(project, dataset, newTable, oldTable, false);
189
+ }
190
+
191
+ public void copyTable(String project, String dataset, String fromTable, String toTable, boolean append)
192
+ throws TimeoutException, JobFailedException, IOException
193
+ {
194
+ log.info(String.format("Copy Job preparing... project:%s dataset:%s from:%s to:%s", project, dataset, fromTable, toTable));
195
+
196
+ Job job = new Job();
197
+ JobReference jobRef = null;
198
+ JobConfiguration jobConfig = new JobConfiguration().setCopy(setCopyConfig(project, dataset, fromTable, toTable, append));
199
+ job.setConfiguration(jobConfig);
200
+ Insert insert = bigQueryClient.jobs().insert(project, job);
201
+ insert.setProjectId(project);
202
+ insert.setDisableGZipContent(true);
203
+
204
+ try {
205
+ jobRef = insert.execute().getJobReference();
206
+ } catch (IllegalStateException ex) {
207
+ throw new JobFailedException(ex.getMessage());
208
+ }
209
+ log.info(String.format("Job executed. job id:[%s]", jobRef.getJobId()));
210
+ getJobStatusUntilDone(project, jobRef);
211
+ }
212
+
213
+ public void deleteTable(String project, String dataset, String table) throws IOException {
214
+ try {
215
+ Tables.Delete delete = bigQueryClient.tables().delete(project, dataset, table);
216
+ delete.execute();
217
+ log.info(String.format("Table deleted. project:%s dataset:%s table:%s", delete.getProjectId(), delete.getDatasetId(), delete.getTableId()));
218
+ } catch (GoogleJsonResponseException ex) {
219
+ log.warn(ex.getMessage());
220
+ }
221
+ }
222
+
193
223
  private JobConfigurationLoad setLoadConfig(String project, String dataset, String table)
194
224
  {
195
225
  JobConfigurationLoad config = new JobConfigurationLoad();
@@ -214,6 +244,21 @@ public class BigqueryWriter
214
244
  return config;
215
245
  }
216
246
 
247
+ private JobConfigurationTableCopy setCopyConfig(String project, String dataset, String fromTable, String toTable, boolean append)
248
+ {
249
+ JobConfigurationTableCopy config = new JobConfigurationTableCopy();
250
+ config.setSourceTable(createTableReference(project, dataset, fromTable))
251
+ .setDestinationTable(createTableReference(project, dataset, toTable));
252
+
253
+ if (append) {
254
+ config.setWriteDisposition("WRITE_APPEND");
255
+ } else {
256
+ config.setWriteDisposition("WRITE_TRUNCATE");
257
+ }
258
+
259
+ return config;
260
+ }
261
+
217
262
  private String createJobId(ImmutableList<String> elements) throws NoSuchAlgorithmException, IOException
218
263
  {
219
264
  StringBuilder sb = new StringBuilder();
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-16 00:00:00.000000000 Z
11
+ date: 2016-01-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -63,7 +63,7 @@ files:
63
63
  - src/test/java/org/embulk/output/TestBigqueryWriter.java
64
64
  - classpath/commons-codec-1.3.jar
65
65
  - classpath/commons-logging-1.1.1.jar
66
- - classpath/embulk-output-bigquery-0.1.11.jar
66
+ - classpath/embulk-output-bigquery-0.2.0.jar
67
67
  - classpath/google-api-client-1.20.0.jar
68
68
  - classpath/google-api-services-bigquery-v2-rev205-1.20.0.jar
69
69
  - classpath/google-http-client-1.20.0.jar