embulk-output-td 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +7 -0
  3. data/CHANGELOG.md +4 -0
  4. data/README.md +1 -0
  5. data/build.gradle +5 -1
  6. data/config/checkstyle/checkstyle.xml +117 -0
  7. data/embulk-output-td.gemspec +1 -1
  8. data/gradle/check.gradle +34 -0
  9. data/src/main/java/com/treasuredata/api/TdApiClient.java +47 -23
  10. data/src/main/java/com/treasuredata/api/TdApiClientConfig.java +3 -3
  11. data/src/main/java/com/treasuredata/api/TdApiConstants.java +6 -2
  12. data/src/main/java/com/treasuredata/api/TdApiExecutionInterruptedException.java +2 -1
  13. data/src/main/java/com/treasuredata/api/TdApiExecutionTimeoutException.java +2 -1
  14. data/src/main/java/com/treasuredata/api/model/TDArrayColumnType.java +1 -1
  15. data/src/main/java/com/treasuredata/api/model/TDBulkImportSession.java +6 -4
  16. data/src/main/java/com/treasuredata/api/model/TDColumn.java +4 -2
  17. data/src/main/java/com/treasuredata/api/model/TDColumnTypeDeserializer.java +26 -13
  18. data/src/main/java/com/treasuredata/api/model/TDDatabase.java +2 -1
  19. data/src/main/java/com/treasuredata/api/model/TDMapColumnType.java +1 -1
  20. data/src/main/java/com/treasuredata/api/model/TDTablePermission.java +4 -2
  21. data/src/main/java/com/treasuredata/api/model/TDTableType.java +2 -1
  22. data/src/main/java/org/embulk/output/td/FinalizableExecutorService.java +35 -17
  23. data/src/main/java/org/embulk/output/td/MsgpackGZFileBuilder.java +13 -7
  24. data/src/main/java/org/embulk/output/td/RecordWriter.java +21 -382
  25. data/src/main/java/org/embulk/output/td/TdOutputPlugin.java +175 -40
  26. data/src/main/java/org/embulk/output/td/writer/BooleanFieldWriter.java +23 -0
  27. data/src/main/java/org/embulk/output/td/writer/DoubleFieldWriter.java +23 -0
  28. data/src/main/java/org/embulk/output/td/writer/FieldWriter.java +38 -0
  29. data/src/main/java/org/embulk/output/td/writer/FieldWriterSet.java +206 -0
  30. data/src/main/java/org/embulk/output/td/writer/LongFieldWriter.java +23 -0
  31. data/src/main/java/org/embulk/output/td/writer/StringFieldWriter.java +23 -0
  32. data/src/main/java/org/embulk/output/td/writer/TimestampFieldLongDuplicator.java +28 -0
  33. data/src/main/java/org/embulk/output/td/writer/TimestampLongFieldWriter.java +23 -0
  34. data/src/main/java/org/embulk/output/td/writer/TimestampStringFieldWriter.java +27 -0
  35. data/src/main/java/org/embulk/output/td/writer/UnixTimestampFieldDuplicator.java +27 -0
  36. data/src/main/java/org/embulk/output/td/writer/UnixTimestampLongFieldWriter.java +26 -0
  37. data/src/test/java/com/treasuredata/api/TestTdApiClient.java +1 -1
  38. data/src/test/java/org/embulk/output/td/TestRecordWriter.java +198 -0
  39. data/src/test/java/org/embulk/output/td/TestTdOutputPlugin.java +529 -0
  40. data/src/test/java/org/embulk/output/td/writer/TestFieldWriterSet.java +146 -0
  41. metadata +29 -14
  42. data/src/test/java/org/embulk/output/td/TestFieldWriter.java +0 -105
@@ -6,6 +6,7 @@ import java.util.Map;
6
6
  import javax.validation.constraints.Min;
7
7
  import javax.validation.constraints.Max;
8
8
 
9
+ import com.google.common.annotations.VisibleForTesting;
9
10
  import com.google.common.base.Optional;
10
11
  import com.google.common.base.Throwables;
11
12
  import com.fasterxml.jackson.annotation.JsonCreator;
@@ -26,7 +27,7 @@ import org.embulk.config.ConfigSource;
26
27
  import org.embulk.config.ConfigException;
27
28
  import org.embulk.config.Task;
28
29
  import org.embulk.config.TaskSource;
29
- import org.embulk.output.td.RecordWriter.FieldWriterSet;
30
+ import org.embulk.output.td.writer.FieldWriterSet;
30
31
  import org.embulk.spi.Exec;
31
32
  import org.embulk.spi.ExecSession;
32
33
  import org.embulk.spi.OutputPlugin;
@@ -34,7 +35,6 @@ import org.embulk.spi.Schema;
34
35
  import org.embulk.spi.TransactionalPageOutput;
35
36
  import org.embulk.spi.time.Timestamp;
36
37
  import org.embulk.spi.time.TimestampFormatter;
37
- import org.joda.time.DateTimeZone;
38
38
  import org.joda.time.format.DateTimeFormat;
39
39
  import org.slf4j.Logger;
40
40
 
@@ -61,7 +61,9 @@ public class TdOutputPlugin
61
61
 
62
62
  // TODO connect_timeout, read_timeout, send_timeout
63
63
 
64
- // TODO mode[append, replace]
64
+ @Config("mode")
65
+ @ConfigDefault("\"append\"")
66
+ public Mode getMode();
65
67
 
66
68
  @Config("auto_create_table")
67
69
  @ConfigDefault("true")
@@ -73,6 +75,9 @@ public class TdOutputPlugin
73
75
  @Config("table")
74
76
  public String getTable();
75
77
 
78
+ public void setLoadTargetTableName(String name);
79
+ public String getLoadTargetTableName();
80
+
76
81
  @Config("session")
77
82
  @ConfigDefault("null")
78
83
  public Optional<String> getSession();
@@ -130,6 +135,37 @@ public class TdOutputPlugin
130
135
  extends Task, TimestampFormatter.TimestampColumnOption
131
136
  {}
132
137
 
138
+ public enum Mode
139
+ {
140
+ APPEND, REPLACE;
141
+
142
+ @JsonCreator
143
+ public static Mode fromConfig(String value)
144
+ {
145
+ switch(value) {
146
+ case "append":
147
+ return APPEND;
148
+ case "replace":
149
+ return REPLACE;
150
+ default:
151
+ throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are [append, replace]", value));
152
+ }
153
+ }
154
+
155
+ @JsonValue
156
+ public String toString()
157
+ {
158
+ switch(this) {
159
+ case APPEND:
160
+ return "append";
161
+ case REPLACE:
162
+ return "replace";
163
+ default:
164
+ throw new IllegalStateException();
165
+ }
166
+ }
167
+ }
168
+
133
169
  public interface HttpProxyTask
134
170
  extends Task
135
171
  {
@@ -173,7 +209,7 @@ public class TdOutputPlugin
173
209
  case "nano": return NANO;
174
210
  default:
175
211
  throw new ConfigException(
176
- String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano"));
212
+ String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano", s));
177
213
  }
178
214
  }
179
215
 
@@ -197,12 +233,8 @@ public class TdOutputPlugin
197
233
  {
198
234
  final PluginTask task = config.loadConfig(PluginTask.class);
199
235
 
200
- // TODO mode check
201
-
202
236
  // check column_options is valid or not
203
- for (String columnName : task.getColumnOptions().keySet()) {
204
- schema.lookupColumn(columnName); // throws SchemaConfigException
205
- }
237
+ checkColumnOptions(schema, task.getColumnOptions());
206
238
 
207
239
  // generate session name
208
240
  task.setSessionName(buildBulkImportSessionName(task, Exec.session()));
@@ -210,11 +242,24 @@ public class TdOutputPlugin
210
242
  try (TdApiClient client = newTdApiClient(task)) {
211
243
  String databaseName = task.getDatabase();
212
244
  String tableName = task.getTable();
213
- if (task.getAutoCreateTable()) {
214
- createTableIfNotExists(client, databaseName, tableName);
215
- } else {
216
- // check if the database and/or table exist or not
217
- validateTableExists(client, databaseName, tableName);
245
+
246
+ switch (task.getMode()) {
247
+ case APPEND:
248
+ if (task.getAutoCreateTable()) {
249
+ // auto_create_table is valid only with append mode (replace mode always creates a new table)
250
+ createTableIfNotExists(client, databaseName, tableName);
251
+ }
252
+ else {
253
+ // check if the database and/or table exist or not
254
+ validateTableExists(client, databaseName, tableName);
255
+ }
256
+ task.setLoadTargetTableName(tableName);
257
+ break;
258
+
259
+ case REPLACE:
260
+ task.setLoadTargetTableName(
261
+ createTemporaryTableWithPrefix(client, databaseName, makeTablePrefix(task)));
262
+ break;
218
263
  }
219
264
 
220
265
  // validate FieldWriterSet configuration before transaction is started
@@ -226,20 +271,32 @@ public class TdOutputPlugin
226
271
 
227
272
  public ConfigDiff resume(TaskSource taskSource,
228
273
  Schema schema, int processorCount,
229
- OutputPlugin.Control control) {
274
+ OutputPlugin.Control control)
275
+ {
230
276
  PluginTask task = taskSource.loadTask(PluginTask.class);
231
277
  try (TdApiClient client = newTdApiClient(task)) {
232
278
  return doRun(client, task, control);
233
279
  }
234
280
  }
235
281
 
236
- private ConfigDiff doRun(TdApiClient client, PluginTask task, OutputPlugin.Control control)
282
+ @VisibleForTesting
283
+ ConfigDiff doRun(TdApiClient client, PluginTask task, OutputPlugin.Control control)
237
284
  {
238
- boolean doUpload = startBulkImportSession(client, task.getSessionName(), task.getDatabase(), task.getTable());
285
+ boolean doUpload = startBulkImportSession(client, task.getSessionName(), task.getDatabase(), task.getLoadTargetTableName());
239
286
  task.setDoUpload(doUpload);
240
287
  control.run(task.dump());
241
288
  completeBulkImportSession(client, task.getSessionName(), 0); // TODO perform job priority
242
289
 
290
+ // commit
291
+ switch (task.getMode()) {
292
+ case APPEND:
293
+ // already done
294
+ break;
295
+ case REPLACE:
296
+ // rename table
297
+ renameTable(client, task.getDatabase(), task.getLoadTargetTableName(), task.getTable());
298
+ }
299
+
243
300
  ConfigDiff configDiff = Exec.newConfigDiff();
244
301
  configDiff.set("last_session", task.getSessionName());
245
302
  return configDiff;
@@ -257,14 +314,29 @@ public class TdOutputPlugin
257
314
  }
258
315
  }
259
316
 
260
- private TdApiClient newTdApiClient(final PluginTask task)
317
+ private String makeTablePrefix(PluginTask task)
318
+ {
319
+ return task.getTable() + "_" + task.getSessionName();
320
+ }
321
+
322
+ @VisibleForTesting
323
+ void checkColumnOptions(Schema schema, Map<String, TimestampColumnOption> columnOptions)
324
+ {
325
+ for (String columnName : columnOptions.keySet()) {
326
+ schema.lookupColumn(columnName); // throws SchemaConfigException
327
+ }
328
+ }
329
+
330
+ @VisibleForTesting
331
+ TdApiClient newTdApiClient(final PluginTask task)
261
332
  {
262
333
  Optional<HttpProxyConfig> httpProxyConfig = newHttpProxyConfig(task.getHttpProxy());
263
334
  TdApiClientConfig config = new TdApiClientConfig(task.getEndpoint(), task.getUseSsl(), httpProxyConfig);
264
335
  TdApiClient client = new TdApiClient(task.getApiKey(), config);
265
336
  try {
266
337
  client.start();
267
- } catch (IOException e) {
338
+ }
339
+ catch (IOException e) {
268
340
  throw Throwables.propagate(e);
269
341
  }
270
342
  return client;
@@ -276,37 +348,63 @@ public class TdOutputPlugin
276
348
  if (task.isPresent()) {
277
349
  HttpProxyTask pt = task.get();
278
350
  httpProxyConfig = Optional.of(new HttpProxyConfig(pt.getHost(), pt.getPort(), pt.getUseSsl()));
279
- } else {
351
+ }
352
+ else {
280
353
  httpProxyConfig = Optional.absent();
281
354
  }
282
355
  return httpProxyConfig;
283
356
  }
284
357
 
285
- private void createTableIfNotExists(TdApiClient client, String databaseName, String tableName)
358
+ @VisibleForTesting
359
+ void createTableIfNotExists(TdApiClient client, String databaseName, String tableName)
286
360
  {
287
361
  log.debug("Creating table \"{}\".\"{}\" if not exists", databaseName, tableName);
288
362
  try {
289
363
  client.createTable(databaseName, tableName);
290
364
  log.debug("Created table \"{}\".\"{}\"", databaseName, tableName);
291
- } catch (TdApiNotFoundException e) {
365
+ }
366
+ catch (TdApiNotFoundException e) {
292
367
  try {
293
368
  client.createDatabase(databaseName);
294
369
  log.debug("Created database \"{}\"", databaseName);
295
- } catch (TdApiConflictException ex) {
370
+ }
371
+ catch (TdApiConflictException ex) {
296
372
  // ignorable error
297
373
  }
298
374
  try {
299
375
  client.createTable(databaseName, tableName);
300
376
  log.debug("Created table \"{}\".\"{}\"", databaseName, tableName);
301
- } catch (TdApiConflictException exe) {
377
+ }
378
+ catch (TdApiConflictException exe) {
302
379
  // ignorable error
303
380
  }
304
- } catch (TdApiConflictException e) {
381
+ }
382
+ catch (TdApiConflictException e) {
305
383
  // ignorable error
306
384
  }
307
385
  }
308
386
 
309
- private void validateTableExists(TdApiClient client, String databaseName, String tableName)
387
+ @VisibleForTesting
388
+ String createTemporaryTableWithPrefix(TdApiClient client, String databaseName, String tablePrefix)
389
+ throws TdApiConflictException
390
+ {
391
+ String tableName = tablePrefix;
392
+ while (true) {
393
+ log.debug("Creating temporal table \"{}\".\"{}\"", databaseName, tableName);
394
+ try {
395
+ client.createTable(databaseName, tableName);
396
+ log.debug("Created temporal table \"{}\".\"{}\"", databaseName, tableName);
397
+ return tableName;
398
+ }
399
+ catch (TdApiConflictException e) {
400
+ log.debug("\"{}\".\"{}\" table already exists. Renaming temporal table.", databaseName, tableName);
401
+ tableName += "_";
402
+ }
403
+ }
404
+ }
405
+
406
+ @VisibleForTesting
407
+ void validateTableExists(TdApiClient client, String databaseName, String tableName)
310
408
  {
311
409
  try {
312
410
  for (TDTable table : client.getTables(databaseName)) {
@@ -315,16 +413,19 @@ public class TdOutputPlugin
315
413
  }
316
414
  }
317
415
  throw new ConfigException(String.format("Table \"%s\".\"%s\" doesn't exist", databaseName, tableName));
318
- } catch (TdApiNotFoundException ex) {
416
+ }
417
+ catch (TdApiNotFoundException ex) {
319
418
  throw new ConfigException(String.format("Database \"%s\" doesn't exist", databaseName), ex);
320
419
  }
321
420
  }
322
421
 
323
- private String buildBulkImportSessionName(PluginTask task, ExecSession exec)
422
+ @VisibleForTesting
423
+ String buildBulkImportSessionName(PluginTask task, ExecSession exec)
324
424
  {
325
425
  if (task.getSession().isPresent()) {
326
426
  return task.getSession().get();
327
- } else {
427
+ }
428
+ else {
328
429
  Timestamp time = exec.getTransactionTime(); // TODO implement Exec.getTransactionUniqueName()
329
430
  return String.format("embulk_%s_%09d",
330
431
  DateTimeFormat.forPattern("yyyyMMdd_HHmmss").withZoneUTC().print(time.getEpochSecond() * 1000),
@@ -333,14 +434,16 @@ public class TdOutputPlugin
333
434
  }
334
435
 
335
436
  // return false if all files are already uploaded
336
- private boolean startBulkImportSession(TdApiClient client,
437
+ @VisibleForTesting
438
+ boolean startBulkImportSession(TdApiClient client,
337
439
  String sessionName, String databaseName, String tableName)
338
440
  {
339
441
  log.info("Create bulk_import session {}", sessionName);
340
442
  TDBulkImportSession session;
341
443
  try {
342
444
  client.createBulkImportSession(sessionName, databaseName, tableName);
343
- } catch (TdApiConflictException ex) {
445
+ }
446
+ catch (TdApiConflictException ex) {
344
447
  // ignorable error
345
448
  }
346
449
  session = client.getBulkImportSession(sessionName);
@@ -366,7 +469,8 @@ public class TdOutputPlugin
366
469
  }
367
470
  }
368
471
 
369
- private void completeBulkImportSession(TdApiClient client, String sessionName, int priority)
472
+ @VisibleForTesting
473
+ void completeBulkImportSession(TdApiClient client, String sessionName, int priority)
370
474
  {
371
475
  TDBulkImportSession session = client.getBulkImportSession(sessionName);
372
476
 
@@ -376,7 +480,8 @@ public class TdOutputPlugin
376
480
  // freeze
377
481
  try {
378
482
  client.freezeBulkImportSession(sessionName);
379
- } catch (TdApiConflictException e) {
483
+ }
484
+ catch (TdApiConflictException e) {
380
485
  // ignorable error
381
486
  }
382
487
  }
@@ -417,7 +522,8 @@ public class TdOutputPlugin
417
522
  }
418
523
  }
419
524
 
420
- private TDBulkImportSession waitForStatusChange(TdApiClient client, String sessionName,
525
+ @VisibleForTesting
526
+ TDBulkImportSession waitForStatusChange(TdApiClient client, String sessionName,
421
527
  ImportStatus current, ImportStatus expecting, String operation)
422
528
  {
423
529
  TDBulkImportSession importSession;
@@ -427,21 +533,47 @@ public class TdOutputPlugin
427
533
  if (importSession.is(expecting)) {
428
534
  return importSession;
429
535
 
430
- } else if (importSession.is(current)) {
536
+ }
537
+ else if (importSession.is(current)) {
431
538
  // in progress
432
539
 
433
- } else {
540
+ }
541
+ else {
434
542
  throw new RuntimeException(String.format("Failed to %s bulk import session '%s'",
435
543
  operation, sessionName));
436
544
  }
437
545
 
438
546
  try {
439
547
  Thread.sleep(3000);
440
- } catch (InterruptedException e) {
548
+ }
549
+ catch (InterruptedException e) {
441
550
  }
442
551
  }
443
552
  }
444
553
 
554
+ @VisibleForTesting
555
+ void renameTable(TdApiClient client, String databaseName, String oldName, String newName)
556
+ {
557
+ log.debug("Renaming table \"{}\".\"{}\" to \"{}\"", databaseName, oldName, newName);
558
+ try {
559
+ client.renameTable(databaseName, oldName, newName);
560
+ }
561
+ catch (TdApiConflictException e) {
562
+ try {
563
+ client.deleteTable(databaseName, newName);
564
+ log.debug("Deleted original table \"{}\".\"{}\"", databaseName, newName);
565
+ }
566
+ catch (TdApiNotFoundException ex) {
567
+ // ignoreable error
568
+ }
569
+ catch (IOException ex) {
570
+ throw Throwables.propagate(ex);
571
+ }
572
+
573
+ client.renameTable(databaseName, oldName, newName);
574
+ }
575
+ }
576
+
445
577
  @Override
446
578
  public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
447
579
  {
@@ -450,14 +582,17 @@ public class TdOutputPlugin
450
582
  RecordWriter closeLater = null;
451
583
  try {
452
584
  FieldWriterSet fieldWriters = new FieldWriterSet(log, task, schema);
453
- RecordWriter recordWriter = closeLater = new RecordWriter(task, taskIndex, newTdApiClient(task), fieldWriters);
585
+ closeLater = new RecordWriter(task, taskIndex, newTdApiClient(task), fieldWriters);
586
+ RecordWriter recordWriter = closeLater;
454
587
  recordWriter.open(schema);
455
588
  closeLater = null;
456
589
  return recordWriter;
457
590
 
458
- } catch (IOException e) {
591
+ }
592
+ catch (IOException e) {
459
593
  throw Throwables.propagate(e);
460
- } finally {
594
+ }
595
+ finally {
461
596
  if (closeLater != null) {
462
597
  closeLater.close();
463
598
  }
@@ -0,0 +1,23 @@
1
+ package org.embulk.output.td.writer;
2
+
3
+ import org.embulk.output.td.MsgpackGZFileBuilder;
4
+ import org.embulk.spi.Column;
5
+ import org.embulk.spi.PageReader;
6
+
7
+ import java.io.IOException;
8
+
9
+ public class BooleanFieldWriter
10
+ extends FieldWriter
11
+ {
12
+ public BooleanFieldWriter(String keyName)
13
+ {
14
+ super(keyName);
15
+ }
16
+
17
+ @Override
18
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
19
+ throws IOException
20
+ {
21
+ builder.writeBoolean(reader.getBoolean(column));
22
+ }
23
+ }
@@ -0,0 +1,23 @@
1
+ package org.embulk.output.td.writer;
2
+
3
+ import org.embulk.output.td.MsgpackGZFileBuilder;
4
+ import org.embulk.spi.Column;
5
+ import org.embulk.spi.PageReader;
6
+
7
+ import java.io.IOException;
8
+
9
+ public class DoubleFieldWriter
10
+ extends FieldWriter
11
+ {
12
+ public DoubleFieldWriter(String keyName)
13
+ {
14
+ super(keyName);
15
+ }
16
+
17
+ @Override
18
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
19
+ throws IOException
20
+ {
21
+ builder.writeDouble(reader.getDouble(column));
22
+ }
23
+ }