embulk-output-td 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +7 -0
  3. data/CHANGELOG.md +4 -0
  4. data/README.md +1 -0
  5. data/build.gradle +5 -1
  6. data/config/checkstyle/checkstyle.xml +117 -0
  7. data/embulk-output-td.gemspec +1 -1
  8. data/gradle/check.gradle +34 -0
  9. data/src/main/java/com/treasuredata/api/TdApiClient.java +47 -23
  10. data/src/main/java/com/treasuredata/api/TdApiClientConfig.java +3 -3
  11. data/src/main/java/com/treasuredata/api/TdApiConstants.java +6 -2
  12. data/src/main/java/com/treasuredata/api/TdApiExecutionInterruptedException.java +2 -1
  13. data/src/main/java/com/treasuredata/api/TdApiExecutionTimeoutException.java +2 -1
  14. data/src/main/java/com/treasuredata/api/model/TDArrayColumnType.java +1 -1
  15. data/src/main/java/com/treasuredata/api/model/TDBulkImportSession.java +6 -4
  16. data/src/main/java/com/treasuredata/api/model/TDColumn.java +4 -2
  17. data/src/main/java/com/treasuredata/api/model/TDColumnTypeDeserializer.java +26 -13
  18. data/src/main/java/com/treasuredata/api/model/TDDatabase.java +2 -1
  19. data/src/main/java/com/treasuredata/api/model/TDMapColumnType.java +1 -1
  20. data/src/main/java/com/treasuredata/api/model/TDTablePermission.java +4 -2
  21. data/src/main/java/com/treasuredata/api/model/TDTableType.java +2 -1
  22. data/src/main/java/org/embulk/output/td/FinalizableExecutorService.java +35 -17
  23. data/src/main/java/org/embulk/output/td/MsgpackGZFileBuilder.java +13 -7
  24. data/src/main/java/org/embulk/output/td/RecordWriter.java +21 -382
  25. data/src/main/java/org/embulk/output/td/TdOutputPlugin.java +175 -40
  26. data/src/main/java/org/embulk/output/td/writer/BooleanFieldWriter.java +23 -0
  27. data/src/main/java/org/embulk/output/td/writer/DoubleFieldWriter.java +23 -0
  28. data/src/main/java/org/embulk/output/td/writer/FieldWriter.java +38 -0
  29. data/src/main/java/org/embulk/output/td/writer/FieldWriterSet.java +206 -0
  30. data/src/main/java/org/embulk/output/td/writer/LongFieldWriter.java +23 -0
  31. data/src/main/java/org/embulk/output/td/writer/StringFieldWriter.java +23 -0
  32. data/src/main/java/org/embulk/output/td/writer/TimestampFieldLongDuplicator.java +28 -0
  33. data/src/main/java/org/embulk/output/td/writer/TimestampLongFieldWriter.java +23 -0
  34. data/src/main/java/org/embulk/output/td/writer/TimestampStringFieldWriter.java +27 -0
  35. data/src/main/java/org/embulk/output/td/writer/UnixTimestampFieldDuplicator.java +27 -0
  36. data/src/main/java/org/embulk/output/td/writer/UnixTimestampLongFieldWriter.java +26 -0
  37. data/src/test/java/com/treasuredata/api/TestTdApiClient.java +1 -1
  38. data/src/test/java/org/embulk/output/td/TestRecordWriter.java +198 -0
  39. data/src/test/java/org/embulk/output/td/TestTdOutputPlugin.java +529 -0
  40. data/src/test/java/org/embulk/output/td/writer/TestFieldWriterSet.java +146 -0
  41. metadata +29 -14
  42. data/src/test/java/org/embulk/output/td/TestFieldWriter.java +0 -105
@@ -6,6 +6,7 @@ import java.util.Map;
6
6
  import javax.validation.constraints.Min;
7
7
  import javax.validation.constraints.Max;
8
8
 
9
+ import com.google.common.annotations.VisibleForTesting;
9
10
  import com.google.common.base.Optional;
10
11
  import com.google.common.base.Throwables;
11
12
  import com.fasterxml.jackson.annotation.JsonCreator;
@@ -26,7 +27,7 @@ import org.embulk.config.ConfigSource;
26
27
  import org.embulk.config.ConfigException;
27
28
  import org.embulk.config.Task;
28
29
  import org.embulk.config.TaskSource;
29
- import org.embulk.output.td.RecordWriter.FieldWriterSet;
30
+ import org.embulk.output.td.writer.FieldWriterSet;
30
31
  import org.embulk.spi.Exec;
31
32
  import org.embulk.spi.ExecSession;
32
33
  import org.embulk.spi.OutputPlugin;
@@ -34,7 +35,6 @@ import org.embulk.spi.Schema;
34
35
  import org.embulk.spi.TransactionalPageOutput;
35
36
  import org.embulk.spi.time.Timestamp;
36
37
  import org.embulk.spi.time.TimestampFormatter;
37
- import org.joda.time.DateTimeZone;
38
38
  import org.joda.time.format.DateTimeFormat;
39
39
  import org.slf4j.Logger;
40
40
 
@@ -61,7 +61,9 @@ public class TdOutputPlugin
61
61
 
62
62
  // TODO connect_timeout, read_timeout, send_timeout
63
63
 
64
- // TODO mode[append, replace]
64
+ @Config("mode")
65
+ @ConfigDefault("\"append\"")
66
+ public Mode getMode();
65
67
 
66
68
  @Config("auto_create_table")
67
69
  @ConfigDefault("true")
@@ -73,6 +75,9 @@ public class TdOutputPlugin
73
75
  @Config("table")
74
76
  public String getTable();
75
77
 
78
+ public void setLoadTargetTableName(String name);
79
+ public String getLoadTargetTableName();
80
+
76
81
  @Config("session")
77
82
  @ConfigDefault("null")
78
83
  public Optional<String> getSession();
@@ -130,6 +135,37 @@ public class TdOutputPlugin
130
135
  extends Task, TimestampFormatter.TimestampColumnOption
131
136
  {}
132
137
 
138
+ public enum Mode
139
+ {
140
+ APPEND, REPLACE;
141
+
142
+ @JsonCreator
143
+ public static Mode fromConfig(String value)
144
+ {
145
+ switch(value) {
146
+ case "append":
147
+ return APPEND;
148
+ case "replace":
149
+ return REPLACE;
150
+ default:
151
+ throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are [append, replace]", value));
152
+ }
153
+ }
154
+
155
+ @JsonValue
156
+ public String toString()
157
+ {
158
+ switch(this) {
159
+ case APPEND:
160
+ return "append";
161
+ case REPLACE:
162
+ return "replace";
163
+ default:
164
+ throw new IllegalStateException();
165
+ }
166
+ }
167
+ }
168
+
133
169
  public interface HttpProxyTask
134
170
  extends Task
135
171
  {
@@ -173,7 +209,7 @@ public class TdOutputPlugin
173
209
  case "nano": return NANO;
174
210
  default:
175
211
  throw new ConfigException(
176
- String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano"));
212
+ String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano", s));
177
213
  }
178
214
  }
179
215
 
@@ -197,12 +233,8 @@ public class TdOutputPlugin
197
233
  {
198
234
  final PluginTask task = config.loadConfig(PluginTask.class);
199
235
 
200
- // TODO mode check
201
-
202
236
  // check column_options is valid or not
203
- for (String columnName : task.getColumnOptions().keySet()) {
204
- schema.lookupColumn(columnName); // throws SchemaConfigException
205
- }
237
+ checkColumnOptions(schema, task.getColumnOptions());
206
238
 
207
239
  // generate session name
208
240
  task.setSessionName(buildBulkImportSessionName(task, Exec.session()));
@@ -210,11 +242,24 @@ public class TdOutputPlugin
210
242
  try (TdApiClient client = newTdApiClient(task)) {
211
243
  String databaseName = task.getDatabase();
212
244
  String tableName = task.getTable();
213
- if (task.getAutoCreateTable()) {
214
- createTableIfNotExists(client, databaseName, tableName);
215
- } else {
216
- // check if the database and/or table exist or not
217
- validateTableExists(client, databaseName, tableName);
245
+
246
+ switch (task.getMode()) {
247
+ case APPEND:
248
+ if (task.getAutoCreateTable()) {
249
+ // auto_create_table is valid only with append mode (replace mode always creates a new table)
250
+ createTableIfNotExists(client, databaseName, tableName);
251
+ }
252
+ else {
253
+ // check if the database and/or table exist or not
254
+ validateTableExists(client, databaseName, tableName);
255
+ }
256
+ task.setLoadTargetTableName(tableName);
257
+ break;
258
+
259
+ case REPLACE:
260
+ task.setLoadTargetTableName(
261
+ createTemporaryTableWithPrefix(client, databaseName, makeTablePrefix(task)));
262
+ break;
218
263
  }
219
264
 
220
265
  // validate FieldWriterSet configuration before transaction is started
@@ -226,20 +271,32 @@ public class TdOutputPlugin
226
271
 
227
272
  public ConfigDiff resume(TaskSource taskSource,
228
273
  Schema schema, int processorCount,
229
- OutputPlugin.Control control) {
274
+ OutputPlugin.Control control)
275
+ {
230
276
  PluginTask task = taskSource.loadTask(PluginTask.class);
231
277
  try (TdApiClient client = newTdApiClient(task)) {
232
278
  return doRun(client, task, control);
233
279
  }
234
280
  }
235
281
 
236
- private ConfigDiff doRun(TdApiClient client, PluginTask task, OutputPlugin.Control control)
282
+ @VisibleForTesting
283
+ ConfigDiff doRun(TdApiClient client, PluginTask task, OutputPlugin.Control control)
237
284
  {
238
- boolean doUpload = startBulkImportSession(client, task.getSessionName(), task.getDatabase(), task.getTable());
285
+ boolean doUpload = startBulkImportSession(client, task.getSessionName(), task.getDatabase(), task.getLoadTargetTableName());
239
286
  task.setDoUpload(doUpload);
240
287
  control.run(task.dump());
241
288
  completeBulkImportSession(client, task.getSessionName(), 0); // TODO perform job priority
242
289
 
290
+ // commit
291
+ switch (task.getMode()) {
292
+ case APPEND:
293
+ // already done
294
+ break;
295
+ case REPLACE:
296
+ // rename table
297
+ renameTable(client, task.getDatabase(), task.getLoadTargetTableName(), task.getTable());
298
+ }
299
+
243
300
  ConfigDiff configDiff = Exec.newConfigDiff();
244
301
  configDiff.set("last_session", task.getSessionName());
245
302
  return configDiff;
@@ -257,14 +314,29 @@ public class TdOutputPlugin
257
314
  }
258
315
  }
259
316
 
260
- private TdApiClient newTdApiClient(final PluginTask task)
317
+ private String makeTablePrefix(PluginTask task)
318
+ {
319
+ return task.getTable() + "_" + task.getSessionName();
320
+ }
321
+
322
+ @VisibleForTesting
323
+ void checkColumnOptions(Schema schema, Map<String, TimestampColumnOption> columnOptions)
324
+ {
325
+ for (String columnName : columnOptions.keySet()) {
326
+ schema.lookupColumn(columnName); // throws SchemaConfigException
327
+ }
328
+ }
329
+
330
+ @VisibleForTesting
331
+ TdApiClient newTdApiClient(final PluginTask task)
261
332
  {
262
333
  Optional<HttpProxyConfig> httpProxyConfig = newHttpProxyConfig(task.getHttpProxy());
263
334
  TdApiClientConfig config = new TdApiClientConfig(task.getEndpoint(), task.getUseSsl(), httpProxyConfig);
264
335
  TdApiClient client = new TdApiClient(task.getApiKey(), config);
265
336
  try {
266
337
  client.start();
267
- } catch (IOException e) {
338
+ }
339
+ catch (IOException e) {
268
340
  throw Throwables.propagate(e);
269
341
  }
270
342
  return client;
@@ -276,37 +348,63 @@ public class TdOutputPlugin
276
348
  if (task.isPresent()) {
277
349
  HttpProxyTask pt = task.get();
278
350
  httpProxyConfig = Optional.of(new HttpProxyConfig(pt.getHost(), pt.getPort(), pt.getUseSsl()));
279
- } else {
351
+ }
352
+ else {
280
353
  httpProxyConfig = Optional.absent();
281
354
  }
282
355
  return httpProxyConfig;
283
356
  }
284
357
 
285
- private void createTableIfNotExists(TdApiClient client, String databaseName, String tableName)
358
+ @VisibleForTesting
359
+ void createTableIfNotExists(TdApiClient client, String databaseName, String tableName)
286
360
  {
287
361
  log.debug("Creating table \"{}\".\"{}\" if not exists", databaseName, tableName);
288
362
  try {
289
363
  client.createTable(databaseName, tableName);
290
364
  log.debug("Created table \"{}\".\"{}\"", databaseName, tableName);
291
- } catch (TdApiNotFoundException e) {
365
+ }
366
+ catch (TdApiNotFoundException e) {
292
367
  try {
293
368
  client.createDatabase(databaseName);
294
369
  log.debug("Created database \"{}\"", databaseName);
295
- } catch (TdApiConflictException ex) {
370
+ }
371
+ catch (TdApiConflictException ex) {
296
372
  // ignorable error
297
373
  }
298
374
  try {
299
375
  client.createTable(databaseName, tableName);
300
376
  log.debug("Created table \"{}\".\"{}\"", databaseName, tableName);
301
- } catch (TdApiConflictException exe) {
377
+ }
378
+ catch (TdApiConflictException exe) {
302
379
  // ignorable error
303
380
  }
304
- } catch (TdApiConflictException e) {
381
+ }
382
+ catch (TdApiConflictException e) {
305
383
  // ignorable error
306
384
  }
307
385
  }
308
386
 
309
- private void validateTableExists(TdApiClient client, String databaseName, String tableName)
387
+ @VisibleForTesting
388
+ String createTemporaryTableWithPrefix(TdApiClient client, String databaseName, String tablePrefix)
389
+ throws TdApiConflictException
390
+ {
391
+ String tableName = tablePrefix;
392
+ while (true) {
393
+ log.debug("Creating temporal table \"{}\".\"{}\"", databaseName, tableName);
394
+ try {
395
+ client.createTable(databaseName, tableName);
396
+ log.debug("Created temporal table \"{}\".\"{}\"", databaseName, tableName);
397
+ return tableName;
398
+ }
399
+ catch (TdApiConflictException e) {
400
+ log.debug("\"{}\".\"{}\" table already exists. Renaming temporal table.", databaseName, tableName);
401
+ tableName += "_";
402
+ }
403
+ }
404
+ }
405
+
406
+ @VisibleForTesting
407
+ void validateTableExists(TdApiClient client, String databaseName, String tableName)
310
408
  {
311
409
  try {
312
410
  for (TDTable table : client.getTables(databaseName)) {
@@ -315,16 +413,19 @@ public class TdOutputPlugin
315
413
  }
316
414
  }
317
415
  throw new ConfigException(String.format("Table \"%s\".\"%s\" doesn't exist", databaseName, tableName));
318
- } catch (TdApiNotFoundException ex) {
416
+ }
417
+ catch (TdApiNotFoundException ex) {
319
418
  throw new ConfigException(String.format("Database \"%s\" doesn't exist", databaseName), ex);
320
419
  }
321
420
  }
322
421
 
323
- private String buildBulkImportSessionName(PluginTask task, ExecSession exec)
422
+ @VisibleForTesting
423
+ String buildBulkImportSessionName(PluginTask task, ExecSession exec)
324
424
  {
325
425
  if (task.getSession().isPresent()) {
326
426
  return task.getSession().get();
327
- } else {
427
+ }
428
+ else {
328
429
  Timestamp time = exec.getTransactionTime(); // TODO implement Exec.getTransactionUniqueName()
329
430
  return String.format("embulk_%s_%09d",
330
431
  DateTimeFormat.forPattern("yyyyMMdd_HHmmss").withZoneUTC().print(time.getEpochSecond() * 1000),
@@ -333,14 +434,16 @@ public class TdOutputPlugin
333
434
  }
334
435
 
335
436
  // return false if all files are already uploaded
336
- private boolean startBulkImportSession(TdApiClient client,
437
+ @VisibleForTesting
438
+ boolean startBulkImportSession(TdApiClient client,
337
439
  String sessionName, String databaseName, String tableName)
338
440
  {
339
441
  log.info("Create bulk_import session {}", sessionName);
340
442
  TDBulkImportSession session;
341
443
  try {
342
444
  client.createBulkImportSession(sessionName, databaseName, tableName);
343
- } catch (TdApiConflictException ex) {
445
+ }
446
+ catch (TdApiConflictException ex) {
344
447
  // ignorable error
345
448
  }
346
449
  session = client.getBulkImportSession(sessionName);
@@ -366,7 +469,8 @@ public class TdOutputPlugin
366
469
  }
367
470
  }
368
471
 
369
- private void completeBulkImportSession(TdApiClient client, String sessionName, int priority)
472
+ @VisibleForTesting
473
+ void completeBulkImportSession(TdApiClient client, String sessionName, int priority)
370
474
  {
371
475
  TDBulkImportSession session = client.getBulkImportSession(sessionName);
372
476
 
@@ -376,7 +480,8 @@ public class TdOutputPlugin
376
480
  // freeze
377
481
  try {
378
482
  client.freezeBulkImportSession(sessionName);
379
- } catch (TdApiConflictException e) {
483
+ }
484
+ catch (TdApiConflictException e) {
380
485
  // ignorable error
381
486
  }
382
487
  }
@@ -417,7 +522,8 @@ public class TdOutputPlugin
417
522
  }
418
523
  }
419
524
 
420
- private TDBulkImportSession waitForStatusChange(TdApiClient client, String sessionName,
525
+ @VisibleForTesting
526
+ TDBulkImportSession waitForStatusChange(TdApiClient client, String sessionName,
421
527
  ImportStatus current, ImportStatus expecting, String operation)
422
528
  {
423
529
  TDBulkImportSession importSession;
@@ -427,21 +533,47 @@ public class TdOutputPlugin
427
533
  if (importSession.is(expecting)) {
428
534
  return importSession;
429
535
 
430
- } else if (importSession.is(current)) {
536
+ }
537
+ else if (importSession.is(current)) {
431
538
  // in progress
432
539
 
433
- } else {
540
+ }
541
+ else {
434
542
  throw new RuntimeException(String.format("Failed to %s bulk import session '%s'",
435
543
  operation, sessionName));
436
544
  }
437
545
 
438
546
  try {
439
547
  Thread.sleep(3000);
440
- } catch (InterruptedException e) {
548
+ }
549
+ catch (InterruptedException e) {
441
550
  }
442
551
  }
443
552
  }
444
553
 
554
+ @VisibleForTesting
555
+ void renameTable(TdApiClient client, String databaseName, String oldName, String newName)
556
+ {
557
+ log.debug("Renaming table \"{}\".\"{}\" to \"{}\"", databaseName, oldName, newName);
558
+ try {
559
+ client.renameTable(databaseName, oldName, newName);
560
+ }
561
+ catch (TdApiConflictException e) {
562
+ try {
563
+ client.deleteTable(databaseName, newName);
564
+ log.debug("Deleted original table \"{}\".\"{}\"", databaseName, newName);
565
+ }
566
+ catch (TdApiNotFoundException ex) {
567
+ // ignoreable error
568
+ }
569
+ catch (IOException ex) {
570
+ throw Throwables.propagate(ex);
571
+ }
572
+
573
+ client.renameTable(databaseName, oldName, newName);
574
+ }
575
+ }
576
+
445
577
  @Override
446
578
  public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
447
579
  {
@@ -450,14 +582,17 @@ public class TdOutputPlugin
450
582
  RecordWriter closeLater = null;
451
583
  try {
452
584
  FieldWriterSet fieldWriters = new FieldWriterSet(log, task, schema);
453
- RecordWriter recordWriter = closeLater = new RecordWriter(task, taskIndex, newTdApiClient(task), fieldWriters);
585
+ closeLater = new RecordWriter(task, taskIndex, newTdApiClient(task), fieldWriters);
586
+ RecordWriter recordWriter = closeLater;
454
587
  recordWriter.open(schema);
455
588
  closeLater = null;
456
589
  return recordWriter;
457
590
 
458
- } catch (IOException e) {
591
+ }
592
+ catch (IOException e) {
459
593
  throw Throwables.propagate(e);
460
- } finally {
594
+ }
595
+ finally {
461
596
  if (closeLater != null) {
462
597
  closeLater.close();
463
598
  }
@@ -0,0 +1,23 @@
1
+ package org.embulk.output.td.writer;
2
+
3
+ import org.embulk.output.td.MsgpackGZFileBuilder;
4
+ import org.embulk.spi.Column;
5
+ import org.embulk.spi.PageReader;
6
+
7
+ import java.io.IOException;
8
+
9
+ public class BooleanFieldWriter
10
+ extends FieldWriter
11
+ {
12
+ public BooleanFieldWriter(String keyName)
13
+ {
14
+ super(keyName);
15
+ }
16
+
17
+ @Override
18
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
19
+ throws IOException
20
+ {
21
+ builder.writeBoolean(reader.getBoolean(column));
22
+ }
23
+ }
@@ -0,0 +1,23 @@
1
+ package org.embulk.output.td.writer;
2
+
3
+ import org.embulk.output.td.MsgpackGZFileBuilder;
4
+ import org.embulk.spi.Column;
5
+ import org.embulk.spi.PageReader;
6
+
7
+ import java.io.IOException;
8
+
9
+ public class DoubleFieldWriter
10
+ extends FieldWriter
11
+ {
12
+ public DoubleFieldWriter(String keyName)
13
+ {
14
+ super(keyName);
15
+ }
16
+
17
+ @Override
18
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
19
+ throws IOException
20
+ {
21
+ builder.writeDouble(reader.getDouble(column));
22
+ }
23
+ }