embulk-output-td 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 58e133b33691a9a323d67be520799e68a8ec7e83
4
- data.tar.gz: bd184003c46843eba457d648e3b557d8d2566373
3
+ metadata.gz: 0e96fa2666efc130272325571d56cfd9e77b5086
4
+ data.tar.gz: b4ed6f4e05cf72ab403427a452b87a6b0da1d575
5
5
  SHA512:
6
- metadata.gz: b6e9317a6a47f6de4388f8d78f64d180396a82444cdb3405abcb723caaf4f0c7fab7c2e33993a9f8bca13eedd2fe514f6311869cd907f0eca0bea9cdf5d63cb5
7
- data.tar.gz: 5daaad4fd6463747b9d5f83d2caff817187187ce7397067bc3627bdacad6e7d39fbcb8a340c430d3399cc8637814a4871d8354af3d6353896e4012adb6e4bd89
6
+ metadata.gz: 25a373bb171c0280913f452e5a220b2ab12e350a4ddd4e476606e29541c79a12dc16ebd172243ae48b5962ff7a7e531c74ce4347b94705a85c3768c77529a3ff
7
+ data.tar.gz: b5910c054b1e9bdc74f2fc269789901bc8652bdb9a56d3b92686e38ebc5d4a1d27a4e86d47c3f0bb560e99c27c3d9bbbfdf216054b378368c3efd8f3c4d44fa9
data/CHANGELOG.md ADDED
@@ -0,0 +1,9 @@
1
+ ## 0.1.1 - 2015-07-14
2
+
3
+ * [maintenance] Make part name unique and idempotent [#9](https://github.com/treasure-data/embulk-output-td/pull/9)
4
+ * [maintenance] Delete temp files after uploading [#7](https://github.com/treasure-data/embulk-output-td/pull/7)
5
+ * [new feature] Add unix_timestamp_unit option [#6](https://github.com/treasure-data/embulk-output-td/pull/6)
6
+
7
+ ## 0.1.0 - 2015-06-23
8
+
9
+ The first release!!
data/README.md CHANGED
@@ -19,6 +19,7 @@ TODO: Write short description here
19
19
  - **table**: table name (string, required)
20
20
  - **session**: bulk_import session name (string, optional)
21
21
  - **time_column**: user-defined time column (string, optional)
22
+ - **unix_timestamp_unit**: if type of "time" or **time_column** is long, it's considered unix timestamp. This option specify its unit in sec, milli, micro or nano (enum, default: `sec`)
22
23
  - **tmpdir**: temporal directory
23
24
  - **upload_concurrency**: upload concurrency (int, default=2). max concurrency is 8.
24
25
  - **file_split_size**: split size (long, default=16384 (16MB)).
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.0"
16
+ version = "0.1.1"
17
17
 
18
18
  compileJava.options.encoding = 'UTF-8' // source encoding
19
19
  sourceCompatibility = 1.7
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-output-td"
4
- spec.version = "0.1.0"
4
+ spec.version = "0.1.1"
5
5
  spec.authors = ["Muga Nishizawa"]
6
6
  spec.summary = %[TreasureData output plugin for Embulk]
7
7
  spec.description = %[TreasureData output plugin is an Embulk plugin that loads records to TreasureData read by any input plugins. Search the input plugins by 'embulk-output' keyword.]
@@ -171,6 +171,7 @@ public class TdApiClient
171
171
  return session;
172
172
  }
173
173
 
174
+ @Deprecated
174
175
  public void uploadBulkImport(String sessionName, File path)
175
176
  throws IOException
176
177
  {
@@ -181,6 +182,15 @@ public class TdApiClient
181
182
  ContentResponse response = executeExchange(request);
182
183
  }
183
184
 
185
+ public void uploadBulkImportPart(String sessionName, String uniquePartName, File path)
186
+ throws IOException
187
+ {
188
+ Request request = prepareExchange(HttpMethod.PUT,
189
+ buildUrl("/v3/bulk_import/upload_part", sessionName, uniquePartName));
190
+ request.file(path.toPath());
191
+ ContentResponse response = executeExchange(request);
192
+ }
193
+
184
194
  public void freezeBulkImportSession(String sessionName)
185
195
  {
186
196
  Request request = prepareExchange(HttpMethod.POST,
@@ -1,5 +1,6 @@
1
1
  package org.embulk.output;
2
2
 
3
+ import org.embulk.spi.Exec;
3
4
  import org.msgpack.MessagePack;
4
5
  import org.msgpack.packer.Packer;
5
6
 
@@ -83,6 +84,11 @@ public class MsgpackGZFileBuilder
83
84
  return file;
84
85
  }
85
86
 
87
+ public boolean delete()
88
+ {
89
+ return file.delete();
90
+ }
91
+
86
92
  public void finish()
87
93
  throws IOException
88
94
  {
@@ -1,6 +1,7 @@
1
1
  package org.embulk.output;
2
2
 
3
3
  import com.google.common.base.Optional;
4
+ import com.google.common.base.Stopwatch;
4
5
  import com.google.common.base.Throwables;
5
6
  import com.treasuredata.api.TdApiClient;
6
7
  import org.embulk.config.CommitReport;
@@ -26,11 +27,15 @@ import org.msgpack.MessagePack;
26
27
  import org.slf4j.Logger;
27
28
 
28
29
  import java.io.File;
30
+ import java.io.Closeable;
29
31
  import java.io.IOException;
32
+ import java.util.Locale;
30
33
  import java.text.NumberFormat;
31
34
  import java.util.concurrent.Callable;
35
+ import java.util.concurrent.TimeUnit;
32
36
 
33
37
  import static com.google.common.base.Preconditions.checkNotNull;
38
+ import org.embulk.output.TdOutputPlugin.UnixTimestampUnit;
34
39
 
35
40
  public class RecordWriter
36
41
  implements TransactionalPageOutput
@@ -38,12 +43,13 @@ public class RecordWriter
38
43
  private final Logger log;
39
44
  private final TdApiClient client;
40
45
  private final String sessionName;
46
+ private final int taskIndex;
41
47
 
42
48
  private final MessagePack msgpack;
43
49
  private final FieldWriterSet fieldWriters;
44
50
  private final File tempDir;
45
51
 
46
- private int seqid = 0;
52
+ private int partSeqId = 0;
47
53
  private PageReader pageReader;
48
54
  private MsgpackGZFileBuilder builder;
49
55
 
@@ -51,11 +57,12 @@ public class RecordWriter
51
57
  private final int uploadConcurrency;
52
58
  private final long fileSplitSize; // unit: kb
53
59
 
54
- public RecordWriter(PluginTask task, TdApiClient client, FieldWriterSet fieldWriters)
60
+ public RecordWriter(PluginTask task, int taskIndex, TdApiClient client, FieldWriterSet fieldWriters)
55
61
  {
56
62
  this.log = Exec.getLogger(getClass());
57
63
  this.client = checkNotNull(client);
58
64
  this.sessionName = task.getSessionName();
65
+ this.taskIndex = taskIndex;
59
66
 
60
67
  this.msgpack = new MessagePack();
61
68
  this.fieldWriters = fieldWriters;
@@ -80,7 +87,7 @@ public class RecordWriter
80
87
  private void prepareNextBuilder()
81
88
  throws IOException
82
89
  {
83
- String prefix = String.format("%s-%d-", sessionName, seqid);
90
+ String prefix = String.format("%s-", sessionName);
84
91
  File tempFile = File.createTempFile(prefix, ".msgpack.gz", tempDir);
85
92
  this.builder = new MsgpackGZFileBuilder(msgpack, tempFile);
86
93
  }
@@ -140,6 +147,7 @@ public class RecordWriter
140
147
 
141
148
  if (builder.getWrittenSize() > fileSplitSize) {
142
149
  flush();
150
+ prepareNextBuilder();
143
151
  }
144
152
  }
145
153
 
@@ -150,31 +158,48 @@ public class RecordWriter
150
158
 
151
159
  public void flush() throws IOException
152
160
  {
153
- builder.finish();
154
-
155
161
  if (builder.getRecordCount() > 0) {
162
+ builder.finish();
163
+
156
164
  log.info("{uploading: {rows: {}, size: {} bytes (compressed)}}",
157
165
  builder.getRecordCount(),
158
166
  NumberFormat.getNumberInstance().format(builder.getWrittenSize()));
159
- upload(builder);
167
+ upload(builder, String.format(Locale.ENGLISH, "task-%d_%d", taskIndex, partSeqId));
168
+ partSeqId++;
160
169
  builder = null;
161
170
  }
162
-
163
- prepareNextBuilder();
164
171
  }
165
172
 
166
- private void upload(final MsgpackGZFileBuilder builder)
173
+ private void upload(final MsgpackGZFileBuilder builder, final String uniquePartName)
167
174
  throws IOException
168
175
  {
169
176
  executor.joinPartial(uploadConcurrency - 1);
170
177
  executor.submit(new Callable<Void>() {
171
178
  @Override
172
- public Void call() throws Exception {
173
- client.uploadBulkImport(sessionName, builder.getFile());
179
+ public Void call() throws Exception
180
+ {
181
+ File file = builder.getFile();
182
+
183
+ log.debug("{uploading: {file: {}}}", file.getAbsolutePath());
184
+ Stopwatch stopwatch = Stopwatch.createStarted();
185
+
186
+ client.uploadBulkImportPart(sessionName, uniquePartName, builder.getFile());
187
+
188
+ stopwatch.stop();
189
+ stopwatch.elapsed(TimeUnit.MILLISECONDS);
190
+ log.debug("{uploaded: {file: {}, time: {}}}", file.getAbsolutePath(), stopwatch);
174
191
  return null;
175
192
  }
176
- }, builder);
177
- seqid++;
193
+ },
194
+ new Closeable() {
195
+ public void close() throws IOException
196
+ {
197
+ builder.close();
198
+ if (!builder.delete()) {
199
+ log.warn("Failed to delete local temporary file {}. Ignoring.", builder.getFile());
200
+ }
201
+ }
202
+ });
178
203
  }
179
204
 
180
205
  @Override
@@ -199,6 +224,7 @@ public class RecordWriter
199
224
  } finally {
200
225
  if (builder != null) {
201
226
  builder.close();
227
+ builder.delete();
202
228
  builder = null;
203
229
  }
204
230
 
@@ -285,7 +311,10 @@ public class RecordWriter
285
311
  case PRIMARY_KEY:
286
312
  log.info("Using {}:{} column as the data partitioning key", columnName, columnType);
287
313
  if (columnType instanceof LongType) {
288
- writer = new LongFieldWriter(columnName);
314
+ if (task.getUnixTimestampUnit() != UnixTimestampUnit.SEC) {
315
+ log.warn("time column is converted from {} to seconds", task.getUnixTimestampUnit());
316
+ }
317
+ writer = new UnixTimestampLongFieldWriter(columnName, task.getUnixTimestampUnit().getFractionUnit());
289
318
  hasPkWriter = true;
290
319
  } else if (columnType instanceof TimestampType) {
291
320
  writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
@@ -344,13 +373,14 @@ public class RecordWriter
344
373
  String columnName = schema.getColumnName(duplicatePrimaryKeySourceIndex);
345
374
  Type columnType = schema.getColumnType(duplicatePrimaryKeySourceIndex);
346
375
 
347
- log.info("Duplicating {}:{} column to 'time' column for the data partitioning",
348
- columnName, columnType);
349
-
350
376
  FieldWriter writer;
351
377
  if (columnType instanceof LongType) {
352
- writer = new LongFieldDuplicator(columnName, "time");
378
+ log.info("Duplicating {}:{} column (unix timestamp {}) to 'time' column as seconds for the data partitioning",
379
+ columnName, columnType, task.getUnixTimestampUnit());
380
+ writer = new UnixTimestampFieldDuplicator(columnName, "time", task.getUnixTimestampUnit().getFractionUnit());
353
381
  } else if (columnType instanceof TimestampType) {
382
+ log.info("Duplicating {}:{} column to 'time' column as seconds for the data partitioning",
383
+ columnName, columnType);
354
384
  writer = new TimestampFieldLongDuplicator(task.getJRuby(), columnName, "time");
355
385
  } else {
356
386
  throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
@@ -473,6 +503,25 @@ public class RecordWriter
473
503
  }
474
504
  }
475
505
 
506
+ static class UnixTimestampLongFieldWriter
507
+ extends FieldWriter
508
+ {
509
+ private final int fractionUnit;
510
+
511
+ UnixTimestampLongFieldWriter(String keyName, int fractionUnit)
512
+ {
513
+ super(keyName);
514
+ this.fractionUnit = fractionUnit;
515
+ }
516
+
517
+ @Override
518
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
519
+ throws IOException
520
+ {
521
+ builder.writeLong(reader.getLong(column) / fractionUnit);
522
+ }
523
+ }
524
+
476
525
  static class StringFieldWriter
477
526
  extends FieldWriter
478
527
  {
@@ -525,15 +574,15 @@ public class RecordWriter
525
574
  }
526
575
  }
527
576
 
528
- static class LongFieldDuplicator
577
+ static class UnixTimestampFieldDuplicator
529
578
  extends LongFieldWriter
530
579
  {
531
- private final LongFieldWriter timeFieldWriter;
580
+ private final UnixTimestampLongFieldWriter timeFieldWriter;
532
581
 
533
- public LongFieldDuplicator(String keyName, String duplicateKeyName)
582
+ public UnixTimestampFieldDuplicator(String keyName, String duplicateKeyName, int fractionUnit)
534
583
  {
535
584
  super(keyName);
536
- timeFieldWriter = new LongFieldWriter(duplicateKeyName);
585
+ timeFieldWriter = new UnixTimestampLongFieldWriter(duplicateKeyName, fractionUnit);
537
586
  }
538
587
 
539
588
  @Override
@@ -7,6 +7,8 @@ import javax.validation.constraints.Max;
7
7
 
8
8
  import com.google.common.base.Optional;
9
9
  import com.google.common.base.Throwables;
10
+ import com.fasterxml.jackson.annotation.JsonCreator;
11
+ import com.fasterxml.jackson.annotation.JsonValue;
10
12
  import com.treasuredata.api.TdApiClient;
11
13
  import com.treasuredata.api.TdApiClientConfig;
12
14
  import com.treasuredata.api.TdApiClientConfig.HttpProxyConfig;
@@ -78,6 +80,10 @@ public class TdOutputPlugin
78
80
  @ConfigDefault("null")
79
81
  public Optional<String> getTimeColumn();
80
82
 
83
+ @Config("unix_timestamp_unit")
84
+ @ConfigDefault("\"sec\"")
85
+ public UnixTimestampUnit getUnixTimestampUnit();
86
+
81
87
  @Config("tmpdir")
82
88
  @ConfigDefault("\"/tmp\"")
83
89
  public String getTempDir();
@@ -116,6 +122,47 @@ public class TdOutputPlugin
116
122
  public boolean getUseSsl();
117
123
  }
118
124
 
125
+ public static enum UnixTimestampUnit
126
+ {
127
+ SEC(1),
128
+ MILLI(1000),
129
+ MICRO(1000000),
130
+ NANO(1000000000);
131
+
132
+ private final int unit;
133
+
134
+ private UnixTimestampUnit(int unit)
135
+ {
136
+ this.unit = unit;
137
+ }
138
+
139
+ public int getFractionUnit()
140
+ {
141
+ return unit;
142
+ }
143
+
144
+ @JsonCreator
145
+ public static UnixTimestampUnit of(String s)
146
+ {
147
+ switch (s) {
148
+ case "sec": return SEC;
149
+ case "milli": return MILLI;
150
+ case "micro": return MICRO;
151
+ case "nano": return NANO;
152
+ default:
153
+ throw new ConfigException(
154
+ String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano"));
155
+ }
156
+ }
157
+
158
+ @JsonValue
159
+ @Override
160
+ public String toString()
161
+ {
162
+ return name().toLowerCase();
163
+ }
164
+ }
165
+
119
166
  private final Logger log;
120
167
 
121
168
  public TdOutputPlugin()
@@ -367,14 +414,14 @@ public class TdOutputPlugin
367
414
  }
368
415
 
369
416
  @Override
370
- public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int processorIndex)
417
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
371
418
  {
372
419
  final PluginTask task = taskSource.loadTask(PluginTask.class);
373
420
 
374
421
  RecordWriter closeLater = null;
375
422
  try {
376
423
  FieldWriterSet fieldWriters = new FieldWriterSet(log, task, schema);
377
- RecordWriter recordWriter = closeLater = new RecordWriter(task, newTdApiClient(task), fieldWriters);
424
+ RecordWriter recordWriter = closeLater = new RecordWriter(task, taskIndex, newTdApiClient(task), fieldWriters);
378
425
  recordWriter.open(schema);
379
426
  closeLater = null;
380
427
  return recordWriter;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Muga Nishizawa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-22 00:00:00.000000000 Z
11
+ date: 2015-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -46,6 +46,7 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
+ - CHANGELOG.md
49
50
  - README.md
50
51
  - build.gradle
51
52
  - embulk-output-td.gemspec
@@ -84,7 +85,7 @@ files:
84
85
  - src/main/java/org/embulk/output/RecordWriter.java
85
86
  - src/main/java/org/embulk/output/TdOutputPlugin.java
86
87
  - src/test/java/org/embulk/output/TestTdOutputPlugin.java
87
- - classpath/embulk-output-td-0.1.0.jar
88
+ - classpath/embulk-output-td-0.1.1.jar
88
89
  - classpath/javassist-3.18.1-GA.jar
89
90
  - classpath/jetty-client-9.2.2.v20140723.jar
90
91
  - classpath/jetty-http-9.2.2.v20140723.jar