embulk-output-td 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 58e133b33691a9a323d67be520799e68a8ec7e83
4
- data.tar.gz: bd184003c46843eba457d648e3b557d8d2566373
3
+ metadata.gz: 0e96fa2666efc130272325571d56cfd9e77b5086
4
+ data.tar.gz: b4ed6f4e05cf72ab403427a452b87a6b0da1d575
5
5
  SHA512:
6
- metadata.gz: b6e9317a6a47f6de4388f8d78f64d180396a82444cdb3405abcb723caaf4f0c7fab7c2e33993a9f8bca13eedd2fe514f6311869cd907f0eca0bea9cdf5d63cb5
7
- data.tar.gz: 5daaad4fd6463747b9d5f83d2caff817187187ce7397067bc3627bdacad6e7d39fbcb8a340c430d3399cc8637814a4871d8354af3d6353896e4012adb6e4bd89
6
+ metadata.gz: 25a373bb171c0280913f452e5a220b2ab12e350a4ddd4e476606e29541c79a12dc16ebd172243ae48b5962ff7a7e531c74ce4347b94705a85c3768c77529a3ff
7
+ data.tar.gz: b5910c054b1e9bdc74f2fc269789901bc8652bdb9a56d3b92686e38ebc5d4a1d27a4e86d47c3f0bb560e99c27c3d9bbbfdf216054b378368c3efd8f3c4d44fa9
data/CHANGELOG.md ADDED
@@ -0,0 +1,9 @@
1
+ ## 0.1.1 - 2015-07-14
2
+
3
+ * [maintenance] Make part name unique and idempotent [#9](https://github.com/treasure-data/embulk-output-td/pull/9)
4
+ * [maintenance] Delete temp files after uploading [#7](https://github.com/treasure-data/embulk-output-td/pull/7)
5
+ * [new feature] Add unix_timestamp_unit option [#6](https://github.com/treasure-data/embulk-output-td/pull/6)
6
+
7
+ ## 0.1.0 - 2015-06-23
8
+
9
+ The first release!!
data/README.md CHANGED
@@ -19,6 +19,7 @@ TODO: Write short description here
19
19
  - **table**: table name (string, required)
20
20
  - **session**: bulk_import session name (string, optional)
21
21
  - **time_column**: user-defined time column (string, optional)
22
+ - **unix_timestamp_unit**: if type of "time" or **time_column** is long, it's considered unix timestamp. This option specify its unit in sec, milli, micro or nano (enum, default: `sec`)
22
23
  - **tmpdir**: temporal directory
23
24
  - **upload_concurrency**: upload concurrency (int, default=2). max concurrency is 8.
24
25
  - **file_split_size**: split size (long, default=16384 (16MB)).
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.0"
16
+ version = "0.1.1"
17
17
 
18
18
  compileJava.options.encoding = 'UTF-8' // source encoding
19
19
  sourceCompatibility = 1.7
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-output-td"
4
- spec.version = "0.1.0"
4
+ spec.version = "0.1.1"
5
5
  spec.authors = ["Muga Nishizawa"]
6
6
  spec.summary = %[TreasureData output plugin for Embulk]
7
7
  spec.description = %[TreasureData output plugin is an Embulk plugin that loads records to TreasureData read by any input plugins. Search the input plugins by 'embulk-output' keyword.]
@@ -171,6 +171,7 @@ public class TdApiClient
171
171
  return session;
172
172
  }
173
173
 
174
+ @Deprecated
174
175
  public void uploadBulkImport(String sessionName, File path)
175
176
  throws IOException
176
177
  {
@@ -181,6 +182,15 @@ public class TdApiClient
181
182
  ContentResponse response = executeExchange(request);
182
183
  }
183
184
 
185
+ public void uploadBulkImportPart(String sessionName, String uniquePartName, File path)
186
+ throws IOException
187
+ {
188
+ Request request = prepareExchange(HttpMethod.PUT,
189
+ buildUrl("/v3/bulk_import/upload_part", sessionName, uniquePartName));
190
+ request.file(path.toPath());
191
+ ContentResponse response = executeExchange(request);
192
+ }
193
+
184
194
  public void freezeBulkImportSession(String sessionName)
185
195
  {
186
196
  Request request = prepareExchange(HttpMethod.POST,
@@ -1,5 +1,6 @@
1
1
  package org.embulk.output;
2
2
 
3
+ import org.embulk.spi.Exec;
3
4
  import org.msgpack.MessagePack;
4
5
  import org.msgpack.packer.Packer;
5
6
 
@@ -83,6 +84,11 @@ public class MsgpackGZFileBuilder
83
84
  return file;
84
85
  }
85
86
 
87
+ public boolean delete()
88
+ {
89
+ return file.delete();
90
+ }
91
+
86
92
  public void finish()
87
93
  throws IOException
88
94
  {
@@ -1,6 +1,7 @@
1
1
  package org.embulk.output;
2
2
 
3
3
  import com.google.common.base.Optional;
4
+ import com.google.common.base.Stopwatch;
4
5
  import com.google.common.base.Throwables;
5
6
  import com.treasuredata.api.TdApiClient;
6
7
  import org.embulk.config.CommitReport;
@@ -26,11 +27,15 @@ import org.msgpack.MessagePack;
26
27
  import org.slf4j.Logger;
27
28
 
28
29
  import java.io.File;
30
+ import java.io.Closeable;
29
31
  import java.io.IOException;
32
+ import java.util.Locale;
30
33
  import java.text.NumberFormat;
31
34
  import java.util.concurrent.Callable;
35
+ import java.util.concurrent.TimeUnit;
32
36
 
33
37
  import static com.google.common.base.Preconditions.checkNotNull;
38
+ import org.embulk.output.TdOutputPlugin.UnixTimestampUnit;
34
39
 
35
40
  public class RecordWriter
36
41
  implements TransactionalPageOutput
@@ -38,12 +43,13 @@ public class RecordWriter
38
43
  private final Logger log;
39
44
  private final TdApiClient client;
40
45
  private final String sessionName;
46
+ private final int taskIndex;
41
47
 
42
48
  private final MessagePack msgpack;
43
49
  private final FieldWriterSet fieldWriters;
44
50
  private final File tempDir;
45
51
 
46
- private int seqid = 0;
52
+ private int partSeqId = 0;
47
53
  private PageReader pageReader;
48
54
  private MsgpackGZFileBuilder builder;
49
55
 
@@ -51,11 +57,12 @@ public class RecordWriter
51
57
  private final int uploadConcurrency;
52
58
  private final long fileSplitSize; // unit: kb
53
59
 
54
- public RecordWriter(PluginTask task, TdApiClient client, FieldWriterSet fieldWriters)
60
+ public RecordWriter(PluginTask task, int taskIndex, TdApiClient client, FieldWriterSet fieldWriters)
55
61
  {
56
62
  this.log = Exec.getLogger(getClass());
57
63
  this.client = checkNotNull(client);
58
64
  this.sessionName = task.getSessionName();
65
+ this.taskIndex = taskIndex;
59
66
 
60
67
  this.msgpack = new MessagePack();
61
68
  this.fieldWriters = fieldWriters;
@@ -80,7 +87,7 @@ public class RecordWriter
80
87
  private void prepareNextBuilder()
81
88
  throws IOException
82
89
  {
83
- String prefix = String.format("%s-%d-", sessionName, seqid);
90
+ String prefix = String.format("%s-", sessionName);
84
91
  File tempFile = File.createTempFile(prefix, ".msgpack.gz", tempDir);
85
92
  this.builder = new MsgpackGZFileBuilder(msgpack, tempFile);
86
93
  }
@@ -140,6 +147,7 @@ public class RecordWriter
140
147
 
141
148
  if (builder.getWrittenSize() > fileSplitSize) {
142
149
  flush();
150
+ prepareNextBuilder();
143
151
  }
144
152
  }
145
153
 
@@ -150,31 +158,48 @@ public class RecordWriter
150
158
 
151
159
  public void flush() throws IOException
152
160
  {
153
- builder.finish();
154
-
155
161
  if (builder.getRecordCount() > 0) {
162
+ builder.finish();
163
+
156
164
  log.info("{uploading: {rows: {}, size: {} bytes (compressed)}}",
157
165
  builder.getRecordCount(),
158
166
  NumberFormat.getNumberInstance().format(builder.getWrittenSize()));
159
- upload(builder);
167
+ upload(builder, String.format(Locale.ENGLISH, "task-%d_%d", taskIndex, partSeqId));
168
+ partSeqId++;
160
169
  builder = null;
161
170
  }
162
-
163
- prepareNextBuilder();
164
171
  }
165
172
 
166
- private void upload(final MsgpackGZFileBuilder builder)
173
+ private void upload(final MsgpackGZFileBuilder builder, final String uniquePartName)
167
174
  throws IOException
168
175
  {
169
176
  executor.joinPartial(uploadConcurrency - 1);
170
177
  executor.submit(new Callable<Void>() {
171
178
  @Override
172
- public Void call() throws Exception {
173
- client.uploadBulkImport(sessionName, builder.getFile());
179
+ public Void call() throws Exception
180
+ {
181
+ File file = builder.getFile();
182
+
183
+ log.debug("{uploading: {file: {}}}", file.getAbsolutePath());
184
+ Stopwatch stopwatch = Stopwatch.createStarted();
185
+
186
+ client.uploadBulkImportPart(sessionName, uniquePartName, builder.getFile());
187
+
188
+ stopwatch.stop();
189
+ stopwatch.elapsed(TimeUnit.MILLISECONDS);
190
+ log.debug("{uploaded: {file: {}, time: {}}}", file.getAbsolutePath(), stopwatch);
174
191
  return null;
175
192
  }
176
- }, builder);
177
- seqid++;
193
+ },
194
+ new Closeable() {
195
+ public void close() throws IOException
196
+ {
197
+ builder.close();
198
+ if (!builder.delete()) {
199
+ log.warn("Failed to delete local temporary file {}. Ignoring.", builder.getFile());
200
+ }
201
+ }
202
+ });
178
203
  }
179
204
 
180
205
  @Override
@@ -199,6 +224,7 @@ public class RecordWriter
199
224
  } finally {
200
225
  if (builder != null) {
201
226
  builder.close();
227
+ builder.delete();
202
228
  builder = null;
203
229
  }
204
230
 
@@ -285,7 +311,10 @@ public class RecordWriter
285
311
  case PRIMARY_KEY:
286
312
  log.info("Using {}:{} column as the data partitioning key", columnName, columnType);
287
313
  if (columnType instanceof LongType) {
288
- writer = new LongFieldWriter(columnName);
314
+ if (task.getUnixTimestampUnit() != UnixTimestampUnit.SEC) {
315
+ log.warn("time column is converted from {} to seconds", task.getUnixTimestampUnit());
316
+ }
317
+ writer = new UnixTimestampLongFieldWriter(columnName, task.getUnixTimestampUnit().getFractionUnit());
289
318
  hasPkWriter = true;
290
319
  } else if (columnType instanceof TimestampType) {
291
320
  writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
@@ -344,13 +373,14 @@ public class RecordWriter
344
373
  String columnName = schema.getColumnName(duplicatePrimaryKeySourceIndex);
345
374
  Type columnType = schema.getColumnType(duplicatePrimaryKeySourceIndex);
346
375
 
347
- log.info("Duplicating {}:{} column to 'time' column for the data partitioning",
348
- columnName, columnType);
349
-
350
376
  FieldWriter writer;
351
377
  if (columnType instanceof LongType) {
352
- writer = new LongFieldDuplicator(columnName, "time");
378
+ log.info("Duplicating {}:{} column (unix timestamp {}) to 'time' column as seconds for the data partitioning",
379
+ columnName, columnType, task.getUnixTimestampUnit());
380
+ writer = new UnixTimestampFieldDuplicator(columnName, "time", task.getUnixTimestampUnit().getFractionUnit());
353
381
  } else if (columnType instanceof TimestampType) {
382
+ log.info("Duplicating {}:{} column to 'time' column as seconds for the data partitioning",
383
+ columnName, columnType);
354
384
  writer = new TimestampFieldLongDuplicator(task.getJRuby(), columnName, "time");
355
385
  } else {
356
386
  throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
@@ -473,6 +503,25 @@ public class RecordWriter
473
503
  }
474
504
  }
475
505
 
506
+ static class UnixTimestampLongFieldWriter
507
+ extends FieldWriter
508
+ {
509
+ private final int fractionUnit;
510
+
511
+ UnixTimestampLongFieldWriter(String keyName, int fractionUnit)
512
+ {
513
+ super(keyName);
514
+ this.fractionUnit = fractionUnit;
515
+ }
516
+
517
+ @Override
518
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
519
+ throws IOException
520
+ {
521
+ builder.writeLong(reader.getLong(column) / fractionUnit);
522
+ }
523
+ }
524
+
476
525
  static class StringFieldWriter
477
526
  extends FieldWriter
478
527
  {
@@ -525,15 +574,15 @@ public class RecordWriter
525
574
  }
526
575
  }
527
576
 
528
- static class LongFieldDuplicator
577
+ static class UnixTimestampFieldDuplicator
529
578
  extends LongFieldWriter
530
579
  {
531
- private final LongFieldWriter timeFieldWriter;
580
+ private final UnixTimestampLongFieldWriter timeFieldWriter;
532
581
 
533
- public LongFieldDuplicator(String keyName, String duplicateKeyName)
582
+ public UnixTimestampFieldDuplicator(String keyName, String duplicateKeyName, int fractionUnit)
534
583
  {
535
584
  super(keyName);
536
- timeFieldWriter = new LongFieldWriter(duplicateKeyName);
585
+ timeFieldWriter = new UnixTimestampLongFieldWriter(duplicateKeyName, fractionUnit);
537
586
  }
538
587
 
539
588
  @Override
@@ -7,6 +7,8 @@ import javax.validation.constraints.Max;
7
7
 
8
8
  import com.google.common.base.Optional;
9
9
  import com.google.common.base.Throwables;
10
+ import com.fasterxml.jackson.annotation.JsonCreator;
11
+ import com.fasterxml.jackson.annotation.JsonValue;
10
12
  import com.treasuredata.api.TdApiClient;
11
13
  import com.treasuredata.api.TdApiClientConfig;
12
14
  import com.treasuredata.api.TdApiClientConfig.HttpProxyConfig;
@@ -78,6 +80,10 @@ public class TdOutputPlugin
78
80
  @ConfigDefault("null")
79
81
  public Optional<String> getTimeColumn();
80
82
 
83
+ @Config("unix_timestamp_unit")
84
+ @ConfigDefault("\"sec\"")
85
+ public UnixTimestampUnit getUnixTimestampUnit();
86
+
81
87
  @Config("tmpdir")
82
88
  @ConfigDefault("\"/tmp\"")
83
89
  public String getTempDir();
@@ -116,6 +122,47 @@ public class TdOutputPlugin
116
122
  public boolean getUseSsl();
117
123
  }
118
124
 
125
+ public static enum UnixTimestampUnit
126
+ {
127
+ SEC(1),
128
+ MILLI(1000),
129
+ MICRO(1000000),
130
+ NANO(1000000000);
131
+
132
+ private final int unit;
133
+
134
+ private UnixTimestampUnit(int unit)
135
+ {
136
+ this.unit = unit;
137
+ }
138
+
139
+ public int getFractionUnit()
140
+ {
141
+ return unit;
142
+ }
143
+
144
+ @JsonCreator
145
+ public static UnixTimestampUnit of(String s)
146
+ {
147
+ switch (s) {
148
+ case "sec": return SEC;
149
+ case "milli": return MILLI;
150
+ case "micro": return MICRO;
151
+ case "nano": return NANO;
152
+ default:
153
+ throw new ConfigException(
154
+ String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano"));
155
+ }
156
+ }
157
+
158
+ @JsonValue
159
+ @Override
160
+ public String toString()
161
+ {
162
+ return name().toLowerCase();
163
+ }
164
+ }
165
+
119
166
  private final Logger log;
120
167
 
121
168
  public TdOutputPlugin()
@@ -367,14 +414,14 @@ public class TdOutputPlugin
367
414
  }
368
415
 
369
416
  @Override
370
- public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int processorIndex)
417
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
371
418
  {
372
419
  final PluginTask task = taskSource.loadTask(PluginTask.class);
373
420
 
374
421
  RecordWriter closeLater = null;
375
422
  try {
376
423
  FieldWriterSet fieldWriters = new FieldWriterSet(log, task, schema);
377
- RecordWriter recordWriter = closeLater = new RecordWriter(task, newTdApiClient(task), fieldWriters);
424
+ RecordWriter recordWriter = closeLater = new RecordWriter(task, taskIndex, newTdApiClient(task), fieldWriters);
378
425
  recordWriter.open(schema);
379
426
  closeLater = null;
380
427
  return recordWriter;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Muga Nishizawa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-22 00:00:00.000000000 Z
11
+ date: 2015-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -46,6 +46,7 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
+ - CHANGELOG.md
49
50
  - README.md
50
51
  - build.gradle
51
52
  - embulk-output-td.gemspec
@@ -84,7 +85,7 @@ files:
84
85
  - src/main/java/org/embulk/output/RecordWriter.java
85
86
  - src/main/java/org/embulk/output/TdOutputPlugin.java
86
87
  - src/test/java/org/embulk/output/TestTdOutputPlugin.java
87
- - classpath/embulk-output-td-0.1.0.jar
88
+ - classpath/embulk-output-td-0.1.1.jar
88
89
  - classpath/javassist-3.18.1-GA.jar
89
90
  - classpath/jetty-client-9.2.2.v20140723.jar
90
91
  - classpath/jetty-http-9.2.2.v20140723.jar