embulk-output-td 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -0
- data/build.gradle +1 -1
- data/embulk-output-td.gemspec +1 -1
- data/src/main/java/com/treasuredata/api/TdApiClient.java +10 -0
- data/src/main/java/org/embulk/output/MsgpackGZFileBuilder.java +6 -0
- data/src/main/java/org/embulk/output/RecordWriter.java +71 -22
- data/src/main/java/org/embulk/output/TdOutputPlugin.java +49 -2
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0e96fa2666efc130272325571d56cfd9e77b5086
|
|
4
|
+
data.tar.gz: b4ed6f4e05cf72ab403427a452b87a6b0da1d575
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 25a373bb171c0280913f452e5a220b2ab12e350a4ddd4e476606e29541c79a12dc16ebd172243ae48b5962ff7a7e531c74ce4347b94705a85c3768c77529a3ff
|
|
7
|
+
data.tar.gz: b5910c054b1e9bdc74f2fc269789901bc8652bdb9a56d3b92686e38ebc5d4a1d27a4e86d47c3f0bb560e99c27c3d9bbbfdf216054b378368c3efd8f3c4d44fa9
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
## 0.1.1 - 2015-07-14
|
|
2
|
+
|
|
3
|
+
* [maintenance] Make part name unique and idempotent [#9](https://github.com/treasure-data/embulk-output-td/pull/9)
|
|
4
|
+
* [maintenance] Delete temp files after uploading [#7](https://github.com/treasure-data/embulk-output-td/pull/7)
|
|
5
|
+
* [new feature] Add unix_timestamp_unit option [#6](https://github.com/treasure-data/embulk-output-td/pull/6)
|
|
6
|
+
|
|
7
|
+
## 0.1.0 - 2015-06-23
|
|
8
|
+
|
|
9
|
+
The first release!!
|
data/README.md
CHANGED
|
@@ -19,6 +19,7 @@ TODO: Write short description here
|
|
|
19
19
|
- **table**: table name (string, required)
|
|
20
20
|
- **session**: bulk_import session name (string, optional)
|
|
21
21
|
- **time_column**: user-defined time column (string, optional)
|
|
22
|
+
- **unix_timestamp_unit**: if type of "time" or **time_column** is long, it's considered unix timestamp. This option specify its unit in sec, milli, micro or nano (enum, default: `sec`)
|
|
22
23
|
- **tmpdir**: temporal directory
|
|
23
24
|
- **upload_concurrency**: upload concurrency (int, default=2). max concurrency is 8.
|
|
24
25
|
- **file_split_size**: split size (long, default=16384 (16MB)).
|
data/build.gradle
CHANGED
data/embulk-output-td.gemspec
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
Gem::Specification.new do |spec|
|
|
3
3
|
spec.name = "embulk-output-td"
|
|
4
|
-
spec.version = "0.1.
|
|
4
|
+
spec.version = "0.1.1"
|
|
5
5
|
spec.authors = ["Muga Nishizawa"]
|
|
6
6
|
spec.summary = %[TreasureData output plugin for Embulk]
|
|
7
7
|
spec.description = %[TreasureData output plugin is an Embulk plugin that loads records to TreasureData read by any input plugins. Search the input plugins by 'embulk-output' keyword.]
|
|
@@ -171,6 +171,7 @@ public class TdApiClient
|
|
|
171
171
|
return session;
|
|
172
172
|
}
|
|
173
173
|
|
|
174
|
+
@Deprecated
|
|
174
175
|
public void uploadBulkImport(String sessionName, File path)
|
|
175
176
|
throws IOException
|
|
176
177
|
{
|
|
@@ -181,6 +182,15 @@ public class TdApiClient
|
|
|
181
182
|
ContentResponse response = executeExchange(request);
|
|
182
183
|
}
|
|
183
184
|
|
|
185
|
+
public void uploadBulkImportPart(String sessionName, String uniquePartName, File path)
|
|
186
|
+
throws IOException
|
|
187
|
+
{
|
|
188
|
+
Request request = prepareExchange(HttpMethod.PUT,
|
|
189
|
+
buildUrl("/v3/bulk_import/upload_part", sessionName, uniquePartName));
|
|
190
|
+
request.file(path.toPath());
|
|
191
|
+
ContentResponse response = executeExchange(request);
|
|
192
|
+
}
|
|
193
|
+
|
|
184
194
|
public void freezeBulkImportSession(String sessionName)
|
|
185
195
|
{
|
|
186
196
|
Request request = prepareExchange(HttpMethod.POST,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
package org.embulk.output;
|
|
2
2
|
|
|
3
|
+
import org.embulk.spi.Exec;
|
|
3
4
|
import org.msgpack.MessagePack;
|
|
4
5
|
import org.msgpack.packer.Packer;
|
|
5
6
|
|
|
@@ -83,6 +84,11 @@ public class MsgpackGZFileBuilder
|
|
|
83
84
|
return file;
|
|
84
85
|
}
|
|
85
86
|
|
|
87
|
+
public boolean delete()
|
|
88
|
+
{
|
|
89
|
+
return file.delete();
|
|
90
|
+
}
|
|
91
|
+
|
|
86
92
|
public void finish()
|
|
87
93
|
throws IOException
|
|
88
94
|
{
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
package org.embulk.output;
|
|
2
2
|
|
|
3
3
|
import com.google.common.base.Optional;
|
|
4
|
+
import com.google.common.base.Stopwatch;
|
|
4
5
|
import com.google.common.base.Throwables;
|
|
5
6
|
import com.treasuredata.api.TdApiClient;
|
|
6
7
|
import org.embulk.config.CommitReport;
|
|
@@ -26,11 +27,15 @@ import org.msgpack.MessagePack;
|
|
|
26
27
|
import org.slf4j.Logger;
|
|
27
28
|
|
|
28
29
|
import java.io.File;
|
|
30
|
+
import java.io.Closeable;
|
|
29
31
|
import java.io.IOException;
|
|
32
|
+
import java.util.Locale;
|
|
30
33
|
import java.text.NumberFormat;
|
|
31
34
|
import java.util.concurrent.Callable;
|
|
35
|
+
import java.util.concurrent.TimeUnit;
|
|
32
36
|
|
|
33
37
|
import static com.google.common.base.Preconditions.checkNotNull;
|
|
38
|
+
import org.embulk.output.TdOutputPlugin.UnixTimestampUnit;
|
|
34
39
|
|
|
35
40
|
public class RecordWriter
|
|
36
41
|
implements TransactionalPageOutput
|
|
@@ -38,12 +43,13 @@ public class RecordWriter
|
|
|
38
43
|
private final Logger log;
|
|
39
44
|
private final TdApiClient client;
|
|
40
45
|
private final String sessionName;
|
|
46
|
+
private final int taskIndex;
|
|
41
47
|
|
|
42
48
|
private final MessagePack msgpack;
|
|
43
49
|
private final FieldWriterSet fieldWriters;
|
|
44
50
|
private final File tempDir;
|
|
45
51
|
|
|
46
|
-
private int
|
|
52
|
+
private int partSeqId = 0;
|
|
47
53
|
private PageReader pageReader;
|
|
48
54
|
private MsgpackGZFileBuilder builder;
|
|
49
55
|
|
|
@@ -51,11 +57,12 @@ public class RecordWriter
|
|
|
51
57
|
private final int uploadConcurrency;
|
|
52
58
|
private final long fileSplitSize; // unit: kb
|
|
53
59
|
|
|
54
|
-
public RecordWriter(PluginTask task, TdApiClient client, FieldWriterSet fieldWriters)
|
|
60
|
+
public RecordWriter(PluginTask task, int taskIndex, TdApiClient client, FieldWriterSet fieldWriters)
|
|
55
61
|
{
|
|
56
62
|
this.log = Exec.getLogger(getClass());
|
|
57
63
|
this.client = checkNotNull(client);
|
|
58
64
|
this.sessionName = task.getSessionName();
|
|
65
|
+
this.taskIndex = taskIndex;
|
|
59
66
|
|
|
60
67
|
this.msgpack = new MessagePack();
|
|
61
68
|
this.fieldWriters = fieldWriters;
|
|
@@ -80,7 +87,7 @@ public class RecordWriter
|
|
|
80
87
|
private void prepareNextBuilder()
|
|
81
88
|
throws IOException
|
|
82
89
|
{
|
|
83
|
-
String prefix = String.format("%s
|
|
90
|
+
String prefix = String.format("%s-", sessionName);
|
|
84
91
|
File tempFile = File.createTempFile(prefix, ".msgpack.gz", tempDir);
|
|
85
92
|
this.builder = new MsgpackGZFileBuilder(msgpack, tempFile);
|
|
86
93
|
}
|
|
@@ -140,6 +147,7 @@ public class RecordWriter
|
|
|
140
147
|
|
|
141
148
|
if (builder.getWrittenSize() > fileSplitSize) {
|
|
142
149
|
flush();
|
|
150
|
+
prepareNextBuilder();
|
|
143
151
|
}
|
|
144
152
|
}
|
|
145
153
|
|
|
@@ -150,31 +158,48 @@ public class RecordWriter
|
|
|
150
158
|
|
|
151
159
|
public void flush() throws IOException
|
|
152
160
|
{
|
|
153
|
-
builder.finish();
|
|
154
|
-
|
|
155
161
|
if (builder.getRecordCount() > 0) {
|
|
162
|
+
builder.finish();
|
|
163
|
+
|
|
156
164
|
log.info("{uploading: {rows: {}, size: {} bytes (compressed)}}",
|
|
157
165
|
builder.getRecordCount(),
|
|
158
166
|
NumberFormat.getNumberInstance().format(builder.getWrittenSize()));
|
|
159
|
-
upload(builder);
|
|
167
|
+
upload(builder, String.format(Locale.ENGLISH, "task-%d_%d", taskIndex, partSeqId));
|
|
168
|
+
partSeqId++;
|
|
160
169
|
builder = null;
|
|
161
170
|
}
|
|
162
|
-
|
|
163
|
-
prepareNextBuilder();
|
|
164
171
|
}
|
|
165
172
|
|
|
166
|
-
private void upload(final MsgpackGZFileBuilder builder)
|
|
173
|
+
private void upload(final MsgpackGZFileBuilder builder, final String uniquePartName)
|
|
167
174
|
throws IOException
|
|
168
175
|
{
|
|
169
176
|
executor.joinPartial(uploadConcurrency - 1);
|
|
170
177
|
executor.submit(new Callable<Void>() {
|
|
171
178
|
@Override
|
|
172
|
-
public Void call() throws Exception
|
|
173
|
-
|
|
179
|
+
public Void call() throws Exception
|
|
180
|
+
{
|
|
181
|
+
File file = builder.getFile();
|
|
182
|
+
|
|
183
|
+
log.debug("{uploading: {file: {}}}", file.getAbsolutePath());
|
|
184
|
+
Stopwatch stopwatch = Stopwatch.createStarted();
|
|
185
|
+
|
|
186
|
+
client.uploadBulkImportPart(sessionName, uniquePartName, builder.getFile());
|
|
187
|
+
|
|
188
|
+
stopwatch.stop();
|
|
189
|
+
stopwatch.elapsed(TimeUnit.MILLISECONDS);
|
|
190
|
+
log.debug("{uploaded: {file: {}, time: {}}}", file.getAbsolutePath(), stopwatch);
|
|
174
191
|
return null;
|
|
175
192
|
}
|
|
176
|
-
},
|
|
177
|
-
|
|
193
|
+
},
|
|
194
|
+
new Closeable() {
|
|
195
|
+
public void close() throws IOException
|
|
196
|
+
{
|
|
197
|
+
builder.close();
|
|
198
|
+
if (!builder.delete()) {
|
|
199
|
+
log.warn("Failed to delete local temporary file {}. Ignoring.", builder.getFile());
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
});
|
|
178
203
|
}
|
|
179
204
|
|
|
180
205
|
@Override
|
|
@@ -199,6 +224,7 @@ public class RecordWriter
|
|
|
199
224
|
} finally {
|
|
200
225
|
if (builder != null) {
|
|
201
226
|
builder.close();
|
|
227
|
+
builder.delete();
|
|
202
228
|
builder = null;
|
|
203
229
|
}
|
|
204
230
|
|
|
@@ -285,7 +311,10 @@ public class RecordWriter
|
|
|
285
311
|
case PRIMARY_KEY:
|
|
286
312
|
log.info("Using {}:{} column as the data partitioning key", columnName, columnType);
|
|
287
313
|
if (columnType instanceof LongType) {
|
|
288
|
-
|
|
314
|
+
if (task.getUnixTimestampUnit() != UnixTimestampUnit.SEC) {
|
|
315
|
+
log.warn("time column is converted from {} to seconds", task.getUnixTimestampUnit());
|
|
316
|
+
}
|
|
317
|
+
writer = new UnixTimestampLongFieldWriter(columnName, task.getUnixTimestampUnit().getFractionUnit());
|
|
289
318
|
hasPkWriter = true;
|
|
290
319
|
} else if (columnType instanceof TimestampType) {
|
|
291
320
|
writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
|
|
@@ -344,13 +373,14 @@ public class RecordWriter
|
|
|
344
373
|
String columnName = schema.getColumnName(duplicatePrimaryKeySourceIndex);
|
|
345
374
|
Type columnType = schema.getColumnType(duplicatePrimaryKeySourceIndex);
|
|
346
375
|
|
|
347
|
-
log.info("Duplicating {}:{} column to 'time' column for the data partitioning",
|
|
348
|
-
columnName, columnType);
|
|
349
|
-
|
|
350
376
|
FieldWriter writer;
|
|
351
377
|
if (columnType instanceof LongType) {
|
|
352
|
-
|
|
378
|
+
log.info("Duplicating {}:{} column (unix timestamp {}) to 'time' column as seconds for the data partitioning",
|
|
379
|
+
columnName, columnType, task.getUnixTimestampUnit());
|
|
380
|
+
writer = new UnixTimestampFieldDuplicator(columnName, "time", task.getUnixTimestampUnit().getFractionUnit());
|
|
353
381
|
} else if (columnType instanceof TimestampType) {
|
|
382
|
+
log.info("Duplicating {}:{} column to 'time' column as seconds for the data partitioning",
|
|
383
|
+
columnName, columnType);
|
|
354
384
|
writer = new TimestampFieldLongDuplicator(task.getJRuby(), columnName, "time");
|
|
355
385
|
} else {
|
|
356
386
|
throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
|
|
@@ -473,6 +503,25 @@ public class RecordWriter
|
|
|
473
503
|
}
|
|
474
504
|
}
|
|
475
505
|
|
|
506
|
+
static class UnixTimestampLongFieldWriter
|
|
507
|
+
extends FieldWriter
|
|
508
|
+
{
|
|
509
|
+
private final int fractionUnit;
|
|
510
|
+
|
|
511
|
+
UnixTimestampLongFieldWriter(String keyName, int fractionUnit)
|
|
512
|
+
{
|
|
513
|
+
super(keyName);
|
|
514
|
+
this.fractionUnit = fractionUnit;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
@Override
|
|
518
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
|
519
|
+
throws IOException
|
|
520
|
+
{
|
|
521
|
+
builder.writeLong(reader.getLong(column) / fractionUnit);
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
|
|
476
525
|
static class StringFieldWriter
|
|
477
526
|
extends FieldWriter
|
|
478
527
|
{
|
|
@@ -525,15 +574,15 @@ public class RecordWriter
|
|
|
525
574
|
}
|
|
526
575
|
}
|
|
527
576
|
|
|
528
|
-
static class
|
|
577
|
+
static class UnixTimestampFieldDuplicator
|
|
529
578
|
extends LongFieldWriter
|
|
530
579
|
{
|
|
531
|
-
private final
|
|
580
|
+
private final UnixTimestampLongFieldWriter timeFieldWriter;
|
|
532
581
|
|
|
533
|
-
public
|
|
582
|
+
public UnixTimestampFieldDuplicator(String keyName, String duplicateKeyName, int fractionUnit)
|
|
534
583
|
{
|
|
535
584
|
super(keyName);
|
|
536
|
-
timeFieldWriter = new
|
|
585
|
+
timeFieldWriter = new UnixTimestampLongFieldWriter(duplicateKeyName, fractionUnit);
|
|
537
586
|
}
|
|
538
587
|
|
|
539
588
|
@Override
|
|
@@ -7,6 +7,8 @@ import javax.validation.constraints.Max;
|
|
|
7
7
|
|
|
8
8
|
import com.google.common.base.Optional;
|
|
9
9
|
import com.google.common.base.Throwables;
|
|
10
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
|
11
|
+
import com.fasterxml.jackson.annotation.JsonValue;
|
|
10
12
|
import com.treasuredata.api.TdApiClient;
|
|
11
13
|
import com.treasuredata.api.TdApiClientConfig;
|
|
12
14
|
import com.treasuredata.api.TdApiClientConfig.HttpProxyConfig;
|
|
@@ -78,6 +80,10 @@ public class TdOutputPlugin
|
|
|
78
80
|
@ConfigDefault("null")
|
|
79
81
|
public Optional<String> getTimeColumn();
|
|
80
82
|
|
|
83
|
+
@Config("unix_timestamp_unit")
|
|
84
|
+
@ConfigDefault("\"sec\"")
|
|
85
|
+
public UnixTimestampUnit getUnixTimestampUnit();
|
|
86
|
+
|
|
81
87
|
@Config("tmpdir")
|
|
82
88
|
@ConfigDefault("\"/tmp\"")
|
|
83
89
|
public String getTempDir();
|
|
@@ -116,6 +122,47 @@ public class TdOutputPlugin
|
|
|
116
122
|
public boolean getUseSsl();
|
|
117
123
|
}
|
|
118
124
|
|
|
125
|
+
public static enum UnixTimestampUnit
|
|
126
|
+
{
|
|
127
|
+
SEC(1),
|
|
128
|
+
MILLI(1000),
|
|
129
|
+
MICRO(1000000),
|
|
130
|
+
NANO(1000000000);
|
|
131
|
+
|
|
132
|
+
private final int unit;
|
|
133
|
+
|
|
134
|
+
private UnixTimestampUnit(int unit)
|
|
135
|
+
{
|
|
136
|
+
this.unit = unit;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
public int getFractionUnit()
|
|
140
|
+
{
|
|
141
|
+
return unit;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
@JsonCreator
|
|
145
|
+
public static UnixTimestampUnit of(String s)
|
|
146
|
+
{
|
|
147
|
+
switch (s) {
|
|
148
|
+
case "sec": return SEC;
|
|
149
|
+
case "milli": return MILLI;
|
|
150
|
+
case "micro": return MICRO;
|
|
151
|
+
case "nano": return NANO;
|
|
152
|
+
default:
|
|
153
|
+
throw new ConfigException(
|
|
154
|
+
String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano"));
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
@JsonValue
|
|
159
|
+
@Override
|
|
160
|
+
public String toString()
|
|
161
|
+
{
|
|
162
|
+
return name().toLowerCase();
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
119
166
|
private final Logger log;
|
|
120
167
|
|
|
121
168
|
public TdOutputPlugin()
|
|
@@ -367,14 +414,14 @@ public class TdOutputPlugin
|
|
|
367
414
|
}
|
|
368
415
|
|
|
369
416
|
@Override
|
|
370
|
-
public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int
|
|
417
|
+
public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
|
|
371
418
|
{
|
|
372
419
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
|
373
420
|
|
|
374
421
|
RecordWriter closeLater = null;
|
|
375
422
|
try {
|
|
376
423
|
FieldWriterSet fieldWriters = new FieldWriterSet(log, task, schema);
|
|
377
|
-
RecordWriter recordWriter = closeLater = new RecordWriter(task, newTdApiClient(task), fieldWriters);
|
|
424
|
+
RecordWriter recordWriter = closeLater = new RecordWriter(task, taskIndex, newTdApiClient(task), fieldWriters);
|
|
378
425
|
recordWriter.open(schema);
|
|
379
426
|
closeLater = null;
|
|
380
427
|
return recordWriter;
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: embulk-output-td
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Muga Nishizawa
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-
|
|
11
|
+
date: 2015-07-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -46,6 +46,7 @@ extensions: []
|
|
|
46
46
|
extra_rdoc_files: []
|
|
47
47
|
files:
|
|
48
48
|
- .gitignore
|
|
49
|
+
- CHANGELOG.md
|
|
49
50
|
- README.md
|
|
50
51
|
- build.gradle
|
|
51
52
|
- embulk-output-td.gemspec
|
|
@@ -84,7 +85,7 @@ files:
|
|
|
84
85
|
- src/main/java/org/embulk/output/RecordWriter.java
|
|
85
86
|
- src/main/java/org/embulk/output/TdOutputPlugin.java
|
|
86
87
|
- src/test/java/org/embulk/output/TestTdOutputPlugin.java
|
|
87
|
-
- classpath/embulk-output-td-0.1.
|
|
88
|
+
- classpath/embulk-output-td-0.1.1.jar
|
|
88
89
|
- classpath/javassist-3.18.1-GA.jar
|
|
89
90
|
- classpath/jetty-client-9.2.2.v20140723.jar
|
|
90
91
|
- classpath/jetty-http-9.2.2.v20140723.jar
|