embulk-output-td 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -0
- data/build.gradle +1 -1
- data/embulk-output-td.gemspec +1 -1
- data/src/main/java/com/treasuredata/api/TdApiClient.java +10 -0
- data/src/main/java/org/embulk/output/MsgpackGZFileBuilder.java +6 -0
- data/src/main/java/org/embulk/output/RecordWriter.java +71 -22
- data/src/main/java/org/embulk/output/TdOutputPlugin.java +49 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e96fa2666efc130272325571d56cfd9e77b5086
|
4
|
+
data.tar.gz: b4ed6f4e05cf72ab403427a452b87a6b0da1d575
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25a373bb171c0280913f452e5a220b2ab12e350a4ddd4e476606e29541c79a12dc16ebd172243ae48b5962ff7a7e531c74ce4347b94705a85c3768c77529a3ff
|
7
|
+
data.tar.gz: b5910c054b1e9bdc74f2fc269789901bc8652bdb9a56d3b92686e38ebc5d4a1d27a4e86d47c3f0bb560e99c27c3d9bbbfdf216054b378368c3efd8f3c4d44fa9
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
## 0.1.1 - 2015-07-14
|
2
|
+
|
3
|
+
* [maintenance] Make part name unique and idempotent [#9](https://github.com/treasure-data/embulk-output-td/pull/9)
|
4
|
+
* [maintenance] Delete temp files after uploading [#7](https://github.com/treasure-data/embulk-output-td/pull/7)
|
5
|
+
* [new feature] Add unix_timestamp_unit option [#6](https://github.com/treasure-data/embulk-output-td/pull/6)
|
6
|
+
|
7
|
+
## 0.1.0 - 2015-06-23
|
8
|
+
|
9
|
+
The first release!!
|
data/README.md
CHANGED
@@ -19,6 +19,7 @@ TODO: Write short description here
|
|
19
19
|
- **table**: table name (string, required)
|
20
20
|
- **session**: bulk_import session name (string, optional)
|
21
21
|
- **time_column**: user-defined time column (string, optional)
|
22
|
+
- **unix_timestamp_unit**: if type of "time" or **time_column** is long, it's considered unix timestamp. This option specify its unit in sec, milli, micro or nano (enum, default: `sec`)
|
22
23
|
- **tmpdir**: temporal directory
|
23
24
|
- **upload_concurrency**: upload concurrency (int, default=2). max concurrency is 8.
|
24
25
|
- **file_split_size**: split size (long, default=16384 (16MB)).
|
data/build.gradle
CHANGED
data/embulk-output-td.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-output-td"
|
4
|
-
spec.version = "0.1.
|
4
|
+
spec.version = "0.1.1"
|
5
5
|
spec.authors = ["Muga Nishizawa"]
|
6
6
|
spec.summary = %[TreasureData output plugin for Embulk]
|
7
7
|
spec.description = %[TreasureData output plugin is an Embulk plugin that loads records to TreasureData read by any input plugins. Search the input plugins by 'embulk-output' keyword.]
|
@@ -171,6 +171,7 @@ public class TdApiClient
|
|
171
171
|
return session;
|
172
172
|
}
|
173
173
|
|
174
|
+
@Deprecated
|
174
175
|
public void uploadBulkImport(String sessionName, File path)
|
175
176
|
throws IOException
|
176
177
|
{
|
@@ -181,6 +182,15 @@ public class TdApiClient
|
|
181
182
|
ContentResponse response = executeExchange(request);
|
182
183
|
}
|
183
184
|
|
185
|
+
public void uploadBulkImportPart(String sessionName, String uniquePartName, File path)
|
186
|
+
throws IOException
|
187
|
+
{
|
188
|
+
Request request = prepareExchange(HttpMethod.PUT,
|
189
|
+
buildUrl("/v3/bulk_import/upload_part", sessionName, uniquePartName));
|
190
|
+
request.file(path.toPath());
|
191
|
+
ContentResponse response = executeExchange(request);
|
192
|
+
}
|
193
|
+
|
184
194
|
public void freezeBulkImportSession(String sessionName)
|
185
195
|
{
|
186
196
|
Request request = prepareExchange(HttpMethod.POST,
|
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.output;
|
2
2
|
|
3
|
+
import org.embulk.spi.Exec;
|
3
4
|
import org.msgpack.MessagePack;
|
4
5
|
import org.msgpack.packer.Packer;
|
5
6
|
|
@@ -83,6 +84,11 @@ public class MsgpackGZFileBuilder
|
|
83
84
|
return file;
|
84
85
|
}
|
85
86
|
|
87
|
+
public boolean delete()
|
88
|
+
{
|
89
|
+
return file.delete();
|
90
|
+
}
|
91
|
+
|
86
92
|
public void finish()
|
87
93
|
throws IOException
|
88
94
|
{
|
@@ -1,6 +1,7 @@
|
|
1
1
|
package org.embulk.output;
|
2
2
|
|
3
3
|
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.base.Stopwatch;
|
4
5
|
import com.google.common.base.Throwables;
|
5
6
|
import com.treasuredata.api.TdApiClient;
|
6
7
|
import org.embulk.config.CommitReport;
|
@@ -26,11 +27,15 @@ import org.msgpack.MessagePack;
|
|
26
27
|
import org.slf4j.Logger;
|
27
28
|
|
28
29
|
import java.io.File;
|
30
|
+
import java.io.Closeable;
|
29
31
|
import java.io.IOException;
|
32
|
+
import java.util.Locale;
|
30
33
|
import java.text.NumberFormat;
|
31
34
|
import java.util.concurrent.Callable;
|
35
|
+
import java.util.concurrent.TimeUnit;
|
32
36
|
|
33
37
|
import static com.google.common.base.Preconditions.checkNotNull;
|
38
|
+
import org.embulk.output.TdOutputPlugin.UnixTimestampUnit;
|
34
39
|
|
35
40
|
public class RecordWriter
|
36
41
|
implements TransactionalPageOutput
|
@@ -38,12 +43,13 @@ public class RecordWriter
|
|
38
43
|
private final Logger log;
|
39
44
|
private final TdApiClient client;
|
40
45
|
private final String sessionName;
|
46
|
+
private final int taskIndex;
|
41
47
|
|
42
48
|
private final MessagePack msgpack;
|
43
49
|
private final FieldWriterSet fieldWriters;
|
44
50
|
private final File tempDir;
|
45
51
|
|
46
|
-
private int
|
52
|
+
private int partSeqId = 0;
|
47
53
|
private PageReader pageReader;
|
48
54
|
private MsgpackGZFileBuilder builder;
|
49
55
|
|
@@ -51,11 +57,12 @@ public class RecordWriter
|
|
51
57
|
private final int uploadConcurrency;
|
52
58
|
private final long fileSplitSize; // unit: kb
|
53
59
|
|
54
|
-
public RecordWriter(PluginTask task, TdApiClient client, FieldWriterSet fieldWriters)
|
60
|
+
public RecordWriter(PluginTask task, int taskIndex, TdApiClient client, FieldWriterSet fieldWriters)
|
55
61
|
{
|
56
62
|
this.log = Exec.getLogger(getClass());
|
57
63
|
this.client = checkNotNull(client);
|
58
64
|
this.sessionName = task.getSessionName();
|
65
|
+
this.taskIndex = taskIndex;
|
59
66
|
|
60
67
|
this.msgpack = new MessagePack();
|
61
68
|
this.fieldWriters = fieldWriters;
|
@@ -80,7 +87,7 @@ public class RecordWriter
|
|
80
87
|
private void prepareNextBuilder()
|
81
88
|
throws IOException
|
82
89
|
{
|
83
|
-
String prefix = String.format("%s
|
90
|
+
String prefix = String.format("%s-", sessionName);
|
84
91
|
File tempFile = File.createTempFile(prefix, ".msgpack.gz", tempDir);
|
85
92
|
this.builder = new MsgpackGZFileBuilder(msgpack, tempFile);
|
86
93
|
}
|
@@ -140,6 +147,7 @@ public class RecordWriter
|
|
140
147
|
|
141
148
|
if (builder.getWrittenSize() > fileSplitSize) {
|
142
149
|
flush();
|
150
|
+
prepareNextBuilder();
|
143
151
|
}
|
144
152
|
}
|
145
153
|
|
@@ -150,31 +158,48 @@ public class RecordWriter
|
|
150
158
|
|
151
159
|
public void flush() throws IOException
|
152
160
|
{
|
153
|
-
builder.finish();
|
154
|
-
|
155
161
|
if (builder.getRecordCount() > 0) {
|
162
|
+
builder.finish();
|
163
|
+
|
156
164
|
log.info("{uploading: {rows: {}, size: {} bytes (compressed)}}",
|
157
165
|
builder.getRecordCount(),
|
158
166
|
NumberFormat.getNumberInstance().format(builder.getWrittenSize()));
|
159
|
-
upload(builder);
|
167
|
+
upload(builder, String.format(Locale.ENGLISH, "task-%d_%d", taskIndex, partSeqId));
|
168
|
+
partSeqId++;
|
160
169
|
builder = null;
|
161
170
|
}
|
162
|
-
|
163
|
-
prepareNextBuilder();
|
164
171
|
}
|
165
172
|
|
166
|
-
private void upload(final MsgpackGZFileBuilder builder)
|
173
|
+
private void upload(final MsgpackGZFileBuilder builder, final String uniquePartName)
|
167
174
|
throws IOException
|
168
175
|
{
|
169
176
|
executor.joinPartial(uploadConcurrency - 1);
|
170
177
|
executor.submit(new Callable<Void>() {
|
171
178
|
@Override
|
172
|
-
public Void call() throws Exception
|
173
|
-
|
179
|
+
public Void call() throws Exception
|
180
|
+
{
|
181
|
+
File file = builder.getFile();
|
182
|
+
|
183
|
+
log.debug("{uploading: {file: {}}}", file.getAbsolutePath());
|
184
|
+
Stopwatch stopwatch = Stopwatch.createStarted();
|
185
|
+
|
186
|
+
client.uploadBulkImportPart(sessionName, uniquePartName, builder.getFile());
|
187
|
+
|
188
|
+
stopwatch.stop();
|
189
|
+
stopwatch.elapsed(TimeUnit.MILLISECONDS);
|
190
|
+
log.debug("{uploaded: {file: {}, time: {}}}", file.getAbsolutePath(), stopwatch);
|
174
191
|
return null;
|
175
192
|
}
|
176
|
-
},
|
177
|
-
|
193
|
+
},
|
194
|
+
new Closeable() {
|
195
|
+
public void close() throws IOException
|
196
|
+
{
|
197
|
+
builder.close();
|
198
|
+
if (!builder.delete()) {
|
199
|
+
log.warn("Failed to delete local temporary file {}. Ignoring.", builder.getFile());
|
200
|
+
}
|
201
|
+
}
|
202
|
+
});
|
178
203
|
}
|
179
204
|
|
180
205
|
@Override
|
@@ -199,6 +224,7 @@ public class RecordWriter
|
|
199
224
|
} finally {
|
200
225
|
if (builder != null) {
|
201
226
|
builder.close();
|
227
|
+
builder.delete();
|
202
228
|
builder = null;
|
203
229
|
}
|
204
230
|
|
@@ -285,7 +311,10 @@ public class RecordWriter
|
|
285
311
|
case PRIMARY_KEY:
|
286
312
|
log.info("Using {}:{} column as the data partitioning key", columnName, columnType);
|
287
313
|
if (columnType instanceof LongType) {
|
288
|
-
|
314
|
+
if (task.getUnixTimestampUnit() != UnixTimestampUnit.SEC) {
|
315
|
+
log.warn("time column is converted from {} to seconds", task.getUnixTimestampUnit());
|
316
|
+
}
|
317
|
+
writer = new UnixTimestampLongFieldWriter(columnName, task.getUnixTimestampUnit().getFractionUnit());
|
289
318
|
hasPkWriter = true;
|
290
319
|
} else if (columnType instanceof TimestampType) {
|
291
320
|
writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
|
@@ -344,13 +373,14 @@ public class RecordWriter
|
|
344
373
|
String columnName = schema.getColumnName(duplicatePrimaryKeySourceIndex);
|
345
374
|
Type columnType = schema.getColumnType(duplicatePrimaryKeySourceIndex);
|
346
375
|
|
347
|
-
log.info("Duplicating {}:{} column to 'time' column for the data partitioning",
|
348
|
-
columnName, columnType);
|
349
|
-
|
350
376
|
FieldWriter writer;
|
351
377
|
if (columnType instanceof LongType) {
|
352
|
-
|
378
|
+
log.info("Duplicating {}:{} column (unix timestamp {}) to 'time' column as seconds for the data partitioning",
|
379
|
+
columnName, columnType, task.getUnixTimestampUnit());
|
380
|
+
writer = new UnixTimestampFieldDuplicator(columnName, "time", task.getUnixTimestampUnit().getFractionUnit());
|
353
381
|
} else if (columnType instanceof TimestampType) {
|
382
|
+
log.info("Duplicating {}:{} column to 'time' column as seconds for the data partitioning",
|
383
|
+
columnName, columnType);
|
354
384
|
writer = new TimestampFieldLongDuplicator(task.getJRuby(), columnName, "time");
|
355
385
|
} else {
|
356
386
|
throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
|
@@ -473,6 +503,25 @@ public class RecordWriter
|
|
473
503
|
}
|
474
504
|
}
|
475
505
|
|
506
|
+
static class UnixTimestampLongFieldWriter
|
507
|
+
extends FieldWriter
|
508
|
+
{
|
509
|
+
private final int fractionUnit;
|
510
|
+
|
511
|
+
UnixTimestampLongFieldWriter(String keyName, int fractionUnit)
|
512
|
+
{
|
513
|
+
super(keyName);
|
514
|
+
this.fractionUnit = fractionUnit;
|
515
|
+
}
|
516
|
+
|
517
|
+
@Override
|
518
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
519
|
+
throws IOException
|
520
|
+
{
|
521
|
+
builder.writeLong(reader.getLong(column) / fractionUnit);
|
522
|
+
}
|
523
|
+
}
|
524
|
+
|
476
525
|
static class StringFieldWriter
|
477
526
|
extends FieldWriter
|
478
527
|
{
|
@@ -525,15 +574,15 @@ public class RecordWriter
|
|
525
574
|
}
|
526
575
|
}
|
527
576
|
|
528
|
-
static class
|
577
|
+
static class UnixTimestampFieldDuplicator
|
529
578
|
extends LongFieldWriter
|
530
579
|
{
|
531
|
-
private final
|
580
|
+
private final UnixTimestampLongFieldWriter timeFieldWriter;
|
532
581
|
|
533
|
-
public
|
582
|
+
public UnixTimestampFieldDuplicator(String keyName, String duplicateKeyName, int fractionUnit)
|
534
583
|
{
|
535
584
|
super(keyName);
|
536
|
-
timeFieldWriter = new
|
585
|
+
timeFieldWriter = new UnixTimestampLongFieldWriter(duplicateKeyName, fractionUnit);
|
537
586
|
}
|
538
587
|
|
539
588
|
@Override
|
@@ -7,6 +7,8 @@ import javax.validation.constraints.Max;
|
|
7
7
|
|
8
8
|
import com.google.common.base.Optional;
|
9
9
|
import com.google.common.base.Throwables;
|
10
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
11
|
+
import com.fasterxml.jackson.annotation.JsonValue;
|
10
12
|
import com.treasuredata.api.TdApiClient;
|
11
13
|
import com.treasuredata.api.TdApiClientConfig;
|
12
14
|
import com.treasuredata.api.TdApiClientConfig.HttpProxyConfig;
|
@@ -78,6 +80,10 @@ public class TdOutputPlugin
|
|
78
80
|
@ConfigDefault("null")
|
79
81
|
public Optional<String> getTimeColumn();
|
80
82
|
|
83
|
+
@Config("unix_timestamp_unit")
|
84
|
+
@ConfigDefault("\"sec\"")
|
85
|
+
public UnixTimestampUnit getUnixTimestampUnit();
|
86
|
+
|
81
87
|
@Config("tmpdir")
|
82
88
|
@ConfigDefault("\"/tmp\"")
|
83
89
|
public String getTempDir();
|
@@ -116,6 +122,47 @@ public class TdOutputPlugin
|
|
116
122
|
public boolean getUseSsl();
|
117
123
|
}
|
118
124
|
|
125
|
+
public static enum UnixTimestampUnit
|
126
|
+
{
|
127
|
+
SEC(1),
|
128
|
+
MILLI(1000),
|
129
|
+
MICRO(1000000),
|
130
|
+
NANO(1000000000);
|
131
|
+
|
132
|
+
private final int unit;
|
133
|
+
|
134
|
+
private UnixTimestampUnit(int unit)
|
135
|
+
{
|
136
|
+
this.unit = unit;
|
137
|
+
}
|
138
|
+
|
139
|
+
public int getFractionUnit()
|
140
|
+
{
|
141
|
+
return unit;
|
142
|
+
}
|
143
|
+
|
144
|
+
@JsonCreator
|
145
|
+
public static UnixTimestampUnit of(String s)
|
146
|
+
{
|
147
|
+
switch (s) {
|
148
|
+
case "sec": return SEC;
|
149
|
+
case "milli": return MILLI;
|
150
|
+
case "micro": return MICRO;
|
151
|
+
case "nano": return NANO;
|
152
|
+
default:
|
153
|
+
throw new ConfigException(
|
154
|
+
String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano"));
|
155
|
+
}
|
156
|
+
}
|
157
|
+
|
158
|
+
@JsonValue
|
159
|
+
@Override
|
160
|
+
public String toString()
|
161
|
+
{
|
162
|
+
return name().toLowerCase();
|
163
|
+
}
|
164
|
+
}
|
165
|
+
|
119
166
|
private final Logger log;
|
120
167
|
|
121
168
|
public TdOutputPlugin()
|
@@ -367,14 +414,14 @@ public class TdOutputPlugin
|
|
367
414
|
}
|
368
415
|
|
369
416
|
@Override
|
370
|
-
public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int
|
417
|
+
public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
|
371
418
|
{
|
372
419
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
373
420
|
|
374
421
|
RecordWriter closeLater = null;
|
375
422
|
try {
|
376
423
|
FieldWriterSet fieldWriters = new FieldWriterSet(log, task, schema);
|
377
|
-
RecordWriter recordWriter = closeLater = new RecordWriter(task, newTdApiClient(task), fieldWriters);
|
424
|
+
RecordWriter recordWriter = closeLater = new RecordWriter(task, taskIndex, newTdApiClient(task), fieldWriters);
|
378
425
|
recordWriter.open(schema);
|
379
426
|
closeLater = null;
|
380
427
|
return recordWriter;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-td
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Muga Nishizawa
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-07-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -46,6 +46,7 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- .gitignore
|
49
|
+
- CHANGELOG.md
|
49
50
|
- README.md
|
50
51
|
- build.gradle
|
51
52
|
- embulk-output-td.gemspec
|
@@ -84,7 +85,7 @@ files:
|
|
84
85
|
- src/main/java/org/embulk/output/RecordWriter.java
|
85
86
|
- src/main/java/org/embulk/output/TdOutputPlugin.java
|
86
87
|
- src/test/java/org/embulk/output/TestTdOutputPlugin.java
|
87
|
-
- classpath/embulk-output-td-0.1.
|
88
|
+
- classpath/embulk-output-td-0.1.1.jar
|
88
89
|
- classpath/javassist-3.18.1-GA.jar
|
89
90
|
- classpath/jetty-client-9.2.2.v20140723.jar
|
90
91
|
- classpath/jetty-http-9.2.2.v20140723.jar
|