embulk-output-td 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +7 -0
- data/CHANGELOG.md +4 -0
- data/README.md +1 -0
- data/build.gradle +5 -1
- data/config/checkstyle/checkstyle.xml +117 -0
- data/embulk-output-td.gemspec +1 -1
- data/gradle/check.gradle +34 -0
- data/src/main/java/com/treasuredata/api/TdApiClient.java +47 -23
- data/src/main/java/com/treasuredata/api/TdApiClientConfig.java +3 -3
- data/src/main/java/com/treasuredata/api/TdApiConstants.java +6 -2
- data/src/main/java/com/treasuredata/api/TdApiExecutionInterruptedException.java +2 -1
- data/src/main/java/com/treasuredata/api/TdApiExecutionTimeoutException.java +2 -1
- data/src/main/java/com/treasuredata/api/model/TDArrayColumnType.java +1 -1
- data/src/main/java/com/treasuredata/api/model/TDBulkImportSession.java +6 -4
- data/src/main/java/com/treasuredata/api/model/TDColumn.java +4 -2
- data/src/main/java/com/treasuredata/api/model/TDColumnTypeDeserializer.java +26 -13
- data/src/main/java/com/treasuredata/api/model/TDDatabase.java +2 -1
- data/src/main/java/com/treasuredata/api/model/TDMapColumnType.java +1 -1
- data/src/main/java/com/treasuredata/api/model/TDTablePermission.java +4 -2
- data/src/main/java/com/treasuredata/api/model/TDTableType.java +2 -1
- data/src/main/java/org/embulk/output/td/FinalizableExecutorService.java +35 -17
- data/src/main/java/org/embulk/output/td/MsgpackGZFileBuilder.java +13 -7
- data/src/main/java/org/embulk/output/td/RecordWriter.java +21 -382
- data/src/main/java/org/embulk/output/td/TdOutputPlugin.java +175 -40
- data/src/main/java/org/embulk/output/td/writer/BooleanFieldWriter.java +23 -0
- data/src/main/java/org/embulk/output/td/writer/DoubleFieldWriter.java +23 -0
- data/src/main/java/org/embulk/output/td/writer/FieldWriter.java +38 -0
- data/src/main/java/org/embulk/output/td/writer/FieldWriterSet.java +206 -0
- data/src/main/java/org/embulk/output/td/writer/LongFieldWriter.java +23 -0
- data/src/main/java/org/embulk/output/td/writer/StringFieldWriter.java +23 -0
- data/src/main/java/org/embulk/output/td/writer/TimestampFieldLongDuplicator.java +28 -0
- data/src/main/java/org/embulk/output/td/writer/TimestampLongFieldWriter.java +23 -0
- data/src/main/java/org/embulk/output/td/writer/TimestampStringFieldWriter.java +27 -0
- data/src/main/java/org/embulk/output/td/writer/UnixTimestampFieldDuplicator.java +27 -0
- data/src/main/java/org/embulk/output/td/writer/UnixTimestampLongFieldWriter.java +26 -0
- data/src/test/java/com/treasuredata/api/TestTdApiClient.java +1 -1
- data/src/test/java/org/embulk/output/td/TestRecordWriter.java +198 -0
- data/src/test/java/org/embulk/output/td/TestTdOutputPlugin.java +529 -0
- data/src/test/java/org/embulk/output/td/writer/TestFieldWriterSet.java +146 -0
- metadata +29 -14
- data/src/test/java/org/embulk/output/td/TestFieldWriter.java +0 -105
@@ -143,13 +143,15 @@ public class TDBulkImportSession
|
|
143
143
|
|
144
144
|
public String getErrorMessage()
|
145
145
|
{
|
146
|
-
if (validRecords == 0)
|
146
|
+
if (validRecords == 0) {
|
147
147
|
return "No record processed";
|
148
|
-
|
148
|
+
}
|
149
|
+
if (errorRecords > 0) {
|
149
150
|
return String.format("%d invalid parts", errorParts);
|
150
|
-
|
151
|
+
}
|
152
|
+
if (errorRecords > 0) {
|
151
153
|
return String.format("%d invalid records", errorRecords);
|
152
|
-
|
154
|
+
}
|
153
155
|
return null;
|
154
156
|
}
|
155
157
|
}
|
@@ -43,13 +43,15 @@ public class TDColumn
|
|
43
43
|
TDColumnTypeDeserializer.parseColumnType(tuple[1]),
|
44
44
|
tuple[0].getBytes());
|
45
45
|
|
46
|
-
}
|
46
|
+
}
|
47
|
+
else if (tuple != null && tuple.length == 3) {
|
47
48
|
return new TDColumn(
|
48
49
|
tuple[0],
|
49
50
|
TDColumnTypeDeserializer.parseColumnType(tuple[1]),
|
50
51
|
tuple[2].getBytes());
|
51
52
|
|
52
|
-
}
|
53
|
+
}
|
54
|
+
else {
|
53
55
|
throw new RuntimeJsonMappingException("Unexpected string tuple to deserialize TDColumn");
|
54
56
|
}
|
55
57
|
}
|
@@ -39,19 +39,24 @@ public class TDColumnTypeDeserializer
|
|
39
39
|
if (p.scan("string")) {
|
40
40
|
return TDPrimitiveColumnType.STRING;
|
41
41
|
|
42
|
-
}
|
42
|
+
}
|
43
|
+
else if (p.scan("int")) {
|
43
44
|
return TDPrimitiveColumnType.INT;
|
44
45
|
|
45
|
-
}
|
46
|
+
}
|
47
|
+
else if (p.scan("long")) {
|
46
48
|
return TDPrimitiveColumnType.LONG;
|
47
49
|
|
48
|
-
}
|
50
|
+
}
|
51
|
+
else if (p.scan("double")) {
|
49
52
|
return TDPrimitiveColumnType.DOUBLE;
|
50
53
|
|
51
|
-
}
|
54
|
+
}
|
55
|
+
else if (p.scan("float")) {
|
52
56
|
return TDPrimitiveColumnType.FLOAT;
|
53
57
|
|
54
|
-
}
|
58
|
+
}
|
59
|
+
else if (p.scan("array")) {
|
55
60
|
if (!p.scan("<")) {
|
56
61
|
throw new IllegalArgumentException("Cannot parse type: expected '<' for array type: " + p.getString());
|
57
62
|
}
|
@@ -61,7 +66,8 @@ public class TDColumnTypeDeserializer
|
|
61
66
|
}
|
62
67
|
return new TDArrayColumnType(elementType);
|
63
68
|
|
64
|
-
}
|
69
|
+
}
|
70
|
+
else if (p.scan("map")) {
|
65
71
|
if (!p.scan("<")) {
|
66
72
|
throw new IllegalArgumentException("Cannot parse type: expected '<' for map type: " + p.getString());
|
67
73
|
}
|
@@ -75,24 +81,29 @@ public class TDColumnTypeDeserializer
|
|
75
81
|
}
|
76
82
|
return new TDMapColumnType(keyType, valueType);
|
77
83
|
|
78
|
-
}
|
84
|
+
}
|
85
|
+
else {
|
79
86
|
throw new IllegalArgumentException("Cannot parse type: " + p.getString());
|
80
87
|
}
|
81
88
|
}
|
82
89
|
|
83
|
-
private static class Parser
|
90
|
+
private static class Parser
|
91
|
+
{
|
84
92
|
private final String string;
|
85
93
|
private int offset;
|
86
94
|
|
87
|
-
public Parser(String string)
|
95
|
+
public Parser(String string)
|
96
|
+
{
|
88
97
|
this.string = string;
|
89
98
|
}
|
90
99
|
|
91
|
-
public String getString()
|
100
|
+
public String getString()
|
101
|
+
{
|
92
102
|
return string;
|
93
103
|
}
|
94
104
|
|
95
|
-
public boolean scan(String s)
|
105
|
+
public boolean scan(String s)
|
106
|
+
{
|
96
107
|
skipSpaces();
|
97
108
|
if (string.startsWith(s, offset)) {
|
98
109
|
offset += s.length();
|
@@ -101,12 +112,14 @@ public class TDColumnTypeDeserializer
|
|
101
112
|
return false;
|
102
113
|
}
|
103
114
|
|
104
|
-
public boolean eof()
|
115
|
+
public boolean eof()
|
116
|
+
{
|
105
117
|
skipSpaces();
|
106
118
|
return string.length() <= offset;
|
107
119
|
}
|
108
120
|
|
109
|
-
private void skipSpaces()
|
121
|
+
private void skipSpaces()
|
122
|
+
{
|
110
123
|
while (string.startsWith(" ", offset)) {
|
111
124
|
offset++;
|
112
125
|
}
|
@@ -17,12 +17,14 @@ public class TDTablePermission
|
|
17
17
|
}
|
18
18
|
|
19
19
|
@JsonProperty("importable")
|
20
|
-
public boolean isImportable()
|
20
|
+
public boolean isImportable()
|
21
|
+
{
|
21
22
|
return importable;
|
22
23
|
}
|
23
24
|
|
24
25
|
@JsonProperty("queryable")
|
25
|
-
public boolean isQueryable()
|
26
|
+
public boolean isQueryable()
|
27
|
+
{
|
26
28
|
return queryable;
|
27
29
|
}
|
28
30
|
|
@@ -21,7 +21,8 @@ public enum TDTableType
|
|
21
21
|
{
|
22
22
|
if ("log".equals(name)) {
|
23
23
|
return LOG;
|
24
|
-
}
|
24
|
+
}
|
25
|
+
else if ("item".equals(name)) {
|
25
26
|
return ITEM;
|
26
27
|
}
|
27
28
|
throw new RuntimeJsonMappingException("Unexpected string tuple to deserialize TDTableType");
|
@@ -17,7 +17,8 @@ public class FinalizableExecutorService
|
|
17
17
|
{
|
18
18
|
@Override
|
19
19
|
public void close()
|
20
|
-
throws IOException
|
20
|
+
throws IOException
|
21
|
+
{
|
21
22
|
// ignore
|
22
23
|
}
|
23
24
|
}
|
@@ -25,58 +26,75 @@ public class FinalizableExecutorService
|
|
25
26
|
protected ExecutorService threads;
|
26
27
|
protected Queue<RunningTask> runningTasks;
|
27
28
|
|
28
|
-
public FinalizableExecutorService()
|
29
|
+
public FinalizableExecutorService()
|
30
|
+
{
|
29
31
|
this.threads = Executors.newCachedThreadPool();
|
30
32
|
this.runningTasks = new LinkedList<>();
|
31
33
|
}
|
32
34
|
|
33
|
-
private static class RunningTask
|
35
|
+
private static class RunningTask
|
36
|
+
{
|
34
37
|
private Future<Void> future;
|
35
38
|
private Closeable finalizer;
|
36
39
|
|
37
|
-
RunningTask(Future<Void> future, Closeable finalizer)
|
40
|
+
RunningTask(Future<Void> future, Closeable finalizer)
|
41
|
+
{
|
38
42
|
this.future = future;
|
39
43
|
this.finalizer = finalizer;
|
40
44
|
}
|
41
45
|
|
42
|
-
public void join()
|
46
|
+
public void join()
|
47
|
+
throws IOException
|
48
|
+
{
|
43
49
|
try {
|
44
50
|
future.get();
|
45
|
-
}
|
46
|
-
|
47
|
-
|
48
|
-
|
51
|
+
}
|
52
|
+
catch (InterruptedException e) {
|
53
|
+
throw new IOException(e);
|
54
|
+
}
|
55
|
+
catch (ExecutionException e) {
|
56
|
+
throw new IOException(e.getCause());
|
49
57
|
}
|
50
58
|
finalizer.close();
|
51
59
|
}
|
52
60
|
|
53
|
-
public void abort()
|
61
|
+
public void abort()
|
62
|
+
throws IOException
|
63
|
+
{
|
54
64
|
finalizer.close();
|
55
65
|
}
|
56
66
|
}
|
57
67
|
|
58
|
-
public void submit(Callable<Void> task, Closeable finalizer)
|
68
|
+
public void submit(Callable<Void> task, Closeable finalizer)
|
69
|
+
{
|
59
70
|
Future<Void> future = threads.submit(task);
|
60
71
|
runningTasks.add(new RunningTask(future, finalizer));
|
61
72
|
}
|
62
73
|
|
63
|
-
public void joinPartial(long upto)
|
64
|
-
|
74
|
+
public void joinPartial(long upto)
|
75
|
+
throws IOException
|
76
|
+
{
|
77
|
+
while (runningTasks.size() > upto) {
|
65
78
|
runningTasks.peek().join();
|
66
79
|
runningTasks.remove();
|
67
80
|
}
|
68
81
|
}
|
69
82
|
|
70
|
-
public void joinAll()
|
83
|
+
public void joinAll()
|
84
|
+
throws IOException
|
85
|
+
{
|
71
86
|
joinPartial(0);
|
72
87
|
}
|
73
88
|
|
74
|
-
public void shutdown()
|
89
|
+
public void shutdown()
|
90
|
+
throws IOException
|
91
|
+
{
|
75
92
|
try {
|
76
93
|
joinAll();
|
77
|
-
}
|
94
|
+
}
|
95
|
+
finally {
|
78
96
|
threads.shutdown();
|
79
|
-
for(RunningTask task : runningTasks) {
|
97
|
+
for (RunningTask task : runningTasks) {
|
80
98
|
task.abort();
|
81
99
|
}
|
82
100
|
}
|
@@ -1,6 +1,5 @@
|
|
1
1
|
package org.embulk.output.td;
|
2
2
|
|
3
|
-
import org.embulk.spi.Exec;
|
4
3
|
import org.msgpack.MessagePack;
|
5
4
|
import org.msgpack.packer.Packer;
|
6
5
|
|
@@ -18,29 +17,35 @@ import static com.google.common.base.Preconditions.checkNotNull;
|
|
18
17
|
public class MsgpackGZFileBuilder
|
19
18
|
implements Closeable
|
20
19
|
{
|
21
|
-
static class DataSizeFilter
|
20
|
+
static class DataSizeFilter
|
21
|
+
extends FilterOutputStream
|
22
|
+
{
|
22
23
|
private long size = 0;
|
23
24
|
|
24
|
-
public DataSizeFilter(OutputStream out)
|
25
|
+
public DataSizeFilter(OutputStream out)
|
26
|
+
{
|
25
27
|
super(out);
|
26
28
|
}
|
27
29
|
|
28
30
|
@Override
|
29
|
-
public void write(int b)
|
31
|
+
public void write(int b)
|
32
|
+
throws IOException
|
30
33
|
{
|
31
34
|
size += 1;
|
32
35
|
super.write(b);
|
33
36
|
}
|
34
37
|
|
35
38
|
@Override
|
36
|
-
public void write(byte[] b, int off, int len)
|
39
|
+
public void write(byte[] b, int off, int len)
|
40
|
+
throws IOException
|
37
41
|
{
|
38
42
|
size += len;
|
39
43
|
super.write(b, off, len);
|
40
44
|
}
|
41
45
|
|
42
46
|
@Override
|
43
|
-
public void close()
|
47
|
+
public void close()
|
48
|
+
throws IOException
|
44
49
|
{
|
45
50
|
super.close();
|
46
51
|
}
|
@@ -94,7 +99,8 @@ public class MsgpackGZFileBuilder
|
|
94
99
|
{
|
95
100
|
try {
|
96
101
|
packer.flush();
|
97
|
-
}
|
102
|
+
}
|
103
|
+
finally {
|
98
104
|
close();
|
99
105
|
}
|
100
106
|
}
|
@@ -1,11 +1,12 @@
|
|
1
1
|
package org.embulk.output.td;
|
2
2
|
|
3
|
-
import com.google.common.
|
3
|
+
import com.google.common.annotations.VisibleForTesting;
|
4
4
|
import com.google.common.base.Stopwatch;
|
5
5
|
import com.google.common.base.Throwables;
|
6
6
|
import com.treasuredata.api.TdApiClient;
|
7
7
|
import org.embulk.config.CommitReport;
|
8
|
-
import org.embulk.
|
8
|
+
import org.embulk.output.td.writer.FieldWriter;
|
9
|
+
import org.embulk.output.td.writer.FieldWriterSet;
|
9
10
|
import org.embulk.spi.Column;
|
10
11
|
import org.embulk.spi.ColumnVisitor;
|
11
12
|
import org.embulk.spi.Exec;
|
@@ -13,16 +14,6 @@ import org.embulk.spi.Page;
|
|
13
14
|
import org.embulk.spi.PageReader;
|
14
15
|
import org.embulk.spi.Schema;
|
15
16
|
import org.embulk.spi.TransactionalPageOutput;
|
16
|
-
import org.embulk.spi.time.TimestampFormatter;
|
17
|
-
import org.embulk.spi.type.BooleanType;
|
18
|
-
import org.embulk.spi.type.DoubleType;
|
19
|
-
import org.embulk.spi.type.LongType;
|
20
|
-
import org.embulk.spi.type.StringType;
|
21
|
-
import org.embulk.spi.type.TimestampType;
|
22
|
-
import org.embulk.spi.type.Type;
|
23
|
-
import org.embulk.spi.util.Timestamps;
|
24
|
-
import org.joda.time.DateTimeZone;
|
25
|
-
import org.jruby.embed.ScriptingContainer;
|
26
17
|
import org.msgpack.MessagePack;
|
27
18
|
import org.slf4j.Logger;
|
28
19
|
|
@@ -91,6 +82,12 @@ public class RecordWriter
|
|
91
82
|
this.builder = new MsgpackGZFileBuilder(msgpack, tempFile);
|
92
83
|
}
|
93
84
|
|
85
|
+
@VisibleForTesting
|
86
|
+
MsgpackGZFileBuilder getBuilder()
|
87
|
+
{
|
88
|
+
return builder;
|
89
|
+
}
|
90
|
+
|
94
91
|
@Override
|
95
92
|
public void add(final Page page)
|
96
93
|
{
|
@@ -136,7 +133,8 @@ public class RecordWriter
|
|
136
133
|
FieldWriter fieldWriter = fieldWriters.getFieldWriter(column.getIndex());
|
137
134
|
try {
|
138
135
|
fieldWriter.writeKeyValue(builder, pageReader, column);
|
139
|
-
}
|
136
|
+
}
|
137
|
+
catch (IOException e) {
|
140
138
|
throw Throwables.propagate(e);
|
141
139
|
}
|
142
140
|
}
|
@@ -150,7 +148,8 @@ public class RecordWriter
|
|
150
148
|
}
|
151
149
|
}
|
152
150
|
|
153
|
-
}
|
151
|
+
}
|
152
|
+
catch (IOException e) {
|
154
153
|
throw Throwables.propagate(e);
|
155
154
|
}
|
156
155
|
}
|
@@ -206,9 +205,11 @@ public class RecordWriter
|
|
206
205
|
{
|
207
206
|
try {
|
208
207
|
flush();
|
209
|
-
}
|
208
|
+
}
|
209
|
+
catch (IOException e) {
|
210
210
|
throw Throwables.propagate(e);
|
211
|
-
}
|
211
|
+
}
|
212
|
+
finally {
|
212
213
|
close();
|
213
214
|
}
|
214
215
|
}
|
@@ -220,7 +221,8 @@ public class RecordWriter
|
|
220
221
|
try {
|
221
222
|
executor.joinAll();
|
222
223
|
executor.shutdown(); // shutdown calls joinAll
|
223
|
-
}
|
224
|
+
}
|
225
|
+
finally {
|
224
226
|
if (builder != null) {
|
225
227
|
builder.close();
|
226
228
|
builder.delete();
|
@@ -231,7 +233,8 @@ public class RecordWriter
|
|
231
233
|
client.close();
|
232
234
|
}
|
233
235
|
}
|
234
|
-
}
|
236
|
+
}
|
237
|
+
catch (IOException e) {
|
235
238
|
throw Throwables.propagate(e);
|
236
239
|
}
|
237
240
|
}
|
@@ -249,368 +252,4 @@ public class RecordWriter
|
|
249
252
|
// TODO
|
250
253
|
return report;
|
251
254
|
}
|
252
|
-
|
253
|
-
static class FieldWriterSet
|
254
|
-
{
|
255
|
-
private enum ColumnWriterMode
|
256
|
-
{
|
257
|
-
PRIMARY_KEY,
|
258
|
-
SIMPLE_VALUE,
|
259
|
-
DUPLICATE_PRIMARY_KEY;
|
260
|
-
}
|
261
|
-
|
262
|
-
private final int fieldCount;
|
263
|
-
private final FieldWriter[] fieldWriters;
|
264
|
-
|
265
|
-
public FieldWriterSet(Logger log, TdOutputPlugin.PluginTask task, Schema schema)
|
266
|
-
{
|
267
|
-
Optional<String> userDefinedPrimaryKeySourceColumnName = task.getTimeColumn();
|
268
|
-
boolean hasPkWriter = false;
|
269
|
-
int duplicatePrimaryKeySourceIndex = -1;
|
270
|
-
int firstTimestampColumnIndex = -1;
|
271
|
-
|
272
|
-
int fc = 0;
|
273
|
-
fieldWriters = new FieldWriter[schema.size()];
|
274
|
-
TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
|
275
|
-
|
276
|
-
for (int i = 0; i < schema.size(); i++) {
|
277
|
-
String columnName = schema.getColumnName(i);
|
278
|
-
Type columnType = schema.getColumnType(i);
|
279
|
-
|
280
|
-
// choose the mode
|
281
|
-
final ColumnWriterMode mode;
|
282
|
-
|
283
|
-
if (userDefinedPrimaryKeySourceColumnName.isPresent() &&
|
284
|
-
columnName.equals(userDefinedPrimaryKeySourceColumnName.get())) {
|
285
|
-
// found time_column
|
286
|
-
if ("time".equals(userDefinedPrimaryKeySourceColumnName.get())) {
|
287
|
-
mode = ColumnWriterMode.PRIMARY_KEY;
|
288
|
-
} else {
|
289
|
-
mode = ColumnWriterMode.DUPLICATE_PRIMARY_KEY;
|
290
|
-
}
|
291
|
-
|
292
|
-
} else if ("time".equals(columnName)) {
|
293
|
-
// the column name is same with the primary key name.
|
294
|
-
if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
|
295
|
-
columnName = newColumnUniqueName(columnName, schema);
|
296
|
-
mode = ColumnWriterMode.SIMPLE_VALUE;
|
297
|
-
log.warn("time_column '{}' is set but 'time' column also exists. The existent 'time' column is renamed to {}",
|
298
|
-
userDefinedPrimaryKeySourceColumnName.get(), "time", "time", columnName);
|
299
|
-
} else {
|
300
|
-
mode = ColumnWriterMode.PRIMARY_KEY;
|
301
|
-
}
|
302
|
-
|
303
|
-
} else {
|
304
|
-
mode = ColumnWriterMode.SIMPLE_VALUE;
|
305
|
-
}
|
306
|
-
|
307
|
-
// create the fieldWriters writer depending on the mode
|
308
|
-
final FieldWriter writer;
|
309
|
-
|
310
|
-
switch (mode) {
|
311
|
-
case PRIMARY_KEY:
|
312
|
-
log.info("Using {}:{} column as the data partitioning key", columnName, columnType);
|
313
|
-
if (columnType instanceof LongType) {
|
314
|
-
if (task.getUnixTimestampUnit() != TdOutputPlugin.UnixTimestampUnit.SEC) {
|
315
|
-
log.warn("time column is converted from {} to seconds", task.getUnixTimestampUnit());
|
316
|
-
}
|
317
|
-
writer = new UnixTimestampLongFieldWriter(columnName, task.getUnixTimestampUnit().getFractionUnit());
|
318
|
-
hasPkWriter = true;
|
319
|
-
} else if (columnType instanceof TimestampType) {
|
320
|
-
writer = new TimestampLongFieldWriter(columnName);
|
321
|
-
|
322
|
-
hasPkWriter = true;
|
323
|
-
} else {
|
324
|
-
throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
|
325
|
-
columnName, columnType));
|
326
|
-
}
|
327
|
-
break;
|
328
|
-
|
329
|
-
case SIMPLE_VALUE:
|
330
|
-
if (columnType instanceof BooleanType) {
|
331
|
-
writer = new BooleanFieldWriter(columnName);
|
332
|
-
} else if (columnType instanceof LongType) {
|
333
|
-
writer = new LongFieldWriter(columnName);
|
334
|
-
} else if (columnType instanceof DoubleType) {
|
335
|
-
writer = new DoubleFieldWriter(columnName);
|
336
|
-
} else if (columnType instanceof StringType) {
|
337
|
-
writer = new StringFieldWriter(columnName);
|
338
|
-
} else if (columnType instanceof TimestampType) {
|
339
|
-
writer = new TimestampStringFieldWriter(timestampFormatters[i], columnName);
|
340
|
-
if (firstTimestampColumnIndex < 0) {
|
341
|
-
firstTimestampColumnIndex = i;
|
342
|
-
}
|
343
|
-
} else {
|
344
|
-
throw new ConfigException("Unsupported type: " + columnType);
|
345
|
-
}
|
346
|
-
break;
|
347
|
-
|
348
|
-
case DUPLICATE_PRIMARY_KEY:
|
349
|
-
duplicatePrimaryKeySourceIndex = i;
|
350
|
-
writer = null; // handle later
|
351
|
-
break;
|
352
|
-
|
353
|
-
default:
|
354
|
-
throw new AssertionError();
|
355
|
-
}
|
356
|
-
|
357
|
-
fieldWriters[i] = writer;
|
358
|
-
fc += 1;
|
359
|
-
}
|
360
|
-
|
361
|
-
if (!hasPkWriter) {
|
362
|
-
// PRIMARY_KEY was not found.
|
363
|
-
if (duplicatePrimaryKeySourceIndex < 0) {
|
364
|
-
if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
|
365
|
-
throw new ConfigException(String.format("time_column '%s' does not exist", userDefinedPrimaryKeySourceColumnName.get()));
|
366
|
-
} else if (firstTimestampColumnIndex >= 0) {
|
367
|
-
// if time is not found, use the first timestamp column
|
368
|
-
duplicatePrimaryKeySourceIndex = firstTimestampColumnIndex;
|
369
|
-
} else {
|
370
|
-
throw new ConfigException(String.format("TD output plugin requires at least one timestamp column, or a long column named 'time'"));
|
371
|
-
}
|
372
|
-
}
|
373
|
-
|
374
|
-
String columnName = schema.getColumnName(duplicatePrimaryKeySourceIndex);
|
375
|
-
Type columnType = schema.getColumnType(duplicatePrimaryKeySourceIndex);
|
376
|
-
|
377
|
-
FieldWriter writer;
|
378
|
-
if (columnType instanceof LongType) {
|
379
|
-
log.info("Duplicating {}:{} column (unix timestamp {}) to 'time' column as seconds for the data partitioning",
|
380
|
-
columnName, columnType, task.getUnixTimestampUnit());
|
381
|
-
writer = new UnixTimestampFieldDuplicator(columnName, "time", task.getUnixTimestampUnit().getFractionUnit());
|
382
|
-
} else if (columnType instanceof TimestampType) {
|
383
|
-
log.info("Duplicating {}:{} column to 'time' column as seconds for the data partitioning",
|
384
|
-
columnName, columnType);
|
385
|
-
writer = new TimestampFieldLongDuplicator(timestampFormatters[duplicatePrimaryKeySourceIndex], columnName, "time");
|
386
|
-
} else {
|
387
|
-
throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
|
388
|
-
columnName, columnType));
|
389
|
-
}
|
390
|
-
|
391
|
-
// replace existint writer
|
392
|
-
fieldWriters[duplicatePrimaryKeySourceIndex] = writer;
|
393
|
-
fc += 1;
|
394
|
-
}
|
395
|
-
|
396
|
-
fieldCount = fc;
|
397
|
-
}
|
398
|
-
|
399
|
-
private static String newColumnUniqueName(String originalName, Schema schema)
|
400
|
-
{
|
401
|
-
String name = originalName;
|
402
|
-
do {
|
403
|
-
name += "_";
|
404
|
-
} while (containsColumnName(schema, name));
|
405
|
-
return name;
|
406
|
-
}
|
407
|
-
|
408
|
-
private static boolean containsColumnName(Schema schema, String name)
|
409
|
-
{
|
410
|
-
for (Column c : schema.getColumns()) {
|
411
|
-
if (c.getName().equals(name)) {
|
412
|
-
return true;
|
413
|
-
}
|
414
|
-
}
|
415
|
-
return false;
|
416
|
-
}
|
417
|
-
|
418
|
-
public FieldWriter getFieldWriter(int index)
|
419
|
-
{
|
420
|
-
return fieldWriters[index];
|
421
|
-
}
|
422
|
-
|
423
|
-
public int getFieldCount()
|
424
|
-
{
|
425
|
-
return fieldCount;
|
426
|
-
}
|
427
|
-
}
|
428
|
-
|
429
|
-
static abstract class FieldWriter
|
430
|
-
{
|
431
|
-
private final String keyName;
|
432
|
-
|
433
|
-
protected FieldWriter(String keyName)
|
434
|
-
{
|
435
|
-
this.keyName = keyName;
|
436
|
-
}
|
437
|
-
|
438
|
-
public void writeKeyValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
439
|
-
throws IOException
|
440
|
-
{
|
441
|
-
writeKey(builder);
|
442
|
-
if (reader.isNull(column)) {
|
443
|
-
builder.writeNil();
|
444
|
-
} else {
|
445
|
-
writeValue(builder, reader, column);
|
446
|
-
}
|
447
|
-
}
|
448
|
-
|
449
|
-
private void writeKey(MsgpackGZFileBuilder builder)
|
450
|
-
throws IOException
|
451
|
-
{
|
452
|
-
builder.writeString(keyName);
|
453
|
-
}
|
454
|
-
|
455
|
-
protected abstract void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
456
|
-
throws IOException;
|
457
|
-
}
|
458
|
-
|
459
|
-
static class DoubleFieldWriter
|
460
|
-
extends FieldWriter
|
461
|
-
{
|
462
|
-
public DoubleFieldWriter(String keyName)
|
463
|
-
{
|
464
|
-
super(keyName);
|
465
|
-
}
|
466
|
-
|
467
|
-
@Override
|
468
|
-
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
469
|
-
throws IOException
|
470
|
-
{
|
471
|
-
builder.writeDouble(reader.getDouble(column));
|
472
|
-
}
|
473
|
-
}
|
474
|
-
|
475
|
-
static class BooleanFieldWriter
|
476
|
-
extends FieldWriter
|
477
|
-
{
|
478
|
-
public BooleanFieldWriter(String keyName)
|
479
|
-
{
|
480
|
-
super(keyName);
|
481
|
-
}
|
482
|
-
|
483
|
-
@Override
|
484
|
-
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
485
|
-
throws IOException
|
486
|
-
{
|
487
|
-
builder.writeBoolean(reader.getBoolean(column));
|
488
|
-
}
|
489
|
-
}
|
490
|
-
|
491
|
-
static class LongFieldWriter
|
492
|
-
extends FieldWriter
|
493
|
-
{
|
494
|
-
LongFieldWriter(String keyName)
|
495
|
-
{
|
496
|
-
super(keyName);
|
497
|
-
}
|
498
|
-
|
499
|
-
@Override
|
500
|
-
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
501
|
-
throws IOException
|
502
|
-
{
|
503
|
-
builder.writeLong(reader.getLong(column));
|
504
|
-
}
|
505
|
-
}
|
506
|
-
|
507
|
-
static class UnixTimestampLongFieldWriter
|
508
|
-
extends FieldWriter
|
509
|
-
{
|
510
|
-
private final int fractionUnit;
|
511
|
-
|
512
|
-
UnixTimestampLongFieldWriter(String keyName, int fractionUnit)
|
513
|
-
{
|
514
|
-
super(keyName);
|
515
|
-
this.fractionUnit = fractionUnit;
|
516
|
-
}
|
517
|
-
|
518
|
-
@Override
|
519
|
-
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
520
|
-
throws IOException
|
521
|
-
{
|
522
|
-
builder.writeLong(reader.getLong(column) / fractionUnit);
|
523
|
-
}
|
524
|
-
}
|
525
|
-
|
526
|
-
static class StringFieldWriter
|
527
|
-
extends FieldWriter
|
528
|
-
{
|
529
|
-
public StringFieldWriter(String keyName)
|
530
|
-
{
|
531
|
-
super(keyName);
|
532
|
-
}
|
533
|
-
|
534
|
-
@Override
|
535
|
-
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
536
|
-
throws IOException
|
537
|
-
{
|
538
|
-
builder.writeString(reader.getString(column));
|
539
|
-
}
|
540
|
-
}
|
541
|
-
|
542
|
-
static class TimestampStringFieldWriter
|
543
|
-
extends FieldWriter
|
544
|
-
{
|
545
|
-
private final TimestampFormatter formatter;
|
546
|
-
|
547
|
-
public TimestampStringFieldWriter(TimestampFormatter formatter, String keyName)
|
548
|
-
{
|
549
|
-
super(keyName);
|
550
|
-
this.formatter = formatter;
|
551
|
-
}
|
552
|
-
|
553
|
-
@Override
|
554
|
-
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
555
|
-
throws IOException
|
556
|
-
{
|
557
|
-
builder.writeString(formatter.format(reader.getTimestamp(column)));
|
558
|
-
}
|
559
|
-
}
|
560
|
-
|
561
|
-
static class TimestampLongFieldWriter
|
562
|
-
extends FieldWriter
|
563
|
-
{
|
564
|
-
public TimestampLongFieldWriter(String keyName)
|
565
|
-
{
|
566
|
-
super(keyName);
|
567
|
-
}
|
568
|
-
|
569
|
-
@Override
|
570
|
-
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
571
|
-
throws IOException
|
572
|
-
{
|
573
|
-
builder.writeLong(reader.getTimestamp(column).getEpochSecond());
|
574
|
-
}
|
575
|
-
}
|
576
|
-
|
577
|
-
static class UnixTimestampFieldDuplicator
|
578
|
-
extends LongFieldWriter
|
579
|
-
{
|
580
|
-
private final UnixTimestampLongFieldWriter timeFieldWriter;
|
581
|
-
|
582
|
-
public UnixTimestampFieldDuplicator(String keyName, String duplicateKeyName, int fractionUnit)
|
583
|
-
{
|
584
|
-
super(keyName);
|
585
|
-
timeFieldWriter = new UnixTimestampLongFieldWriter(duplicateKeyName, fractionUnit);
|
586
|
-
}
|
587
|
-
|
588
|
-
@Override
|
589
|
-
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
590
|
-
throws IOException
|
591
|
-
{
|
592
|
-
super.writeValue(builder, reader, column);
|
593
|
-
timeFieldWriter.writeKeyValue(builder, reader, column);
|
594
|
-
}
|
595
|
-
}
|
596
|
-
|
597
|
-
static class TimestampFieldLongDuplicator
|
598
|
-
extends TimestampStringFieldWriter
|
599
|
-
{
|
600
|
-
private final TimestampLongFieldWriter timeFieldWriter;
|
601
|
-
|
602
|
-
public TimestampFieldLongDuplicator(TimestampFormatter formatter, String keyName, String longDuplicateKeyName)
|
603
|
-
{
|
604
|
-
super(formatter, keyName);
|
605
|
-
timeFieldWriter = new TimestampLongFieldWriter(longDuplicateKeyName);
|
606
|
-
}
|
607
|
-
|
608
|
-
@Override
|
609
|
-
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
610
|
-
throws IOException
|
611
|
-
{
|
612
|
-
super.writeValue(builder, reader, column);
|
613
|
-
timeFieldWriter.writeKeyValue(builder, reader, column);
|
614
|
-
}
|
615
|
-
}
|
616
255
|
}
|