embulk-output-td 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +7 -0
- data/README.md +63 -0
- data/build.gradle +79 -0
- data/embulk-output-td.gemspec +18 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/output/td.rb +3 -0
- data/settings.gradle +1 -0
- data/src/main/java/com/treasuredata/api/TdApiClient.java +436 -0
- data/src/main/java/com/treasuredata/api/TdApiClientConfig.java +79 -0
- data/src/main/java/com/treasuredata/api/TdApiConflictException.java +10 -0
- data/src/main/java/com/treasuredata/api/TdApiConstants.java +6 -0
- data/src/main/java/com/treasuredata/api/TdApiException.java +20 -0
- data/src/main/java/com/treasuredata/api/TdApiExecutionException.java +10 -0
- data/src/main/java/com/treasuredata/api/TdApiExecutionInterruptedException.java +15 -0
- data/src/main/java/com/treasuredata/api/TdApiExecutionTimeoutException.java +17 -0
- data/src/main/java/com/treasuredata/api/TdApiNotFoundException.java +10 -0
- data/src/main/java/com/treasuredata/api/TdApiResponseException.java +32 -0
- data/src/main/java/com/treasuredata/api/model/TDArrayColumnType.java +80 -0
- data/src/main/java/com/treasuredata/api/model/TDBulkImportSession.java +155 -0
- data/src/main/java/com/treasuredata/api/model/TDColumn.java +83 -0
- data/src/main/java/com/treasuredata/api/model/TDColumnType.java +23 -0
- data/src/main/java/com/treasuredata/api/model/TDColumnTypeDeserializer.java +115 -0
- data/src/main/java/com/treasuredata/api/model/TDDatabase.java +48 -0
- data/src/main/java/com/treasuredata/api/model/TDDatabaseList.java +24 -0
- data/src/main/java/com/treasuredata/api/model/TDMapColumnType.java +88 -0
- data/src/main/java/com/treasuredata/api/model/TDPrimitiveColumnType.java +61 -0
- data/src/main/java/com/treasuredata/api/model/TDTable.java +64 -0
- data/src/main/java/com/treasuredata/api/model/TDTableList.java +33 -0
- data/src/main/java/com/treasuredata/api/model/TDTablePermission.java +48 -0
- data/src/main/java/com/treasuredata/api/model/TDTableSchema.java +44 -0
- data/src/main/java/com/treasuredata/api/model/TDTableType.java +36 -0
- data/src/main/java/org/embulk/output/FinalizableExecutorService.java +84 -0
- data/src/main/java/org/embulk/output/MsgpackGZFileBuilder.java +148 -0
- data/src/main/java/org/embulk/output/RecordWriter.java +567 -0
- data/src/main/java/org/embulk/output/TdOutputPlugin.java +390 -0
- data/src/test/java/org/embulk/output/TestTdOutputPlugin.java +5 -0
- metadata +119 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
package com.treasuredata.api.model;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonProperty;
|
4
|
+
import com.google.common.base.Objects;
|
5
|
+
|
6
|
+
public class TDTablePermission
|
7
|
+
{
|
8
|
+
private boolean importable;
|
9
|
+
private boolean queryable;
|
10
|
+
|
11
|
+
public TDTablePermission(
|
12
|
+
@JsonProperty("importable") boolean importable,
|
13
|
+
@JsonProperty("queryable") boolean queryable)
|
14
|
+
{
|
15
|
+
this.importable = importable;
|
16
|
+
this.queryable = queryable;
|
17
|
+
}
|
18
|
+
|
19
|
+
@JsonProperty("importable")
|
20
|
+
public boolean isImportable() {
|
21
|
+
return importable;
|
22
|
+
}
|
23
|
+
|
24
|
+
@JsonProperty("queryable")
|
25
|
+
public boolean isQueryable() {
|
26
|
+
return queryable;
|
27
|
+
}
|
28
|
+
|
29
|
+
@Override
|
30
|
+
public boolean equals(Object obj)
|
31
|
+
{
|
32
|
+
if (this == obj) {
|
33
|
+
return true;
|
34
|
+
}
|
35
|
+
if (obj == null || getClass() != obj.getClass()) {
|
36
|
+
return false;
|
37
|
+
}
|
38
|
+
TDTablePermission other = (TDTablePermission) obj;
|
39
|
+
return Objects.equal(this.importable, other.importable) &&
|
40
|
+
Objects.equal(this.queryable, other.queryable);
|
41
|
+
}
|
42
|
+
|
43
|
+
@Override
|
44
|
+
public int hashCode()
|
45
|
+
{
|
46
|
+
return Objects.hashCode(importable, queryable);
|
47
|
+
}
|
48
|
+
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
package com.treasuredata.api.model;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonProperty;
|
5
|
+
import com.google.common.base.Objects;
|
6
|
+
|
7
|
+
import java.util.List;
|
8
|
+
|
9
|
+
public class TDTableSchema
|
10
|
+
{
|
11
|
+
private List<TDColumn> columns;
|
12
|
+
|
13
|
+
@JsonCreator
|
14
|
+
public TDTableSchema(
|
15
|
+
@JsonProperty("columns") List<TDColumn> columns)
|
16
|
+
{
|
17
|
+
this.columns = columns;
|
18
|
+
}
|
19
|
+
|
20
|
+
@JsonProperty
|
21
|
+
public List<TDColumn> getColumns()
|
22
|
+
{
|
23
|
+
return columns;
|
24
|
+
}
|
25
|
+
|
26
|
+
@Override
|
27
|
+
public boolean equals(Object obj)
|
28
|
+
{
|
29
|
+
if (this == obj) {
|
30
|
+
return true;
|
31
|
+
}
|
32
|
+
if (obj == null || getClass() != obj.getClass()) {
|
33
|
+
return false;
|
34
|
+
}
|
35
|
+
TDTableSchema other = (TDTableSchema) obj;
|
36
|
+
return Objects.equal(this.columns, other.columns);
|
37
|
+
}
|
38
|
+
|
39
|
+
@Override
|
40
|
+
public int hashCode()
|
41
|
+
{
|
42
|
+
return Objects.hashCode(columns);
|
43
|
+
}
|
44
|
+
}
|
@@ -0,0 +1,36 @@
|
|
1
|
+
package com.treasuredata.api.model;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonValue;
|
5
|
+
import com.fasterxml.jackson.databind.RuntimeJsonMappingException;
|
6
|
+
|
7
|
+
public enum TDTableType
|
8
|
+
{
|
9
|
+
LOG("log"),
|
10
|
+
ITEM("item");
|
11
|
+
|
12
|
+
private String name;
|
13
|
+
|
14
|
+
private TDTableType(String name)
|
15
|
+
{
|
16
|
+
this.name = name;
|
17
|
+
}
|
18
|
+
|
19
|
+
@JsonCreator
|
20
|
+
public static TDTableType fromName(String name)
|
21
|
+
{
|
22
|
+
if ("log".equals(name)) {
|
23
|
+
return LOG;
|
24
|
+
} else if ("item".equals(name)) {
|
25
|
+
return ITEM;
|
26
|
+
}
|
27
|
+
throw new RuntimeJsonMappingException("Unexpected string tuple to deserialize TDTableType");
|
28
|
+
}
|
29
|
+
|
30
|
+
@JsonValue
|
31
|
+
@Override
|
32
|
+
public String toString()
|
33
|
+
{
|
34
|
+
return name;
|
35
|
+
}
|
36
|
+
}
|
@@ -0,0 +1,84 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import java.io.Closeable;
|
4
|
+
import java.io.IOException;
|
5
|
+
import java.util.LinkedList;
|
6
|
+
import java.util.Queue;
|
7
|
+
import java.util.concurrent.Callable;
|
8
|
+
import java.util.concurrent.ExecutionException;
|
9
|
+
import java.util.concurrent.ExecutorService;
|
10
|
+
import java.util.concurrent.Executors;
|
11
|
+
import java.util.concurrent.Future;
|
12
|
+
|
13
|
+
public class FinalizableExecutorService
|
14
|
+
{
|
15
|
+
public static class NotCloseable
|
16
|
+
implements Closeable
|
17
|
+
{
|
18
|
+
@Override
|
19
|
+
public void close()
|
20
|
+
throws IOException {
|
21
|
+
// ignore
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
protected ExecutorService threads;
|
26
|
+
protected Queue<RunningTask> runningTasks;
|
27
|
+
|
28
|
+
public FinalizableExecutorService() {
|
29
|
+
this.threads = Executors.newCachedThreadPool();
|
30
|
+
this.runningTasks = new LinkedList<>();
|
31
|
+
}
|
32
|
+
|
33
|
+
private static class RunningTask {
|
34
|
+
private Future<Void> future;
|
35
|
+
private Closeable finalizer;
|
36
|
+
|
37
|
+
RunningTask(Future<Void> future, Closeable finalizer) {
|
38
|
+
this.future = future;
|
39
|
+
this.finalizer = finalizer;
|
40
|
+
}
|
41
|
+
|
42
|
+
public void join() throws IOException {
|
43
|
+
try {
|
44
|
+
future.get();
|
45
|
+
} catch (InterruptedException ex) {
|
46
|
+
throw new IOException(ex);
|
47
|
+
} catch (ExecutionException ex) {
|
48
|
+
throw new IOException(ex.getCause());
|
49
|
+
}
|
50
|
+
finalizer.close();
|
51
|
+
}
|
52
|
+
|
53
|
+
public void abort() throws IOException {
|
54
|
+
finalizer.close();
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
public void submit(Callable<Void> task, Closeable finalizer) {
|
59
|
+
Future<Void> future = threads.submit(task);
|
60
|
+
runningTasks.add(new RunningTask(future, finalizer));
|
61
|
+
}
|
62
|
+
|
63
|
+
public void joinPartial(long upto) throws IOException {
|
64
|
+
while(runningTasks.size() > upto) {
|
65
|
+
runningTasks.peek().join();
|
66
|
+
runningTasks.remove();
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
public void joinAll() throws IOException {
|
71
|
+
joinPartial(0);
|
72
|
+
}
|
73
|
+
|
74
|
+
public void shutdown() throws IOException {
|
75
|
+
try {
|
76
|
+
joinAll();
|
77
|
+
} finally {
|
78
|
+
threads.shutdown();
|
79
|
+
for(RunningTask task : runningTasks) {
|
80
|
+
task.abort();
|
81
|
+
}
|
82
|
+
}
|
83
|
+
}
|
84
|
+
}
|
@@ -0,0 +1,148 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import org.msgpack.MessagePack;
|
4
|
+
import org.msgpack.packer.Packer;
|
5
|
+
|
6
|
+
import java.io.BufferedOutputStream;
|
7
|
+
import java.io.Closeable;
|
8
|
+
import java.io.File;
|
9
|
+
import java.io.FileOutputStream;
|
10
|
+
import java.io.FilterOutputStream;
|
11
|
+
import java.io.IOException;
|
12
|
+
import java.io.OutputStream;
|
13
|
+
import java.util.zip.GZIPOutputStream;
|
14
|
+
|
15
|
+
import static com.google.common.base.Preconditions.checkNotNull;
|
16
|
+
|
17
|
+
public class MsgpackGZFileBuilder
|
18
|
+
implements Closeable
|
19
|
+
{
|
20
|
+
static class DataSizeFilter extends FilterOutputStream {
|
21
|
+
private long size = 0;
|
22
|
+
|
23
|
+
public DataSizeFilter(OutputStream out) {
|
24
|
+
super(out);
|
25
|
+
}
|
26
|
+
|
27
|
+
@Override
|
28
|
+
public void write(int b) throws IOException
|
29
|
+
{
|
30
|
+
size += 1;
|
31
|
+
super.write(b);
|
32
|
+
}
|
33
|
+
|
34
|
+
@Override
|
35
|
+
public void write(byte[] b, int off, int len) throws IOException
|
36
|
+
{
|
37
|
+
size += len;
|
38
|
+
super.write(b, off, len);
|
39
|
+
}
|
40
|
+
|
41
|
+
@Override
|
42
|
+
public void close() throws IOException
|
43
|
+
{
|
44
|
+
super.close();
|
45
|
+
}
|
46
|
+
|
47
|
+
public long size()
|
48
|
+
{
|
49
|
+
return size;
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
private final File file;
|
54
|
+
private final DataSizeFilter out;
|
55
|
+
private final GZIPOutputStream gzout;
|
56
|
+
|
57
|
+
private Packer packer;
|
58
|
+
private long recordCount;
|
59
|
+
|
60
|
+
public MsgpackGZFileBuilder(MessagePack msgpack, File file)
|
61
|
+
throws IOException
|
62
|
+
{
|
63
|
+
this.file = checkNotNull(file);
|
64
|
+
this.out = new DataSizeFilter(new BufferedOutputStream(new FileOutputStream(file)));
|
65
|
+
this.gzout = new GZIPOutputStream(this.out);
|
66
|
+
this.packer = msgpack.createPacker(this.gzout);
|
67
|
+
|
68
|
+
this.recordCount = 0;
|
69
|
+
}
|
70
|
+
|
71
|
+
public long getRecordCount()
|
72
|
+
{
|
73
|
+
return recordCount;
|
74
|
+
}
|
75
|
+
|
76
|
+
public long getWrittenSize()
|
77
|
+
{
|
78
|
+
return out.size();
|
79
|
+
}
|
80
|
+
|
81
|
+
public File getFile()
|
82
|
+
{
|
83
|
+
return file;
|
84
|
+
}
|
85
|
+
|
86
|
+
public void finish()
|
87
|
+
throws IOException
|
88
|
+
{
|
89
|
+
try {
|
90
|
+
packer.flush();
|
91
|
+
} finally {
|
92
|
+
close();
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
@Override
|
97
|
+
public void close()
|
98
|
+
throws IOException
|
99
|
+
{
|
100
|
+
if (packer != null) {
|
101
|
+
packer.close();
|
102
|
+
packer = null;
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
public void writeNil()
|
107
|
+
throws IOException
|
108
|
+
{
|
109
|
+
packer.writeNil();
|
110
|
+
}
|
111
|
+
|
112
|
+
public void writeMapBegin(int size)
|
113
|
+
throws IOException
|
114
|
+
{
|
115
|
+
packer.writeMapBegin(size);
|
116
|
+
}
|
117
|
+
|
118
|
+
public void writeMapEnd()
|
119
|
+
throws IOException
|
120
|
+
{
|
121
|
+
packer.writeMapEnd();
|
122
|
+
recordCount++;
|
123
|
+
}
|
124
|
+
|
125
|
+
public void writeString(String v)
|
126
|
+
throws IOException
|
127
|
+
{
|
128
|
+
packer.write(v);
|
129
|
+
}
|
130
|
+
|
131
|
+
public void writeBoolean(boolean v)
|
132
|
+
throws IOException
|
133
|
+
{
|
134
|
+
packer.write(v);
|
135
|
+
}
|
136
|
+
|
137
|
+
public void writeLong(long v)
|
138
|
+
throws IOException
|
139
|
+
{
|
140
|
+
packer.write(v);
|
141
|
+
}
|
142
|
+
|
143
|
+
public void writeDouble(double v)
|
144
|
+
throws IOException
|
145
|
+
{
|
146
|
+
packer.write(v);
|
147
|
+
}
|
148
|
+
}
|
@@ -0,0 +1,567 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.base.Throwables;
|
5
|
+
import com.treasuredata.api.TdApiClient;
|
6
|
+
import org.embulk.config.CommitReport;
|
7
|
+
import org.embulk.config.ConfigException;
|
8
|
+
import org.embulk.output.TdOutputPlugin.PluginTask;
|
9
|
+
import org.embulk.spi.Column;
|
10
|
+
import org.embulk.spi.ColumnVisitor;
|
11
|
+
import org.embulk.spi.Exec;
|
12
|
+
import org.embulk.spi.Page;
|
13
|
+
import org.embulk.spi.PageReader;
|
14
|
+
import org.embulk.spi.Schema;
|
15
|
+
import org.embulk.spi.TransactionalPageOutput;
|
16
|
+
import org.embulk.spi.time.TimestampFormatter;
|
17
|
+
import org.embulk.spi.type.BooleanType;
|
18
|
+
import org.embulk.spi.type.DoubleType;
|
19
|
+
import org.embulk.spi.type.LongType;
|
20
|
+
import org.embulk.spi.type.StringType;
|
21
|
+
import org.embulk.spi.type.TimestampType;
|
22
|
+
import org.embulk.spi.type.Type;
|
23
|
+
import org.joda.time.DateTimeZone;
|
24
|
+
import org.jruby.embed.ScriptingContainer;
|
25
|
+
import org.msgpack.MessagePack;
|
26
|
+
import org.slf4j.Logger;
|
27
|
+
|
28
|
+
import java.io.File;
|
29
|
+
import java.io.IOException;
|
30
|
+
import java.text.NumberFormat;
|
31
|
+
import java.util.concurrent.Callable;
|
32
|
+
|
33
|
+
import static com.google.common.base.Preconditions.checkNotNull;
|
34
|
+
|
35
|
+
public class RecordWriter
|
36
|
+
implements TransactionalPageOutput
|
37
|
+
{
|
38
|
+
private final Logger log;
|
39
|
+
private final TdApiClient client;
|
40
|
+
private final String sessionName;
|
41
|
+
|
42
|
+
private final MessagePack msgpack;
|
43
|
+
private final FieldWriterSet fieldWriters;
|
44
|
+
private final File tempDir;
|
45
|
+
|
46
|
+
private int seqid = 0;
|
47
|
+
private PageReader pageReader;
|
48
|
+
private MsgpackGZFileBuilder builder;
|
49
|
+
|
50
|
+
private final FinalizableExecutorService executor;
|
51
|
+
private final int uploadConcurrency;
|
52
|
+
private final long fileSplitSize; // unit: kb
|
53
|
+
|
54
|
+
public RecordWriter(PluginTask task, TdApiClient client, FieldWriterSet fieldWriters)
|
55
|
+
{
|
56
|
+
this.log = Exec.getLogger(getClass());
|
57
|
+
this.client = checkNotNull(client);
|
58
|
+
this.sessionName = task.getSessionName();
|
59
|
+
|
60
|
+
this.msgpack = new MessagePack();
|
61
|
+
this.fieldWriters = fieldWriters;
|
62
|
+
this.tempDir = new File(task.getTempDir());
|
63
|
+
this.executor = new FinalizableExecutorService();
|
64
|
+
this.uploadConcurrency = task.getUploadConcurrency();
|
65
|
+
this.fileSplitSize = task.getFileSplitSize() * 1024;
|
66
|
+
}
|
67
|
+
|
68
|
+
public static void validateSchema(Logger log, PluginTask task, Schema schema)
|
69
|
+
{
|
70
|
+
new FieldWriterSet(log, task, schema);
|
71
|
+
}
|
72
|
+
|
73
|
+
void open(final Schema schema)
|
74
|
+
throws IOException
|
75
|
+
{
|
76
|
+
this.pageReader = new PageReader(checkNotNull(schema));
|
77
|
+
prepareNextBuilder();
|
78
|
+
}
|
79
|
+
|
80
|
+
private void prepareNextBuilder()
|
81
|
+
throws IOException
|
82
|
+
{
|
83
|
+
String prefix = String.format("%s-%d-", sessionName, seqid);
|
84
|
+
File tempFile = File.createTempFile(prefix, ".msgpack.gz", tempDir);
|
85
|
+
this.builder = new MsgpackGZFileBuilder(msgpack, tempFile);
|
86
|
+
}
|
87
|
+
|
88
|
+
@Override
|
89
|
+
public void add(final Page page)
|
90
|
+
{
|
91
|
+
pageReader.setPage(checkNotNull(page));
|
92
|
+
|
93
|
+
try {
|
94
|
+
while (pageReader.nextRecord()) {
|
95
|
+
builder.writeMapBegin(fieldWriters.getFieldCount());
|
96
|
+
|
97
|
+
pageReader.getSchema().visitColumns(new ColumnVisitor() {
|
98
|
+
@Override
|
99
|
+
public void booleanColumn(Column column)
|
100
|
+
{
|
101
|
+
write(column);
|
102
|
+
}
|
103
|
+
|
104
|
+
@Override
|
105
|
+
public void longColumn(Column column)
|
106
|
+
{
|
107
|
+
write(column);
|
108
|
+
}
|
109
|
+
|
110
|
+
@Override
|
111
|
+
public void doubleColumn(Column column)
|
112
|
+
{
|
113
|
+
write(column);
|
114
|
+
}
|
115
|
+
|
116
|
+
@Override
|
117
|
+
public void stringColumn(Column column)
|
118
|
+
{
|
119
|
+
write(column);
|
120
|
+
}
|
121
|
+
|
122
|
+
@Override
|
123
|
+
public void timestampColumn(Column column)
|
124
|
+
{
|
125
|
+
write(column);
|
126
|
+
}
|
127
|
+
|
128
|
+
private void write(Column column)
|
129
|
+
{
|
130
|
+
FieldWriter fieldWriter = fieldWriters.getFieldWriter(column.getIndex());
|
131
|
+
try {
|
132
|
+
fieldWriter.writeKeyValue(builder, pageReader, column);
|
133
|
+
} catch (IOException e) {
|
134
|
+
throw Throwables.propagate(e);
|
135
|
+
}
|
136
|
+
}
|
137
|
+
});
|
138
|
+
|
139
|
+
builder.writeMapEnd();
|
140
|
+
|
141
|
+
if (builder.getWrittenSize() > fileSplitSize) {
|
142
|
+
flush();
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
} catch (IOException e) {
|
147
|
+
throw Throwables.propagate(e);
|
148
|
+
}
|
149
|
+
}
|
150
|
+
|
151
|
+
public void flush() throws IOException
|
152
|
+
{
|
153
|
+
builder.finish();
|
154
|
+
|
155
|
+
if (builder.getRecordCount() > 0) {
|
156
|
+
log.info("{uploading: {rows: {}, size: {} bytes (compressed)}}",
|
157
|
+
builder.getRecordCount(),
|
158
|
+
NumberFormat.getNumberInstance().format(builder.getWrittenSize()));
|
159
|
+
upload(builder);
|
160
|
+
builder = null;
|
161
|
+
}
|
162
|
+
|
163
|
+
prepareNextBuilder();
|
164
|
+
}
|
165
|
+
|
166
|
+
private void upload(final MsgpackGZFileBuilder builder)
|
167
|
+
throws IOException
|
168
|
+
{
|
169
|
+
executor.joinPartial(uploadConcurrency - 1);
|
170
|
+
executor.submit(new Callable<Void>() {
|
171
|
+
@Override
|
172
|
+
public Void call() throws Exception {
|
173
|
+
client.uploadBulkImport(sessionName, builder.getFile());
|
174
|
+
return null;
|
175
|
+
}
|
176
|
+
}, builder);
|
177
|
+
seqid++;
|
178
|
+
}
|
179
|
+
|
180
|
+
@Override
|
181
|
+
public void finish()
|
182
|
+
{
|
183
|
+
try {
|
184
|
+
flush();
|
185
|
+
} catch (IOException e) {
|
186
|
+
throw Throwables.propagate(e);
|
187
|
+
} finally {
|
188
|
+
close();
|
189
|
+
}
|
190
|
+
}
|
191
|
+
|
192
|
+
@Override
|
193
|
+
public void close()
|
194
|
+
{
|
195
|
+
try {
|
196
|
+
try {
|
197
|
+
executor.joinAll();
|
198
|
+
executor.shutdown(); // shutdown calls joinAll
|
199
|
+
} finally {
|
200
|
+
if (builder != null) {
|
201
|
+
builder.close();
|
202
|
+
builder = null;
|
203
|
+
}
|
204
|
+
|
205
|
+
if (client != null) {
|
206
|
+
client.close();
|
207
|
+
}
|
208
|
+
}
|
209
|
+
} catch (IOException e) {
|
210
|
+
throw Throwables.propagate(e);
|
211
|
+
}
|
212
|
+
}
|
213
|
+
|
214
|
+
@Override
|
215
|
+
public void abort()
|
216
|
+
{
|
217
|
+
// do nothing
|
218
|
+
}
|
219
|
+
|
220
|
+
@Override
|
221
|
+
public CommitReport commit()
|
222
|
+
{
|
223
|
+
CommitReport report = Exec.newCommitReport();
|
224
|
+
// TODO
|
225
|
+
return report;
|
226
|
+
}
|
227
|
+
|
228
|
+
static class FieldWriterSet
|
229
|
+
{
|
230
|
+
private enum ColumnWriterMode
|
231
|
+
{
|
232
|
+
PRIMARY_KEY,
|
233
|
+
SIMPLE_VALUE,
|
234
|
+
DUPLICATE_PRIMARY_KEY;
|
235
|
+
}
|
236
|
+
|
237
|
+
private final int fieldCount;
|
238
|
+
private final FieldWriter[] fieldWriters;
|
239
|
+
|
240
|
+
public FieldWriterSet(Logger log, PluginTask task, Schema schema)
|
241
|
+
{
|
242
|
+
Optional<String> userDefinedPrimaryKeySourceColumnName = task.getTimeColumn();
|
243
|
+
boolean hasPkWriter = false;
|
244
|
+
int duplicatePrimaryKeySourceIndex = -1;
|
245
|
+
int firstTimestampColumnIndex = -1;
|
246
|
+
|
247
|
+
int fc = 0;
|
248
|
+
fieldWriters = new FieldWriter[schema.size()];
|
249
|
+
|
250
|
+
for (int i = 0; i < schema.size(); i++) {
|
251
|
+
String columnName = schema.getColumnName(i);
|
252
|
+
Type columnType = schema.getColumnType(i);
|
253
|
+
|
254
|
+
// choose the mode
|
255
|
+
final ColumnWriterMode mode;
|
256
|
+
|
257
|
+
if (userDefinedPrimaryKeySourceColumnName.isPresent() &&
|
258
|
+
columnName.equals(userDefinedPrimaryKeySourceColumnName.get())) {
|
259
|
+
// found time_column
|
260
|
+
if ("time".equals(userDefinedPrimaryKeySourceColumnName.get())) {
|
261
|
+
mode = ColumnWriterMode.PRIMARY_KEY;
|
262
|
+
} else {
|
263
|
+
mode = ColumnWriterMode.DUPLICATE_PRIMARY_KEY;
|
264
|
+
}
|
265
|
+
|
266
|
+
} else if ("time".equals(columnName)) {
|
267
|
+
// the column name is same with the primary key name.
|
268
|
+
if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
|
269
|
+
columnName = newColumnUniqueName(columnName, schema);
|
270
|
+
mode = ColumnWriterMode.SIMPLE_VALUE;
|
271
|
+
log.warn("time_column '{}' is set but 'time' column also exists. The existent 'time' column is renamed to {}",
|
272
|
+
userDefinedPrimaryKeySourceColumnName.get(), "time", "time", columnName);
|
273
|
+
} else {
|
274
|
+
mode = ColumnWriterMode.PRIMARY_KEY;
|
275
|
+
}
|
276
|
+
|
277
|
+
} else {
|
278
|
+
mode = ColumnWriterMode.SIMPLE_VALUE;
|
279
|
+
}
|
280
|
+
|
281
|
+
// create the fieldWriters writer depending on the mode
|
282
|
+
final FieldWriter writer;
|
283
|
+
|
284
|
+
switch (mode) {
|
285
|
+
case PRIMARY_KEY:
|
286
|
+
log.info("Using {}:{} column as the data partitioning key", columnName, columnType);
|
287
|
+
if (columnType instanceof LongType) {
|
288
|
+
writer = new LongFieldWriter(columnName);
|
289
|
+
hasPkWriter = true;
|
290
|
+
} else if (columnType instanceof TimestampType) {
|
291
|
+
writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
|
292
|
+
hasPkWriter = true;
|
293
|
+
} else {
|
294
|
+
throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
|
295
|
+
columnName, columnType));
|
296
|
+
}
|
297
|
+
break;
|
298
|
+
|
299
|
+
case SIMPLE_VALUE:
|
300
|
+
if (columnType instanceof BooleanType) {
|
301
|
+
writer = new BooleanFieldWriter(columnName);
|
302
|
+
} else if (columnType instanceof LongType) {
|
303
|
+
writer = new LongFieldWriter(columnName);
|
304
|
+
} else if (columnType instanceof DoubleType) {
|
305
|
+
writer = new DoubleFieldWriter(columnName);
|
306
|
+
} else if (columnType instanceof StringType) {
|
307
|
+
writer = new StringFieldWriter(columnName);
|
308
|
+
} else if (columnType instanceof TimestampType) {
|
309
|
+
writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
|
310
|
+
if (firstTimestampColumnIndex < 0) {
|
311
|
+
firstTimestampColumnIndex = i;
|
312
|
+
}
|
313
|
+
} else {
|
314
|
+
throw new ConfigException("Unsupported type: " + columnType);
|
315
|
+
}
|
316
|
+
break;
|
317
|
+
|
318
|
+
case DUPLICATE_PRIMARY_KEY:
|
319
|
+
duplicatePrimaryKeySourceIndex = i;
|
320
|
+
writer = null; // handle later
|
321
|
+
break;
|
322
|
+
|
323
|
+
default:
|
324
|
+
throw new AssertionError();
|
325
|
+
}
|
326
|
+
|
327
|
+
fieldWriters[i] = writer;
|
328
|
+
fc += 1;
|
329
|
+
}
|
330
|
+
|
331
|
+
if (!hasPkWriter) {
|
332
|
+
// PRIMARY_KEY was not found.
|
333
|
+
if (duplicatePrimaryKeySourceIndex < 0) {
|
334
|
+
if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
|
335
|
+
throw new ConfigException(String.format("time_column '%s' does not exist", userDefinedPrimaryKeySourceColumnName.get()));
|
336
|
+
} else if (firstTimestampColumnIndex >= 0) {
|
337
|
+
// if time is not found, use the first timestamp column
|
338
|
+
duplicatePrimaryKeySourceIndex = firstTimestampColumnIndex;
|
339
|
+
} else {
|
340
|
+
throw new ConfigException(String.format("TD output plugin requires at least one timestamp column, or a long column named 'time'"));
|
341
|
+
}
|
342
|
+
}
|
343
|
+
|
344
|
+
String columnName = schema.getColumnName(duplicatePrimaryKeySourceIndex);
|
345
|
+
Type columnType = schema.getColumnType(duplicatePrimaryKeySourceIndex);
|
346
|
+
|
347
|
+
log.info("Duplicating {}:{} column to 'time' column for the data partitioning",
|
348
|
+
columnName, columnType);
|
349
|
+
|
350
|
+
FieldWriter writer;
|
351
|
+
if (columnType instanceof LongType) {
|
352
|
+
writer = new LongFieldDuplicator(columnName, "time");
|
353
|
+
} else if (columnType instanceof TimestampType) {
|
354
|
+
writer = new TimestampFieldLongDuplicator(task.getJRuby(), columnName, "time");
|
355
|
+
} else {
|
356
|
+
throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
|
357
|
+
columnName, columnType));
|
358
|
+
}
|
359
|
+
|
360
|
+
// replace existint writer
|
361
|
+
fieldWriters[duplicatePrimaryKeySourceIndex] = writer;
|
362
|
+
fc += 1;
|
363
|
+
}
|
364
|
+
|
365
|
+
fieldCount = fc;
|
366
|
+
}
|
367
|
+
|
368
|
+
private static String newColumnUniqueName(String originalName, Schema schema)
|
369
|
+
{
|
370
|
+
String name = originalName;
|
371
|
+
do {
|
372
|
+
name += "_";
|
373
|
+
} while (containsColumnName(schema, name));
|
374
|
+
return name;
|
375
|
+
}
|
376
|
+
|
377
|
+
private static boolean containsColumnName(Schema schema, String name)
|
378
|
+
{
|
379
|
+
for (Column c : schema.getColumns()) {
|
380
|
+
if (c.getName().equals(name)) {
|
381
|
+
return true;
|
382
|
+
}
|
383
|
+
}
|
384
|
+
return false;
|
385
|
+
}
|
386
|
+
|
387
|
+
public FieldWriter getFieldWriter(int index)
|
388
|
+
{
|
389
|
+
return fieldWriters[index];
|
390
|
+
}
|
391
|
+
|
392
|
+
public int getFieldCount()
|
393
|
+
{
|
394
|
+
return fieldCount;
|
395
|
+
}
|
396
|
+
}
|
397
|
+
|
398
|
+
static abstract class FieldWriter
|
399
|
+
{
|
400
|
+
private final String keyName;
|
401
|
+
|
402
|
+
protected FieldWriter(String keyName)
|
403
|
+
{
|
404
|
+
this.keyName = keyName;
|
405
|
+
}
|
406
|
+
|
407
|
+
public void writeKeyValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
408
|
+
throws IOException
|
409
|
+
{
|
410
|
+
writeKey(builder);
|
411
|
+
if (reader.isNull(column)) {
|
412
|
+
builder.writeNil();
|
413
|
+
} else {
|
414
|
+
writeValue(builder, reader, column);
|
415
|
+
}
|
416
|
+
}
|
417
|
+
|
418
|
+
private void writeKey(MsgpackGZFileBuilder builder)
|
419
|
+
throws IOException
|
420
|
+
{
|
421
|
+
builder.writeString(keyName);
|
422
|
+
}
|
423
|
+
|
424
|
+
protected abstract void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
425
|
+
throws IOException;
|
426
|
+
}
|
427
|
+
|
428
|
+
static class DoubleFieldWriter
|
429
|
+
extends FieldWriter
|
430
|
+
{
|
431
|
+
public DoubleFieldWriter(String keyName)
|
432
|
+
{
|
433
|
+
super(keyName);
|
434
|
+
}
|
435
|
+
|
436
|
+
@Override
|
437
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
438
|
+
throws IOException
|
439
|
+
{
|
440
|
+
builder.writeDouble(reader.getDouble(column));
|
441
|
+
}
|
442
|
+
}
|
443
|
+
|
444
|
+
static class BooleanFieldWriter
|
445
|
+
extends FieldWriter
|
446
|
+
{
|
447
|
+
public BooleanFieldWriter(String keyName)
|
448
|
+
{
|
449
|
+
super(keyName);
|
450
|
+
}
|
451
|
+
|
452
|
+
@Override
|
453
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
454
|
+
throws IOException
|
455
|
+
{
|
456
|
+
builder.writeBoolean(reader.getBoolean(column));
|
457
|
+
}
|
458
|
+
}
|
459
|
+
|
460
|
+
static class LongFieldWriter
|
461
|
+
extends FieldWriter
|
462
|
+
{
|
463
|
+
LongFieldWriter(String keyName)
|
464
|
+
{
|
465
|
+
super(keyName);
|
466
|
+
}
|
467
|
+
|
468
|
+
@Override
|
469
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
470
|
+
throws IOException
|
471
|
+
{
|
472
|
+
builder.writeLong(reader.getLong(column));
|
473
|
+
}
|
474
|
+
}
|
475
|
+
|
476
|
+
static class StringFieldWriter
|
477
|
+
extends FieldWriter
|
478
|
+
{
|
479
|
+
public StringFieldWriter(String keyName)
|
480
|
+
{
|
481
|
+
super(keyName);
|
482
|
+
}
|
483
|
+
|
484
|
+
@Override
|
485
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
486
|
+
throws IOException
|
487
|
+
{
|
488
|
+
builder.writeString(reader.getString(column));
|
489
|
+
}
|
490
|
+
}
|
491
|
+
|
492
|
+
static class TimestampStringFieldWriter
|
493
|
+
extends FieldWriter
|
494
|
+
{
|
495
|
+
// to format timestamp values to string by "%Y-%m-%d %H:%M:%S.%3N"
|
496
|
+
private final TimestampFormatter defaultFormatter;
|
497
|
+
|
498
|
+
public TimestampStringFieldWriter(ScriptingContainer jruby, String keyName)
|
499
|
+
{
|
500
|
+
super(keyName);
|
501
|
+
this.defaultFormatter = new TimestampFormatter(jruby, "%Y-%m-%d %H:%M:%S.%3N", DateTimeZone.UTC);
|
502
|
+
}
|
503
|
+
|
504
|
+
@Override
|
505
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
506
|
+
throws IOException
|
507
|
+
{
|
508
|
+
builder.writeString(defaultFormatter.format(reader.getTimestamp(column)));
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
static class TimestampLongFieldWriter
|
513
|
+
extends FieldWriter
|
514
|
+
{
|
515
|
+
public TimestampLongFieldWriter(String keyName)
|
516
|
+
{
|
517
|
+
super(keyName);
|
518
|
+
}
|
519
|
+
|
520
|
+
@Override
|
521
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
522
|
+
throws IOException
|
523
|
+
{
|
524
|
+
builder.writeLong(reader.getTimestamp(column).getEpochSecond());
|
525
|
+
}
|
526
|
+
}
|
527
|
+
|
528
|
+
static class LongFieldDuplicator
|
529
|
+
extends LongFieldWriter
|
530
|
+
{
|
531
|
+
private final LongFieldWriter timeFieldWriter;
|
532
|
+
|
533
|
+
public LongFieldDuplicator(String keyName, String duplicateKeyName)
|
534
|
+
{
|
535
|
+
super(keyName);
|
536
|
+
timeFieldWriter = new LongFieldWriter(duplicateKeyName);
|
537
|
+
}
|
538
|
+
|
539
|
+
@Override
|
540
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
541
|
+
throws IOException
|
542
|
+
{
|
543
|
+
super.writeValue(builder, reader, column);
|
544
|
+
timeFieldWriter.writeKeyValue(builder, reader, column);
|
545
|
+
}
|
546
|
+
}
|
547
|
+
|
548
|
+
static class TimestampFieldLongDuplicator
|
549
|
+
extends TimestampStringFieldWriter
|
550
|
+
{
|
551
|
+
private final TimestampLongFieldWriter timeFieldWriter;
|
552
|
+
|
553
|
+
public TimestampFieldLongDuplicator(ScriptingContainer jruby, String keyName, String longDuplicateKeyName)
|
554
|
+
{
|
555
|
+
super(jruby, keyName);
|
556
|
+
timeFieldWriter = new TimestampLongFieldWriter(longDuplicateKeyName);
|
557
|
+
}
|
558
|
+
|
559
|
+
@Override
|
560
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
561
|
+
throws IOException
|
562
|
+
{
|
563
|
+
super.writeValue(builder, reader, column);
|
564
|
+
timeFieldWriter.writeKeyValue(builder, reader, column);
|
565
|
+
}
|
566
|
+
}
|
567
|
+
}
|