embulk-output-td 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +7 -0
- data/README.md +63 -0
- data/build.gradle +79 -0
- data/embulk-output-td.gemspec +18 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/output/td.rb +3 -0
- data/settings.gradle +1 -0
- data/src/main/java/com/treasuredata/api/TdApiClient.java +436 -0
- data/src/main/java/com/treasuredata/api/TdApiClientConfig.java +79 -0
- data/src/main/java/com/treasuredata/api/TdApiConflictException.java +10 -0
- data/src/main/java/com/treasuredata/api/TdApiConstants.java +6 -0
- data/src/main/java/com/treasuredata/api/TdApiException.java +20 -0
- data/src/main/java/com/treasuredata/api/TdApiExecutionException.java +10 -0
- data/src/main/java/com/treasuredata/api/TdApiExecutionInterruptedException.java +15 -0
- data/src/main/java/com/treasuredata/api/TdApiExecutionTimeoutException.java +17 -0
- data/src/main/java/com/treasuredata/api/TdApiNotFoundException.java +10 -0
- data/src/main/java/com/treasuredata/api/TdApiResponseException.java +32 -0
- data/src/main/java/com/treasuredata/api/model/TDArrayColumnType.java +80 -0
- data/src/main/java/com/treasuredata/api/model/TDBulkImportSession.java +155 -0
- data/src/main/java/com/treasuredata/api/model/TDColumn.java +83 -0
- data/src/main/java/com/treasuredata/api/model/TDColumnType.java +23 -0
- data/src/main/java/com/treasuredata/api/model/TDColumnTypeDeserializer.java +115 -0
- data/src/main/java/com/treasuredata/api/model/TDDatabase.java +48 -0
- data/src/main/java/com/treasuredata/api/model/TDDatabaseList.java +24 -0
- data/src/main/java/com/treasuredata/api/model/TDMapColumnType.java +88 -0
- data/src/main/java/com/treasuredata/api/model/TDPrimitiveColumnType.java +61 -0
- data/src/main/java/com/treasuredata/api/model/TDTable.java +64 -0
- data/src/main/java/com/treasuredata/api/model/TDTableList.java +33 -0
- data/src/main/java/com/treasuredata/api/model/TDTablePermission.java +48 -0
- data/src/main/java/com/treasuredata/api/model/TDTableSchema.java +44 -0
- data/src/main/java/com/treasuredata/api/model/TDTableType.java +36 -0
- data/src/main/java/org/embulk/output/FinalizableExecutorService.java +84 -0
- data/src/main/java/org/embulk/output/MsgpackGZFileBuilder.java +148 -0
- data/src/main/java/org/embulk/output/RecordWriter.java +567 -0
- data/src/main/java/org/embulk/output/TdOutputPlugin.java +390 -0
- data/src/test/java/org/embulk/output/TestTdOutputPlugin.java +5 -0
- metadata +119 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
package com.treasuredata.api.model;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonProperty;
|
4
|
+
import com.google.common.base.Objects;
|
5
|
+
|
6
|
+
public class TDTablePermission
|
7
|
+
{
|
8
|
+
private boolean importable;
|
9
|
+
private boolean queryable;
|
10
|
+
|
11
|
+
public TDTablePermission(
|
12
|
+
@JsonProperty("importable") boolean importable,
|
13
|
+
@JsonProperty("queryable") boolean queryable)
|
14
|
+
{
|
15
|
+
this.importable = importable;
|
16
|
+
this.queryable = queryable;
|
17
|
+
}
|
18
|
+
|
19
|
+
@JsonProperty("importable")
|
20
|
+
public boolean isImportable() {
|
21
|
+
return importable;
|
22
|
+
}
|
23
|
+
|
24
|
+
@JsonProperty("queryable")
|
25
|
+
public boolean isQueryable() {
|
26
|
+
return queryable;
|
27
|
+
}
|
28
|
+
|
29
|
+
@Override
|
30
|
+
public boolean equals(Object obj)
|
31
|
+
{
|
32
|
+
if (this == obj) {
|
33
|
+
return true;
|
34
|
+
}
|
35
|
+
if (obj == null || getClass() != obj.getClass()) {
|
36
|
+
return false;
|
37
|
+
}
|
38
|
+
TDTablePermission other = (TDTablePermission) obj;
|
39
|
+
return Objects.equal(this.importable, other.importable) &&
|
40
|
+
Objects.equal(this.queryable, other.queryable);
|
41
|
+
}
|
42
|
+
|
43
|
+
@Override
|
44
|
+
public int hashCode()
|
45
|
+
{
|
46
|
+
return Objects.hashCode(importable, queryable);
|
47
|
+
}
|
48
|
+
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
package com.treasuredata.api.model;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonProperty;
|
5
|
+
import com.google.common.base.Objects;
|
6
|
+
|
7
|
+
import java.util.List;
|
8
|
+
|
9
|
+
public class TDTableSchema
|
10
|
+
{
|
11
|
+
private List<TDColumn> columns;
|
12
|
+
|
13
|
+
@JsonCreator
|
14
|
+
public TDTableSchema(
|
15
|
+
@JsonProperty("columns") List<TDColumn> columns)
|
16
|
+
{
|
17
|
+
this.columns = columns;
|
18
|
+
}
|
19
|
+
|
20
|
+
@JsonProperty
|
21
|
+
public List<TDColumn> getColumns()
|
22
|
+
{
|
23
|
+
return columns;
|
24
|
+
}
|
25
|
+
|
26
|
+
@Override
|
27
|
+
public boolean equals(Object obj)
|
28
|
+
{
|
29
|
+
if (this == obj) {
|
30
|
+
return true;
|
31
|
+
}
|
32
|
+
if (obj == null || getClass() != obj.getClass()) {
|
33
|
+
return false;
|
34
|
+
}
|
35
|
+
TDTableSchema other = (TDTableSchema) obj;
|
36
|
+
return Objects.equal(this.columns, other.columns);
|
37
|
+
}
|
38
|
+
|
39
|
+
@Override
|
40
|
+
public int hashCode()
|
41
|
+
{
|
42
|
+
return Objects.hashCode(columns);
|
43
|
+
}
|
44
|
+
}
|
@@ -0,0 +1,36 @@
|
|
1
|
+
package com.treasuredata.api.model;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonValue;
|
5
|
+
import com.fasterxml.jackson.databind.RuntimeJsonMappingException;
|
6
|
+
|
7
|
+
public enum TDTableType
|
8
|
+
{
|
9
|
+
LOG("log"),
|
10
|
+
ITEM("item");
|
11
|
+
|
12
|
+
private String name;
|
13
|
+
|
14
|
+
private TDTableType(String name)
|
15
|
+
{
|
16
|
+
this.name = name;
|
17
|
+
}
|
18
|
+
|
19
|
+
@JsonCreator
|
20
|
+
public static TDTableType fromName(String name)
|
21
|
+
{
|
22
|
+
if ("log".equals(name)) {
|
23
|
+
return LOG;
|
24
|
+
} else if ("item".equals(name)) {
|
25
|
+
return ITEM;
|
26
|
+
}
|
27
|
+
throw new RuntimeJsonMappingException("Unexpected string tuple to deserialize TDTableType");
|
28
|
+
}
|
29
|
+
|
30
|
+
@JsonValue
|
31
|
+
@Override
|
32
|
+
public String toString()
|
33
|
+
{
|
34
|
+
return name;
|
35
|
+
}
|
36
|
+
}
|
@@ -0,0 +1,84 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import java.io.Closeable;
|
4
|
+
import java.io.IOException;
|
5
|
+
import java.util.LinkedList;
|
6
|
+
import java.util.Queue;
|
7
|
+
import java.util.concurrent.Callable;
|
8
|
+
import java.util.concurrent.ExecutionException;
|
9
|
+
import java.util.concurrent.ExecutorService;
|
10
|
+
import java.util.concurrent.Executors;
|
11
|
+
import java.util.concurrent.Future;
|
12
|
+
|
13
|
+
public class FinalizableExecutorService
|
14
|
+
{
|
15
|
+
public static class NotCloseable
|
16
|
+
implements Closeable
|
17
|
+
{
|
18
|
+
@Override
|
19
|
+
public void close()
|
20
|
+
throws IOException {
|
21
|
+
// ignore
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
protected ExecutorService threads;
|
26
|
+
protected Queue<RunningTask> runningTasks;
|
27
|
+
|
28
|
+
public FinalizableExecutorService() {
|
29
|
+
this.threads = Executors.newCachedThreadPool();
|
30
|
+
this.runningTasks = new LinkedList<>();
|
31
|
+
}
|
32
|
+
|
33
|
+
private static class RunningTask {
|
34
|
+
private Future<Void> future;
|
35
|
+
private Closeable finalizer;
|
36
|
+
|
37
|
+
RunningTask(Future<Void> future, Closeable finalizer) {
|
38
|
+
this.future = future;
|
39
|
+
this.finalizer = finalizer;
|
40
|
+
}
|
41
|
+
|
42
|
+
public void join() throws IOException {
|
43
|
+
try {
|
44
|
+
future.get();
|
45
|
+
} catch (InterruptedException ex) {
|
46
|
+
throw new IOException(ex);
|
47
|
+
} catch (ExecutionException ex) {
|
48
|
+
throw new IOException(ex.getCause());
|
49
|
+
}
|
50
|
+
finalizer.close();
|
51
|
+
}
|
52
|
+
|
53
|
+
public void abort() throws IOException {
|
54
|
+
finalizer.close();
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
public void submit(Callable<Void> task, Closeable finalizer) {
|
59
|
+
Future<Void> future = threads.submit(task);
|
60
|
+
runningTasks.add(new RunningTask(future, finalizer));
|
61
|
+
}
|
62
|
+
|
63
|
+
public void joinPartial(long upto) throws IOException {
|
64
|
+
while(runningTasks.size() > upto) {
|
65
|
+
runningTasks.peek().join();
|
66
|
+
runningTasks.remove();
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
public void joinAll() throws IOException {
|
71
|
+
joinPartial(0);
|
72
|
+
}
|
73
|
+
|
74
|
+
public void shutdown() throws IOException {
|
75
|
+
try {
|
76
|
+
joinAll();
|
77
|
+
} finally {
|
78
|
+
threads.shutdown();
|
79
|
+
for(RunningTask task : runningTasks) {
|
80
|
+
task.abort();
|
81
|
+
}
|
82
|
+
}
|
83
|
+
}
|
84
|
+
}
|
@@ -0,0 +1,148 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import org.msgpack.MessagePack;
|
4
|
+
import org.msgpack.packer.Packer;
|
5
|
+
|
6
|
+
import java.io.BufferedOutputStream;
|
7
|
+
import java.io.Closeable;
|
8
|
+
import java.io.File;
|
9
|
+
import java.io.FileOutputStream;
|
10
|
+
import java.io.FilterOutputStream;
|
11
|
+
import java.io.IOException;
|
12
|
+
import java.io.OutputStream;
|
13
|
+
import java.util.zip.GZIPOutputStream;
|
14
|
+
|
15
|
+
import static com.google.common.base.Preconditions.checkNotNull;
|
16
|
+
|
17
|
+
public class MsgpackGZFileBuilder
|
18
|
+
implements Closeable
|
19
|
+
{
|
20
|
+
static class DataSizeFilter extends FilterOutputStream {
|
21
|
+
private long size = 0;
|
22
|
+
|
23
|
+
public DataSizeFilter(OutputStream out) {
|
24
|
+
super(out);
|
25
|
+
}
|
26
|
+
|
27
|
+
@Override
|
28
|
+
public void write(int b) throws IOException
|
29
|
+
{
|
30
|
+
size += 1;
|
31
|
+
super.write(b);
|
32
|
+
}
|
33
|
+
|
34
|
+
@Override
|
35
|
+
public void write(byte[] b, int off, int len) throws IOException
|
36
|
+
{
|
37
|
+
size += len;
|
38
|
+
super.write(b, off, len);
|
39
|
+
}
|
40
|
+
|
41
|
+
@Override
|
42
|
+
public void close() throws IOException
|
43
|
+
{
|
44
|
+
super.close();
|
45
|
+
}
|
46
|
+
|
47
|
+
public long size()
|
48
|
+
{
|
49
|
+
return size;
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
private final File file;
|
54
|
+
private final DataSizeFilter out;
|
55
|
+
private final GZIPOutputStream gzout;
|
56
|
+
|
57
|
+
private Packer packer;
|
58
|
+
private long recordCount;
|
59
|
+
|
60
|
+
public MsgpackGZFileBuilder(MessagePack msgpack, File file)
|
61
|
+
throws IOException
|
62
|
+
{
|
63
|
+
this.file = checkNotNull(file);
|
64
|
+
this.out = new DataSizeFilter(new BufferedOutputStream(new FileOutputStream(file)));
|
65
|
+
this.gzout = new GZIPOutputStream(this.out);
|
66
|
+
this.packer = msgpack.createPacker(this.gzout);
|
67
|
+
|
68
|
+
this.recordCount = 0;
|
69
|
+
}
|
70
|
+
|
71
|
+
public long getRecordCount()
|
72
|
+
{
|
73
|
+
return recordCount;
|
74
|
+
}
|
75
|
+
|
76
|
+
public long getWrittenSize()
|
77
|
+
{
|
78
|
+
return out.size();
|
79
|
+
}
|
80
|
+
|
81
|
+
public File getFile()
|
82
|
+
{
|
83
|
+
return file;
|
84
|
+
}
|
85
|
+
|
86
|
+
public void finish()
|
87
|
+
throws IOException
|
88
|
+
{
|
89
|
+
try {
|
90
|
+
packer.flush();
|
91
|
+
} finally {
|
92
|
+
close();
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
@Override
|
97
|
+
public void close()
|
98
|
+
throws IOException
|
99
|
+
{
|
100
|
+
if (packer != null) {
|
101
|
+
packer.close();
|
102
|
+
packer = null;
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
public void writeNil()
|
107
|
+
throws IOException
|
108
|
+
{
|
109
|
+
packer.writeNil();
|
110
|
+
}
|
111
|
+
|
112
|
+
public void writeMapBegin(int size)
|
113
|
+
throws IOException
|
114
|
+
{
|
115
|
+
packer.writeMapBegin(size);
|
116
|
+
}
|
117
|
+
|
118
|
+
public void writeMapEnd()
|
119
|
+
throws IOException
|
120
|
+
{
|
121
|
+
packer.writeMapEnd();
|
122
|
+
recordCount++;
|
123
|
+
}
|
124
|
+
|
125
|
+
public void writeString(String v)
|
126
|
+
throws IOException
|
127
|
+
{
|
128
|
+
packer.write(v);
|
129
|
+
}
|
130
|
+
|
131
|
+
public void writeBoolean(boolean v)
|
132
|
+
throws IOException
|
133
|
+
{
|
134
|
+
packer.write(v);
|
135
|
+
}
|
136
|
+
|
137
|
+
public void writeLong(long v)
|
138
|
+
throws IOException
|
139
|
+
{
|
140
|
+
packer.write(v);
|
141
|
+
}
|
142
|
+
|
143
|
+
public void writeDouble(double v)
|
144
|
+
throws IOException
|
145
|
+
{
|
146
|
+
packer.write(v);
|
147
|
+
}
|
148
|
+
}
|
@@ -0,0 +1,567 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.base.Throwables;
|
5
|
+
import com.treasuredata.api.TdApiClient;
|
6
|
+
import org.embulk.config.CommitReport;
|
7
|
+
import org.embulk.config.ConfigException;
|
8
|
+
import org.embulk.output.TdOutputPlugin.PluginTask;
|
9
|
+
import org.embulk.spi.Column;
|
10
|
+
import org.embulk.spi.ColumnVisitor;
|
11
|
+
import org.embulk.spi.Exec;
|
12
|
+
import org.embulk.spi.Page;
|
13
|
+
import org.embulk.spi.PageReader;
|
14
|
+
import org.embulk.spi.Schema;
|
15
|
+
import org.embulk.spi.TransactionalPageOutput;
|
16
|
+
import org.embulk.spi.time.TimestampFormatter;
|
17
|
+
import org.embulk.spi.type.BooleanType;
|
18
|
+
import org.embulk.spi.type.DoubleType;
|
19
|
+
import org.embulk.spi.type.LongType;
|
20
|
+
import org.embulk.spi.type.StringType;
|
21
|
+
import org.embulk.spi.type.TimestampType;
|
22
|
+
import org.embulk.spi.type.Type;
|
23
|
+
import org.joda.time.DateTimeZone;
|
24
|
+
import org.jruby.embed.ScriptingContainer;
|
25
|
+
import org.msgpack.MessagePack;
|
26
|
+
import org.slf4j.Logger;
|
27
|
+
|
28
|
+
import java.io.File;
|
29
|
+
import java.io.IOException;
|
30
|
+
import java.text.NumberFormat;
|
31
|
+
import java.util.concurrent.Callable;
|
32
|
+
|
33
|
+
import static com.google.common.base.Preconditions.checkNotNull;
|
34
|
+
|
35
|
+
public class RecordWriter
|
36
|
+
implements TransactionalPageOutput
|
37
|
+
{
|
38
|
+
private final Logger log;
|
39
|
+
private final TdApiClient client;
|
40
|
+
private final String sessionName;
|
41
|
+
|
42
|
+
private final MessagePack msgpack;
|
43
|
+
private final FieldWriterSet fieldWriters;
|
44
|
+
private final File tempDir;
|
45
|
+
|
46
|
+
private int seqid = 0;
|
47
|
+
private PageReader pageReader;
|
48
|
+
private MsgpackGZFileBuilder builder;
|
49
|
+
|
50
|
+
private final FinalizableExecutorService executor;
|
51
|
+
private final int uploadConcurrency;
|
52
|
+
private final long fileSplitSize; // unit: kb
|
53
|
+
|
54
|
+
public RecordWriter(PluginTask task, TdApiClient client, FieldWriterSet fieldWriters)
|
55
|
+
{
|
56
|
+
this.log = Exec.getLogger(getClass());
|
57
|
+
this.client = checkNotNull(client);
|
58
|
+
this.sessionName = task.getSessionName();
|
59
|
+
|
60
|
+
this.msgpack = new MessagePack();
|
61
|
+
this.fieldWriters = fieldWriters;
|
62
|
+
this.tempDir = new File(task.getTempDir());
|
63
|
+
this.executor = new FinalizableExecutorService();
|
64
|
+
this.uploadConcurrency = task.getUploadConcurrency();
|
65
|
+
this.fileSplitSize = task.getFileSplitSize() * 1024;
|
66
|
+
}
|
67
|
+
|
68
|
+
public static void validateSchema(Logger log, PluginTask task, Schema schema)
|
69
|
+
{
|
70
|
+
new FieldWriterSet(log, task, schema);
|
71
|
+
}
|
72
|
+
|
73
|
+
void open(final Schema schema)
|
74
|
+
throws IOException
|
75
|
+
{
|
76
|
+
this.pageReader = new PageReader(checkNotNull(schema));
|
77
|
+
prepareNextBuilder();
|
78
|
+
}
|
79
|
+
|
80
|
+
private void prepareNextBuilder()
|
81
|
+
throws IOException
|
82
|
+
{
|
83
|
+
String prefix = String.format("%s-%d-", sessionName, seqid);
|
84
|
+
File tempFile = File.createTempFile(prefix, ".msgpack.gz", tempDir);
|
85
|
+
this.builder = new MsgpackGZFileBuilder(msgpack, tempFile);
|
86
|
+
}
|
87
|
+
|
88
|
+
@Override
|
89
|
+
public void add(final Page page)
|
90
|
+
{
|
91
|
+
pageReader.setPage(checkNotNull(page));
|
92
|
+
|
93
|
+
try {
|
94
|
+
while (pageReader.nextRecord()) {
|
95
|
+
builder.writeMapBegin(fieldWriters.getFieldCount());
|
96
|
+
|
97
|
+
pageReader.getSchema().visitColumns(new ColumnVisitor() {
|
98
|
+
@Override
|
99
|
+
public void booleanColumn(Column column)
|
100
|
+
{
|
101
|
+
write(column);
|
102
|
+
}
|
103
|
+
|
104
|
+
@Override
|
105
|
+
public void longColumn(Column column)
|
106
|
+
{
|
107
|
+
write(column);
|
108
|
+
}
|
109
|
+
|
110
|
+
@Override
|
111
|
+
public void doubleColumn(Column column)
|
112
|
+
{
|
113
|
+
write(column);
|
114
|
+
}
|
115
|
+
|
116
|
+
@Override
|
117
|
+
public void stringColumn(Column column)
|
118
|
+
{
|
119
|
+
write(column);
|
120
|
+
}
|
121
|
+
|
122
|
+
@Override
|
123
|
+
public void timestampColumn(Column column)
|
124
|
+
{
|
125
|
+
write(column);
|
126
|
+
}
|
127
|
+
|
128
|
+
private void write(Column column)
|
129
|
+
{
|
130
|
+
FieldWriter fieldWriter = fieldWriters.getFieldWriter(column.getIndex());
|
131
|
+
try {
|
132
|
+
fieldWriter.writeKeyValue(builder, pageReader, column);
|
133
|
+
} catch (IOException e) {
|
134
|
+
throw Throwables.propagate(e);
|
135
|
+
}
|
136
|
+
}
|
137
|
+
});
|
138
|
+
|
139
|
+
builder.writeMapEnd();
|
140
|
+
|
141
|
+
if (builder.getWrittenSize() > fileSplitSize) {
|
142
|
+
flush();
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
} catch (IOException e) {
|
147
|
+
throw Throwables.propagate(e);
|
148
|
+
}
|
149
|
+
}
|
150
|
+
|
151
|
+
public void flush() throws IOException
|
152
|
+
{
|
153
|
+
builder.finish();
|
154
|
+
|
155
|
+
if (builder.getRecordCount() > 0) {
|
156
|
+
log.info("{uploading: {rows: {}, size: {} bytes (compressed)}}",
|
157
|
+
builder.getRecordCount(),
|
158
|
+
NumberFormat.getNumberInstance().format(builder.getWrittenSize()));
|
159
|
+
upload(builder);
|
160
|
+
builder = null;
|
161
|
+
}
|
162
|
+
|
163
|
+
prepareNextBuilder();
|
164
|
+
}
|
165
|
+
|
166
|
+
private void upload(final MsgpackGZFileBuilder builder)
|
167
|
+
throws IOException
|
168
|
+
{
|
169
|
+
executor.joinPartial(uploadConcurrency - 1);
|
170
|
+
executor.submit(new Callable<Void>() {
|
171
|
+
@Override
|
172
|
+
public Void call() throws Exception {
|
173
|
+
client.uploadBulkImport(sessionName, builder.getFile());
|
174
|
+
return null;
|
175
|
+
}
|
176
|
+
}, builder);
|
177
|
+
seqid++;
|
178
|
+
}
|
179
|
+
|
180
|
+
@Override
|
181
|
+
public void finish()
|
182
|
+
{
|
183
|
+
try {
|
184
|
+
flush();
|
185
|
+
} catch (IOException e) {
|
186
|
+
throw Throwables.propagate(e);
|
187
|
+
} finally {
|
188
|
+
close();
|
189
|
+
}
|
190
|
+
}
|
191
|
+
|
192
|
+
@Override
|
193
|
+
public void close()
|
194
|
+
{
|
195
|
+
try {
|
196
|
+
try {
|
197
|
+
executor.joinAll();
|
198
|
+
executor.shutdown(); // shutdown calls joinAll
|
199
|
+
} finally {
|
200
|
+
if (builder != null) {
|
201
|
+
builder.close();
|
202
|
+
builder = null;
|
203
|
+
}
|
204
|
+
|
205
|
+
if (client != null) {
|
206
|
+
client.close();
|
207
|
+
}
|
208
|
+
}
|
209
|
+
} catch (IOException e) {
|
210
|
+
throw Throwables.propagate(e);
|
211
|
+
}
|
212
|
+
}
|
213
|
+
|
214
|
+
@Override
|
215
|
+
public void abort()
|
216
|
+
{
|
217
|
+
// do nothing
|
218
|
+
}
|
219
|
+
|
220
|
+
@Override
|
221
|
+
public CommitReport commit()
|
222
|
+
{
|
223
|
+
CommitReport report = Exec.newCommitReport();
|
224
|
+
// TODO
|
225
|
+
return report;
|
226
|
+
}
|
227
|
+
|
228
|
+
static class FieldWriterSet
|
229
|
+
{
|
230
|
+
private enum ColumnWriterMode
|
231
|
+
{
|
232
|
+
PRIMARY_KEY,
|
233
|
+
SIMPLE_VALUE,
|
234
|
+
DUPLICATE_PRIMARY_KEY;
|
235
|
+
}
|
236
|
+
|
237
|
+
private final int fieldCount;
|
238
|
+
private final FieldWriter[] fieldWriters;
|
239
|
+
|
240
|
+
public FieldWriterSet(Logger log, PluginTask task, Schema schema)
|
241
|
+
{
|
242
|
+
Optional<String> userDefinedPrimaryKeySourceColumnName = task.getTimeColumn();
|
243
|
+
boolean hasPkWriter = false;
|
244
|
+
int duplicatePrimaryKeySourceIndex = -1;
|
245
|
+
int firstTimestampColumnIndex = -1;
|
246
|
+
|
247
|
+
int fc = 0;
|
248
|
+
fieldWriters = new FieldWriter[schema.size()];
|
249
|
+
|
250
|
+
for (int i = 0; i < schema.size(); i++) {
|
251
|
+
String columnName = schema.getColumnName(i);
|
252
|
+
Type columnType = schema.getColumnType(i);
|
253
|
+
|
254
|
+
// choose the mode
|
255
|
+
final ColumnWriterMode mode;
|
256
|
+
|
257
|
+
if (userDefinedPrimaryKeySourceColumnName.isPresent() &&
|
258
|
+
columnName.equals(userDefinedPrimaryKeySourceColumnName.get())) {
|
259
|
+
// found time_column
|
260
|
+
if ("time".equals(userDefinedPrimaryKeySourceColumnName.get())) {
|
261
|
+
mode = ColumnWriterMode.PRIMARY_KEY;
|
262
|
+
} else {
|
263
|
+
mode = ColumnWriterMode.DUPLICATE_PRIMARY_KEY;
|
264
|
+
}
|
265
|
+
|
266
|
+
} else if ("time".equals(columnName)) {
|
267
|
+
// the column name is same with the primary key name.
|
268
|
+
if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
|
269
|
+
columnName = newColumnUniqueName(columnName, schema);
|
270
|
+
mode = ColumnWriterMode.SIMPLE_VALUE;
|
271
|
+
log.warn("time_column '{}' is set but 'time' column also exists. The existent 'time' column is renamed to {}",
|
272
|
+
userDefinedPrimaryKeySourceColumnName.get(), "time", "time", columnName);
|
273
|
+
} else {
|
274
|
+
mode = ColumnWriterMode.PRIMARY_KEY;
|
275
|
+
}
|
276
|
+
|
277
|
+
} else {
|
278
|
+
mode = ColumnWriterMode.SIMPLE_VALUE;
|
279
|
+
}
|
280
|
+
|
281
|
+
// create the fieldWriters writer depending on the mode
|
282
|
+
final FieldWriter writer;
|
283
|
+
|
284
|
+
switch (mode) {
|
285
|
+
case PRIMARY_KEY:
|
286
|
+
log.info("Using {}:{} column as the data partitioning key", columnName, columnType);
|
287
|
+
if (columnType instanceof LongType) {
|
288
|
+
writer = new LongFieldWriter(columnName);
|
289
|
+
hasPkWriter = true;
|
290
|
+
} else if (columnType instanceof TimestampType) {
|
291
|
+
writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
|
292
|
+
hasPkWriter = true;
|
293
|
+
} else {
|
294
|
+
throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
|
295
|
+
columnName, columnType));
|
296
|
+
}
|
297
|
+
break;
|
298
|
+
|
299
|
+
case SIMPLE_VALUE:
|
300
|
+
if (columnType instanceof BooleanType) {
|
301
|
+
writer = new BooleanFieldWriter(columnName);
|
302
|
+
} else if (columnType instanceof LongType) {
|
303
|
+
writer = new LongFieldWriter(columnName);
|
304
|
+
} else if (columnType instanceof DoubleType) {
|
305
|
+
writer = new DoubleFieldWriter(columnName);
|
306
|
+
} else if (columnType instanceof StringType) {
|
307
|
+
writer = new StringFieldWriter(columnName);
|
308
|
+
} else if (columnType instanceof TimestampType) {
|
309
|
+
writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
|
310
|
+
if (firstTimestampColumnIndex < 0) {
|
311
|
+
firstTimestampColumnIndex = i;
|
312
|
+
}
|
313
|
+
} else {
|
314
|
+
throw new ConfigException("Unsupported type: " + columnType);
|
315
|
+
}
|
316
|
+
break;
|
317
|
+
|
318
|
+
case DUPLICATE_PRIMARY_KEY:
|
319
|
+
duplicatePrimaryKeySourceIndex = i;
|
320
|
+
writer = null; // handle later
|
321
|
+
break;
|
322
|
+
|
323
|
+
default:
|
324
|
+
throw new AssertionError();
|
325
|
+
}
|
326
|
+
|
327
|
+
fieldWriters[i] = writer;
|
328
|
+
fc += 1;
|
329
|
+
}
|
330
|
+
|
331
|
+
if (!hasPkWriter) {
|
332
|
+
// PRIMARY_KEY was not found.
|
333
|
+
if (duplicatePrimaryKeySourceIndex < 0) {
|
334
|
+
if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
|
335
|
+
throw new ConfigException(String.format("time_column '%s' does not exist", userDefinedPrimaryKeySourceColumnName.get()));
|
336
|
+
} else if (firstTimestampColumnIndex >= 0) {
|
337
|
+
// if time is not found, use the first timestamp column
|
338
|
+
duplicatePrimaryKeySourceIndex = firstTimestampColumnIndex;
|
339
|
+
} else {
|
340
|
+
throw new ConfigException(String.format("TD output plugin requires at least one timestamp column, or a long column named 'time'"));
|
341
|
+
}
|
342
|
+
}
|
343
|
+
|
344
|
+
String columnName = schema.getColumnName(duplicatePrimaryKeySourceIndex);
|
345
|
+
Type columnType = schema.getColumnType(duplicatePrimaryKeySourceIndex);
|
346
|
+
|
347
|
+
log.info("Duplicating {}:{} column to 'time' column for the data partitioning",
|
348
|
+
columnName, columnType);
|
349
|
+
|
350
|
+
FieldWriter writer;
|
351
|
+
if (columnType instanceof LongType) {
|
352
|
+
writer = new LongFieldDuplicator(columnName, "time");
|
353
|
+
} else if (columnType instanceof TimestampType) {
|
354
|
+
writer = new TimestampFieldLongDuplicator(task.getJRuby(), columnName, "time");
|
355
|
+
} else {
|
356
|
+
throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
|
357
|
+
columnName, columnType));
|
358
|
+
}
|
359
|
+
|
360
|
+
// replace existint writer
|
361
|
+
fieldWriters[duplicatePrimaryKeySourceIndex] = writer;
|
362
|
+
fc += 1;
|
363
|
+
}
|
364
|
+
|
365
|
+
fieldCount = fc;
|
366
|
+
}
|
367
|
+
|
368
|
+
private static String newColumnUniqueName(String originalName, Schema schema)
|
369
|
+
{
|
370
|
+
String name = originalName;
|
371
|
+
do {
|
372
|
+
name += "_";
|
373
|
+
} while (containsColumnName(schema, name));
|
374
|
+
return name;
|
375
|
+
}
|
376
|
+
|
377
|
+
private static boolean containsColumnName(Schema schema, String name)
|
378
|
+
{
|
379
|
+
for (Column c : schema.getColumns()) {
|
380
|
+
if (c.getName().equals(name)) {
|
381
|
+
return true;
|
382
|
+
}
|
383
|
+
}
|
384
|
+
return false;
|
385
|
+
}
|
386
|
+
|
387
|
+
public FieldWriter getFieldWriter(int index)
|
388
|
+
{
|
389
|
+
return fieldWriters[index];
|
390
|
+
}
|
391
|
+
|
392
|
+
public int getFieldCount()
|
393
|
+
{
|
394
|
+
return fieldCount;
|
395
|
+
}
|
396
|
+
}
|
397
|
+
|
398
|
+
static abstract class FieldWriter
|
399
|
+
{
|
400
|
+
private final String keyName;
|
401
|
+
|
402
|
+
protected FieldWriter(String keyName)
|
403
|
+
{
|
404
|
+
this.keyName = keyName;
|
405
|
+
}
|
406
|
+
|
407
|
+
public void writeKeyValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
408
|
+
throws IOException
|
409
|
+
{
|
410
|
+
writeKey(builder);
|
411
|
+
if (reader.isNull(column)) {
|
412
|
+
builder.writeNil();
|
413
|
+
} else {
|
414
|
+
writeValue(builder, reader, column);
|
415
|
+
}
|
416
|
+
}
|
417
|
+
|
418
|
+
private void writeKey(MsgpackGZFileBuilder builder)
|
419
|
+
throws IOException
|
420
|
+
{
|
421
|
+
builder.writeString(keyName);
|
422
|
+
}
|
423
|
+
|
424
|
+
protected abstract void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
425
|
+
throws IOException;
|
426
|
+
}
|
427
|
+
|
428
|
+
static class DoubleFieldWriter
|
429
|
+
extends FieldWriter
|
430
|
+
{
|
431
|
+
public DoubleFieldWriter(String keyName)
|
432
|
+
{
|
433
|
+
super(keyName);
|
434
|
+
}
|
435
|
+
|
436
|
+
@Override
|
437
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
438
|
+
throws IOException
|
439
|
+
{
|
440
|
+
builder.writeDouble(reader.getDouble(column));
|
441
|
+
}
|
442
|
+
}
|
443
|
+
|
444
|
+
static class BooleanFieldWriter
|
445
|
+
extends FieldWriter
|
446
|
+
{
|
447
|
+
public BooleanFieldWriter(String keyName)
|
448
|
+
{
|
449
|
+
super(keyName);
|
450
|
+
}
|
451
|
+
|
452
|
+
@Override
|
453
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
454
|
+
throws IOException
|
455
|
+
{
|
456
|
+
builder.writeBoolean(reader.getBoolean(column));
|
457
|
+
}
|
458
|
+
}
|
459
|
+
|
460
|
+
static class LongFieldWriter
|
461
|
+
extends FieldWriter
|
462
|
+
{
|
463
|
+
LongFieldWriter(String keyName)
|
464
|
+
{
|
465
|
+
super(keyName);
|
466
|
+
}
|
467
|
+
|
468
|
+
@Override
|
469
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
470
|
+
throws IOException
|
471
|
+
{
|
472
|
+
builder.writeLong(reader.getLong(column));
|
473
|
+
}
|
474
|
+
}
|
475
|
+
|
476
|
+
static class StringFieldWriter
|
477
|
+
extends FieldWriter
|
478
|
+
{
|
479
|
+
public StringFieldWriter(String keyName)
|
480
|
+
{
|
481
|
+
super(keyName);
|
482
|
+
}
|
483
|
+
|
484
|
+
@Override
|
485
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
486
|
+
throws IOException
|
487
|
+
{
|
488
|
+
builder.writeString(reader.getString(column));
|
489
|
+
}
|
490
|
+
}
|
491
|
+
|
492
|
+
static class TimestampStringFieldWriter
|
493
|
+
extends FieldWriter
|
494
|
+
{
|
495
|
+
// to format timestamp values to string by "%Y-%m-%d %H:%M:%S.%3N"
|
496
|
+
private final TimestampFormatter defaultFormatter;
|
497
|
+
|
498
|
+
public TimestampStringFieldWriter(ScriptingContainer jruby, String keyName)
|
499
|
+
{
|
500
|
+
super(keyName);
|
501
|
+
this.defaultFormatter = new TimestampFormatter(jruby, "%Y-%m-%d %H:%M:%S.%3N", DateTimeZone.UTC);
|
502
|
+
}
|
503
|
+
|
504
|
+
@Override
|
505
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
506
|
+
throws IOException
|
507
|
+
{
|
508
|
+
builder.writeString(defaultFormatter.format(reader.getTimestamp(column)));
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
static class TimestampLongFieldWriter
|
513
|
+
extends FieldWriter
|
514
|
+
{
|
515
|
+
public TimestampLongFieldWriter(String keyName)
|
516
|
+
{
|
517
|
+
super(keyName);
|
518
|
+
}
|
519
|
+
|
520
|
+
@Override
|
521
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
522
|
+
throws IOException
|
523
|
+
{
|
524
|
+
builder.writeLong(reader.getTimestamp(column).getEpochSecond());
|
525
|
+
}
|
526
|
+
}
|
527
|
+
|
528
|
+
static class LongFieldDuplicator
|
529
|
+
extends LongFieldWriter
|
530
|
+
{
|
531
|
+
private final LongFieldWriter timeFieldWriter;
|
532
|
+
|
533
|
+
public LongFieldDuplicator(String keyName, String duplicateKeyName)
|
534
|
+
{
|
535
|
+
super(keyName);
|
536
|
+
timeFieldWriter = new LongFieldWriter(duplicateKeyName);
|
537
|
+
}
|
538
|
+
|
539
|
+
@Override
|
540
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
541
|
+
throws IOException
|
542
|
+
{
|
543
|
+
super.writeValue(builder, reader, column);
|
544
|
+
timeFieldWriter.writeKeyValue(builder, reader, column);
|
545
|
+
}
|
546
|
+
}
|
547
|
+
|
548
|
+
static class TimestampFieldLongDuplicator
|
549
|
+
extends TimestampStringFieldWriter
|
550
|
+
{
|
551
|
+
private final TimestampLongFieldWriter timeFieldWriter;
|
552
|
+
|
553
|
+
public TimestampFieldLongDuplicator(ScriptingContainer jruby, String keyName, String longDuplicateKeyName)
|
554
|
+
{
|
555
|
+
super(jruby, keyName);
|
556
|
+
timeFieldWriter = new TimestampLongFieldWriter(longDuplicateKeyName);
|
557
|
+
}
|
558
|
+
|
559
|
+
@Override
|
560
|
+
public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
561
|
+
throws IOException
|
562
|
+
{
|
563
|
+
super.writeValue(builder, reader, column);
|
564
|
+
timeFieldWriter.writeKeyValue(builder, reader, column);
|
565
|
+
}
|
566
|
+
}
|
567
|
+
}
|