embulk-output-td 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/README.md +63 -0
  4. data/build.gradle +79 -0
  5. data/embulk-output-td.gemspec +18 -0
  6. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  7. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  8. data/gradlew +164 -0
  9. data/gradlew.bat +90 -0
  10. data/lib/embulk/output/td.rb +3 -0
  11. data/settings.gradle +1 -0
  12. data/src/main/java/com/treasuredata/api/TdApiClient.java +436 -0
  13. data/src/main/java/com/treasuredata/api/TdApiClientConfig.java +79 -0
  14. data/src/main/java/com/treasuredata/api/TdApiConflictException.java +10 -0
  15. data/src/main/java/com/treasuredata/api/TdApiConstants.java +6 -0
  16. data/src/main/java/com/treasuredata/api/TdApiException.java +20 -0
  17. data/src/main/java/com/treasuredata/api/TdApiExecutionException.java +10 -0
  18. data/src/main/java/com/treasuredata/api/TdApiExecutionInterruptedException.java +15 -0
  19. data/src/main/java/com/treasuredata/api/TdApiExecutionTimeoutException.java +17 -0
  20. data/src/main/java/com/treasuredata/api/TdApiNotFoundException.java +10 -0
  21. data/src/main/java/com/treasuredata/api/TdApiResponseException.java +32 -0
  22. data/src/main/java/com/treasuredata/api/model/TDArrayColumnType.java +80 -0
  23. data/src/main/java/com/treasuredata/api/model/TDBulkImportSession.java +155 -0
  24. data/src/main/java/com/treasuredata/api/model/TDColumn.java +83 -0
  25. data/src/main/java/com/treasuredata/api/model/TDColumnType.java +23 -0
  26. data/src/main/java/com/treasuredata/api/model/TDColumnTypeDeserializer.java +115 -0
  27. data/src/main/java/com/treasuredata/api/model/TDDatabase.java +48 -0
  28. data/src/main/java/com/treasuredata/api/model/TDDatabaseList.java +24 -0
  29. data/src/main/java/com/treasuredata/api/model/TDMapColumnType.java +88 -0
  30. data/src/main/java/com/treasuredata/api/model/TDPrimitiveColumnType.java +61 -0
  31. data/src/main/java/com/treasuredata/api/model/TDTable.java +64 -0
  32. data/src/main/java/com/treasuredata/api/model/TDTableList.java +33 -0
  33. data/src/main/java/com/treasuredata/api/model/TDTablePermission.java +48 -0
  34. data/src/main/java/com/treasuredata/api/model/TDTableSchema.java +44 -0
  35. data/src/main/java/com/treasuredata/api/model/TDTableType.java +36 -0
  36. data/src/main/java/org/embulk/output/FinalizableExecutorService.java +84 -0
  37. data/src/main/java/org/embulk/output/MsgpackGZFileBuilder.java +148 -0
  38. data/src/main/java/org/embulk/output/RecordWriter.java +567 -0
  39. data/src/main/java/org/embulk/output/TdOutputPlugin.java +390 -0
  40. data/src/test/java/org/embulk/output/TestTdOutputPlugin.java +5 -0
  41. metadata +119 -0
@@ -0,0 +1,48 @@
1
+ package com.treasuredata.api.model;
2
+
3
+ import com.fasterxml.jackson.annotation.JsonProperty;
4
+ import com.google.common.base.Objects;
5
+
6
+ public class TDTablePermission
7
+ {
8
+ private boolean importable;
9
+ private boolean queryable;
10
+
11
+ public TDTablePermission(
12
+ @JsonProperty("importable") boolean importable,
13
+ @JsonProperty("queryable") boolean queryable)
14
+ {
15
+ this.importable = importable;
16
+ this.queryable = queryable;
17
+ }
18
+
19
+ @JsonProperty("importable")
20
+ public boolean isImportable() {
21
+ return importable;
22
+ }
23
+
24
+ @JsonProperty("queryable")
25
+ public boolean isQueryable() {
26
+ return queryable;
27
+ }
28
+
29
+ @Override
30
+ public boolean equals(Object obj)
31
+ {
32
+ if (this == obj) {
33
+ return true;
34
+ }
35
+ if (obj == null || getClass() != obj.getClass()) {
36
+ return false;
37
+ }
38
+ TDTablePermission other = (TDTablePermission) obj;
39
+ return Objects.equal(this.importable, other.importable) &&
40
+ Objects.equal(this.queryable, other.queryable);
41
+ }
42
+
43
+ @Override
44
+ public int hashCode()
45
+ {
46
+ return Objects.hashCode(importable, queryable);
47
+ }
48
+ }
@@ -0,0 +1,44 @@
1
+ package com.treasuredata.api.model;
2
+
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.fasterxml.jackson.annotation.JsonProperty;
5
+ import com.google.common.base.Objects;
6
+
7
+ import java.util.List;
8
+
9
+ public class TDTableSchema
10
+ {
11
+ private List<TDColumn> columns;
12
+
13
+ @JsonCreator
14
+ public TDTableSchema(
15
+ @JsonProperty("columns") List<TDColumn> columns)
16
+ {
17
+ this.columns = columns;
18
+ }
19
+
20
+ @JsonProperty
21
+ public List<TDColumn> getColumns()
22
+ {
23
+ return columns;
24
+ }
25
+
26
+ @Override
27
+ public boolean equals(Object obj)
28
+ {
29
+ if (this == obj) {
30
+ return true;
31
+ }
32
+ if (obj == null || getClass() != obj.getClass()) {
33
+ return false;
34
+ }
35
+ TDTableSchema other = (TDTableSchema) obj;
36
+ return Objects.equal(this.columns, other.columns);
37
+ }
38
+
39
+ @Override
40
+ public int hashCode()
41
+ {
42
+ return Objects.hashCode(columns);
43
+ }
44
+ }
@@ -0,0 +1,36 @@
1
+ package com.treasuredata.api.model;
2
+
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.fasterxml.jackson.annotation.JsonValue;
5
+ import com.fasterxml.jackson.databind.RuntimeJsonMappingException;
6
+
7
+ public enum TDTableType
8
+ {
9
+ LOG("log"),
10
+ ITEM("item");
11
+
12
+ private String name;
13
+
14
+ private TDTableType(String name)
15
+ {
16
+ this.name = name;
17
+ }
18
+
19
+ @JsonCreator
20
+ public static TDTableType fromName(String name)
21
+ {
22
+ if ("log".equals(name)) {
23
+ return LOG;
24
+ } else if ("item".equals(name)) {
25
+ return ITEM;
26
+ }
27
+ throw new RuntimeJsonMappingException("Unexpected string tuple to deserialize TDTableType");
28
+ }
29
+
30
+ @JsonValue
31
+ @Override
32
+ public String toString()
33
+ {
34
+ return name;
35
+ }
36
+ }
@@ -0,0 +1,84 @@
1
+ package org.embulk.output;
2
+
3
+ import java.io.Closeable;
4
+ import java.io.IOException;
5
+ import java.util.LinkedList;
6
+ import java.util.Queue;
7
+ import java.util.concurrent.Callable;
8
+ import java.util.concurrent.ExecutionException;
9
+ import java.util.concurrent.ExecutorService;
10
+ import java.util.concurrent.Executors;
11
+ import java.util.concurrent.Future;
12
+
13
+ public class FinalizableExecutorService
14
+ {
15
+ public static class NotCloseable
16
+ implements Closeable
17
+ {
18
+ @Override
19
+ public void close()
20
+ throws IOException {
21
+ // ignore
22
+ }
23
+ }
24
+
25
+ protected ExecutorService threads;
26
+ protected Queue<RunningTask> runningTasks;
27
+
28
+ public FinalizableExecutorService() {
29
+ this.threads = Executors.newCachedThreadPool();
30
+ this.runningTasks = new LinkedList<>();
31
+ }
32
+
33
+ private static class RunningTask {
34
+ private Future<Void> future;
35
+ private Closeable finalizer;
36
+
37
+ RunningTask(Future<Void> future, Closeable finalizer) {
38
+ this.future = future;
39
+ this.finalizer = finalizer;
40
+ }
41
+
42
+ public void join() throws IOException {
43
+ try {
44
+ future.get();
45
+ } catch (InterruptedException ex) {
46
+ throw new IOException(ex);
47
+ } catch (ExecutionException ex) {
48
+ throw new IOException(ex.getCause());
49
+ }
50
+ finalizer.close();
51
+ }
52
+
53
+ public void abort() throws IOException {
54
+ finalizer.close();
55
+ }
56
+ }
57
+
58
+ public void submit(Callable<Void> task, Closeable finalizer) {
59
+ Future<Void> future = threads.submit(task);
60
+ runningTasks.add(new RunningTask(future, finalizer));
61
+ }
62
+
63
+ public void joinPartial(long upto) throws IOException {
64
+ while(runningTasks.size() > upto) {
65
+ runningTasks.peek().join();
66
+ runningTasks.remove();
67
+ }
68
+ }
69
+
70
+ public void joinAll() throws IOException {
71
+ joinPartial(0);
72
+ }
73
+
74
+ public void shutdown() throws IOException {
75
+ try {
76
+ joinAll();
77
+ } finally {
78
+ threads.shutdown();
79
+ for(RunningTask task : runningTasks) {
80
+ task.abort();
81
+ }
82
+ }
83
+ }
84
+ }
@@ -0,0 +1,148 @@
1
+ package org.embulk.output;
2
+
3
+ import org.msgpack.MessagePack;
4
+ import org.msgpack.packer.Packer;
5
+
6
+ import java.io.BufferedOutputStream;
7
+ import java.io.Closeable;
8
+ import java.io.File;
9
+ import java.io.FileOutputStream;
10
+ import java.io.FilterOutputStream;
11
+ import java.io.IOException;
12
+ import java.io.OutputStream;
13
+ import java.util.zip.GZIPOutputStream;
14
+
15
+ import static com.google.common.base.Preconditions.checkNotNull;
16
+
17
+ public class MsgpackGZFileBuilder
18
+ implements Closeable
19
+ {
20
+ static class DataSizeFilter extends FilterOutputStream {
21
+ private long size = 0;
22
+
23
+ public DataSizeFilter(OutputStream out) {
24
+ super(out);
25
+ }
26
+
27
+ @Override
28
+ public void write(int b) throws IOException
29
+ {
30
+ size += 1;
31
+ super.write(b);
32
+ }
33
+
34
+ @Override
35
+ public void write(byte[] b, int off, int len) throws IOException
36
+ {
37
+ size += len;
38
+ super.write(b, off, len);
39
+ }
40
+
41
+ @Override
42
+ public void close() throws IOException
43
+ {
44
+ super.close();
45
+ }
46
+
47
+ public long size()
48
+ {
49
+ return size;
50
+ }
51
+ }
52
+
53
+ private final File file;
54
+ private final DataSizeFilter out;
55
+ private final GZIPOutputStream gzout;
56
+
57
+ private Packer packer;
58
+ private long recordCount;
59
+
60
+ public MsgpackGZFileBuilder(MessagePack msgpack, File file)
61
+ throws IOException
62
+ {
63
+ this.file = checkNotNull(file);
64
+ this.out = new DataSizeFilter(new BufferedOutputStream(new FileOutputStream(file)));
65
+ this.gzout = new GZIPOutputStream(this.out);
66
+ this.packer = msgpack.createPacker(this.gzout);
67
+
68
+ this.recordCount = 0;
69
+ }
70
+
71
+ public long getRecordCount()
72
+ {
73
+ return recordCount;
74
+ }
75
+
76
+ public long getWrittenSize()
77
+ {
78
+ return out.size();
79
+ }
80
+
81
+ public File getFile()
82
+ {
83
+ return file;
84
+ }
85
+
86
+ public void finish()
87
+ throws IOException
88
+ {
89
+ try {
90
+ packer.flush();
91
+ } finally {
92
+ close();
93
+ }
94
+ }
95
+
96
+ @Override
97
+ public void close()
98
+ throws IOException
99
+ {
100
+ if (packer != null) {
101
+ packer.close();
102
+ packer = null;
103
+ }
104
+ }
105
+
106
+ public void writeNil()
107
+ throws IOException
108
+ {
109
+ packer.writeNil();
110
+ }
111
+
112
+ public void writeMapBegin(int size)
113
+ throws IOException
114
+ {
115
+ packer.writeMapBegin(size);
116
+ }
117
+
118
+ public void writeMapEnd()
119
+ throws IOException
120
+ {
121
+ packer.writeMapEnd();
122
+ recordCount++;
123
+ }
124
+
125
+ public void writeString(String v)
126
+ throws IOException
127
+ {
128
+ packer.write(v);
129
+ }
130
+
131
+ public void writeBoolean(boolean v)
132
+ throws IOException
133
+ {
134
+ packer.write(v);
135
+ }
136
+
137
+ public void writeLong(long v)
138
+ throws IOException
139
+ {
140
+ packer.write(v);
141
+ }
142
+
143
+ public void writeDouble(double v)
144
+ throws IOException
145
+ {
146
+ packer.write(v);
147
+ }
148
+ }
@@ -0,0 +1,567 @@
1
+ package org.embulk.output;
2
+
3
+ import com.google.common.base.Optional;
4
+ import com.google.common.base.Throwables;
5
+ import com.treasuredata.api.TdApiClient;
6
+ import org.embulk.config.CommitReport;
7
+ import org.embulk.config.ConfigException;
8
+ import org.embulk.output.TdOutputPlugin.PluginTask;
9
+ import org.embulk.spi.Column;
10
+ import org.embulk.spi.ColumnVisitor;
11
+ import org.embulk.spi.Exec;
12
+ import org.embulk.spi.Page;
13
+ import org.embulk.spi.PageReader;
14
+ import org.embulk.spi.Schema;
15
+ import org.embulk.spi.TransactionalPageOutput;
16
+ import org.embulk.spi.time.TimestampFormatter;
17
+ import org.embulk.spi.type.BooleanType;
18
+ import org.embulk.spi.type.DoubleType;
19
+ import org.embulk.spi.type.LongType;
20
+ import org.embulk.spi.type.StringType;
21
+ import org.embulk.spi.type.TimestampType;
22
+ import org.embulk.spi.type.Type;
23
+ import org.joda.time.DateTimeZone;
24
+ import org.jruby.embed.ScriptingContainer;
25
+ import org.msgpack.MessagePack;
26
+ import org.slf4j.Logger;
27
+
28
+ import java.io.File;
29
+ import java.io.IOException;
30
+ import java.text.NumberFormat;
31
+ import java.util.concurrent.Callable;
32
+
33
+ import static com.google.common.base.Preconditions.checkNotNull;
34
+
35
+ public class RecordWriter
36
+ implements TransactionalPageOutput
37
+ {
38
+ private final Logger log;
39
+ private final TdApiClient client;
40
+ private final String sessionName;
41
+
42
+ private final MessagePack msgpack;
43
+ private final FieldWriterSet fieldWriters;
44
+ private final File tempDir;
45
+
46
+ private int seqid = 0;
47
+ private PageReader pageReader;
48
+ private MsgpackGZFileBuilder builder;
49
+
50
+ private final FinalizableExecutorService executor;
51
+ private final int uploadConcurrency;
52
+ private final long fileSplitSize; // unit: kb
53
+
54
+ public RecordWriter(PluginTask task, TdApiClient client, FieldWriterSet fieldWriters)
55
+ {
56
+ this.log = Exec.getLogger(getClass());
57
+ this.client = checkNotNull(client);
58
+ this.sessionName = task.getSessionName();
59
+
60
+ this.msgpack = new MessagePack();
61
+ this.fieldWriters = fieldWriters;
62
+ this.tempDir = new File(task.getTempDir());
63
+ this.executor = new FinalizableExecutorService();
64
+ this.uploadConcurrency = task.getUploadConcurrency();
65
+ this.fileSplitSize = task.getFileSplitSize() * 1024;
66
+ }
67
+
68
+ public static void validateSchema(Logger log, PluginTask task, Schema schema)
69
+ {
70
+ new FieldWriterSet(log, task, schema);
71
+ }
72
+
73
+ void open(final Schema schema)
74
+ throws IOException
75
+ {
76
+ this.pageReader = new PageReader(checkNotNull(schema));
77
+ prepareNextBuilder();
78
+ }
79
+
80
+ private void prepareNextBuilder()
81
+ throws IOException
82
+ {
83
+ String prefix = String.format("%s-%d-", sessionName, seqid);
84
+ File tempFile = File.createTempFile(prefix, ".msgpack.gz", tempDir);
85
+ this.builder = new MsgpackGZFileBuilder(msgpack, tempFile);
86
+ }
87
+
88
+ @Override
89
+ public void add(final Page page)
90
+ {
91
+ pageReader.setPage(checkNotNull(page));
92
+
93
+ try {
94
+ while (pageReader.nextRecord()) {
95
+ builder.writeMapBegin(fieldWriters.getFieldCount());
96
+
97
+ pageReader.getSchema().visitColumns(new ColumnVisitor() {
98
+ @Override
99
+ public void booleanColumn(Column column)
100
+ {
101
+ write(column);
102
+ }
103
+
104
+ @Override
105
+ public void longColumn(Column column)
106
+ {
107
+ write(column);
108
+ }
109
+
110
+ @Override
111
+ public void doubleColumn(Column column)
112
+ {
113
+ write(column);
114
+ }
115
+
116
+ @Override
117
+ public void stringColumn(Column column)
118
+ {
119
+ write(column);
120
+ }
121
+
122
+ @Override
123
+ public void timestampColumn(Column column)
124
+ {
125
+ write(column);
126
+ }
127
+
128
+ private void write(Column column)
129
+ {
130
+ FieldWriter fieldWriter = fieldWriters.getFieldWriter(column.getIndex());
131
+ try {
132
+ fieldWriter.writeKeyValue(builder, pageReader, column);
133
+ } catch (IOException e) {
134
+ throw Throwables.propagate(e);
135
+ }
136
+ }
137
+ });
138
+
139
+ builder.writeMapEnd();
140
+
141
+ if (builder.getWrittenSize() > fileSplitSize) {
142
+ flush();
143
+ }
144
+ }
145
+
146
+ } catch (IOException e) {
147
+ throw Throwables.propagate(e);
148
+ }
149
+ }
150
+
151
+ public void flush() throws IOException
152
+ {
153
+ builder.finish();
154
+
155
+ if (builder.getRecordCount() > 0) {
156
+ log.info("{uploading: {rows: {}, size: {} bytes (compressed)}}",
157
+ builder.getRecordCount(),
158
+ NumberFormat.getNumberInstance().format(builder.getWrittenSize()));
159
+ upload(builder);
160
+ builder = null;
161
+ }
162
+
163
+ prepareNextBuilder();
164
+ }
165
+
166
+ private void upload(final MsgpackGZFileBuilder builder)
167
+ throws IOException
168
+ {
169
+ executor.joinPartial(uploadConcurrency - 1);
170
+ executor.submit(new Callable<Void>() {
171
+ @Override
172
+ public Void call() throws Exception {
173
+ client.uploadBulkImport(sessionName, builder.getFile());
174
+ return null;
175
+ }
176
+ }, builder);
177
+ seqid++;
178
+ }
179
+
180
+ @Override
181
+ public void finish()
182
+ {
183
+ try {
184
+ flush();
185
+ } catch (IOException e) {
186
+ throw Throwables.propagate(e);
187
+ } finally {
188
+ close();
189
+ }
190
+ }
191
+
192
+ @Override
193
+ public void close()
194
+ {
195
+ try {
196
+ try {
197
+ executor.joinAll();
198
+ executor.shutdown(); // shutdown calls joinAll
199
+ } finally {
200
+ if (builder != null) {
201
+ builder.close();
202
+ builder = null;
203
+ }
204
+
205
+ if (client != null) {
206
+ client.close();
207
+ }
208
+ }
209
+ } catch (IOException e) {
210
+ throw Throwables.propagate(e);
211
+ }
212
+ }
213
+
214
+ @Override
215
+ public void abort()
216
+ {
217
+ // do nothing
218
+ }
219
+
220
+ @Override
221
+ public CommitReport commit()
222
+ {
223
+ CommitReport report = Exec.newCommitReport();
224
+ // TODO
225
+ return report;
226
+ }
227
+
228
+ static class FieldWriterSet
229
+ {
230
+ private enum ColumnWriterMode
231
+ {
232
+ PRIMARY_KEY,
233
+ SIMPLE_VALUE,
234
+ DUPLICATE_PRIMARY_KEY;
235
+ }
236
+
237
+ private final int fieldCount;
238
+ private final FieldWriter[] fieldWriters;
239
+
240
+ public FieldWriterSet(Logger log, PluginTask task, Schema schema)
241
+ {
242
+ Optional<String> userDefinedPrimaryKeySourceColumnName = task.getTimeColumn();
243
+ boolean hasPkWriter = false;
244
+ int duplicatePrimaryKeySourceIndex = -1;
245
+ int firstTimestampColumnIndex = -1;
246
+
247
+ int fc = 0;
248
+ fieldWriters = new FieldWriter[schema.size()];
249
+
250
+ for (int i = 0; i < schema.size(); i++) {
251
+ String columnName = schema.getColumnName(i);
252
+ Type columnType = schema.getColumnType(i);
253
+
254
+ // choose the mode
255
+ final ColumnWriterMode mode;
256
+
257
+ if (userDefinedPrimaryKeySourceColumnName.isPresent() &&
258
+ columnName.equals(userDefinedPrimaryKeySourceColumnName.get())) {
259
+ // found time_column
260
+ if ("time".equals(userDefinedPrimaryKeySourceColumnName.get())) {
261
+ mode = ColumnWriterMode.PRIMARY_KEY;
262
+ } else {
263
+ mode = ColumnWriterMode.DUPLICATE_PRIMARY_KEY;
264
+ }
265
+
266
+ } else if ("time".equals(columnName)) {
267
+ // the column name is same with the primary key name.
268
+ if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
269
+ columnName = newColumnUniqueName(columnName, schema);
270
+ mode = ColumnWriterMode.SIMPLE_VALUE;
271
+ log.warn("time_column '{}' is set but 'time' column also exists. The existent 'time' column is renamed to {}",
272
+ userDefinedPrimaryKeySourceColumnName.get(), "time", "time", columnName);
273
+ } else {
274
+ mode = ColumnWriterMode.PRIMARY_KEY;
275
+ }
276
+
277
+ } else {
278
+ mode = ColumnWriterMode.SIMPLE_VALUE;
279
+ }
280
+
281
+ // create the fieldWriters writer depending on the mode
282
+ final FieldWriter writer;
283
+
284
+ switch (mode) {
285
+ case PRIMARY_KEY:
286
+ log.info("Using {}:{} column as the data partitioning key", columnName, columnType);
287
+ if (columnType instanceof LongType) {
288
+ writer = new LongFieldWriter(columnName);
289
+ hasPkWriter = true;
290
+ } else if (columnType instanceof TimestampType) {
291
+ writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
292
+ hasPkWriter = true;
293
+ } else {
294
+ throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
295
+ columnName, columnType));
296
+ }
297
+ break;
298
+
299
+ case SIMPLE_VALUE:
300
+ if (columnType instanceof BooleanType) {
301
+ writer = new BooleanFieldWriter(columnName);
302
+ } else if (columnType instanceof LongType) {
303
+ writer = new LongFieldWriter(columnName);
304
+ } else if (columnType instanceof DoubleType) {
305
+ writer = new DoubleFieldWriter(columnName);
306
+ } else if (columnType instanceof StringType) {
307
+ writer = new StringFieldWriter(columnName);
308
+ } else if (columnType instanceof TimestampType) {
309
+ writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
310
+ if (firstTimestampColumnIndex < 0) {
311
+ firstTimestampColumnIndex = i;
312
+ }
313
+ } else {
314
+ throw new ConfigException("Unsupported type: " + columnType);
315
+ }
316
+ break;
317
+
318
+ case DUPLICATE_PRIMARY_KEY:
319
+ duplicatePrimaryKeySourceIndex = i;
320
+ writer = null; // handle later
321
+ break;
322
+
323
+ default:
324
+ throw new AssertionError();
325
+ }
326
+
327
+ fieldWriters[i] = writer;
328
+ fc += 1;
329
+ }
330
+
331
+ if (!hasPkWriter) {
332
+ // PRIMARY_KEY was not found.
333
+ if (duplicatePrimaryKeySourceIndex < 0) {
334
+ if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
335
+ throw new ConfigException(String.format("time_column '%s' does not exist", userDefinedPrimaryKeySourceColumnName.get()));
336
+ } else if (firstTimestampColumnIndex >= 0) {
337
+ // if time is not found, use the first timestamp column
338
+ duplicatePrimaryKeySourceIndex = firstTimestampColumnIndex;
339
+ } else {
340
+ throw new ConfigException(String.format("TD output plugin requires at least one timestamp column, or a long column named 'time'"));
341
+ }
342
+ }
343
+
344
+ String columnName = schema.getColumnName(duplicatePrimaryKeySourceIndex);
345
+ Type columnType = schema.getColumnType(duplicatePrimaryKeySourceIndex);
346
+
347
+ log.info("Duplicating {}:{} column to 'time' column for the data partitioning",
348
+ columnName, columnType);
349
+
350
+ FieldWriter writer;
351
+ if (columnType instanceof LongType) {
352
+ writer = new LongFieldDuplicator(columnName, "time");
353
+ } else if (columnType instanceof TimestampType) {
354
+ writer = new TimestampFieldLongDuplicator(task.getJRuby(), columnName, "time");
355
+ } else {
356
+ throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
357
+ columnName, columnType));
358
+ }
359
+
360
+ // replace existint writer
361
+ fieldWriters[duplicatePrimaryKeySourceIndex] = writer;
362
+ fc += 1;
363
+ }
364
+
365
+ fieldCount = fc;
366
+ }
367
+
368
+ private static String newColumnUniqueName(String originalName, Schema schema)
369
+ {
370
+ String name = originalName;
371
+ do {
372
+ name += "_";
373
+ } while (containsColumnName(schema, name));
374
+ return name;
375
+ }
376
+
377
+ private static boolean containsColumnName(Schema schema, String name)
378
+ {
379
+ for (Column c : schema.getColumns()) {
380
+ if (c.getName().equals(name)) {
381
+ return true;
382
+ }
383
+ }
384
+ return false;
385
+ }
386
+
387
+ public FieldWriter getFieldWriter(int index)
388
+ {
389
+ return fieldWriters[index];
390
+ }
391
+
392
+ public int getFieldCount()
393
+ {
394
+ return fieldCount;
395
+ }
396
+ }
397
+
398
+ static abstract class FieldWriter
399
+ {
400
+ private final String keyName;
401
+
402
+ protected FieldWriter(String keyName)
403
+ {
404
+ this.keyName = keyName;
405
+ }
406
+
407
+ public void writeKeyValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
408
+ throws IOException
409
+ {
410
+ writeKey(builder);
411
+ if (reader.isNull(column)) {
412
+ builder.writeNil();
413
+ } else {
414
+ writeValue(builder, reader, column);
415
+ }
416
+ }
417
+
418
+ private void writeKey(MsgpackGZFileBuilder builder)
419
+ throws IOException
420
+ {
421
+ builder.writeString(keyName);
422
+ }
423
+
424
+ protected abstract void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
425
+ throws IOException;
426
+ }
427
+
428
+ static class DoubleFieldWriter
429
+ extends FieldWriter
430
+ {
431
+ public DoubleFieldWriter(String keyName)
432
+ {
433
+ super(keyName);
434
+ }
435
+
436
+ @Override
437
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
438
+ throws IOException
439
+ {
440
+ builder.writeDouble(reader.getDouble(column));
441
+ }
442
+ }
443
+
444
+ static class BooleanFieldWriter
445
+ extends FieldWriter
446
+ {
447
+ public BooleanFieldWriter(String keyName)
448
+ {
449
+ super(keyName);
450
+ }
451
+
452
+ @Override
453
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
454
+ throws IOException
455
+ {
456
+ builder.writeBoolean(reader.getBoolean(column));
457
+ }
458
+ }
459
+
460
+ static class LongFieldWriter
461
+ extends FieldWriter
462
+ {
463
+ LongFieldWriter(String keyName)
464
+ {
465
+ super(keyName);
466
+ }
467
+
468
+ @Override
469
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
470
+ throws IOException
471
+ {
472
+ builder.writeLong(reader.getLong(column));
473
+ }
474
+ }
475
+
476
+ static class StringFieldWriter
477
+ extends FieldWriter
478
+ {
479
+ public StringFieldWriter(String keyName)
480
+ {
481
+ super(keyName);
482
+ }
483
+
484
+ @Override
485
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
486
+ throws IOException
487
+ {
488
+ builder.writeString(reader.getString(column));
489
+ }
490
+ }
491
+
492
+ static class TimestampStringFieldWriter
493
+ extends FieldWriter
494
+ {
495
+ // to format timestamp values to string by "%Y-%m-%d %H:%M:%S.%3N"
496
+ private final TimestampFormatter defaultFormatter;
497
+
498
+ public TimestampStringFieldWriter(ScriptingContainer jruby, String keyName)
499
+ {
500
+ super(keyName);
501
+ this.defaultFormatter = new TimestampFormatter(jruby, "%Y-%m-%d %H:%M:%S.%3N", DateTimeZone.UTC);
502
+ }
503
+
504
+ @Override
505
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
506
+ throws IOException
507
+ {
508
+ builder.writeString(defaultFormatter.format(reader.getTimestamp(column)));
509
+ }
510
+ }
511
+
512
+ static class TimestampLongFieldWriter
513
+ extends FieldWriter
514
+ {
515
+ public TimestampLongFieldWriter(String keyName)
516
+ {
517
+ super(keyName);
518
+ }
519
+
520
+ @Override
521
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
522
+ throws IOException
523
+ {
524
+ builder.writeLong(reader.getTimestamp(column).getEpochSecond());
525
+ }
526
+ }
527
+
528
+ static class LongFieldDuplicator
529
+ extends LongFieldWriter
530
+ {
531
+ private final LongFieldWriter timeFieldWriter;
532
+
533
+ public LongFieldDuplicator(String keyName, String duplicateKeyName)
534
+ {
535
+ super(keyName);
536
+ timeFieldWriter = new LongFieldWriter(duplicateKeyName);
537
+ }
538
+
539
+ @Override
540
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
541
+ throws IOException
542
+ {
543
+ super.writeValue(builder, reader, column);
544
+ timeFieldWriter.writeKeyValue(builder, reader, column);
545
+ }
546
+ }
547
+
548
+ static class TimestampFieldLongDuplicator
549
+ extends TimestampStringFieldWriter
550
+ {
551
+ private final TimestampLongFieldWriter timeFieldWriter;
552
+
553
+ public TimestampFieldLongDuplicator(ScriptingContainer jruby, String keyName, String longDuplicateKeyName)
554
+ {
555
+ super(jruby, keyName);
556
+ timeFieldWriter = new TimestampLongFieldWriter(longDuplicateKeyName);
557
+ }
558
+
559
+ @Override
560
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
561
+ throws IOException
562
+ {
563
+ super.writeValue(builder, reader, column);
564
+ timeFieldWriter.writeKeyValue(builder, reader, column);
565
+ }
566
+ }
567
+ }