embulk-output-td 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/README.md +63 -0
  4. data/build.gradle +79 -0
  5. data/embulk-output-td.gemspec +18 -0
  6. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  7. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  8. data/gradlew +164 -0
  9. data/gradlew.bat +90 -0
  10. data/lib/embulk/output/td.rb +3 -0
  11. data/settings.gradle +1 -0
  12. data/src/main/java/com/treasuredata/api/TdApiClient.java +436 -0
  13. data/src/main/java/com/treasuredata/api/TdApiClientConfig.java +79 -0
  14. data/src/main/java/com/treasuredata/api/TdApiConflictException.java +10 -0
  15. data/src/main/java/com/treasuredata/api/TdApiConstants.java +6 -0
  16. data/src/main/java/com/treasuredata/api/TdApiException.java +20 -0
  17. data/src/main/java/com/treasuredata/api/TdApiExecutionException.java +10 -0
  18. data/src/main/java/com/treasuredata/api/TdApiExecutionInterruptedException.java +15 -0
  19. data/src/main/java/com/treasuredata/api/TdApiExecutionTimeoutException.java +17 -0
  20. data/src/main/java/com/treasuredata/api/TdApiNotFoundException.java +10 -0
  21. data/src/main/java/com/treasuredata/api/TdApiResponseException.java +32 -0
  22. data/src/main/java/com/treasuredata/api/model/TDArrayColumnType.java +80 -0
  23. data/src/main/java/com/treasuredata/api/model/TDBulkImportSession.java +155 -0
  24. data/src/main/java/com/treasuredata/api/model/TDColumn.java +83 -0
  25. data/src/main/java/com/treasuredata/api/model/TDColumnType.java +23 -0
  26. data/src/main/java/com/treasuredata/api/model/TDColumnTypeDeserializer.java +115 -0
  27. data/src/main/java/com/treasuredata/api/model/TDDatabase.java +48 -0
  28. data/src/main/java/com/treasuredata/api/model/TDDatabaseList.java +24 -0
  29. data/src/main/java/com/treasuredata/api/model/TDMapColumnType.java +88 -0
  30. data/src/main/java/com/treasuredata/api/model/TDPrimitiveColumnType.java +61 -0
  31. data/src/main/java/com/treasuredata/api/model/TDTable.java +64 -0
  32. data/src/main/java/com/treasuredata/api/model/TDTableList.java +33 -0
  33. data/src/main/java/com/treasuredata/api/model/TDTablePermission.java +48 -0
  34. data/src/main/java/com/treasuredata/api/model/TDTableSchema.java +44 -0
  35. data/src/main/java/com/treasuredata/api/model/TDTableType.java +36 -0
  36. data/src/main/java/org/embulk/output/FinalizableExecutorService.java +84 -0
  37. data/src/main/java/org/embulk/output/MsgpackGZFileBuilder.java +148 -0
  38. data/src/main/java/org/embulk/output/RecordWriter.java +567 -0
  39. data/src/main/java/org/embulk/output/TdOutputPlugin.java +390 -0
  40. data/src/test/java/org/embulk/output/TestTdOutputPlugin.java +5 -0
  41. metadata +119 -0
@@ -0,0 +1,48 @@
1
+ package com.treasuredata.api.model;
2
+
3
+ import com.fasterxml.jackson.annotation.JsonProperty;
4
+ import com.google.common.base.Objects;
5
+
6
+ public class TDTablePermission
7
+ {
8
+ private boolean importable;
9
+ private boolean queryable;
10
+
11
+ public TDTablePermission(
12
+ @JsonProperty("importable") boolean importable,
13
+ @JsonProperty("queryable") boolean queryable)
14
+ {
15
+ this.importable = importable;
16
+ this.queryable = queryable;
17
+ }
18
+
19
+ @JsonProperty("importable")
20
+ public boolean isImportable() {
21
+ return importable;
22
+ }
23
+
24
+ @JsonProperty("queryable")
25
+ public boolean isQueryable() {
26
+ return queryable;
27
+ }
28
+
29
+ @Override
30
+ public boolean equals(Object obj)
31
+ {
32
+ if (this == obj) {
33
+ return true;
34
+ }
35
+ if (obj == null || getClass() != obj.getClass()) {
36
+ return false;
37
+ }
38
+ TDTablePermission other = (TDTablePermission) obj;
39
+ return Objects.equal(this.importable, other.importable) &&
40
+ Objects.equal(this.queryable, other.queryable);
41
+ }
42
+
43
+ @Override
44
+ public int hashCode()
45
+ {
46
+ return Objects.hashCode(importable, queryable);
47
+ }
48
+ }
@@ -0,0 +1,44 @@
1
+ package com.treasuredata.api.model;
2
+
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.fasterxml.jackson.annotation.JsonProperty;
5
+ import com.google.common.base.Objects;
6
+
7
+ import java.util.List;
8
+
9
+ public class TDTableSchema
10
+ {
11
+ private List<TDColumn> columns;
12
+
13
+ @JsonCreator
14
+ public TDTableSchema(
15
+ @JsonProperty("columns") List<TDColumn> columns)
16
+ {
17
+ this.columns = columns;
18
+ }
19
+
20
+ @JsonProperty
21
+ public List<TDColumn> getColumns()
22
+ {
23
+ return columns;
24
+ }
25
+
26
+ @Override
27
+ public boolean equals(Object obj)
28
+ {
29
+ if (this == obj) {
30
+ return true;
31
+ }
32
+ if (obj == null || getClass() != obj.getClass()) {
33
+ return false;
34
+ }
35
+ TDTableSchema other = (TDTableSchema) obj;
36
+ return Objects.equal(this.columns, other.columns);
37
+ }
38
+
39
+ @Override
40
+ public int hashCode()
41
+ {
42
+ return Objects.hashCode(columns);
43
+ }
44
+ }
@@ -0,0 +1,36 @@
1
+ package com.treasuredata.api.model;
2
+
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.fasterxml.jackson.annotation.JsonValue;
5
+ import com.fasterxml.jackson.databind.RuntimeJsonMappingException;
6
+
7
+ public enum TDTableType
8
+ {
9
+ LOG("log"),
10
+ ITEM("item");
11
+
12
+ private String name;
13
+
14
+ private TDTableType(String name)
15
+ {
16
+ this.name = name;
17
+ }
18
+
19
+ @JsonCreator
20
+ public static TDTableType fromName(String name)
21
+ {
22
+ if ("log".equals(name)) {
23
+ return LOG;
24
+ } else if ("item".equals(name)) {
25
+ return ITEM;
26
+ }
27
+ throw new RuntimeJsonMappingException("Unexpected string tuple to deserialize TDTableType");
28
+ }
29
+
30
+ @JsonValue
31
+ @Override
32
+ public String toString()
33
+ {
34
+ return name;
35
+ }
36
+ }
@@ -0,0 +1,84 @@
1
+ package org.embulk.output;
2
+
3
+ import java.io.Closeable;
4
+ import java.io.IOException;
5
+ import java.util.LinkedList;
6
+ import java.util.Queue;
7
+ import java.util.concurrent.Callable;
8
+ import java.util.concurrent.ExecutionException;
9
+ import java.util.concurrent.ExecutorService;
10
+ import java.util.concurrent.Executors;
11
+ import java.util.concurrent.Future;
12
+
13
+ public class FinalizableExecutorService
14
+ {
15
+ public static class NotCloseable
16
+ implements Closeable
17
+ {
18
+ @Override
19
+ public void close()
20
+ throws IOException {
21
+ // ignore
22
+ }
23
+ }
24
+
25
+ protected ExecutorService threads;
26
+ protected Queue<RunningTask> runningTasks;
27
+
28
+ public FinalizableExecutorService() {
29
+ this.threads = Executors.newCachedThreadPool();
30
+ this.runningTasks = new LinkedList<>();
31
+ }
32
+
33
+ private static class RunningTask {
34
+ private Future<Void> future;
35
+ private Closeable finalizer;
36
+
37
+ RunningTask(Future<Void> future, Closeable finalizer) {
38
+ this.future = future;
39
+ this.finalizer = finalizer;
40
+ }
41
+
42
+ public void join() throws IOException {
43
+ try {
44
+ future.get();
45
+ } catch (InterruptedException ex) {
46
+ throw new IOException(ex);
47
+ } catch (ExecutionException ex) {
48
+ throw new IOException(ex.getCause());
49
+ }
50
+ finalizer.close();
51
+ }
52
+
53
+ public void abort() throws IOException {
54
+ finalizer.close();
55
+ }
56
+ }
57
+
58
+ public void submit(Callable<Void> task, Closeable finalizer) {
59
+ Future<Void> future = threads.submit(task);
60
+ runningTasks.add(new RunningTask(future, finalizer));
61
+ }
62
+
63
+ public void joinPartial(long upto) throws IOException {
64
+ while(runningTasks.size() > upto) {
65
+ runningTasks.peek().join();
66
+ runningTasks.remove();
67
+ }
68
+ }
69
+
70
+ public void joinAll() throws IOException {
71
+ joinPartial(0);
72
+ }
73
+
74
+ public void shutdown() throws IOException {
75
+ try {
76
+ joinAll();
77
+ } finally {
78
+ threads.shutdown();
79
+ for(RunningTask task : runningTasks) {
80
+ task.abort();
81
+ }
82
+ }
83
+ }
84
+ }
@@ -0,0 +1,148 @@
1
+ package org.embulk.output;
2
+
3
+ import org.msgpack.MessagePack;
4
+ import org.msgpack.packer.Packer;
5
+
6
+ import java.io.BufferedOutputStream;
7
+ import java.io.Closeable;
8
+ import java.io.File;
9
+ import java.io.FileOutputStream;
10
+ import java.io.FilterOutputStream;
11
+ import java.io.IOException;
12
+ import java.io.OutputStream;
13
+ import java.util.zip.GZIPOutputStream;
14
+
15
+ import static com.google.common.base.Preconditions.checkNotNull;
16
+
17
+ public class MsgpackGZFileBuilder
18
+ implements Closeable
19
+ {
20
+ static class DataSizeFilter extends FilterOutputStream {
21
+ private long size = 0;
22
+
23
+ public DataSizeFilter(OutputStream out) {
24
+ super(out);
25
+ }
26
+
27
+ @Override
28
+ public void write(int b) throws IOException
29
+ {
30
+ size += 1;
31
+ super.write(b);
32
+ }
33
+
34
+ @Override
35
+ public void write(byte[] b, int off, int len) throws IOException
36
+ {
37
+ size += len;
38
+ super.write(b, off, len);
39
+ }
40
+
41
+ @Override
42
+ public void close() throws IOException
43
+ {
44
+ super.close();
45
+ }
46
+
47
+ public long size()
48
+ {
49
+ return size;
50
+ }
51
+ }
52
+
53
+ private final File file;
54
+ private final DataSizeFilter out;
55
+ private final GZIPOutputStream gzout;
56
+
57
+ private Packer packer;
58
+ private long recordCount;
59
+
60
+ public MsgpackGZFileBuilder(MessagePack msgpack, File file)
61
+ throws IOException
62
+ {
63
+ this.file = checkNotNull(file);
64
+ this.out = new DataSizeFilter(new BufferedOutputStream(new FileOutputStream(file)));
65
+ this.gzout = new GZIPOutputStream(this.out);
66
+ this.packer = msgpack.createPacker(this.gzout);
67
+
68
+ this.recordCount = 0;
69
+ }
70
+
71
+ public long getRecordCount()
72
+ {
73
+ return recordCount;
74
+ }
75
+
76
+ public long getWrittenSize()
77
+ {
78
+ return out.size();
79
+ }
80
+
81
+ public File getFile()
82
+ {
83
+ return file;
84
+ }
85
+
86
+ public void finish()
87
+ throws IOException
88
+ {
89
+ try {
90
+ packer.flush();
91
+ } finally {
92
+ close();
93
+ }
94
+ }
95
+
96
+ @Override
97
+ public void close()
98
+ throws IOException
99
+ {
100
+ if (packer != null) {
101
+ packer.close();
102
+ packer = null;
103
+ }
104
+ }
105
+
106
+ public void writeNil()
107
+ throws IOException
108
+ {
109
+ packer.writeNil();
110
+ }
111
+
112
+ public void writeMapBegin(int size)
113
+ throws IOException
114
+ {
115
+ packer.writeMapBegin(size);
116
+ }
117
+
118
+ public void writeMapEnd()
119
+ throws IOException
120
+ {
121
+ packer.writeMapEnd();
122
+ recordCount++;
123
+ }
124
+
125
+ public void writeString(String v)
126
+ throws IOException
127
+ {
128
+ packer.write(v);
129
+ }
130
+
131
+ public void writeBoolean(boolean v)
132
+ throws IOException
133
+ {
134
+ packer.write(v);
135
+ }
136
+
137
+ public void writeLong(long v)
138
+ throws IOException
139
+ {
140
+ packer.write(v);
141
+ }
142
+
143
+ public void writeDouble(double v)
144
+ throws IOException
145
+ {
146
+ packer.write(v);
147
+ }
148
+ }
@@ -0,0 +1,567 @@
1
+ package org.embulk.output;
2
+
3
+ import com.google.common.base.Optional;
4
+ import com.google.common.base.Throwables;
5
+ import com.treasuredata.api.TdApiClient;
6
+ import org.embulk.config.CommitReport;
7
+ import org.embulk.config.ConfigException;
8
+ import org.embulk.output.TdOutputPlugin.PluginTask;
9
+ import org.embulk.spi.Column;
10
+ import org.embulk.spi.ColumnVisitor;
11
+ import org.embulk.spi.Exec;
12
+ import org.embulk.spi.Page;
13
+ import org.embulk.spi.PageReader;
14
+ import org.embulk.spi.Schema;
15
+ import org.embulk.spi.TransactionalPageOutput;
16
+ import org.embulk.spi.time.TimestampFormatter;
17
+ import org.embulk.spi.type.BooleanType;
18
+ import org.embulk.spi.type.DoubleType;
19
+ import org.embulk.spi.type.LongType;
20
+ import org.embulk.spi.type.StringType;
21
+ import org.embulk.spi.type.TimestampType;
22
+ import org.embulk.spi.type.Type;
23
+ import org.joda.time.DateTimeZone;
24
+ import org.jruby.embed.ScriptingContainer;
25
+ import org.msgpack.MessagePack;
26
+ import org.slf4j.Logger;
27
+
28
+ import java.io.File;
29
+ import java.io.IOException;
30
+ import java.text.NumberFormat;
31
+ import java.util.concurrent.Callable;
32
+
33
+ import static com.google.common.base.Preconditions.checkNotNull;
34
+
35
+ public class RecordWriter
36
+ implements TransactionalPageOutput
37
+ {
38
+ private final Logger log;
39
+ private final TdApiClient client;
40
+ private final String sessionName;
41
+
42
+ private final MessagePack msgpack;
43
+ private final FieldWriterSet fieldWriters;
44
+ private final File tempDir;
45
+
46
+ private int seqid = 0;
47
+ private PageReader pageReader;
48
+ private MsgpackGZFileBuilder builder;
49
+
50
+ private final FinalizableExecutorService executor;
51
+ private final int uploadConcurrency;
52
+ private final long fileSplitSize; // unit: kb
53
+
54
+ public RecordWriter(PluginTask task, TdApiClient client, FieldWriterSet fieldWriters)
55
+ {
56
+ this.log = Exec.getLogger(getClass());
57
+ this.client = checkNotNull(client);
58
+ this.sessionName = task.getSessionName();
59
+
60
+ this.msgpack = new MessagePack();
61
+ this.fieldWriters = fieldWriters;
62
+ this.tempDir = new File(task.getTempDir());
63
+ this.executor = new FinalizableExecutorService();
64
+ this.uploadConcurrency = task.getUploadConcurrency();
65
+ this.fileSplitSize = task.getFileSplitSize() * 1024;
66
+ }
67
+
68
+ public static void validateSchema(Logger log, PluginTask task, Schema schema)
69
+ {
70
+ new FieldWriterSet(log, task, schema);
71
+ }
72
+
73
+ void open(final Schema schema)
74
+ throws IOException
75
+ {
76
+ this.pageReader = new PageReader(checkNotNull(schema));
77
+ prepareNextBuilder();
78
+ }
79
+
80
+ private void prepareNextBuilder()
81
+ throws IOException
82
+ {
83
+ String prefix = String.format("%s-%d-", sessionName, seqid);
84
+ File tempFile = File.createTempFile(prefix, ".msgpack.gz", tempDir);
85
+ this.builder = new MsgpackGZFileBuilder(msgpack, tempFile);
86
+ }
87
+
88
+ @Override
89
+ public void add(final Page page)
90
+ {
91
+ pageReader.setPage(checkNotNull(page));
92
+
93
+ try {
94
+ while (pageReader.nextRecord()) {
95
+ builder.writeMapBegin(fieldWriters.getFieldCount());
96
+
97
+ pageReader.getSchema().visitColumns(new ColumnVisitor() {
98
+ @Override
99
+ public void booleanColumn(Column column)
100
+ {
101
+ write(column);
102
+ }
103
+
104
+ @Override
105
+ public void longColumn(Column column)
106
+ {
107
+ write(column);
108
+ }
109
+
110
+ @Override
111
+ public void doubleColumn(Column column)
112
+ {
113
+ write(column);
114
+ }
115
+
116
+ @Override
117
+ public void stringColumn(Column column)
118
+ {
119
+ write(column);
120
+ }
121
+
122
+ @Override
123
+ public void timestampColumn(Column column)
124
+ {
125
+ write(column);
126
+ }
127
+
128
+ private void write(Column column)
129
+ {
130
+ FieldWriter fieldWriter = fieldWriters.getFieldWriter(column.getIndex());
131
+ try {
132
+ fieldWriter.writeKeyValue(builder, pageReader, column);
133
+ } catch (IOException e) {
134
+ throw Throwables.propagate(e);
135
+ }
136
+ }
137
+ });
138
+
139
+ builder.writeMapEnd();
140
+
141
+ if (builder.getWrittenSize() > fileSplitSize) {
142
+ flush();
143
+ }
144
+ }
145
+
146
+ } catch (IOException e) {
147
+ throw Throwables.propagate(e);
148
+ }
149
+ }
150
+
151
+ public void flush() throws IOException
152
+ {
153
+ builder.finish();
154
+
155
+ if (builder.getRecordCount() > 0) {
156
+ log.info("{uploading: {rows: {}, size: {} bytes (compressed)}}",
157
+ builder.getRecordCount(),
158
+ NumberFormat.getNumberInstance().format(builder.getWrittenSize()));
159
+ upload(builder);
160
+ builder = null;
161
+ }
162
+
163
+ prepareNextBuilder();
164
+ }
165
+
166
+ private void upload(final MsgpackGZFileBuilder builder)
167
+ throws IOException
168
+ {
169
+ executor.joinPartial(uploadConcurrency - 1);
170
+ executor.submit(new Callable<Void>() {
171
+ @Override
172
+ public Void call() throws Exception {
173
+ client.uploadBulkImport(sessionName, builder.getFile());
174
+ return null;
175
+ }
176
+ }, builder);
177
+ seqid++;
178
+ }
179
+
180
+ @Override
181
+ public void finish()
182
+ {
183
+ try {
184
+ flush();
185
+ } catch (IOException e) {
186
+ throw Throwables.propagate(e);
187
+ } finally {
188
+ close();
189
+ }
190
+ }
191
+
192
+ @Override
193
+ public void close()
194
+ {
195
+ try {
196
+ try {
197
+ executor.joinAll();
198
+ executor.shutdown(); // shutdown calls joinAll
199
+ } finally {
200
+ if (builder != null) {
201
+ builder.close();
202
+ builder = null;
203
+ }
204
+
205
+ if (client != null) {
206
+ client.close();
207
+ }
208
+ }
209
+ } catch (IOException e) {
210
+ throw Throwables.propagate(e);
211
+ }
212
+ }
213
+
214
+ @Override
215
+ public void abort()
216
+ {
217
+ // do nothing
218
+ }
219
+
220
+ @Override
221
+ public CommitReport commit()
222
+ {
223
+ CommitReport report = Exec.newCommitReport();
224
+ // TODO
225
+ return report;
226
+ }
227
+
228
+ static class FieldWriterSet
229
+ {
230
+ private enum ColumnWriterMode
231
+ {
232
+ PRIMARY_KEY,
233
+ SIMPLE_VALUE,
234
+ DUPLICATE_PRIMARY_KEY;
235
+ }
236
+
237
+ private final int fieldCount;
238
+ private final FieldWriter[] fieldWriters;
239
+
240
+ public FieldWriterSet(Logger log, PluginTask task, Schema schema)
241
+ {
242
+ Optional<String> userDefinedPrimaryKeySourceColumnName = task.getTimeColumn();
243
+ boolean hasPkWriter = false;
244
+ int duplicatePrimaryKeySourceIndex = -1;
245
+ int firstTimestampColumnIndex = -1;
246
+
247
+ int fc = 0;
248
+ fieldWriters = new FieldWriter[schema.size()];
249
+
250
+ for (int i = 0; i < schema.size(); i++) {
251
+ String columnName = schema.getColumnName(i);
252
+ Type columnType = schema.getColumnType(i);
253
+
254
+ // choose the mode
255
+ final ColumnWriterMode mode;
256
+
257
+ if (userDefinedPrimaryKeySourceColumnName.isPresent() &&
258
+ columnName.equals(userDefinedPrimaryKeySourceColumnName.get())) {
259
+ // found time_column
260
+ if ("time".equals(userDefinedPrimaryKeySourceColumnName.get())) {
261
+ mode = ColumnWriterMode.PRIMARY_KEY;
262
+ } else {
263
+ mode = ColumnWriterMode.DUPLICATE_PRIMARY_KEY;
264
+ }
265
+
266
+ } else if ("time".equals(columnName)) {
267
+ // the column name is same with the primary key name.
268
+ if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
269
+ columnName = newColumnUniqueName(columnName, schema);
270
+ mode = ColumnWriterMode.SIMPLE_VALUE;
271
+ log.warn("time_column '{}' is set but 'time' column also exists. The existent 'time' column is renamed to {}",
272
+ userDefinedPrimaryKeySourceColumnName.get(), "time", "time", columnName);
273
+ } else {
274
+ mode = ColumnWriterMode.PRIMARY_KEY;
275
+ }
276
+
277
+ } else {
278
+ mode = ColumnWriterMode.SIMPLE_VALUE;
279
+ }
280
+
281
+ // create the fieldWriters writer depending on the mode
282
+ final FieldWriter writer;
283
+
284
+ switch (mode) {
285
+ case PRIMARY_KEY:
286
+ log.info("Using {}:{} column as the data partitioning key", columnName, columnType);
287
+ if (columnType instanceof LongType) {
288
+ writer = new LongFieldWriter(columnName);
289
+ hasPkWriter = true;
290
+ } else if (columnType instanceof TimestampType) {
291
+ writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
292
+ hasPkWriter = true;
293
+ } else {
294
+ throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
295
+ columnName, columnType));
296
+ }
297
+ break;
298
+
299
+ case SIMPLE_VALUE:
300
+ if (columnType instanceof BooleanType) {
301
+ writer = new BooleanFieldWriter(columnName);
302
+ } else if (columnType instanceof LongType) {
303
+ writer = new LongFieldWriter(columnName);
304
+ } else if (columnType instanceof DoubleType) {
305
+ writer = new DoubleFieldWriter(columnName);
306
+ } else if (columnType instanceof StringType) {
307
+ writer = new StringFieldWriter(columnName);
308
+ } else if (columnType instanceof TimestampType) {
309
+ writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
310
+ if (firstTimestampColumnIndex < 0) {
311
+ firstTimestampColumnIndex = i;
312
+ }
313
+ } else {
314
+ throw new ConfigException("Unsupported type: " + columnType);
315
+ }
316
+ break;
317
+
318
+ case DUPLICATE_PRIMARY_KEY:
319
+ duplicatePrimaryKeySourceIndex = i;
320
+ writer = null; // handle later
321
+ break;
322
+
323
+ default:
324
+ throw new AssertionError();
325
+ }
326
+
327
+ fieldWriters[i] = writer;
328
+ fc += 1;
329
+ }
330
+
331
+ if (!hasPkWriter) {
332
+ // PRIMARY_KEY was not found.
333
+ if (duplicatePrimaryKeySourceIndex < 0) {
334
+ if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
335
+ throw new ConfigException(String.format("time_column '%s' does not exist", userDefinedPrimaryKeySourceColumnName.get()));
336
+ } else if (firstTimestampColumnIndex >= 0) {
337
+ // if time is not found, use the first timestamp column
338
+ duplicatePrimaryKeySourceIndex = firstTimestampColumnIndex;
339
+ } else {
340
+ throw new ConfigException(String.format("TD output plugin requires at least one timestamp column, or a long column named 'time'"));
341
+ }
342
+ }
343
+
344
+ String columnName = schema.getColumnName(duplicatePrimaryKeySourceIndex);
345
+ Type columnType = schema.getColumnType(duplicatePrimaryKeySourceIndex);
346
+
347
+ log.info("Duplicating {}:{} column to 'time' column for the data partitioning",
348
+ columnName, columnType);
349
+
350
+ FieldWriter writer;
351
+ if (columnType instanceof LongType) {
352
+ writer = new LongFieldDuplicator(columnName, "time");
353
+ } else if (columnType instanceof TimestampType) {
354
+ writer = new TimestampFieldLongDuplicator(task.getJRuby(), columnName, "time");
355
+ } else {
356
+ throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
357
+ columnName, columnType));
358
+ }
359
+
360
+ // replace existint writer
361
+ fieldWriters[duplicatePrimaryKeySourceIndex] = writer;
362
+ fc += 1;
363
+ }
364
+
365
+ fieldCount = fc;
366
+ }
367
+
368
+ private static String newColumnUniqueName(String originalName, Schema schema)
369
+ {
370
+ String name = originalName;
371
+ do {
372
+ name += "_";
373
+ } while (containsColumnName(schema, name));
374
+ return name;
375
+ }
376
+
377
+ private static boolean containsColumnName(Schema schema, String name)
378
+ {
379
+ for (Column c : schema.getColumns()) {
380
+ if (c.getName().equals(name)) {
381
+ return true;
382
+ }
383
+ }
384
+ return false;
385
+ }
386
+
387
+ public FieldWriter getFieldWriter(int index)
388
+ {
389
+ return fieldWriters[index];
390
+ }
391
+
392
+ public int getFieldCount()
393
+ {
394
+ return fieldCount;
395
+ }
396
+ }
397
+
398
+ static abstract class FieldWriter
399
+ {
400
+ private final String keyName;
401
+
402
+ protected FieldWriter(String keyName)
403
+ {
404
+ this.keyName = keyName;
405
+ }
406
+
407
+ public void writeKeyValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
408
+ throws IOException
409
+ {
410
+ writeKey(builder);
411
+ if (reader.isNull(column)) {
412
+ builder.writeNil();
413
+ } else {
414
+ writeValue(builder, reader, column);
415
+ }
416
+ }
417
+
418
+ private void writeKey(MsgpackGZFileBuilder builder)
419
+ throws IOException
420
+ {
421
+ builder.writeString(keyName);
422
+ }
423
+
424
+ protected abstract void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
425
+ throws IOException;
426
+ }
427
+
428
+ static class DoubleFieldWriter
429
+ extends FieldWriter
430
+ {
431
+ public DoubleFieldWriter(String keyName)
432
+ {
433
+ super(keyName);
434
+ }
435
+
436
+ @Override
437
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
438
+ throws IOException
439
+ {
440
+ builder.writeDouble(reader.getDouble(column));
441
+ }
442
+ }
443
+
444
+ static class BooleanFieldWriter
445
+ extends FieldWriter
446
+ {
447
+ public BooleanFieldWriter(String keyName)
448
+ {
449
+ super(keyName);
450
+ }
451
+
452
+ @Override
453
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
454
+ throws IOException
455
+ {
456
+ builder.writeBoolean(reader.getBoolean(column));
457
+ }
458
+ }
459
+
460
+ static class LongFieldWriter
461
+ extends FieldWriter
462
+ {
463
+ LongFieldWriter(String keyName)
464
+ {
465
+ super(keyName);
466
+ }
467
+
468
+ @Override
469
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
470
+ throws IOException
471
+ {
472
+ builder.writeLong(reader.getLong(column));
473
+ }
474
+ }
475
+
476
+ static class StringFieldWriter
477
+ extends FieldWriter
478
+ {
479
+ public StringFieldWriter(String keyName)
480
+ {
481
+ super(keyName);
482
+ }
483
+
484
+ @Override
485
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
486
+ throws IOException
487
+ {
488
+ builder.writeString(reader.getString(column));
489
+ }
490
+ }
491
+
492
+ static class TimestampStringFieldWriter
493
+ extends FieldWriter
494
+ {
495
+ // to format timestamp values to string by "%Y-%m-%d %H:%M:%S.%3N"
496
+ private final TimestampFormatter defaultFormatter;
497
+
498
+ public TimestampStringFieldWriter(ScriptingContainer jruby, String keyName)
499
+ {
500
+ super(keyName);
501
+ this.defaultFormatter = new TimestampFormatter(jruby, "%Y-%m-%d %H:%M:%S.%3N", DateTimeZone.UTC);
502
+ }
503
+
504
+ @Override
505
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
506
+ throws IOException
507
+ {
508
+ builder.writeString(defaultFormatter.format(reader.getTimestamp(column)));
509
+ }
510
+ }
511
+
512
+ static class TimestampLongFieldWriter
513
+ extends FieldWriter
514
+ {
515
+ public TimestampLongFieldWriter(String keyName)
516
+ {
517
+ super(keyName);
518
+ }
519
+
520
+ @Override
521
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
522
+ throws IOException
523
+ {
524
+ builder.writeLong(reader.getTimestamp(column).getEpochSecond());
525
+ }
526
+ }
527
+
528
+ static class LongFieldDuplicator
529
+ extends LongFieldWriter
530
+ {
531
+ private final LongFieldWriter timeFieldWriter;
532
+
533
+ public LongFieldDuplicator(String keyName, String duplicateKeyName)
534
+ {
535
+ super(keyName);
536
+ timeFieldWriter = new LongFieldWriter(duplicateKeyName);
537
+ }
538
+
539
+ @Override
540
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
541
+ throws IOException
542
+ {
543
+ super.writeValue(builder, reader, column);
544
+ timeFieldWriter.writeKeyValue(builder, reader, column);
545
+ }
546
+ }
547
+
548
+ static class TimestampFieldLongDuplicator
549
+ extends TimestampStringFieldWriter
550
+ {
551
+ private final TimestampLongFieldWriter timeFieldWriter;
552
+
553
+ public TimestampFieldLongDuplicator(ScriptingContainer jruby, String keyName, String longDuplicateKeyName)
554
+ {
555
+ super(jruby, keyName);
556
+ timeFieldWriter = new TimestampLongFieldWriter(longDuplicateKeyName);
557
+ }
558
+
559
+ @Override
560
+ public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
561
+ throws IOException
562
+ {
563
+ super.writeValue(builder, reader, column);
564
+ timeFieldWriter.writeKeyValue(builder, reader, column);
565
+ }
566
+ }
567
+ }