embulk-output-td 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -1
- data/README.md +8 -0
- data/build.gradle +1 -1
- data/embulk-output-td.gemspec +1 -1
- data/src/main/java/com/treasuredata/api/TdApiClient.java +38 -2
- data/src/main/java/com/treasuredata/api/model/TDColumn.java +45 -1
- data/src/main/java/com/treasuredata/api/model/TDTable.java +2 -2
- data/src/main/java/org/embulk/output/td/RecordWriter.java +4 -52
- data/src/main/java/org/embulk/output/td/TdOutputPlugin.java +166 -24
- data/src/main/java/org/embulk/output/td/writer/FieldWriterSet.java +123 -4
- data/src/test/java/org/embulk/output/td/TestRecordWriter.java +52 -5
- data/src/test/java/org/embulk/output/td/TestTdOutputPlugin.java +23 -13
- data/src/test/java/org/embulk/output/td/writer/TestFieldWriterSet.java +13 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7f75918c0833bfac5a887d29736ec84c572e9594
|
4
|
+
data.tar.gz: b4079b5ed3740e5f36c3f3c60763549dd2921ff8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80584f78e0fc8c85255e4a10bc53e2e39819253634f22d109e0c5542da4c4e83f7eb43fefc94f9d21ac8cdfe5be6ae3704684f6622b5728f5d6a66c4a9975602
|
7
|
+
data.tar.gz: 5134cd1a02a2c96c8330206d5ced8780ebae83954b825d8bf606b76cf44b9a31bb1801a9133ac5cc4b56c7a5305c66093f1c01839ee36d1113111edf8b2b4d0a
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,12 @@
|
|
1
|
-
## 0.1.
|
1
|
+
## 0.1.7 - 2016-01-07
|
2
|
+
|
3
|
+
* [new feature] Add time_value option [#16](https://github.com/treasure-data/embulk-output-td/pull/16)
|
4
|
+
* [new feature] Merge exact column types into the table schema [#25](https://github.com/treasure-data/embulk-output-td/pull/25)
|
5
|
+
* [new feature] Add stop_on_invalid_record option [#26](https://github.com/treasure-data/embulk-output-td/pull/26)
|
6
|
+
* [new feature] Show skipped records by a perform job [#28](https://github.com/treasure-data/embulk-output-td/pull/28)
|
7
|
+
* [maintenance] Use rename endpoint with 'overwrite' parameter [#23](https://github.com/treasure-data/embulk-output-td/pull/23)
|
8
|
+
|
9
|
+
## 0.1.6 - 2015-12-08
|
2
10
|
|
3
11
|
* [maintenance] Upgrade Embulk v0.7.10 [#22](https://github.com/treasure-data/embulk-output-td/pull/22)
|
4
12
|
* [maintenance] Upgrade Embulk v0.7.5 [#21](https://github.com/treasure-data/embulk-output-td/pull/21)
|
data/README.md
CHANGED
@@ -24,6 +24,8 @@ TODO: Write short description here
|
|
24
24
|
- **tmpdir**: temporal directory
|
25
25
|
- **upload_concurrency**: upload concurrency (int, default=2). max concurrency is 8.
|
26
26
|
- **file_split_size**: split size (long, default=16384 (16MB)).
|
27
|
+
- **stop_on_invalid_record**: stop bulk load transaction if a file includes invalid record (such as invalid timestamp) (boolean, default=false).
|
28
|
+
- **displayed_error_records_count_limit**: limit the count of the shown error records skipped by the perform job (int, default=10).
|
27
29
|
- **default_timestamp_type_convert_to**: configure output type of timestamp columns. Available options are "sec" (convert timestamp to UNIX timestamp in seconds) and "string" (convert timestamp to string). (string, default: `"string"`)
|
28
30
|
- **default_timezone**: default timezone (string, default='UTC')
|
29
31
|
- **default_timestamp_format**: default timestamp format (string, default=`%Y-%m-%d %H:%M:%S.%6N`)
|
@@ -43,6 +45,12 @@ out:
|
|
43
45
|
time_column: created_at
|
44
46
|
```
|
45
47
|
|
48
|
+
## Install
|
49
|
+
|
50
|
+
```
|
51
|
+
$ embulk gem install embulk-output-td
|
52
|
+
```
|
53
|
+
|
46
54
|
### Http Proxy Configuration
|
47
55
|
If you want to add your Http Proxy configuration, you can use `http_proxy` parameter:
|
48
56
|
```yaml
|
data/build.gradle
CHANGED
data/embulk-output-td.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-output-td"
|
4
|
-
spec.version = "0.1.
|
4
|
+
spec.version = "0.1.7"
|
5
5
|
spec.authors = ["Muga Nishizawa"]
|
6
6
|
spec.summary = %[TreasureData output plugin for Embulk]
|
7
7
|
spec.description = %[TreasureData output plugin is an Embulk plugin that loads records to TreasureData read by any input plugins. Search the input plugins by 'embulk-output' keyword.]
|
@@ -3,8 +3,10 @@ package com.treasuredata.api;
|
|
3
3
|
import com.fasterxml.jackson.databind.DeserializationFeature;
|
4
4
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
5
5
|
import com.google.common.annotations.VisibleForTesting;
|
6
|
+
import com.google.common.base.Throwables;
|
6
7
|
import com.google.common.collect.ImmutableMap;
|
7
8
|
import com.treasuredata.api.model.TDBulkImportSession;
|
9
|
+
import com.treasuredata.api.model.TDColumn;
|
8
10
|
import com.treasuredata.api.model.TDDatabase;
|
9
11
|
import com.treasuredata.api.model.TDDatabaseList;
|
10
12
|
import com.treasuredata.api.model.TDTable;
|
@@ -16,6 +18,8 @@ import org.eclipse.jetty.client.Origin;
|
|
16
18
|
import org.eclipse.jetty.client.ProxyConfiguration;
|
17
19
|
import org.eclipse.jetty.client.api.ContentResponse;
|
18
20
|
import org.eclipse.jetty.client.api.Request;
|
21
|
+
import org.eclipse.jetty.client.util.InputStreamContentProvider;
|
22
|
+
import org.eclipse.jetty.client.util.InputStreamResponseListener;
|
19
23
|
import org.eclipse.jetty.client.util.StringContentProvider;
|
20
24
|
import org.eclipse.jetty.http.HttpMethod;
|
21
25
|
import org.eclipse.jetty.util.HttpCookieStore;
|
@@ -27,6 +31,7 @@ import java.io.ByteArrayOutputStream;
|
|
27
31
|
import java.io.Closeable;
|
28
32
|
import java.io.File;
|
29
33
|
import java.io.IOException;
|
34
|
+
import java.io.InputStream;
|
30
35
|
import java.io.UnsupportedEncodingException;
|
31
36
|
import java.net.URLEncoder;
|
32
37
|
import java.security.MessageDigest;
|
@@ -37,6 +42,9 @@ import java.util.Date;
|
|
37
42
|
import java.util.List;
|
38
43
|
import java.util.Locale;
|
39
44
|
import java.util.Map;
|
45
|
+
import java.util.concurrent.ExecutionException;
|
46
|
+
import java.util.concurrent.TimeUnit;
|
47
|
+
import java.util.concurrent.TimeoutException;
|
40
48
|
|
41
49
|
public class TdApiClient
|
42
50
|
implements Closeable
|
@@ -157,10 +165,21 @@ public class TdApiClient
|
|
157
165
|
ContentResponse response = executeExchange(request);
|
158
166
|
}
|
159
167
|
|
160
|
-
public void renameTable(String databaseName, String oldName, String newName)
|
168
|
+
public void renameTable(String databaseName, String oldName, String newName, boolean overwrite)
|
161
169
|
{
|
162
170
|
Request request = prepareExchange(HttpMethod.POST,
|
163
|
-
buildUrl("/v3/table/rename", databaseName, oldName, newName)
|
171
|
+
buildUrl("/v3/table/rename", databaseName, oldName, newName),
|
172
|
+
ImmutableMap.<String, String>of(),
|
173
|
+
ImmutableMap.of("overwrite", Boolean.toString(overwrite)));
|
174
|
+
ContentResponse response = executeExchange(request);
|
175
|
+
}
|
176
|
+
|
177
|
+
public void updateSchema(String databaseName, String tableName, List<TDColumn> newSchema)
|
178
|
+
{
|
179
|
+
Request request = prepareExchange(HttpMethod.POST,
|
180
|
+
buildUrl("/v3/table/update-schema", databaseName, tableName),
|
181
|
+
ImmutableMap.<String, String>of(),
|
182
|
+
ImmutableMap.of("schema", formatRequestParameterObject(newSchema)));
|
164
183
|
ContentResponse response = executeExchange(request);
|
165
184
|
}
|
166
185
|
|
@@ -231,6 +250,23 @@ public class TdApiClient
|
|
231
250
|
ContentResponse response = executeExchange(request);
|
232
251
|
}
|
233
252
|
|
253
|
+
public InputStream getBulkImportErrorRecords(String sessionName)
|
254
|
+
{
|
255
|
+
// TODO use td-client-java v0.7
|
256
|
+
|
257
|
+
Request request = prepareExchange(HttpMethod.GET,
|
258
|
+
buildUrl("/v3/bulk_import/error_records", sessionName));
|
259
|
+
InputStreamResponseListener listener = new InputStreamResponseListener();
|
260
|
+
request.send(listener);
|
261
|
+
try {
|
262
|
+
listener.get(60000, TimeUnit.MILLISECONDS); // 60 sec.
|
263
|
+
return listener.getInputStream();
|
264
|
+
}
|
265
|
+
catch (InterruptedException | ExecutionException | TimeoutException e) {
|
266
|
+
throw Throwables.propagate(e);
|
267
|
+
}
|
268
|
+
}
|
269
|
+
|
234
270
|
private Request prepareExchange(HttpMethod method, String url)
|
235
271
|
{
|
236
272
|
return prepareExchange(method, url, Collections.<String, String>emptyMap(),
|
@@ -4,6 +4,14 @@ import com.fasterxml.jackson.annotation.JsonCreator;
|
|
4
4
|
import com.fasterxml.jackson.annotation.JsonValue;
|
5
5
|
import com.fasterxml.jackson.databind.RuntimeJsonMappingException;
|
6
6
|
import com.google.common.base.Objects;
|
7
|
+
import com.google.common.base.Strings;
|
8
|
+
import com.google.common.collect.Lists;
|
9
|
+
import org.json.simple.JSONArray;
|
10
|
+
import org.json.simple.parser.JSONParser;
|
11
|
+
import org.json.simple.parser.ParseException;
|
12
|
+
|
13
|
+
import java.util.ArrayList;
|
14
|
+
import java.util.List;
|
7
15
|
|
8
16
|
public class TDColumn
|
9
17
|
{
|
@@ -33,8 +41,44 @@ public class TDColumn
|
|
33
41
|
return key;
|
34
42
|
}
|
35
43
|
|
44
|
+
private static JSONArray castToArray(Object obj)
|
45
|
+
{
|
46
|
+
if (obj instanceof JSONArray) {
|
47
|
+
return (JSONArray) obj;
|
48
|
+
}
|
49
|
+
else {
|
50
|
+
throw new RuntimeJsonMappingException("Not an json array: " + obj);
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
public static List<TDColumn> parseTuple(String jsonStr)
|
55
|
+
{
|
56
|
+
if (Strings.isNullOrEmpty(jsonStr)) {
|
57
|
+
return new ArrayList<>(0);
|
58
|
+
}
|
59
|
+
|
60
|
+
// unescape json quotation
|
61
|
+
try {
|
62
|
+
String unescaped = jsonStr.replaceAll("\\\"", "\"");
|
63
|
+
JSONArray arr = castToArray(new JSONParser().parse(unescaped));
|
64
|
+
List<TDColumn> columnList = new ArrayList<>(arr.size());
|
65
|
+
for (Object e : arr) {
|
66
|
+
JSONArray columnNameAndType = castToArray(e);
|
67
|
+
String[] s = new String[columnNameAndType.size()];
|
68
|
+
for (int i = 0; i < columnNameAndType.size(); ++i) {
|
69
|
+
s[i] = columnNameAndType.get(i).toString();
|
70
|
+
}
|
71
|
+
columnList.add(parseTuple(s));
|
72
|
+
}
|
73
|
+
return columnList;
|
74
|
+
}
|
75
|
+
catch (ParseException e) {
|
76
|
+
return new ArrayList<>(0);
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
36
80
|
@JsonCreator
|
37
|
-
public static TDColumn
|
81
|
+
public static TDColumn parseTuple(String[] tuple)
|
38
82
|
{
|
39
83
|
// TODO encode key in some ways
|
40
84
|
if (tuple != null && tuple.length == 2) {
|
@@ -16,11 +16,11 @@ public class TDTable
|
|
16
16
|
public TDTable(
|
17
17
|
@JsonProperty("name") String name,
|
18
18
|
@JsonProperty("type") TDTableType type,
|
19
|
-
@JsonProperty("
|
19
|
+
@JsonProperty("schema") String schema)
|
20
20
|
{
|
21
21
|
this.name = name;
|
22
22
|
this.type = type;
|
23
|
-
this.columns =
|
23
|
+
this.columns = TDColumn.parseTuple(schema);
|
24
24
|
}
|
25
25
|
|
26
26
|
@JsonProperty
|
@@ -5,11 +5,7 @@ import com.google.common.base.Stopwatch;
|
|
5
5
|
import com.google.common.base.Throwables;
|
6
6
|
import com.treasuredata.api.TdApiClient;
|
7
7
|
import org.embulk.config.TaskReport;
|
8
|
-
import org.embulk.output.td.writer.FieldWriter;
|
9
|
-
import org.embulk.output.td.writer.IFieldWriter;
|
10
8
|
import org.embulk.output.td.writer.FieldWriterSet;
|
11
|
-
import org.embulk.spi.Column;
|
12
|
-
import org.embulk.spi.ColumnVisitor;
|
13
9
|
import org.embulk.spi.Exec;
|
14
10
|
import org.embulk.spi.Page;
|
15
11
|
import org.embulk.spi.PageReader;
|
@@ -68,7 +64,8 @@ public class RecordWriter
|
|
68
64
|
new FieldWriterSet(log, task, schema);
|
69
65
|
}
|
70
66
|
|
71
|
-
|
67
|
+
@VisibleForTesting
|
68
|
+
public void open(final Schema schema)
|
72
69
|
throws IOException
|
73
70
|
{
|
74
71
|
this.pageReader = new PageReader(checkNotNull(schema));
|
@@ -84,7 +81,7 @@ public class RecordWriter
|
|
84
81
|
}
|
85
82
|
|
86
83
|
@VisibleForTesting
|
87
|
-
MsgpackGZFileBuilder getBuilder()
|
84
|
+
public MsgpackGZFileBuilder getBuilder()
|
88
85
|
{
|
89
86
|
return builder;
|
90
87
|
}
|
@@ -96,52 +93,7 @@ public class RecordWriter
|
|
96
93
|
|
97
94
|
try {
|
98
95
|
while (pageReader.nextRecord()) {
|
99
|
-
|
100
|
-
|
101
|
-
pageReader.getSchema().visitColumns(new ColumnVisitor() {
|
102
|
-
@Override
|
103
|
-
public void booleanColumn(Column column)
|
104
|
-
{
|
105
|
-
write(column);
|
106
|
-
}
|
107
|
-
|
108
|
-
@Override
|
109
|
-
public void longColumn(Column column)
|
110
|
-
{
|
111
|
-
write(column);
|
112
|
-
}
|
113
|
-
|
114
|
-
@Override
|
115
|
-
public void doubleColumn(Column column)
|
116
|
-
{
|
117
|
-
write(column);
|
118
|
-
}
|
119
|
-
|
120
|
-
@Override
|
121
|
-
public void stringColumn(Column column)
|
122
|
-
{
|
123
|
-
write(column);
|
124
|
-
}
|
125
|
-
|
126
|
-
@Override
|
127
|
-
public void timestampColumn(Column column)
|
128
|
-
{
|
129
|
-
write(column);
|
130
|
-
}
|
131
|
-
|
132
|
-
private void write(Column column)
|
133
|
-
{
|
134
|
-
IFieldWriter fieldWriter = fieldWriters.getFieldWriter(column.getIndex());
|
135
|
-
try {
|
136
|
-
fieldWriter.writeKeyValue(builder, pageReader, column);
|
137
|
-
}
|
138
|
-
catch (IOException e) {
|
139
|
-
throw Throwables.propagate(e);
|
140
|
-
}
|
141
|
-
}
|
142
|
-
});
|
143
|
-
|
144
|
-
builder.writeMapEnd();
|
96
|
+
fieldWriters.addRecord(builder, pageReader);
|
145
97
|
|
146
98
|
if (builder.getWrittenSize() > fileSplitSize) {
|
147
99
|
flush();
|
@@ -1,8 +1,15 @@
|
|
1
1
|
package org.embulk.output.td;
|
2
2
|
|
3
3
|
import java.io.IOException;
|
4
|
+
import java.io.InputStream;
|
4
5
|
import java.util.List;
|
6
|
+
import java.util.ArrayList;
|
5
7
|
import java.util.Map;
|
8
|
+
import java.util.HashMap;
|
9
|
+
import java.nio.charset.StandardCharsets;
|
10
|
+
import java.util.regex.Pattern;
|
11
|
+
import java.util.zip.GZIPInputStream;
|
12
|
+
|
6
13
|
import javax.validation.constraints.Min;
|
7
14
|
import javax.validation.constraints.Max;
|
8
15
|
|
@@ -19,6 +26,9 @@ import com.treasuredata.api.TdApiNotFoundException;
|
|
19
26
|
import com.treasuredata.api.model.TDBulkImportSession;
|
20
27
|
import com.treasuredata.api.model.TDBulkImportSession.ImportStatus;
|
21
28
|
import com.treasuredata.api.model.TDTable;
|
29
|
+
import com.treasuredata.api.model.TDColumn;
|
30
|
+
import com.treasuredata.api.model.TDColumnType;
|
31
|
+
import com.treasuredata.api.model.TDPrimitiveColumnType;
|
22
32
|
import org.embulk.config.TaskReport;
|
23
33
|
import org.embulk.config.Config;
|
24
34
|
import org.embulk.config.ConfigDefault;
|
@@ -28,14 +38,20 @@ import org.embulk.config.ConfigException;
|
|
28
38
|
import org.embulk.config.Task;
|
29
39
|
import org.embulk.config.TaskSource;
|
30
40
|
import org.embulk.output.td.writer.FieldWriterSet;
|
41
|
+
import org.embulk.spi.DataException;
|
31
42
|
import org.embulk.spi.Exec;
|
43
|
+
import org.embulk.spi.ColumnVisitor;
|
32
44
|
import org.embulk.spi.ExecSession;
|
33
45
|
import org.embulk.spi.OutputPlugin;
|
34
46
|
import org.embulk.spi.Schema;
|
47
|
+
import org.embulk.spi.Column;
|
35
48
|
import org.embulk.spi.TransactionalPageOutput;
|
36
49
|
import org.embulk.spi.time.Timestamp;
|
37
50
|
import org.embulk.spi.time.TimestampFormatter;
|
38
51
|
import org.joda.time.format.DateTimeFormat;
|
52
|
+
import org.msgpack.MessagePack;
|
53
|
+
import org.msgpack.unpacker.Unpacker;
|
54
|
+
import org.msgpack.unpacker.UnpackerIterator;
|
39
55
|
import org.slf4j.Logger;
|
40
56
|
|
41
57
|
public class TdOutputPlugin
|
@@ -90,6 +106,10 @@ public class TdOutputPlugin
|
|
90
106
|
@ConfigDefault("null")
|
91
107
|
public Optional<String> getTimeColumn();
|
92
108
|
|
109
|
+
@Config("time_value")
|
110
|
+
@ConfigDefault("null")
|
111
|
+
public Optional<TimeValueConfig> getTimeValue(); // TODO allow timestamp format such as {from: "2015-01-01 00:00:00 UTC", to: "2015-01-02 00:00:00 UTC"} as well as unixtime integer
|
112
|
+
|
93
113
|
@Config("unix_timestamp_unit")
|
94
114
|
@ConfigDefault("\"sec\"")
|
95
115
|
public UnixTimestampUnit getUnixTimestampUnit();
|
@@ -128,6 +148,15 @@ public class TdOutputPlugin
|
|
128
148
|
@ConfigDefault("{}")
|
129
149
|
public Map<String, TimestampColumnOption> getColumnOptions();
|
130
150
|
|
151
|
+
@Config("stop_on_invalid_record")
|
152
|
+
@ConfigDefault("false")
|
153
|
+
boolean getStopOnInvalidRecord();
|
154
|
+
|
155
|
+
@Config("displayed_error_records_count_limit")
|
156
|
+
@ConfigDefault("10")
|
157
|
+
@Min(0)
|
158
|
+
int getDisplayedErrorRecordsCountLimit();
|
159
|
+
|
131
160
|
public boolean getDoUpload();
|
132
161
|
public void setDoUpload(boolean doUpload);
|
133
162
|
|
@@ -184,6 +213,19 @@ public class TdOutputPlugin
|
|
184
213
|
public boolean getUseSsl();
|
185
214
|
}
|
186
215
|
|
216
|
+
public interface TimeValueConfig
|
217
|
+
extends Task
|
218
|
+
{
|
219
|
+
@Config("from")
|
220
|
+
@Min(0)
|
221
|
+
public long getFrom();
|
222
|
+
|
223
|
+
@Config("to")
|
224
|
+
@ConfigDefault("0")
|
225
|
+
@Min(0)
|
226
|
+
public long getTo();
|
227
|
+
}
|
228
|
+
|
187
229
|
public static enum ConvertTimestampType
|
188
230
|
{
|
189
231
|
STRING(-1),
|
@@ -314,7 +356,7 @@ public class TdOutputPlugin
|
|
314
356
|
// validate FieldWriterSet configuration before transaction is started
|
315
357
|
RecordWriter.validateSchema(log, task, schema);
|
316
358
|
|
317
|
-
return doRun(client, task, control);
|
359
|
+
return doRun(client, schema, task, control);
|
318
360
|
}
|
319
361
|
}
|
320
362
|
|
@@ -324,17 +366,17 @@ public class TdOutputPlugin
|
|
324
366
|
{
|
325
367
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
326
368
|
try (TdApiClient client = newTdApiClient(task)) {
|
327
|
-
return doRun(client, task, control);
|
369
|
+
return doRun(client, schema, task, control);
|
328
370
|
}
|
329
371
|
}
|
330
372
|
|
331
373
|
@VisibleForTesting
|
332
|
-
ConfigDiff doRun(TdApiClient client, PluginTask task, OutputPlugin.Control control)
|
374
|
+
ConfigDiff doRun(TdApiClient client, Schema schema, PluginTask task, OutputPlugin.Control control)
|
333
375
|
{
|
334
376
|
boolean doUpload = startBulkImportSession(client, task.getSessionName(), task.getDatabase(), task.getLoadTargetTableName());
|
335
377
|
task.setDoUpload(doUpload);
|
336
378
|
control.run(task.dump());
|
337
|
-
completeBulkImportSession(client, task
|
379
|
+
completeBulkImportSession(client, schema, task, 0); // TODO perform job priority
|
338
380
|
|
339
381
|
// commit
|
340
382
|
switch (task.getMode()) {
|
@@ -377,7 +419,7 @@ public class TdOutputPlugin
|
|
377
419
|
}
|
378
420
|
|
379
421
|
@VisibleForTesting
|
380
|
-
TdApiClient newTdApiClient(final PluginTask task)
|
422
|
+
public TdApiClient newTdApiClient(final PluginTask task)
|
381
423
|
{
|
382
424
|
Optional<HttpProxyConfig> httpProxyConfig = newHttpProxyConfig(task.getHttpProxy());
|
383
425
|
TdApiClientConfig config = new TdApiClientConfig(task.getEndpoint(), task.getUseSsl(), httpProxyConfig);
|
@@ -519,8 +561,9 @@ public class TdOutputPlugin
|
|
519
561
|
}
|
520
562
|
|
521
563
|
@VisibleForTesting
|
522
|
-
void completeBulkImportSession(TdApiClient client,
|
564
|
+
void completeBulkImportSession(TdApiClient client, Schema schema, PluginTask task, int priority)
|
523
565
|
{
|
566
|
+
String sessionName = task.getSessionName();
|
524
567
|
TDBulkImportSession session = client.getBulkImportSession(sessionName);
|
525
568
|
|
526
569
|
switch (session.getStatus()) {
|
@@ -548,12 +591,28 @@ public class TdOutputPlugin
|
|
548
591
|
// pass
|
549
592
|
case READY:
|
550
593
|
// TODO add an option to make the transaction failed if error_records or error_parts is too large
|
551
|
-
|
594
|
+
|
595
|
+
// add Embulk's columns to the table schema
|
596
|
+
Map<String, TDColumnType> newColumns = updateSchema(client, schema, task);
|
552
597
|
log.info("Committing bulk import session '{}'", sessionName);
|
553
598
|
log.info(" valid records: {}", session.getValidRecords());
|
554
599
|
log.info(" error records: {}", session.getErrorRecords());
|
555
600
|
log.info(" valid parts: {}", session.getValidParts());
|
556
601
|
log.info(" error parts: {}", session.getErrorParts());
|
602
|
+
if (!newColumns.isEmpty()) {
|
603
|
+
log.info(" new columns:");
|
604
|
+
}
|
605
|
+
for (Map.Entry<String, TDColumnType> pair : newColumns.entrySet()) {
|
606
|
+
log.info(" - {}: {}", pair.getKey(), pair.getValue());
|
607
|
+
}
|
608
|
+
|
609
|
+
showBulkImportErrorRecords(client, sessionName, (int) Math.min(session.getErrorRecords(), task.getDisplayedErrorRecordsCountLimit()));
|
610
|
+
|
611
|
+
if (session.getErrorRecords() > 0 && task.getStopOnInvalidRecord()) {
|
612
|
+
throw new DataException(String.format("Stop committing because the perform job skipped %d error records", session.getErrorRecords()));
|
613
|
+
}
|
614
|
+
|
615
|
+
// commit
|
557
616
|
client.commitBulkImportSession(sessionName);
|
558
617
|
|
559
618
|
// pass
|
@@ -571,6 +630,105 @@ public class TdOutputPlugin
|
|
571
630
|
}
|
572
631
|
}
|
573
632
|
|
633
|
+
Map<String, TDColumnType> updateSchema(TdApiClient client, Schema inputSchema, PluginTask task)
|
634
|
+
{
|
635
|
+
String databaseName = task.getDatabase();
|
636
|
+
|
637
|
+
TDTable table = findTable(client, databaseName, task.getTable());
|
638
|
+
if (table == null) {
|
639
|
+
return new HashMap<>();
|
640
|
+
}
|
641
|
+
|
642
|
+
final Map<String, TDColumnType> guessedSchema = new HashMap<>();
|
643
|
+
inputSchema.visitColumns(new ColumnVisitor() {
|
644
|
+
public void booleanColumn(Column column)
|
645
|
+
{
|
646
|
+
guessedSchema.put(column.getName(), TDPrimitiveColumnType.LONG);
|
647
|
+
}
|
648
|
+
|
649
|
+
public void longColumn(Column column)
|
650
|
+
{
|
651
|
+
guessedSchema.put(column.getName(), TDPrimitiveColumnType.LONG);;
|
652
|
+
}
|
653
|
+
|
654
|
+
public void doubleColumn(Column column)
|
655
|
+
{
|
656
|
+
guessedSchema.put(column.getName(), TDPrimitiveColumnType.DOUBLE);
|
657
|
+
}
|
658
|
+
|
659
|
+
public void stringColumn(Column column)
|
660
|
+
{
|
661
|
+
guessedSchema.put(column.getName(), TDPrimitiveColumnType.STRING);
|
662
|
+
}
|
663
|
+
|
664
|
+
public void timestampColumn(Column column)
|
665
|
+
{
|
666
|
+
guessedSchema.put(column.getName(), TDPrimitiveColumnType.STRING);
|
667
|
+
}
|
668
|
+
});
|
669
|
+
|
670
|
+
Map<String, Integer> usedNames = new HashMap<>();
|
671
|
+
for (TDColumn existent : table.getColumns()) {
|
672
|
+
usedNames.put(new String(existent.getKey()), 1);
|
673
|
+
guessedSchema.remove(existent.getName()); // don't change type of existent columns
|
674
|
+
}
|
675
|
+
guessedSchema.remove("time"); // don't change type of 'time' column
|
676
|
+
|
677
|
+
List<TDColumn> newSchema = new ArrayList<>(table.getColumns());
|
678
|
+
for (Map.Entry<String, TDColumnType> pair : guessedSchema.entrySet()) {
|
679
|
+
String key = renameColumn(pair.getKey());
|
680
|
+
|
681
|
+
if (!usedNames.containsKey(key)) {
|
682
|
+
usedNames.put(key, 1);
|
683
|
+
} else {
|
684
|
+
int next = usedNames.get(key);
|
685
|
+
key = key + "_" + next;
|
686
|
+
usedNames.put(key, next + 1);
|
687
|
+
}
|
688
|
+
|
689
|
+
newSchema.add(new TDColumn(pair.getKey(), pair.getValue(), key.getBytes(StandardCharsets.UTF_8)));
|
690
|
+
}
|
691
|
+
|
692
|
+
client.updateSchema(databaseName, task.getLoadTargetTableName(), newSchema);
|
693
|
+
return guessedSchema;
|
694
|
+
}
|
695
|
+
|
696
|
+
private static TDTable findTable(TdApiClient client, String databaseName, String tableName)
|
697
|
+
{
|
698
|
+
for (TDTable table : client.getTables(databaseName)) {
|
699
|
+
if (table.getName().equals(tableName)) {
|
700
|
+
return table;
|
701
|
+
}
|
702
|
+
}
|
703
|
+
return null;
|
704
|
+
}
|
705
|
+
|
706
|
+
private static final Pattern COLUMN_NAME_PATTERN = Pattern.compile("\\A[a-z_][a-z0-9_]*\\z");
|
707
|
+
private static final Pattern COLUMN_NAME_SQUASH_PATTERN = Pattern.compile("(?:[^a-zA-Z0-9_]|(?:\\A[^a-zA-Z_]))+");
|
708
|
+
|
709
|
+
private static String renameColumn(String origName)
|
710
|
+
{
|
711
|
+
if (COLUMN_NAME_PATTERN.matcher(origName).matches()) {
|
712
|
+
return origName;
|
713
|
+
}
|
714
|
+
return COLUMN_NAME_SQUASH_PATTERN.matcher(origName).replaceAll("_").toLowerCase();
|
715
|
+
}
|
716
|
+
|
717
|
+
void showBulkImportErrorRecords(TdApiClient client, String sessionName, int recordCountLimit)
|
718
|
+
{
|
719
|
+
log.info("Show {} error records", recordCountLimit);
|
720
|
+
try (InputStream in = client.getBulkImportErrorRecords(sessionName)) {
|
721
|
+
Unpacker unpacker = new MessagePack().createUnpacker(new GZIPInputStream(in));
|
722
|
+
UnpackerIterator records = unpacker.iterator();
|
723
|
+
for (int i = 0; i < recordCountLimit; i++) {
|
724
|
+
log.info(" {}", records.next());
|
725
|
+
}
|
726
|
+
}
|
727
|
+
catch (Exception ignored) {
|
728
|
+
log.info("Stop downloading error records", ignored);
|
729
|
+
}
|
730
|
+
}
|
731
|
+
|
574
732
|
@VisibleForTesting
|
575
733
|
TDBulkImportSession waitForStatusChange(TdApiClient client, String sessionName,
|
576
734
|
ImportStatus current, ImportStatus expecting, String operation)
|
@@ -604,23 +762,7 @@ public class TdOutputPlugin
|
|
604
762
|
void renameTable(TdApiClient client, String databaseName, String oldName, String newName)
|
605
763
|
{
|
606
764
|
log.debug("Renaming table \"{}\".\"{}\" to \"{}\"", databaseName, oldName, newName);
|
607
|
-
|
608
|
-
client.renameTable(databaseName, oldName, newName);
|
609
|
-
}
|
610
|
-
catch (TdApiConflictException e) {
|
611
|
-
try {
|
612
|
-
client.deleteTable(databaseName, newName);
|
613
|
-
log.debug("Deleted original table \"{}\".\"{}\"", databaseName, newName);
|
614
|
-
}
|
615
|
-
catch (TdApiNotFoundException ex) {
|
616
|
-
// ignoreable error
|
617
|
-
}
|
618
|
-
catch (IOException ex) {
|
619
|
-
throw Throwables.propagate(ex);
|
620
|
-
}
|
621
|
-
|
622
|
-
client.renameTable(databaseName, oldName, newName);
|
623
|
-
}
|
765
|
+
client.renameTable(databaseName, oldName, newName, true);
|
624
766
|
}
|
625
767
|
|
626
768
|
@Override
|
@@ -1,9 +1,15 @@
|
|
1
1
|
package org.embulk.output.td.writer;
|
2
2
|
|
3
|
+
import java.io.IOException;
|
4
|
+
|
5
|
+
import com.google.common.annotations.VisibleForTesting;
|
3
6
|
import com.google.common.base.Optional;
|
7
|
+
import com.google.common.base.Throwables;
|
4
8
|
import org.embulk.config.ConfigException;
|
5
9
|
import org.embulk.output.td.TdOutputPlugin;
|
6
10
|
import org.embulk.spi.Column;
|
11
|
+
import org.embulk.spi.ColumnVisitor;
|
12
|
+
import org.embulk.spi.PageReader;
|
7
13
|
import org.embulk.spi.Schema;
|
8
14
|
import org.embulk.spi.time.TimestampFormatter;
|
9
15
|
import org.embulk.spi.type.BooleanType;
|
@@ -13,6 +19,8 @@ import org.embulk.spi.type.StringType;
|
|
13
19
|
import org.embulk.spi.type.TimestampType;
|
14
20
|
import org.embulk.spi.type.Type;
|
15
21
|
import org.embulk.spi.util.Timestamps;
|
22
|
+
import org.embulk.output.td.MsgpackGZFileBuilder;
|
23
|
+
import org.embulk.output.td.TdOutputPlugin.TimeValueConfig;
|
16
24
|
import org.slf4j.Logger;
|
17
25
|
|
18
26
|
public class FieldWriterSet
|
@@ -26,11 +34,17 @@ public class FieldWriterSet
|
|
26
34
|
|
27
35
|
private final int fieldCount;
|
28
36
|
private final IFieldWriter[] fieldWriters;
|
37
|
+
private final Optional<TimeValueGenerator> staticTimeValue;
|
29
38
|
|
30
39
|
public FieldWriterSet(Logger log, TdOutputPlugin.PluginTask task, Schema schema)
|
31
40
|
{
|
32
41
|
Optional<String> userDefinedPrimaryKeySourceColumnName = task.getTimeColumn();
|
33
42
|
TdOutputPlugin.ConvertTimestampType convertTimestamp = task.getConvertTimestampType();
|
43
|
+
Optional<TimeValueConfig> timeValueConfig = task.getTimeValue();
|
44
|
+
if (timeValueConfig.isPresent() && userDefinedPrimaryKeySourceColumnName.isPresent()) {
|
45
|
+
throw new ConfigException("Setting both time_column and time_value is invalid");
|
46
|
+
}
|
47
|
+
|
34
48
|
boolean hasPkWriter = false;
|
35
49
|
int duplicatePrimaryKeySourceIndex = -1;
|
36
50
|
int firstTimestampColumnIndex = -1;
|
@@ -62,7 +76,13 @@ public class FieldWriterSet
|
|
62
76
|
columnName = newColumnUniqueName(columnName, schema);
|
63
77
|
mode = ColumnWriterMode.SIMPLE_VALUE;
|
64
78
|
log.warn("time_column '{}' is set but 'time' column also exists. The existent 'time' column is renamed to {}",
|
65
|
-
userDefinedPrimaryKeySourceColumnName.get(),
|
79
|
+
userDefinedPrimaryKeySourceColumnName.get(), columnName);
|
80
|
+
}
|
81
|
+
else if (timeValueConfig.isPresent()) {
|
82
|
+
columnName = newColumnUniqueName(columnName, schema);
|
83
|
+
mode = ColumnWriterMode.SIMPLE_VALUE;
|
84
|
+
log.warn("time_value is set but 'time' column also exists. The existent 'time' column is renamed to {}",
|
85
|
+
columnName);
|
66
86
|
}
|
67
87
|
else {
|
68
88
|
mode = ColumnWriterMode.PRIMARY_KEY;
|
@@ -143,7 +163,11 @@ public class FieldWriterSet
|
|
143
163
|
fc += 1;
|
144
164
|
}
|
145
165
|
|
146
|
-
if (
|
166
|
+
if (timeValueConfig.isPresent()) {
|
167
|
+
// "time" column is written by RecordWriter
|
168
|
+
fc += 1;
|
169
|
+
}
|
170
|
+
else if (!hasPkWriter) {
|
147
171
|
// PRIMARY_KEY was not found.
|
148
172
|
if (duplicatePrimaryKeySourceIndex < 0) {
|
149
173
|
if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
|
@@ -195,6 +219,13 @@ public class FieldWriterSet
|
|
195
219
|
fc += 1;
|
196
220
|
}
|
197
221
|
|
222
|
+
if (timeValueConfig.isPresent()) {
|
223
|
+
staticTimeValue = Optional.of(new TimeValueGenerator(timeValueConfig.get()));
|
224
|
+
}
|
225
|
+
else {
|
226
|
+
staticTimeValue = Optional.absent();
|
227
|
+
}
|
228
|
+
|
198
229
|
fieldCount = fc;
|
199
230
|
}
|
200
231
|
|
@@ -218,13 +249,101 @@ public class FieldWriterSet
|
|
218
249
|
return false;
|
219
250
|
}
|
220
251
|
|
252
|
+
@VisibleForTesting
|
221
253
|
public IFieldWriter getFieldWriter(int index)
|
222
254
|
{
|
223
255
|
return fieldWriters[index];
|
224
256
|
}
|
225
257
|
|
226
|
-
public
|
258
|
+
public void addRecord(final MsgpackGZFileBuilder builder, final PageReader reader)
|
259
|
+
throws IOException
|
227
260
|
{
|
228
|
-
|
261
|
+
beginRecord(builder);
|
262
|
+
|
263
|
+
reader.getSchema().visitColumns(new ColumnVisitor() {
|
264
|
+
@Override
|
265
|
+
public void booleanColumn(Column column)
|
266
|
+
{
|
267
|
+
addColumn(builder, reader, column);
|
268
|
+
}
|
269
|
+
|
270
|
+
@Override
|
271
|
+
public void longColumn(Column column)
|
272
|
+
{
|
273
|
+
addColumn(builder, reader, column);
|
274
|
+
}
|
275
|
+
|
276
|
+
@Override
|
277
|
+
public void doubleColumn(Column column)
|
278
|
+
{
|
279
|
+
addColumn(builder, reader, column);
|
280
|
+
}
|
281
|
+
|
282
|
+
@Override
|
283
|
+
public void stringColumn(Column column)
|
284
|
+
{
|
285
|
+
addColumn(builder, reader, column);
|
286
|
+
}
|
287
|
+
|
288
|
+
@Override
|
289
|
+
public void timestampColumn(Column column)
|
290
|
+
{
|
291
|
+
addColumn(builder, reader, column);
|
292
|
+
}
|
293
|
+
|
294
|
+
});
|
295
|
+
|
296
|
+
endRecord(builder);
|
297
|
+
}
|
298
|
+
|
299
|
+
private void beginRecord(MsgpackGZFileBuilder builder)
|
300
|
+
throws IOException
|
301
|
+
{
|
302
|
+
builder.writeMapBegin(fieldCount);
|
303
|
+
if (staticTimeValue.isPresent()) {
|
304
|
+
builder.writeString("time");
|
305
|
+
builder.writeLong(staticTimeValue.get().next());
|
306
|
+
}
|
307
|
+
}
|
308
|
+
|
309
|
+
private void endRecord(MsgpackGZFileBuilder builder)
|
310
|
+
throws IOException
|
311
|
+
{
|
312
|
+
builder.writeMapEnd();
|
313
|
+
}
|
314
|
+
|
315
|
+
private void addColumn(MsgpackGZFileBuilder builder, PageReader reader, Column column)
|
316
|
+
{
|
317
|
+
try {
|
318
|
+
fieldWriters[column.getIndex()].writeKeyValue(builder, reader, column);
|
319
|
+
}
|
320
|
+
catch (IOException e) {
|
321
|
+
throw Throwables.propagate(e);
|
322
|
+
}
|
323
|
+
}
|
324
|
+
|
325
|
+
static class TimeValueGenerator
|
326
|
+
{
|
327
|
+
private final long from;
|
328
|
+
private final long to;
|
329
|
+
private long current;
|
330
|
+
|
331
|
+
TimeValueGenerator(TimeValueConfig config)
|
332
|
+
{
|
333
|
+
current = from = config.getFrom();
|
334
|
+
to = config.getTo();
|
335
|
+
}
|
336
|
+
|
337
|
+
long next()
|
338
|
+
{
|
339
|
+
try {
|
340
|
+
return current++;
|
341
|
+
}
|
342
|
+
finally {
|
343
|
+
if (current >= to) {
|
344
|
+
current = from;
|
345
|
+
}
|
346
|
+
}
|
347
|
+
}
|
229
348
|
}
|
230
349
|
}
|
@@ -1,9 +1,9 @@
|
|
1
1
|
package org.embulk.output.td;
|
2
2
|
|
3
|
+
import com.google.common.collect.ImmutableMap;
|
3
4
|
import com.treasuredata.api.TdApiClient;
|
4
5
|
import org.embulk.EmbulkTestRuntime;
|
5
6
|
import org.embulk.output.td.TdOutputPlugin.PluginTask;
|
6
|
-
import org.embulk.output.td.writer.FieldWriterSet;
|
7
7
|
import org.embulk.spi.Page;
|
8
8
|
import org.embulk.spi.PageTestUtils;
|
9
9
|
import org.embulk.spi.Schema;
|
@@ -27,6 +27,7 @@ import static org.embulk.output.td.TestTdOutputPlugin.plugin;
|
|
27
27
|
import static org.embulk.output.td.TestTdOutputPlugin.pluginTask;
|
28
28
|
import static org.embulk.output.td.TestTdOutputPlugin.schema;
|
29
29
|
import static org.embulk.output.td.TestTdOutputPlugin.recordWriter;
|
30
|
+
import static org.embulk.output.td.TestTdOutputPlugin.tdApiClient;
|
30
31
|
import static org.junit.Assert.assertEquals;
|
31
32
|
import static org.junit.Assert.assertTrue;
|
32
33
|
import static org.mockito.Matchers.any;
|
@@ -58,16 +59,14 @@ public class TestRecordWriter
|
|
58
59
|
|
59
60
|
plugin = plugin();
|
60
61
|
task = pluginTask(config().set("session_name", "my_session"));
|
61
|
-
|
62
|
-
TdApiClient client = plugin.newTdApiClient(task);
|
63
|
-
FieldWriterSet fieldWriters = fieldWriters(log, task, schema);
|
64
|
-
recordWriter = recordWriter(task, client, fieldWriters);
|
65
62
|
}
|
66
63
|
|
67
64
|
@Test
|
68
65
|
public void checkOpenAndClose()
|
69
66
|
throws Exception
|
70
67
|
{
|
68
|
+
recordWriter = recordWriter(task, tdApiClient(plugin, task), fieldWriters(log, task, schema));
|
69
|
+
|
71
70
|
// confirm that no error happens
|
72
71
|
try {
|
73
72
|
recordWriter.open(schema);
|
@@ -82,6 +81,7 @@ public class TestRecordWriter
|
|
82
81
|
throws Exception
|
83
82
|
{
|
84
83
|
TdApiClient client = spy(plugin.newTdApiClient(task));
|
84
|
+
recordWriter = recordWriter(task, client, fieldWriters(log, task, schema));
|
85
85
|
|
86
86
|
{ // add no record
|
87
87
|
RecordWriter recordWriter = recordWriter(task, client, fieldWriters(log, task, schema));
|
@@ -116,6 +116,8 @@ public class TestRecordWriter
|
|
116
116
|
public void addNonNullValues()
|
117
117
|
throws Exception
|
118
118
|
{
|
119
|
+
recordWriter = recordWriter(task, tdApiClient(plugin, task), fieldWriters(log, task, schema));
|
120
|
+
|
119
121
|
try {
|
120
122
|
recordWriter.open(schema);
|
121
123
|
|
@@ -152,6 +154,8 @@ public class TestRecordWriter
|
|
152
154
|
public void addNullValues()
|
153
155
|
throws Exception
|
154
156
|
{
|
157
|
+
recordWriter = recordWriter(task, tdApiClient(plugin, task), fieldWriters(log, task, schema));
|
158
|
+
|
155
159
|
try {
|
156
160
|
recordWriter.open(schema);
|
157
161
|
|
@@ -183,9 +187,51 @@ public class TestRecordWriter
|
|
183
187
|
}
|
184
188
|
}
|
185
189
|
|
190
|
+
@Test
|
191
|
+
public void checkGeneratedTimeValueByOption()
|
192
|
+
throws Exception
|
193
|
+
{
|
194
|
+
schema = schema("_c0", Types.LONG, "_c1", Types.STRING,
|
195
|
+
"_c2", Types.BOOLEAN, "_c3", Types.DOUBLE, "_c4", Types.TIMESTAMP);
|
196
|
+
task = pluginTask(config().set("session_name", "my_session").set("time_value", ImmutableMap.of("from", 0L, "to", 0L)));
|
197
|
+
recordWriter = recordWriter(task, tdApiClient(plugin, task), fieldWriters(log, task, schema));
|
198
|
+
|
199
|
+
try {
|
200
|
+
recordWriter.open(schema);
|
201
|
+
|
202
|
+
// values are not null
|
203
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
|
204
|
+
0L, "v", true, 0.0, Timestamp.ofEpochSecond(1442595600L))) {
|
205
|
+
recordWriter.add(page);
|
206
|
+
}
|
207
|
+
|
208
|
+
MsgpackGZFileBuilder builder = recordWriter.getBuilder();
|
209
|
+
builder.finish();
|
210
|
+
|
211
|
+
// record count 1
|
212
|
+
assertEquals(1, builder.getRecordCount());
|
213
|
+
|
214
|
+
Unpacker u = msgpack.createUnpacker(new GZIPInputStream(new FileInputStream(builder.getFile())));
|
215
|
+
MapValue v = u.readValue().asMapValue();
|
216
|
+
|
217
|
+
// compare actual values
|
218
|
+
assertEquals(0L, v.get(createRawValue("time")).asIntegerValue().getLong());
|
219
|
+
assertEquals(0L, v.get(createRawValue("_c0")).asIntegerValue().getLong());
|
220
|
+
assertEquals("v", v.get(createRawValue("_c1")).asRawValue().getString());
|
221
|
+
assertEquals(true, v.get(createRawValue("_c2")).asBooleanValue().getBoolean());
|
222
|
+
assertEquals(0.0, v.get(createRawValue("_c3")).asFloatValue().getDouble(), 0.000001);
|
223
|
+
assertEquals("2015-09-18 17:00:00.000", v.get(createRawValue("_c4")).asRawValue().getString());
|
224
|
+
|
225
|
+
}
|
226
|
+
finally {
|
227
|
+
recordWriter.close();
|
228
|
+
}
|
229
|
+
}
|
230
|
+
|
186
231
|
@Test
|
187
232
|
public void doAbortNorthing()
|
188
233
|
{
|
234
|
+
recordWriter = recordWriter(task, tdApiClient(plugin, task), fieldWriters(log, task, schema));
|
189
235
|
recordWriter.abort();
|
190
236
|
// no error happen
|
191
237
|
}
|
@@ -193,6 +239,7 @@ public class TestRecordWriter
|
|
193
239
|
@Test
|
194
240
|
public void checkTaskReport()
|
195
241
|
{
|
242
|
+
recordWriter = recordWriter(task, tdApiClient(plugin, task), fieldWriters(log, task, schema));
|
196
243
|
assertTrue(recordWriter.commit().isEmpty());
|
197
244
|
}
|
198
245
|
}
|
@@ -8,6 +8,7 @@ import com.treasuredata.api.TdApiConflictException;
|
|
8
8
|
import com.treasuredata.api.TdApiNotFoundException;
|
9
9
|
import com.treasuredata.api.model.TDBulkImportSession;
|
10
10
|
import com.treasuredata.api.model.TDBulkImportSession.ImportStatus;
|
11
|
+
import com.treasuredata.api.model.TDColumnType;
|
11
12
|
import com.treasuredata.api.model.TDTable;
|
12
13
|
import com.treasuredata.api.model.TDTableType;
|
13
14
|
import org.embulk.EmbulkTestRuntime;
|
@@ -34,6 +35,7 @@ import org.junit.Rule;
|
|
34
35
|
import org.junit.Test;
|
35
36
|
import org.slf4j.Logger;
|
36
37
|
|
38
|
+
import java.util.HashMap;
|
37
39
|
import java.util.List;
|
38
40
|
|
39
41
|
import static com.treasuredata.api.model.TDBulkImportSession.ImportStatus.COMMITTED;
|
@@ -112,7 +114,7 @@ public class TestTdOutputPlugin
|
|
112
114
|
{
|
113
115
|
doReturn("session_name").when(plugin).buildBulkImportSessionName(any(PluginTask.class), any(ExecSession.class));
|
114
116
|
ConfigDiff configDiff = Exec.newConfigDiff().set("last_session", "session_name");
|
115
|
-
doReturn(configDiff).when(plugin).doRun(any(TdApiClient.class), any(PluginTask.class), any(OutputPlugin.Control.class));
|
117
|
+
doReturn(configDiff).when(plugin).doRun(any(TdApiClient.class), any(Schema.class), any(PluginTask.class), any(OutputPlugin.Control.class));
|
116
118
|
Schema schema = schema("time", Types.LONG, "c0", Types.STRING, "c1", Types.STRING);
|
117
119
|
|
118
120
|
{ // auto_create_table is true
|
@@ -151,7 +153,7 @@ public class TestTdOutputPlugin
|
|
151
153
|
task.setLoadTargetTableName("my_table");
|
152
154
|
task.setDoUpload(true);
|
153
155
|
doReturn(true).when(plugin).startBulkImportSession(any(TdApiClient.class), anyString(), anyString(), anyString());
|
154
|
-
doNothing().when(plugin).completeBulkImportSession(any(TdApiClient.class),
|
156
|
+
doNothing().when(plugin).completeBulkImportSession(any(TdApiClient.class), any(Schema.class), any(PluginTask.class), anyInt());
|
155
157
|
Schema schema = schema("time", Types.LONG, "c0", Types.STRING, "c1", Types.STRING);
|
156
158
|
|
157
159
|
ConfigDiff configDiff = plugin.resume(task.dump(), schema, 0, new OutputPlugin.Control()
|
@@ -384,7 +386,10 @@ public class TestTdOutputPlugin
|
|
384
386
|
public void completeBulkImportSession()
|
385
387
|
{
|
386
388
|
PluginTask task = pluginTask(config);
|
389
|
+
Schema schema = schema("c0", Types.LONG);
|
390
|
+
|
387
391
|
doReturn(session(UNKNOWN, false)).when(plugin).waitForStatusChange(any(TdApiClient.class), anyString(), any(ImportStatus.class), any(ImportStatus.class), anyString());
|
392
|
+
doReturn(new HashMap<String, TDColumnType>()).when(plugin).updateSchema(any(TdApiClient.class), any(Schema.class), any(PluginTask.class));
|
388
393
|
|
389
394
|
TdApiClient client = spy(plugin.newTdApiClient(task));
|
390
395
|
doNothing().when(client).freezeBulkImportSession(anyString());
|
@@ -392,45 +397,45 @@ public class TestTdOutputPlugin
|
|
392
397
|
doNothing().when(client).commitBulkImportSession(anyString());
|
393
398
|
|
394
399
|
{ // uploading + unfreeze
|
395
|
-
doReturn(session(UPLOADING, false)).when(client).getBulkImportSession(
|
396
|
-
plugin.completeBulkImportSession(client,
|
400
|
+
doReturn(session(UPLOADING, false)).when(client).getBulkImportSession(anyString());
|
401
|
+
plugin.completeBulkImportSession(client, schema, task, 0);
|
397
402
|
// no error happens
|
398
403
|
}
|
399
404
|
|
400
405
|
{ // uploading + frozen
|
401
|
-
doReturn(session(UPLOADING, true)).when(client).getBulkImportSession(
|
402
|
-
plugin.completeBulkImportSession(client,
|
406
|
+
doReturn(session(UPLOADING, true)).when(client).getBulkImportSession(anyString());
|
407
|
+
plugin.completeBulkImportSession(client, schema, task, 0);
|
403
408
|
// no error happens
|
404
409
|
}
|
405
410
|
|
406
411
|
{ // performing
|
407
412
|
doReturn(session(PERFORMING, false)).when(client).getBulkImportSession(anyString());
|
408
|
-
plugin.completeBulkImportSession(client,
|
413
|
+
plugin.completeBulkImportSession(client, schema, task, 0);
|
409
414
|
// no error happens
|
410
415
|
}
|
411
416
|
|
412
417
|
{ // ready
|
413
418
|
doReturn(session(READY, false)).when(client).getBulkImportSession(anyString());
|
414
|
-
plugin.completeBulkImportSession(client,
|
419
|
+
plugin.completeBulkImportSession(client, schema, task, 0);
|
415
420
|
// no error happens
|
416
421
|
}
|
417
422
|
|
418
423
|
{ // committing
|
419
424
|
doReturn(session(COMMITTING, false)).when(client).getBulkImportSession(anyString());
|
420
|
-
plugin.completeBulkImportSession(client,
|
425
|
+
plugin.completeBulkImportSession(client, schema, task, 0);
|
421
426
|
// no error happens
|
422
427
|
}
|
423
428
|
|
424
429
|
{ // committed
|
425
430
|
doReturn(session(COMMITTED, false)).when(client).getBulkImportSession(anyString());
|
426
|
-
plugin.completeBulkImportSession(client,
|
431
|
+
plugin.completeBulkImportSession(client, schema, task, 0);
|
427
432
|
// no error happens
|
428
433
|
}
|
429
434
|
|
430
435
|
{ // unknown
|
431
436
|
doReturn(session(UNKNOWN, false)).when(client).getBulkImportSession(anyString());
|
432
437
|
try {
|
433
|
-
plugin.completeBulkImportSession(client,
|
438
|
+
plugin.completeBulkImportSession(client, schema, task, 0);
|
434
439
|
fail();
|
435
440
|
}
|
436
441
|
catch (Throwable t) {
|
@@ -439,8 +444,8 @@ public class TestTdOutputPlugin
|
|
439
444
|
|
440
445
|
{ // if freezeBulkImportSession got 409, it can be ignoreable.
|
441
446
|
doThrow(conflict()).when(client).freezeBulkImportSession(anyString());
|
442
|
-
doReturn(session(UPLOADING, true)).when(client).getBulkImportSession(
|
443
|
-
plugin.completeBulkImportSession(client,
|
447
|
+
doReturn(session(UPLOADING, true)).when(client).getBulkImportSession(anyString());
|
448
|
+
plugin.completeBulkImportSession(client, schema, task, 0);
|
444
449
|
// no error happens
|
445
450
|
}
|
446
451
|
}
|
@@ -507,6 +512,11 @@ public class TestTdOutputPlugin
|
|
507
512
|
return spy(new TdOutputPlugin());
|
508
513
|
}
|
509
514
|
|
515
|
+
public static TdApiClient tdApiClient(TdOutputPlugin plugin, PluginTask task)
|
516
|
+
{
|
517
|
+
return spy(plugin.newTdApiClient(task));
|
518
|
+
}
|
519
|
+
|
510
520
|
public static FieldWriterSet fieldWriters(Logger log, PluginTask task, Schema schema)
|
511
521
|
{
|
512
522
|
return spy(new FieldWriterSet(log, task, schema));
|
@@ -1,5 +1,6 @@
|
|
1
1
|
package org.embulk.output.td.writer;
|
2
2
|
|
3
|
+
import com.google.common.collect.ImmutableMap;
|
3
4
|
import org.embulk.EmbulkTestRuntime;
|
4
5
|
import org.embulk.config.ConfigException;
|
5
6
|
import org.embulk.config.ConfigSource;
|
@@ -79,6 +80,17 @@ public class TestFieldWriterSet
|
|
79
80
|
assertTrue(t instanceof ConfigException);
|
80
81
|
}
|
81
82
|
}
|
83
|
+
|
84
|
+
{ // if both of time_column and time_value are specified, it throws ConfigError.
|
85
|
+
schema = schema("_c0", Types.STRING, "_c1", Types.LONG);
|
86
|
+
try {
|
87
|
+
new FieldWriterSet(log, pluginTask(config.deepCopy().set("time_column", "_c1").set("time_value", ImmutableMap.of("from", 0L, "to", 0L))), schema);
|
88
|
+
fail();
|
89
|
+
}
|
90
|
+
catch (Throwable t) {
|
91
|
+
assertTrue(t instanceof ConfigException);
|
92
|
+
}
|
93
|
+
}
|
82
94
|
}
|
83
95
|
|
84
96
|
@Test
|
@@ -101,7 +113,7 @@ public class TestFieldWriterSet
|
|
101
113
|
}
|
102
114
|
|
103
115
|
@Test
|
104
|
-
public void
|
116
|
+
public void specifiedTimeColumn()
|
105
117
|
{
|
106
118
|
{ // time_column option (timestamp type)
|
107
119
|
Schema schema = schema("_c0", Types.TIMESTAMP, "_c1", Types.STRING);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-td
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Muga Nishizawa
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -103,7 +103,7 @@ files:
|
|
103
103
|
- src/test/java/org/embulk/output/td/TestRecordWriter.java
|
104
104
|
- src/test/java/org/embulk/output/td/TestTdOutputPlugin.java
|
105
105
|
- src/test/java/org/embulk/output/td/writer/TestFieldWriterSet.java
|
106
|
-
- classpath/embulk-output-td-0.1.
|
106
|
+
- classpath/embulk-output-td-0.1.7.jar
|
107
107
|
- classpath/javassist-3.18.1-GA.jar
|
108
108
|
- classpath/jetty-client-9.2.2.v20140723.jar
|
109
109
|
- classpath/jetty-http-9.2.2.v20140723.jar
|