embulk-output-td 0.1.8 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7926b5de5fab7cc6f9343b39e0c5d35f8e2e160e
4
- data.tar.gz: 1f581633d6365c1043fef772f856c79e1b29397e
3
+ metadata.gz: 985479cb77ed1b156d896d1dbad073fdc6141c56
4
+ data.tar.gz: 4fde290d7d7e83a7eba0f687c22312e0ad589f8d
5
5
  SHA512:
6
- metadata.gz: 668eec6a9224c66c7bc0d750af02a8d8a577a584482fa44599722a0478d517ef09b80b214a6821eefee32ebd65ee0dc38d164fb0cd7b3dc13df208d85872aff8
7
- data.tar.gz: 94a707125cc4631a6cf71fe4af3a720370cf667374d7eb6e8772a7d478b38b1affea2d44244a91dcef9fb0c47ffb7d0ee67751ce392b622de40dd66e2a1a3d63
6
+ metadata.gz: 87aba00cd303cbe98772d3e645d2336fc15091f502829eaed3064def64ac4154bc4d39110d785f30fd046e34a183a4bc131953c5d1524732f72893ddea8c9f66
7
+ data.tar.gz: a3a2281e0bcd43ef99763ae8ab7e526f8d22b1f667bb480e5c16b52bf5bcbf6cca4caeedcf5fbf35b1bd801a1cc5c8896b55f65007d522c4f877373818bbfebd
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.0 - 2016-01-12
2
+
3
+ * [new feature] Not use first timestamp column as primary key [#32](https://github.com/treasure-data/embulk-output-td/pull/32)
4
+
1
5
  ## 0.1.8 - 2016-01-09
2
6
 
3
7
  * [new feature] Add mode to time value option [#31](https://github.com/treasure-data/embulk-output-td/pull/31)
data/build.gradle CHANGED
@@ -16,7 +16,7 @@ configurations {
16
16
  provided
17
17
  }
18
18
 
19
- version = "0.1.8"
19
+ version = "0.2.0"
20
20
 
21
21
  compileJava.options.encoding = 'UTF-8' // source encoding
22
22
  sourceCompatibility = 1.7
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-output-td"
4
- spec.version = "0.1.8"
4
+ spec.version = "0.2.0"
5
5
  spec.authors = ["Muga Nishizawa"]
6
6
  spec.summary = %[TreasureData output plugin for Embulk]
7
7
  spec.description = %[TreasureData output plugin is an Embulk plugin that loads records to TreasureData read by any input plugins. Search the input plugins by 'embulk-output' keyword.]
@@ -109,6 +109,7 @@ public class TdOutputPlugin
109
109
  @Config("time_value")
110
110
  @ConfigDefault("null")
111
111
  public Optional<TimeValueConfig> getTimeValue(); // TODO allow timestamp format such as {from: "2015-01-01 00:00:00 UTC", to: "2015-01-02 00:00:00 UTC"} as well as unixtime integer
112
+ public void setTimeValue(Optional<TimeValueConfig> timeValue);
112
113
 
113
114
  @Config("unix_timestamp_unit")
114
115
  @ConfigDefault("\"sec\"")
@@ -5,9 +5,6 @@ import org.embulk.config.Config;
5
5
  import org.embulk.config.ConfigDefault;
6
6
  import org.embulk.config.Task;
7
7
 
8
- import javax.validation.constraints.Max;
9
- import javax.validation.constraints.Min;
10
-
11
8
  public interface TimeValueConfig
12
9
  extends Task
13
10
  {
@@ -17,19 +14,13 @@ public interface TimeValueConfig
17
14
 
18
15
  @Config("value")
19
16
  @ConfigDefault("null")
20
- @Min(0)
21
- @Max(253402300799L) // '9999-12-31 23:59:59 UTC'
22
17
  Optional<Long> getValue();
23
18
 
24
19
  @Config("from")
25
20
  @ConfigDefault("null")
26
- @Min(0)
27
- @Max(253402300799L) // '9999-12-31 23:59:59 UTC'
28
21
  Optional<Long> getFrom();
29
22
 
30
23
  @Config("to")
31
24
  @ConfigDefault("null")
32
- @Min(0)
33
- @Max(253402300799L) // '9999-12-31 23:59:59 UTC'
34
25
  Optional<Long> getTo();
35
26
  }
@@ -10,54 +10,73 @@ public abstract class TimeValueGenerator
10
10
  public static TimeValueGenerator newGenerator(final TimeValueConfig config)
11
11
  {
12
12
  switch (config.getMode()) {
13
- case "incremental_time": { // default mode
13
+ case "incremental_time": // default mode
14
14
  require(config.getFrom(), "'from', 'to'");
15
+ validateTimeRange(config.getFrom().get(), "'from'");
15
16
  require(config.getTo(), "'to'");
17
+ validateTimeRange(config.getTo().get(), "'to'");
16
18
  reject(config.getValue(), "'value'");
17
19
 
18
- return new TimeValueGenerator()
19
- {
20
- private final long from = config.getFrom().get();
21
- private final long to = config.getTo().get();
22
-
23
- private long current = from;
24
-
25
- @Override
26
- public long next()
27
- {
28
- try {
29
- return current++;
30
- }
31
- finally {
32
- if (current > to) {
33
- current = from;
34
- }
35
- }
36
- }
37
- };
38
- }
39
- case "fixed_time": {
20
+ return new IncrementalTimeValueGenerator(config);
21
+
22
+ case "fixed_time":
40
23
  require(config.getValue(), "'value'");
24
+ validateTimeRange(config.getValue().get(), "'value'");
41
25
  reject(config.getFrom(), "'from'");
42
26
  reject(config.getTo(), "'to'");
43
27
 
44
- return new TimeValueGenerator()
45
- {
46
- private final long fixed = config.getValue().get();
28
+ return new FixedTimeValueGenerator(config);
47
29
 
48
- @Override
49
- public long next()
50
- {
51
- return fixed;
52
- }
53
- };
54
- }
55
- default: {
30
+ default:
56
31
  throw new ConfigException(String.format("Unknwon mode '%s'. Supported methods are incremental_time, fixed_time.", config.getMode()));
32
+ }
33
+ }
34
+
35
+ public static class IncrementalTimeValueGenerator
36
+ extends TimeValueGenerator
37
+ {
38
+ private final long from;
39
+ private final long to;
40
+
41
+ private long current;
42
+
43
+ public IncrementalTimeValueGenerator(final TimeValueConfig config)
44
+ {
45
+ current = from = config.getFrom().get();
46
+ to = config.getTo().get();
47
+ }
48
+
49
+ @Override
50
+ public long next()
51
+ {
52
+ try {
53
+ return current++;
54
+ }
55
+ finally {
56
+ if (current > to) {
57
+ current = from;
58
+ }
57
59
  }
58
60
  }
59
61
  }
60
62
 
63
+ public static class FixedTimeValueGenerator
64
+ extends TimeValueGenerator
65
+ {
66
+ private final long value;
67
+
68
+ public FixedTimeValueGenerator(final TimeValueConfig config)
69
+ {
70
+ value = config.getValue().get();
71
+ }
72
+
73
+ @Override
74
+ public long next()
75
+ {
76
+ return value;
77
+ }
78
+ }
79
+
61
80
  // ported from embulk-input-s3
62
81
  private static <T> T require(Optional<T> value, String message)
63
82
  {
@@ -69,6 +88,13 @@ public abstract class TimeValueGenerator
69
88
  }
70
89
  }
71
90
 
91
+ private static void validateTimeRange(long value, String message)
92
+ {
93
+ if (value < 0 || 253402300799L < value) { // should be [1970-01-01 00:00:00, 9999-12-31 23:59:59]
94
+ throw new ConfigException("The option value must be within [0, 253402300799L]: " + message);
95
+ }
96
+ }
97
+
72
98
  // ported from embulk-input-s3
73
99
  private static <T> void reject(Optional<T> value, String message)
74
100
  {
@@ -6,11 +6,14 @@ import com.google.common.annotations.VisibleForTesting;
6
6
  import com.google.common.base.Optional;
7
7
  import com.google.common.base.Throwables;
8
8
  import org.embulk.config.ConfigException;
9
+ import org.embulk.config.ConfigSource;
9
10
  import org.embulk.output.td.TdOutputPlugin;
11
+ import org.embulk.output.td.TdOutputPlugin.ConvertTimestampType;
10
12
  import org.embulk.output.td.TimeValueConfig;
11
13
  import org.embulk.output.td.TimeValueGenerator;
12
14
  import org.embulk.spi.Column;
13
15
  import org.embulk.spi.ColumnVisitor;
16
+ import org.embulk.spi.Exec;
14
17
  import org.embulk.spi.PageReader;
15
18
  import org.embulk.spi.Schema;
16
19
  import org.embulk.spi.time.TimestampFormatter;
@@ -40,15 +43,14 @@ public class FieldWriterSet
40
43
  public FieldWriterSet(Logger log, TdOutputPlugin.PluginTask task, Schema schema)
41
44
  {
42
45
  Optional<String> userDefinedPrimaryKeySourceColumnName = task.getTimeColumn();
43
- TdOutputPlugin.ConvertTimestampType convertTimestamp = task.getConvertTimestampType();
46
+ ConvertTimestampType convertTimestampType = task.getConvertTimestampType();
44
47
  Optional<TimeValueConfig> timeValueConfig = task.getTimeValue();
45
48
  if (timeValueConfig.isPresent() && userDefinedPrimaryKeySourceColumnName.isPresent()) {
46
49
  throw new ConfigException("Setting both time_column and time_value is invalid");
47
50
  }
48
51
 
49
- boolean hasPkWriter = false;
52
+ boolean foundPrimaryKey = false;
50
53
  int duplicatePrimaryKeySourceIndex = -1;
51
- int firstTimestampColumnIndex = -1;
52
54
 
53
55
  int fc = 0;
54
56
  fieldWriters = new IFieldWriter[schema.size()];
@@ -104,12 +106,11 @@ public class FieldWriterSet
104
106
  log.warn("time column is converted from {} to seconds", task.getUnixTimestampUnit());
105
107
  }
106
108
  writer = new UnixTimestampLongFieldWriter(columnName, task.getUnixTimestampUnit().getFractionUnit());
107
- hasPkWriter = true;
109
+ foundPrimaryKey = true;
108
110
  }
109
111
  else if (columnType instanceof TimestampType) {
110
112
  writer = new TimestampLongFieldWriter(columnName);
111
-
112
- hasPkWriter = true;
113
+ foundPrimaryKey = true;
113
114
  }
114
115
  else {
115
116
  throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
@@ -118,37 +119,7 @@ public class FieldWriterSet
118
119
  break;
119
120
 
120
121
  case SIMPLE_VALUE:
121
- if (columnType instanceof BooleanType) {
122
- writer = new BooleanFieldWriter(columnName);
123
- }
124
- else if (columnType instanceof LongType) {
125
- writer = new LongFieldWriter(columnName);
126
- }
127
- else if (columnType instanceof DoubleType) {
128
- writer = new DoubleFieldWriter(columnName);
129
- }
130
- else if (columnType instanceof StringType) {
131
- writer = new StringFieldWriter(columnName);
132
- }
133
- else if (columnType instanceof TimestampType) {
134
- switch (convertTimestamp) {
135
- case STRING:
136
- writer = new TimestampStringFieldWriter(timestampFormatters[i], columnName);
137
- break;
138
- case SEC:
139
- writer = new TimestampLongFieldWriter(columnName);
140
- break;
141
- default:
142
- // Thread of control doesn't come here but, just in case, it throws ConfigException.
143
- throw new ConfigException(String.format("Unknown option {} as convert_timestamp_type", convertTimestamp));
144
- }
145
- if (firstTimestampColumnIndex < 0) {
146
- firstTimestampColumnIndex = i;
147
- }
148
- }
149
- else {
150
- throw new ConfigException("Unsupported type: " + columnType);
151
- }
122
+ writer = newSimpleFieldWriter(columnName, columnType, convertTimestampType, timestampFormatters[i]);
152
123
  break;
153
124
 
154
125
  case DUPLICATE_PRIMARY_KEY:
@@ -164,24 +135,24 @@ public class FieldWriterSet
164
135
  fc += 1;
165
136
  }
166
137
 
138
+ if (foundPrimaryKey) {
139
+ // appropriate 'time' column is found
140
+
141
+ staticTimeValue = Optional.absent();
142
+ fieldCount = fc;
143
+ return;
144
+ }
145
+
167
146
  if (timeValueConfig.isPresent()) {
168
- // "time" column is written by RecordWriter
169
- fc += 1;
147
+ // 'time_value' option is specified
148
+
149
+ staticTimeValue = Optional.of(TimeValueGenerator.newGenerator(timeValueConfig.get()));
150
+ fieldCount = fc + 1;
151
+ return;
170
152
  }
171
- else if (!hasPkWriter) {
172
- // PRIMARY_KEY was not found.
173
- if (duplicatePrimaryKeySourceIndex < 0) {
174
- if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
175
- throw new ConfigException(String.format("time_column '%s' does not exist", userDefinedPrimaryKeySourceColumnName.get()));
176
- }
177
- else if (firstTimestampColumnIndex >= 0) {
178
- // if time is not found, use the first timestamp column
179
- duplicatePrimaryKeySourceIndex = firstTimestampColumnIndex;
180
- }
181
- else {
182
- throw new ConfigException(String.format("TD output plugin requires at least one timestamp column, or a long column named 'time'"));
183
- }
184
- }
153
+
154
+ if (!foundPrimaryKey && duplicatePrimaryKeySourceIndex >= 0) {
155
+ // 'time_column' option is correctly specified
185
156
 
186
157
  String columnName = schema.getColumnName(duplicatePrimaryKeySourceIndex);
187
158
  Type columnType = schema.getColumnType(duplicatePrimaryKeySourceIndex);
@@ -196,18 +167,7 @@ public class FieldWriterSet
196
167
  else if (columnType instanceof TimestampType) {
197
168
  log.info("Duplicating {}:{} column to 'time' column as seconds for the data partitioning",
198
169
  columnName, columnType);
199
- IFieldWriter fw;
200
- switch (convertTimestamp) {
201
- case STRING:
202
- fw = new TimestampStringFieldWriter(timestampFormatters[duplicatePrimaryKeySourceIndex], columnName);
203
- break;
204
- case SEC:
205
- fw = new TimestampLongFieldWriter(columnName);
206
- break;
207
- default:
208
- // Thread of control doesn't come here but, just in case, it throws ConfigException.
209
- throw new ConfigException(String.format("Unknown option {} as convert_timestamp_type", convertTimestamp));
210
- }
170
+ IFieldWriter fw = newSimpleTimestampFieldWriter(columnName, columnType, convertTimestampType, timestampFormatters[duplicatePrimaryKeySourceIndex]);
211
171
  writer = new TimestampFieldLongDuplicator(fw, "time");
212
172
  }
213
173
  else {
@@ -217,17 +177,28 @@ public class FieldWriterSet
217
177
 
218
178
  // replace existint writer
219
179
  fieldWriters[duplicatePrimaryKeySourceIndex] = writer;
220
- fc += 1;
180
+ staticTimeValue = Optional.absent();
181
+ fieldCount = fc + 1;
182
+ return;
221
183
  }
222
184
 
223
- if (timeValueConfig.isPresent()) {
224
- staticTimeValue = Optional.of(TimeValueGenerator.newGenerator(timeValueConfig.get()));
225
- }
226
- else {
227
- staticTimeValue = Optional.absent();
185
+ if (!foundPrimaryKey) {
186
+ // primary key is not found yet
187
+
188
+ if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
189
+ throw new ConfigException(String.format("A specified time_column '%s' does not exist", userDefinedPrimaryKeySourceColumnName.get()));
190
+ }
191
+
192
+ long uploadTime = System.currentTimeMillis() / 1000;
193
+ log.info("'time' column is generated and is set to a unix time {}", uploadTime);
194
+ TimeValueConfig newConfig = Exec.newConfigSource().set("mode", "fixed_time").set("value", uploadTime).loadConfig(TimeValueConfig.class);
195
+ task.setTimeValue(Optional.of(newConfig));
196
+ staticTimeValue = Optional.of(TimeValueGenerator.newGenerator(newConfig));
197
+ fieldCount = fc + 1;
198
+ return;
228
199
  }
229
200
 
230
- fieldCount = fc;
201
+ throw new AssertionError("Cannot select primary key");
231
202
  }
232
203
 
233
204
  private static String newColumnUniqueName(String originalName, Schema schema)
@@ -250,6 +221,43 @@ public class FieldWriterSet
250
221
  return false;
251
222
  }
252
223
 
224
+ private static FieldWriter newSimpleFieldWriter(String columnName, Type columnType, ConvertTimestampType convertTimestampType, TimestampFormatter timestampFormatter)
225
+ {
226
+ if (columnType instanceof BooleanType) {
227
+ return new BooleanFieldWriter(columnName);
228
+ }
229
+ else if (columnType instanceof LongType) {
230
+ return new LongFieldWriter(columnName);
231
+ }
232
+ else if (columnType instanceof DoubleType) {
233
+ return new DoubleFieldWriter(columnName);
234
+ }
235
+ else if (columnType instanceof StringType) {
236
+ return new StringFieldWriter(columnName);
237
+ }
238
+ else if (columnType instanceof TimestampType) {
239
+ return newSimpleTimestampFieldWriter(columnName, columnType, convertTimestampType, timestampFormatter);
240
+ }
241
+ else {
242
+ throw new ConfigException("Unsupported type: " + columnType);
243
+ }
244
+ }
245
+
246
+ private static FieldWriter newSimpleTimestampFieldWriter(String columnName, Type columnType, ConvertTimestampType convertTimestampType, TimestampFormatter timestampFormatter)
247
+ {
248
+ switch (convertTimestampType) {
249
+ case STRING:
250
+ return new TimestampStringFieldWriter(timestampFormatter, columnName);
251
+
252
+ case SEC:
253
+ return new TimestampLongFieldWriter(columnName);
254
+
255
+ default:
256
+ // Thread of control doesn't come here but, just in case, it throws ConfigException.
257
+ throw new ConfigException(String.format("Unknown option {} as convert_timestamp_type", convertTimestampType));
258
+ }
259
+ }
260
+
253
261
  @VisibleForTesting
254
262
  public IFieldWriter getFieldWriter(int index)
255
263
  {
@@ -48,17 +48,6 @@ public class TestFieldWriterSet
48
48
  }
49
49
  }
50
50
 
51
- { // if schema doesn't have time column and the user doesn't specify time_column option, it throws ConfigError.
52
- schema = schema("_c0", Types.STRING, "_c1", Types.STRING);
53
- try {
54
- new FieldWriterSet(log, pluginTask(config), schema);
55
- fail();
56
- }
57
- catch (Throwable t) {
58
- assertTrue(t instanceof ConfigException);
59
- }
60
- }
61
-
62
51
  { // if schema doesn't have a column specified as time_column column, it throws ConfigError
63
52
  schema = schema("_c0", Types.STRING, "_c1", Types.STRING);
64
53
  try {
@@ -205,6 +194,7 @@ public class TestFieldWriterSet
205
194
  Schema schema = schema("_c0", Types.TIMESTAMP, "_c1", Types.LONG);
206
195
  FieldWriterSet writers = new FieldWriterSet(log, pluginTask(config), schema);
207
196
 
208
- assertTrue(writers.getFieldWriter(0) instanceof TimestampFieldLongDuplicator);
197
+ assertTrue(writers.getFieldWriter(0) instanceof TimestampStringFieldWriter); // c0
198
+ assertTrue(writers.getFieldWriter(1) instanceof LongFieldWriter); // c1
209
199
  }
210
200
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Muga Nishizawa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-10 00:00:00.000000000 Z
11
+ date: 2016-01-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -106,7 +106,7 @@ files:
106
106
  - src/test/java/org/embulk/output/td/TestTdOutputPlugin.java
107
107
  - src/test/java/org/embulk/output/td/TestTimeValueGenerator.java
108
108
  - src/test/java/org/embulk/output/td/writer/TestFieldWriterSet.java
109
- - classpath/embulk-output-td-0.1.8.jar
109
+ - classpath/embulk-output-td-0.2.0.jar
110
110
  - classpath/javassist-3.18.1-GA.jar
111
111
  - classpath/jetty-client-9.2.2.v20140723.jar
112
112
  - classpath/jetty-http-9.2.2.v20140723.jar