embulk-output-td 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7926b5de5fab7cc6f9343b39e0c5d35f8e2e160e
4
- data.tar.gz: 1f581633d6365c1043fef772f856c79e1b29397e
3
+ metadata.gz: 985479cb77ed1b156d896d1dbad073fdc6141c56
4
+ data.tar.gz: 4fde290d7d7e83a7eba0f687c22312e0ad589f8d
5
5
  SHA512:
6
- metadata.gz: 668eec6a9224c66c7bc0d750af02a8d8a577a584482fa44599722a0478d517ef09b80b214a6821eefee32ebd65ee0dc38d164fb0cd7b3dc13df208d85872aff8
7
- data.tar.gz: 94a707125cc4631a6cf71fe4af3a720370cf667374d7eb6e8772a7d478b38b1affea2d44244a91dcef9fb0c47ffb7d0ee67751ce392b622de40dd66e2a1a3d63
6
+ metadata.gz: 87aba00cd303cbe98772d3e645d2336fc15091f502829eaed3064def64ac4154bc4d39110d785f30fd046e34a183a4bc131953c5d1524732f72893ddea8c9f66
7
+ data.tar.gz: a3a2281e0bcd43ef99763ae8ab7e526f8d22b1f667bb480e5c16b52bf5bcbf6cca4caeedcf5fbf35b1bd801a1cc5c8896b55f65007d522c4f877373818bbfebd
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.0 - 2016-01-12
2
+
3
+ * [new feature] Not use first timestamp column as primary key [#32](https://github.com/treasure-data/embulk-output-td/pull/32)
4
+
1
5
  ## 0.1.8 - 2016-01-09
2
6
 
3
7
  * [new feature] Add mode to time value option [#31](https://github.com/treasure-data/embulk-output-td/pull/31)
data/build.gradle CHANGED
@@ -16,7 +16,7 @@ configurations {
16
16
  provided
17
17
  }
18
18
 
19
- version = "0.1.8"
19
+ version = "0.2.0"
20
20
 
21
21
  compileJava.options.encoding = 'UTF-8' // source encoding
22
22
  sourceCompatibility = 1.7
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-output-td"
4
- spec.version = "0.1.8"
4
+ spec.version = "0.2.0"
5
5
  spec.authors = ["Muga Nishizawa"]
6
6
  spec.summary = %[TreasureData output plugin for Embulk]
7
7
  spec.description = %[TreasureData output plugin is an Embulk plugin that loads records to TreasureData read by any input plugins. Search the input plugins by 'embulk-output' keyword.]
@@ -109,6 +109,7 @@ public class TdOutputPlugin
109
109
  @Config("time_value")
110
110
  @ConfigDefault("null")
111
111
  public Optional<TimeValueConfig> getTimeValue(); // TODO allow timestamp format such as {from: "2015-01-01 00:00:00 UTC", to: "2015-01-02 00:00:00 UTC"} as well as unixtime integer
112
+ public void setTimeValue(Optional<TimeValueConfig> timeValue);
112
113
 
113
114
  @Config("unix_timestamp_unit")
114
115
  @ConfigDefault("\"sec\"")
@@ -5,9 +5,6 @@ import org.embulk.config.Config;
5
5
  import org.embulk.config.ConfigDefault;
6
6
  import org.embulk.config.Task;
7
7
 
8
- import javax.validation.constraints.Max;
9
- import javax.validation.constraints.Min;
10
-
11
8
  public interface TimeValueConfig
12
9
  extends Task
13
10
  {
@@ -17,19 +14,13 @@ public interface TimeValueConfig
17
14
 
18
15
  @Config("value")
19
16
  @ConfigDefault("null")
20
- @Min(0)
21
- @Max(253402300799L) // '9999-12-31 23:59:59 UTC'
22
17
  Optional<Long> getValue();
23
18
 
24
19
  @Config("from")
25
20
  @ConfigDefault("null")
26
- @Min(0)
27
- @Max(253402300799L) // '9999-12-31 23:59:59 UTC'
28
21
  Optional<Long> getFrom();
29
22
 
30
23
  @Config("to")
31
24
  @ConfigDefault("null")
32
- @Min(0)
33
- @Max(253402300799L) // '9999-12-31 23:59:59 UTC'
34
25
  Optional<Long> getTo();
35
26
  }
@@ -10,54 +10,73 @@ public abstract class TimeValueGenerator
10
10
  public static TimeValueGenerator newGenerator(final TimeValueConfig config)
11
11
  {
12
12
  switch (config.getMode()) {
13
- case "incremental_time": { // default mode
13
+ case "incremental_time": // default mode
14
14
  require(config.getFrom(), "'from', 'to'");
15
+ validateTimeRange(config.getFrom().get(), "'from'");
15
16
  require(config.getTo(), "'to'");
17
+ validateTimeRange(config.getTo().get(), "'to'");
16
18
  reject(config.getValue(), "'value'");
17
19
 
18
- return new TimeValueGenerator()
19
- {
20
- private final long from = config.getFrom().get();
21
- private final long to = config.getTo().get();
22
-
23
- private long current = from;
24
-
25
- @Override
26
- public long next()
27
- {
28
- try {
29
- return current++;
30
- }
31
- finally {
32
- if (current > to) {
33
- current = from;
34
- }
35
- }
36
- }
37
- };
38
- }
39
- case "fixed_time": {
20
+ return new IncrementalTimeValueGenerator(config);
21
+
22
+ case "fixed_time":
40
23
  require(config.getValue(), "'value'");
24
+ validateTimeRange(config.getValue().get(), "'value'");
41
25
  reject(config.getFrom(), "'from'");
42
26
  reject(config.getTo(), "'to'");
43
27
 
44
- return new TimeValueGenerator()
45
- {
46
- private final long fixed = config.getValue().get();
28
+ return new FixedTimeValueGenerator(config);
47
29
 
48
- @Override
49
- public long next()
50
- {
51
- return fixed;
52
- }
53
- };
54
- }
55
- default: {
30
+ default:
56
31
  throw new ConfigException(String.format("Unknwon mode '%s'. Supported methods are incremental_time, fixed_time.", config.getMode()));
32
+ }
33
+ }
34
+
35
+ public static class IncrementalTimeValueGenerator
36
+ extends TimeValueGenerator
37
+ {
38
+ private final long from;
39
+ private final long to;
40
+
41
+ private long current;
42
+
43
+ public IncrementalTimeValueGenerator(final TimeValueConfig config)
44
+ {
45
+ current = from = config.getFrom().get();
46
+ to = config.getTo().get();
47
+ }
48
+
49
+ @Override
50
+ public long next()
51
+ {
52
+ try {
53
+ return current++;
54
+ }
55
+ finally {
56
+ if (current > to) {
57
+ current = from;
58
+ }
57
59
  }
58
60
  }
59
61
  }
60
62
 
63
+ public static class FixedTimeValueGenerator
64
+ extends TimeValueGenerator
65
+ {
66
+ private final long value;
67
+
68
+ public FixedTimeValueGenerator(final TimeValueConfig config)
69
+ {
70
+ value = config.getValue().get();
71
+ }
72
+
73
+ @Override
74
+ public long next()
75
+ {
76
+ return value;
77
+ }
78
+ }
79
+
61
80
  // ported from embulk-input-s3
62
81
  private static <T> T require(Optional<T> value, String message)
63
82
  {
@@ -69,6 +88,13 @@ public abstract class TimeValueGenerator
69
88
  }
70
89
  }
71
90
 
91
+ private static void validateTimeRange(long value, String message)
92
+ {
93
+ if (value < 0 || 253402300799L < value) { // should be [1970-01-01 00:00:00, 9999-12-31 23:59:59]
94
+ throw new ConfigException("The option value must be within [0, 253402300799L]: " + message);
95
+ }
96
+ }
97
+
72
98
  // ported from embulk-input-s3
73
99
  private static <T> void reject(Optional<T> value, String message)
74
100
  {
@@ -6,11 +6,14 @@ import com.google.common.annotations.VisibleForTesting;
6
6
  import com.google.common.base.Optional;
7
7
  import com.google.common.base.Throwables;
8
8
  import org.embulk.config.ConfigException;
9
+ import org.embulk.config.ConfigSource;
9
10
  import org.embulk.output.td.TdOutputPlugin;
11
+ import org.embulk.output.td.TdOutputPlugin.ConvertTimestampType;
10
12
  import org.embulk.output.td.TimeValueConfig;
11
13
  import org.embulk.output.td.TimeValueGenerator;
12
14
  import org.embulk.spi.Column;
13
15
  import org.embulk.spi.ColumnVisitor;
16
+ import org.embulk.spi.Exec;
14
17
  import org.embulk.spi.PageReader;
15
18
  import org.embulk.spi.Schema;
16
19
  import org.embulk.spi.time.TimestampFormatter;
@@ -40,15 +43,14 @@ public class FieldWriterSet
40
43
  public FieldWriterSet(Logger log, TdOutputPlugin.PluginTask task, Schema schema)
41
44
  {
42
45
  Optional<String> userDefinedPrimaryKeySourceColumnName = task.getTimeColumn();
43
- TdOutputPlugin.ConvertTimestampType convertTimestamp = task.getConvertTimestampType();
46
+ ConvertTimestampType convertTimestampType = task.getConvertTimestampType();
44
47
  Optional<TimeValueConfig> timeValueConfig = task.getTimeValue();
45
48
  if (timeValueConfig.isPresent() && userDefinedPrimaryKeySourceColumnName.isPresent()) {
46
49
  throw new ConfigException("Setting both time_column and time_value is invalid");
47
50
  }
48
51
 
49
- boolean hasPkWriter = false;
52
+ boolean foundPrimaryKey = false;
50
53
  int duplicatePrimaryKeySourceIndex = -1;
51
- int firstTimestampColumnIndex = -1;
52
54
 
53
55
  int fc = 0;
54
56
  fieldWriters = new IFieldWriter[schema.size()];
@@ -104,12 +106,11 @@ public class FieldWriterSet
104
106
  log.warn("time column is converted from {} to seconds", task.getUnixTimestampUnit());
105
107
  }
106
108
  writer = new UnixTimestampLongFieldWriter(columnName, task.getUnixTimestampUnit().getFractionUnit());
107
- hasPkWriter = true;
109
+ foundPrimaryKey = true;
108
110
  }
109
111
  else if (columnType instanceof TimestampType) {
110
112
  writer = new TimestampLongFieldWriter(columnName);
111
-
112
- hasPkWriter = true;
113
+ foundPrimaryKey = true;
113
114
  }
114
115
  else {
115
116
  throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
@@ -118,37 +119,7 @@ public class FieldWriterSet
118
119
  break;
119
120
 
120
121
  case SIMPLE_VALUE:
121
- if (columnType instanceof BooleanType) {
122
- writer = new BooleanFieldWriter(columnName);
123
- }
124
- else if (columnType instanceof LongType) {
125
- writer = new LongFieldWriter(columnName);
126
- }
127
- else if (columnType instanceof DoubleType) {
128
- writer = new DoubleFieldWriter(columnName);
129
- }
130
- else if (columnType instanceof StringType) {
131
- writer = new StringFieldWriter(columnName);
132
- }
133
- else if (columnType instanceof TimestampType) {
134
- switch (convertTimestamp) {
135
- case STRING:
136
- writer = new TimestampStringFieldWriter(timestampFormatters[i], columnName);
137
- break;
138
- case SEC:
139
- writer = new TimestampLongFieldWriter(columnName);
140
- break;
141
- default:
142
- // Thread of control doesn't come here but, just in case, it throws ConfigException.
143
- throw new ConfigException(String.format("Unknown option {} as convert_timestamp_type", convertTimestamp));
144
- }
145
- if (firstTimestampColumnIndex < 0) {
146
- firstTimestampColumnIndex = i;
147
- }
148
- }
149
- else {
150
- throw new ConfigException("Unsupported type: " + columnType);
151
- }
122
+ writer = newSimpleFieldWriter(columnName, columnType, convertTimestampType, timestampFormatters[i]);
152
123
  break;
153
124
 
154
125
  case DUPLICATE_PRIMARY_KEY:
@@ -164,24 +135,24 @@ public class FieldWriterSet
164
135
  fc += 1;
165
136
  }
166
137
 
138
+ if (foundPrimaryKey) {
139
+ // appropriate 'time' column is found
140
+
141
+ staticTimeValue = Optional.absent();
142
+ fieldCount = fc;
143
+ return;
144
+ }
145
+
167
146
  if (timeValueConfig.isPresent()) {
168
- // "time" column is written by RecordWriter
169
- fc += 1;
147
+ // 'time_value' option is specified
148
+
149
+ staticTimeValue = Optional.of(TimeValueGenerator.newGenerator(timeValueConfig.get()));
150
+ fieldCount = fc + 1;
151
+ return;
170
152
  }
171
- else if (!hasPkWriter) {
172
- // PRIMARY_KEY was not found.
173
- if (duplicatePrimaryKeySourceIndex < 0) {
174
- if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
175
- throw new ConfigException(String.format("time_column '%s' does not exist", userDefinedPrimaryKeySourceColumnName.get()));
176
- }
177
- else if (firstTimestampColumnIndex >= 0) {
178
- // if time is not found, use the first timestamp column
179
- duplicatePrimaryKeySourceIndex = firstTimestampColumnIndex;
180
- }
181
- else {
182
- throw new ConfigException(String.format("TD output plugin requires at least one timestamp column, or a long column named 'time'"));
183
- }
184
- }
153
+
154
+ if (!foundPrimaryKey && duplicatePrimaryKeySourceIndex >= 0) {
155
+ // 'time_column' option is correctly specified
185
156
 
186
157
  String columnName = schema.getColumnName(duplicatePrimaryKeySourceIndex);
187
158
  Type columnType = schema.getColumnType(duplicatePrimaryKeySourceIndex);
@@ -196,18 +167,7 @@ public class FieldWriterSet
196
167
  else if (columnType instanceof TimestampType) {
197
168
  log.info("Duplicating {}:{} column to 'time' column as seconds for the data partitioning",
198
169
  columnName, columnType);
199
- IFieldWriter fw;
200
- switch (convertTimestamp) {
201
- case STRING:
202
- fw = new TimestampStringFieldWriter(timestampFormatters[duplicatePrimaryKeySourceIndex], columnName);
203
- break;
204
- case SEC:
205
- fw = new TimestampLongFieldWriter(columnName);
206
- break;
207
- default:
208
- // Thread of control doesn't come here but, just in case, it throws ConfigException.
209
- throw new ConfigException(String.format("Unknown option {} as convert_timestamp_type", convertTimestamp));
210
- }
170
+ IFieldWriter fw = newSimpleTimestampFieldWriter(columnName, columnType, convertTimestampType, timestampFormatters[duplicatePrimaryKeySourceIndex]);
211
171
  writer = new TimestampFieldLongDuplicator(fw, "time");
212
172
  }
213
173
  else {
@@ -217,17 +177,28 @@ public class FieldWriterSet
217
177
 
218
178
  // replace existint writer
219
179
  fieldWriters[duplicatePrimaryKeySourceIndex] = writer;
220
- fc += 1;
180
+ staticTimeValue = Optional.absent();
181
+ fieldCount = fc + 1;
182
+ return;
221
183
  }
222
184
 
223
- if (timeValueConfig.isPresent()) {
224
- staticTimeValue = Optional.of(TimeValueGenerator.newGenerator(timeValueConfig.get()));
225
- }
226
- else {
227
- staticTimeValue = Optional.absent();
185
+ if (!foundPrimaryKey) {
186
+ // primary key is not found yet
187
+
188
+ if (userDefinedPrimaryKeySourceColumnName.isPresent()) {
189
+ throw new ConfigException(String.format("A specified time_column '%s' does not exist", userDefinedPrimaryKeySourceColumnName.get()));
190
+ }
191
+
192
+ long uploadTime = System.currentTimeMillis() / 1000;
193
+ log.info("'time' column is generated and is set to a unix time {}", uploadTime);
194
+ TimeValueConfig newConfig = Exec.newConfigSource().set("mode", "fixed_time").set("value", uploadTime).loadConfig(TimeValueConfig.class);
195
+ task.setTimeValue(Optional.of(newConfig));
196
+ staticTimeValue = Optional.of(TimeValueGenerator.newGenerator(newConfig));
197
+ fieldCount = fc + 1;
198
+ return;
228
199
  }
229
200
 
230
- fieldCount = fc;
201
+ throw new AssertionError("Cannot select primary key");
231
202
  }
232
203
 
233
204
  private static String newColumnUniqueName(String originalName, Schema schema)
@@ -250,6 +221,43 @@ public class FieldWriterSet
250
221
  return false;
251
222
  }
252
223
 
224
+ private static FieldWriter newSimpleFieldWriter(String columnName, Type columnType, ConvertTimestampType convertTimestampType, TimestampFormatter timestampFormatter)
225
+ {
226
+ if (columnType instanceof BooleanType) {
227
+ return new BooleanFieldWriter(columnName);
228
+ }
229
+ else if (columnType instanceof LongType) {
230
+ return new LongFieldWriter(columnName);
231
+ }
232
+ else if (columnType instanceof DoubleType) {
233
+ return new DoubleFieldWriter(columnName);
234
+ }
235
+ else if (columnType instanceof StringType) {
236
+ return new StringFieldWriter(columnName);
237
+ }
238
+ else if (columnType instanceof TimestampType) {
239
+ return newSimpleTimestampFieldWriter(columnName, columnType, convertTimestampType, timestampFormatter);
240
+ }
241
+ else {
242
+ throw new ConfigException("Unsupported type: " + columnType);
243
+ }
244
+ }
245
+
246
+ private static FieldWriter newSimpleTimestampFieldWriter(String columnName, Type columnType, ConvertTimestampType convertTimestampType, TimestampFormatter timestampFormatter)
247
+ {
248
+ switch (convertTimestampType) {
249
+ case STRING:
250
+ return new TimestampStringFieldWriter(timestampFormatter, columnName);
251
+
252
+ case SEC:
253
+ return new TimestampLongFieldWriter(columnName);
254
+
255
+ default:
256
+ // Thread of control doesn't come here but, just in case, it throws ConfigException.
257
+ throw new ConfigException(String.format("Unknown option {} as convert_timestamp_type", convertTimestampType));
258
+ }
259
+ }
260
+
253
261
  @VisibleForTesting
254
262
  public IFieldWriter getFieldWriter(int index)
255
263
  {
@@ -48,17 +48,6 @@ public class TestFieldWriterSet
48
48
  }
49
49
  }
50
50
 
51
- { // if schema doesn't have time column and the user doesn't specify time_column option, it throws ConfigError.
52
- schema = schema("_c0", Types.STRING, "_c1", Types.STRING);
53
- try {
54
- new FieldWriterSet(log, pluginTask(config), schema);
55
- fail();
56
- }
57
- catch (Throwable t) {
58
- assertTrue(t instanceof ConfigException);
59
- }
60
- }
61
-
62
51
  { // if schema doesn't have a column specified as time_column column, it throws ConfigError
63
52
  schema = schema("_c0", Types.STRING, "_c1", Types.STRING);
64
53
  try {
@@ -205,6 +194,7 @@ public class TestFieldWriterSet
205
194
  Schema schema = schema("_c0", Types.TIMESTAMP, "_c1", Types.LONG);
206
195
  FieldWriterSet writers = new FieldWriterSet(log, pluginTask(config), schema);
207
196
 
208
- assertTrue(writers.getFieldWriter(0) instanceof TimestampFieldLongDuplicator);
197
+ assertTrue(writers.getFieldWriter(0) instanceof TimestampStringFieldWriter); // c0
198
+ assertTrue(writers.getFieldWriter(1) instanceof LongFieldWriter); // c1
209
199
  }
210
200
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Muga Nishizawa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-10 00:00:00.000000000 Z
11
+ date: 2016-01-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -106,7 +106,7 @@ files:
106
106
  - src/test/java/org/embulk/output/td/TestTdOutputPlugin.java
107
107
  - src/test/java/org/embulk/output/td/TestTimeValueGenerator.java
108
108
  - src/test/java/org/embulk/output/td/writer/TestFieldWriterSet.java
109
- - classpath/embulk-output-td-0.1.8.jar
109
+ - classpath/embulk-output-td-0.2.0.jar
110
110
  - classpath/javassist-3.18.1-GA.jar
111
111
  - classpath/jetty-client-9.2.2.v20140723.jar
112
112
  - classpath/jetty-http-9.2.2.v20140723.jar