embulk-output-td 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4fee7360589ff074152102355e21cd14a4fd6f15
4
- data.tar.gz: f839023cacdc9b64382aa5d7b3fcdce51c997a86
3
+ metadata.gz: 5610036db652164098fcc3f828b8fb3f95542329
4
+ data.tar.gz: a297a28131574b748d20c49097774b6bdb607552
5
5
  SHA512:
6
- metadata.gz: e841eb70257ada5e8c012595968536236f606aebe110fe2b32edf203607d569c5436d001504d0c5d98da490b58580893ea8b0fe58314fc8ae51c984cc0c7f0b8
7
- data.tar.gz: 52af81b3e7c4d27ec844b5f74abe02a5dc2df4339945bd2da37580f5a184c5417ac1d2306fb22b62d164273df90b12e637e368eed2564d30d676443e2eea7b69
6
+ metadata.gz: 18bd27aedf3f1585c2c1fbbdc881f2e50157bfe61eb5d820363dd7896dc9c63f2c7fc2abae1ff1ef57c3b338b4672ee86bb1b57c097f073f0a1489a67800b3c4
7
+ data.tar.gz: 2d25527c5fb487970c90386c343c3dab2e4a0ad630698c9c772af247949f5affc5cae09d7edcd6ecdd43e21dc92d5c7a99055c4e9d5fc42d86b5643808867c0b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## 0.1.4 - 2015-XX-XX
2
+
3
+ ## 0.1.3 - 2015-08-05
4
+
5
+ * [maintenance] Upgrade Embulk v0.6.19
6
+ * [new feature] Add column_options [#11](https://github.com/treasure-data/embulk-output-td/pull/11)
7
+
1
8
  ## 0.1.2 - 2015-07-14
2
9
 
3
10
  ## 0.1.1 - 2015-07-14
data/README.md CHANGED
@@ -23,6 +23,11 @@ TODO: Write short description here
23
23
  - **tmpdir**: temporal directory
24
24
  - **upload_concurrency**: upload concurrency (int, default=2). max concurrency is 8.
25
25
  - **file_split_size**: split size (long, default=16384 (16MB)).
26
+ - **default_timezone**: default timezone (string, default='UTC')
27
+ - **default_timestamp_format**: default timestamp format (string, default=`%Y-%m-%d %H:%M:%S.%6N`)
28
+ - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
29
+ - **timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp value into a SQL string. In this cases, this timezone option is used to control the timezone. (string, value of default_timezone option is used by default)
30
+ - **format**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp value into a string. This timestamp_format option is used to control the format of the timestamp. (string, value of default_timestamp_format option is used by default)
26
31
 
27
32
  ## Example
28
33
  Here is sample configuration for TD output plugin.
data/build.gradle CHANGED
@@ -13,19 +13,22 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.2"
16
+ version = "0.1.3"
17
17
 
18
18
  compileJava.options.encoding = 'UTF-8' // source encoding
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
21
21
 
22
22
  dependencies {
23
- compile "org.embulk:embulk-core:0.6.10"
24
- provided "org.embulk:embulk-core:0.6.10"
25
- compile "org.eclipse.jetty:jetty-client:9.2.2.v20140723"
26
- compile "org.msgpack:msgpack:0.6.11"
23
+ compile "org.embulk:embulk-core:0.6.19"
24
+ provided "org.embulk:embulk-core:0.6.19"
25
+ compile "org.embulk:embulk-standards:0.6.19"
26
+ provided "org.embulk:embulk-standards:0.6.19"
27
+ compile "org.eclipse.jetty:jetty-client:9.2.2.v20140723"
28
+ compile "org.msgpack:msgpack:0.6.11"
27
29
 
28
30
  testCompile "junit:junit:4.+"
31
+ testCompile "org.bigtesting:fixd:1.0.0"
29
32
  }
30
33
 
31
34
  task classpath(type: Copy, dependsOn: ["jar"]) {
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-output-td"
4
- spec.version = "0.1.2"
4
+ spec.version = "0.1.3"
5
5
  spec.authors = ["Muga Nishizawa"]
6
6
  spec.summary = %[TreasureData output plugin for Embulk]
7
7
  spec.description = %[TreasureData output plugin is an Embulk plugin that loads records to TreasureData read by any input plugins. Search the input plugins by 'embulk-output' keyword.]
@@ -1,3 +1,3 @@
1
1
  Embulk::JavaPlugin.register_output(
2
- "td", "org.embulk.output.TdOutputPlugin",
2
+ "td", "org.embulk.output.td.TdOutputPlugin",
3
3
  File.expand_path('../../../../classpath', __FILE__))
@@ -1,4 +1,4 @@
1
- package org.embulk.output;
1
+ package org.embulk.output.td;
2
2
 
3
3
  import java.io.Closeable;
4
4
  import java.io.IOException;
@@ -1,4 +1,4 @@
1
- package org.embulk.output;
1
+ package org.embulk.output.td;
2
2
 
3
3
  import org.embulk.spi.Exec;
4
4
  import org.msgpack.MessagePack;
@@ -1,4 +1,4 @@
1
- package org.embulk.output;
1
+ package org.embulk.output.td;
2
2
 
3
3
  import com.google.common.base.Optional;
4
4
  import com.google.common.base.Stopwatch;
@@ -6,7 +6,6 @@ import com.google.common.base.Throwables;
6
6
  import com.treasuredata.api.TdApiClient;
7
7
  import org.embulk.config.CommitReport;
8
8
  import org.embulk.config.ConfigException;
9
- import org.embulk.output.TdOutputPlugin.PluginTask;
10
9
  import org.embulk.spi.Column;
11
10
  import org.embulk.spi.ColumnVisitor;
12
11
  import org.embulk.spi.Exec;
@@ -21,6 +20,7 @@ import org.embulk.spi.type.LongType;
21
20
  import org.embulk.spi.type.StringType;
22
21
  import org.embulk.spi.type.TimestampType;
23
22
  import org.embulk.spi.type.Type;
23
+ import org.embulk.spi.util.Timestamps;
24
24
  import org.joda.time.DateTimeZone;
25
25
  import org.jruby.embed.ScriptingContainer;
26
26
  import org.msgpack.MessagePack;
@@ -35,7 +35,6 @@ import java.util.concurrent.Callable;
35
35
  import java.util.concurrent.TimeUnit;
36
36
 
37
37
  import static com.google.common.base.Preconditions.checkNotNull;
38
- import org.embulk.output.TdOutputPlugin.UnixTimestampUnit;
39
38
 
40
39
  public class RecordWriter
41
40
  implements TransactionalPageOutput
@@ -57,7 +56,7 @@ public class RecordWriter
57
56
  private final int uploadConcurrency;
58
57
  private final long fileSplitSize; // unit: kb
59
58
 
60
- public RecordWriter(PluginTask task, int taskIndex, TdApiClient client, FieldWriterSet fieldWriters)
59
+ public RecordWriter(TdOutputPlugin.PluginTask task, int taskIndex, TdApiClient client, FieldWriterSet fieldWriters)
61
60
  {
62
61
  this.log = Exec.getLogger(getClass());
63
62
  this.client = checkNotNull(client);
@@ -72,7 +71,7 @@ public class RecordWriter
72
71
  this.fileSplitSize = task.getFileSplitSize() * 1024;
73
72
  }
74
73
 
75
- public static void validateSchema(Logger log, PluginTask task, Schema schema)
74
+ public static void validateSchema(Logger log, TdOutputPlugin.PluginTask task, Schema schema)
76
75
  {
77
76
  new FieldWriterSet(log, task, schema);
78
77
  }
@@ -263,7 +262,7 @@ public class RecordWriter
263
262
  private final int fieldCount;
264
263
  private final FieldWriter[] fieldWriters;
265
264
 
266
- public FieldWriterSet(Logger log, PluginTask task, Schema schema)
265
+ public FieldWriterSet(Logger log, TdOutputPlugin.PluginTask task, Schema schema)
267
266
  {
268
267
  Optional<String> userDefinedPrimaryKeySourceColumnName = task.getTimeColumn();
269
268
  boolean hasPkWriter = false;
@@ -272,6 +271,7 @@ public class RecordWriter
272
271
 
273
272
  int fc = 0;
274
273
  fieldWriters = new FieldWriter[schema.size()];
274
+ TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
275
275
 
276
276
  for (int i = 0; i < schema.size(); i++) {
277
277
  String columnName = schema.getColumnName(i);
@@ -311,13 +311,13 @@ public class RecordWriter
311
311
  case PRIMARY_KEY:
312
312
  log.info("Using {}:{} column as the data partitioning key", columnName, columnType);
313
313
  if (columnType instanceof LongType) {
314
- if (task.getUnixTimestampUnit() != UnixTimestampUnit.SEC) {
314
+ if (task.getUnixTimestampUnit() != TdOutputPlugin.UnixTimestampUnit.SEC) {
315
315
  log.warn("time column is converted from {} to seconds", task.getUnixTimestampUnit());
316
316
  }
317
317
  writer = new UnixTimestampLongFieldWriter(columnName, task.getUnixTimestampUnit().getFractionUnit());
318
318
  hasPkWriter = true;
319
319
  } else if (columnType instanceof TimestampType) {
320
- writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
320
+ writer = new TimestampStringFieldWriter(timestampFormatters[i], columnName);
321
321
  hasPkWriter = true;
322
322
  } else {
323
323
  throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
@@ -335,7 +335,7 @@ public class RecordWriter
335
335
  } else if (columnType instanceof StringType) {
336
336
  writer = new StringFieldWriter(columnName);
337
337
  } else if (columnType instanceof TimestampType) {
338
- writer = new TimestampStringFieldWriter(task.getJRuby(), columnName);
338
+ writer = new TimestampStringFieldWriter(timestampFormatters[i], columnName);
339
339
  if (firstTimestampColumnIndex < 0) {
340
340
  firstTimestampColumnIndex = i;
341
341
  }
@@ -381,7 +381,7 @@ public class RecordWriter
381
381
  } else if (columnType instanceof TimestampType) {
382
382
  log.info("Duplicating {}:{} column to 'time' column as seconds for the data partitioning",
383
383
  columnName, columnType);
384
- writer = new TimestampFieldLongDuplicator(task.getJRuby(), columnName, "time");
384
+ writer = new TimestampFieldLongDuplicator(timestampFormatters[duplicatePrimaryKeySourceIndex], columnName, "time");
385
385
  } else {
386
386
  throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s",
387
387
  columnName, columnType));
@@ -541,20 +541,19 @@ public class RecordWriter
541
541
  static class TimestampStringFieldWriter
542
542
  extends FieldWriter
543
543
  {
544
- // to format timestamp values to string by "%Y-%m-%d %H:%M:%S.%3N"
545
- private final TimestampFormatter defaultFormatter;
544
+ private final TimestampFormatter formatter;
546
545
 
547
- public TimestampStringFieldWriter(ScriptingContainer jruby, String keyName)
546
+ public TimestampStringFieldWriter(TimestampFormatter formatter, String keyName)
548
547
  {
549
548
  super(keyName);
550
- this.defaultFormatter = new TimestampFormatter(jruby, "%Y-%m-%d %H:%M:%S.%3N", DateTimeZone.UTC);
549
+ this.formatter = formatter;
551
550
  }
552
551
 
553
552
  @Override
554
553
  public void writeValue(MsgpackGZFileBuilder builder, PageReader reader, Column column)
555
554
  throws IOException
556
555
  {
557
- builder.writeString(defaultFormatter.format(reader.getTimestamp(column)));
556
+ builder.writeString(formatter.format(reader.getTimestamp(column)));
558
557
  }
559
558
  }
560
559
 
@@ -599,9 +598,9 @@ public class RecordWriter
599
598
  {
600
599
  private final TimestampLongFieldWriter timeFieldWriter;
601
600
 
602
- public TimestampFieldLongDuplicator(ScriptingContainer jruby, String keyName, String longDuplicateKeyName)
601
+ public TimestampFieldLongDuplicator(TimestampFormatter formatter, String keyName, String longDuplicateKeyName)
603
602
  {
604
- super(jruby, keyName);
603
+ super(formatter, keyName);
605
604
  timeFieldWriter = new TimestampLongFieldWriter(longDuplicateKeyName);
606
605
  }
607
606
 
@@ -1,7 +1,8 @@
1
- package org.embulk.output;
1
+ package org.embulk.output.td;
2
2
 
3
3
  import java.io.IOException;
4
4
  import java.util.List;
5
+ import java.util.Map;
5
6
  import javax.validation.constraints.Min;
6
7
  import javax.validation.constraints.Max;
7
8
 
@@ -14,36 +15,34 @@ import com.treasuredata.api.TdApiClientConfig;
14
15
  import com.treasuredata.api.TdApiClientConfig.HttpProxyConfig;
15
16
  import com.treasuredata.api.TdApiConflictException;
16
17
  import com.treasuredata.api.TdApiNotFoundException;
17
- import com.treasuredata.api.TdApiException;
18
18
  import com.treasuredata.api.model.TDBulkImportSession;
19
19
  import com.treasuredata.api.model.TDBulkImportSession.ImportStatus;
20
- import com.treasuredata.api.model.TDDatabase;
21
20
  import com.treasuredata.api.model.TDTable;
22
21
  import org.embulk.config.CommitReport;
23
22
  import org.embulk.config.Config;
24
23
  import org.embulk.config.ConfigDefault;
25
24
  import org.embulk.config.ConfigDiff;
26
- import org.embulk.config.ConfigInject;
27
25
  import org.embulk.config.ConfigSource;
28
26
  import org.embulk.config.ConfigException;
29
27
  import org.embulk.config.Task;
30
28
  import org.embulk.config.TaskSource;
31
- import org.embulk.output.RecordWriter.FieldWriterSet;
29
+ import org.embulk.output.td.RecordWriter.FieldWriterSet;
32
30
  import org.embulk.spi.Exec;
33
31
  import org.embulk.spi.ExecSession;
34
32
  import org.embulk.spi.OutputPlugin;
35
33
  import org.embulk.spi.Schema;
36
34
  import org.embulk.spi.TransactionalPageOutput;
37
35
  import org.embulk.spi.time.Timestamp;
36
+ import org.embulk.spi.time.TimestampFormatter;
37
+ import org.joda.time.DateTimeZone;
38
38
  import org.joda.time.format.DateTimeFormat;
39
- import org.jruby.embed.ScriptingContainer;
40
39
  import org.slf4j.Logger;
41
40
 
42
41
  public class TdOutputPlugin
43
42
  implements OutputPlugin
44
43
  {
45
44
  public interface PluginTask
46
- extends Task
45
+ extends Task, TimestampFormatter.Task
47
46
  {
48
47
  @Config("apikey")
49
48
  public String getApiKey();
@@ -62,6 +61,8 @@ public class TdOutputPlugin
62
61
 
63
62
  // TODO connect_timeout, read_timeout, send_timeout
64
63
 
64
+ // TODO mode[append, replace]
65
+
65
66
  @Config("auto_create_table")
66
67
  @ConfigDefault("true")
67
68
  public boolean getAutoCreateTable();
@@ -98,8 +99,25 @@ public class TdOutputPlugin
98
99
  @ConfigDefault("16384") // default 16MB (unit: kb)
99
100
  public long getFileSplitSize();
100
101
 
101
- @ConfigInject
102
- public ScriptingContainer getJRuby();
102
+ @Override
103
+ @Config("default_timestamp_format")
104
+ // SQL timestamp with milliseconds is, by defualt, used because Hive and Presto use
105
+ // those format. As timestamp type, Presto
106
+ // * cannot parse SQL timestamp with timezone like '2015-02-03 04:05:06.789 UTC'
107
+ // * cannot parse SQL timestamp with nanoseconds like '2015-02-03 04:05:06.789012345'
108
+ // * cannot parse SQL timestamp with microseconds like '2015-02-03 04:05:06.789012'
109
+ // * can parse SQL timestamp with milliseconds like '2015-02-03 04:05:06.789'
110
+ // On the other hand, Hive
111
+ // * cannot parse SQL timestamp with timezone like '2015-02-03 04:05:06.789 UTC'
112
+ // * can parse SQL timestamp with nanoseconds like '2015-02-03 04:05:06.789012345'
113
+ // * can parse SQL timestamp with microseconds like '2015-02-03 04:05:06.789012'
114
+ // * can parse SQL timestamp with milliseconds like '2015-02-03 04:05:06.789'
115
+ @ConfigDefault("\"%Y-%m-%d %H:%M:%S.%3N\"")
116
+ public String getDefaultTimestampFormat();
117
+
118
+ @Config("column_options")
119
+ @ConfigDefault("{}")
120
+ public Map<String, TimestampColumnOption> getColumnOptions();
103
121
 
104
122
  public boolean getDoUpload();
105
123
  public void setDoUpload(boolean doUpload);
@@ -108,6 +126,10 @@ public class TdOutputPlugin
108
126
  public void setSessionName(String session);
109
127
  }
110
128
 
129
+ public interface TimestampColumnOption
130
+ extends Task, TimestampFormatter.TimestampColumnOption
131
+ {}
132
+
111
133
  public interface HttpProxyTask
112
134
  extends Task
113
135
  {
@@ -175,6 +197,13 @@ public class TdOutputPlugin
175
197
  {
176
198
  final PluginTask task = config.loadConfig(PluginTask.class);
177
199
 
200
+ // TODO mode check
201
+
202
+ // check column_options is valid or not
203
+ for (String columnName : task.getColumnOptions().keySet()) {
204
+ schema.lookupColumn(columnName); // throws SchemaConfigException
205
+ }
206
+
178
207
  // generate session name
179
208
  task.setSessionName(buildBulkImportSessionName(task, Exec.session()));
180
209
 
@@ -0,0 +1,79 @@
1
+ package com.treasuredata.api;
2
+
3
+ import com.treasuredata.api.model.TDDatabase;
4
+ import org.bigtesting.fixd.ServerFixture;
5
+ import org.bigtesting.fixd.core.Method;
6
+ import org.junit.After;
7
+ import org.junit.Before;
8
+ import org.junit.Test;
9
+
10
+ import java.util.List;
11
+
12
+ import static org.junit.Assert.assertEquals;
13
+ import static org.junit.Assert.fail;
14
+
15
+ public class TestTdApiClient
16
+ {
17
+ private ServerFixture server;
18
+ private TdApiClient client;
19
+ private TdApiClientConfig clientConfig;
20
+ private String apikey = "apikey";
21
+
22
+ @Before
23
+ public void startServer()
24
+ throws Exception
25
+ {
26
+ server = new ServerFixture(9490);
27
+ server.start();
28
+ }
29
+
30
+ @After
31
+ public void stopServer()
32
+ throws Exception
33
+ {
34
+ server.stop();
35
+ }
36
+
37
+ @Before
38
+ public void startTdApiClient()
39
+ throws Exception
40
+ {
41
+ clientConfig = new TdApiClientConfig("localhost:9490", false);
42
+ client = new TdApiClient(apikey, clientConfig);
43
+ client.start();
44
+ }
45
+
46
+ @After
47
+ public void stopTdApiClient()
48
+ throws Exception
49
+ {
50
+ client.close();;
51
+ }
52
+
53
+ private static final String DATABASE_LIST_JSON =
54
+ "{" +
55
+ "\"databases\":[" +
56
+ "{\"name\":\"test1\"}," +
57
+ "{\"name\":\"test2\"}" +
58
+ "]" +
59
+ "}";
60
+
61
+ @Test
62
+ public void getDatabases() throws Exception
63
+ {
64
+ server.handle(Method.GET, "/v3/database/list").with(200, "text/json", DATABASE_LIST_JSON);
65
+ List<TDDatabase> dbs = client.getDatabases();
66
+ assertEquals(2, dbs.size());
67
+ assertEquals("test1", dbs.get(0).getName());
68
+ assertEquals("test2", dbs.get(1).getName());
69
+ }
70
+
71
+ @Test(expected = TdApiNotFoundException.class)
72
+ public void notFoundDatabases()
73
+ throws Exception
74
+ {
75
+ server.handle(Method.GET, "/v3/database/list").with(404, "text/json", "{\"message\":\"not found\"}");
76
+ client.getDatabases();
77
+ fail();
78
+ }
79
+ }
@@ -1,4 +1,4 @@
1
- package org.embulk.output;
1
+ package org.embulk.output.td;
2
2
 
3
3
  public class TestTdOutputPlugin
4
4
  {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Muga Nishizawa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-14 00:00:00.000000000 Z
11
+ date: 2015-08-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -80,12 +80,13 @@ files:
80
80
  - src/main/java/com/treasuredata/api/model/TDTablePermission.java
81
81
  - src/main/java/com/treasuredata/api/model/TDTableSchema.java
82
82
  - src/main/java/com/treasuredata/api/model/TDTableType.java
83
- - src/main/java/org/embulk/output/FinalizableExecutorService.java
84
- - src/main/java/org/embulk/output/MsgpackGZFileBuilder.java
85
- - src/main/java/org/embulk/output/RecordWriter.java
86
- - src/main/java/org/embulk/output/TdOutputPlugin.java
87
- - src/test/java/org/embulk/output/TestTdOutputPlugin.java
88
- - classpath/embulk-output-td-0.1.2.jar
83
+ - src/main/java/org/embulk/output/td/FinalizableExecutorService.java
84
+ - src/main/java/org/embulk/output/td/MsgpackGZFileBuilder.java
85
+ - src/main/java/org/embulk/output/td/RecordWriter.java
86
+ - src/main/java/org/embulk/output/td/TdOutputPlugin.java
87
+ - src/test/java/com/treasuredata/api/TestTdApiClient.java
88
+ - src/test/java/org/embulk/output/td/TestTdOutputPlugin.java
89
+ - classpath/embulk-output-td-0.1.3.jar
89
90
  - classpath/javassist-3.18.1-GA.jar
90
91
  - classpath/jetty-client-9.2.2.v20140723.jar
91
92
  - classpath/jetty-http-9.2.2.v20140723.jar