embulk-output-td 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/README.md +63 -0
  4. data/build.gradle +79 -0
  5. data/embulk-output-td.gemspec +18 -0
  6. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  7. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  8. data/gradlew +164 -0
  9. data/gradlew.bat +90 -0
  10. data/lib/embulk/output/td.rb +3 -0
  11. data/settings.gradle +1 -0
  12. data/src/main/java/com/treasuredata/api/TdApiClient.java +436 -0
  13. data/src/main/java/com/treasuredata/api/TdApiClientConfig.java +79 -0
  14. data/src/main/java/com/treasuredata/api/TdApiConflictException.java +10 -0
  15. data/src/main/java/com/treasuredata/api/TdApiConstants.java +6 -0
  16. data/src/main/java/com/treasuredata/api/TdApiException.java +20 -0
  17. data/src/main/java/com/treasuredata/api/TdApiExecutionException.java +10 -0
  18. data/src/main/java/com/treasuredata/api/TdApiExecutionInterruptedException.java +15 -0
  19. data/src/main/java/com/treasuredata/api/TdApiExecutionTimeoutException.java +17 -0
  20. data/src/main/java/com/treasuredata/api/TdApiNotFoundException.java +10 -0
  21. data/src/main/java/com/treasuredata/api/TdApiResponseException.java +32 -0
  22. data/src/main/java/com/treasuredata/api/model/TDArrayColumnType.java +80 -0
  23. data/src/main/java/com/treasuredata/api/model/TDBulkImportSession.java +155 -0
  24. data/src/main/java/com/treasuredata/api/model/TDColumn.java +83 -0
  25. data/src/main/java/com/treasuredata/api/model/TDColumnType.java +23 -0
  26. data/src/main/java/com/treasuredata/api/model/TDColumnTypeDeserializer.java +115 -0
  27. data/src/main/java/com/treasuredata/api/model/TDDatabase.java +48 -0
  28. data/src/main/java/com/treasuredata/api/model/TDDatabaseList.java +24 -0
  29. data/src/main/java/com/treasuredata/api/model/TDMapColumnType.java +88 -0
  30. data/src/main/java/com/treasuredata/api/model/TDPrimitiveColumnType.java +61 -0
  31. data/src/main/java/com/treasuredata/api/model/TDTable.java +64 -0
  32. data/src/main/java/com/treasuredata/api/model/TDTableList.java +33 -0
  33. data/src/main/java/com/treasuredata/api/model/TDTablePermission.java +48 -0
  34. data/src/main/java/com/treasuredata/api/model/TDTableSchema.java +44 -0
  35. data/src/main/java/com/treasuredata/api/model/TDTableType.java +36 -0
  36. data/src/main/java/org/embulk/output/FinalizableExecutorService.java +84 -0
  37. data/src/main/java/org/embulk/output/MsgpackGZFileBuilder.java +148 -0
  38. data/src/main/java/org/embulk/output/RecordWriter.java +567 -0
  39. data/src/main/java/org/embulk/output/TdOutputPlugin.java +390 -0
  40. data/src/test/java/org/embulk/output/TestTdOutputPlugin.java +5 -0
  41. metadata +119 -0
@@ -0,0 +1,390 @@
1
+ package org.embulk.output;
2
+
3
+ import java.io.IOException;
4
+ import java.util.List;
5
+ import javax.validation.constraints.Min;
6
+ import javax.validation.constraints.Max;
7
+
8
+ import com.google.common.base.Optional;
9
+ import com.google.common.base.Throwables;
10
+ import com.treasuredata.api.TdApiClient;
11
+ import com.treasuredata.api.TdApiClientConfig;
12
+ import com.treasuredata.api.TdApiClientConfig.HttpProxyConfig;
13
+ import com.treasuredata.api.TdApiConflictException;
14
+ import com.treasuredata.api.TdApiNotFoundException;
15
+ import com.treasuredata.api.TdApiException;
16
+ import com.treasuredata.api.model.TDBulkImportSession;
17
+ import com.treasuredata.api.model.TDBulkImportSession.ImportStatus;
18
+ import com.treasuredata.api.model.TDDatabase;
19
+ import com.treasuredata.api.model.TDTable;
20
+ import org.embulk.config.CommitReport;
21
+ import org.embulk.config.Config;
22
+ import org.embulk.config.ConfigDefault;
23
+ import org.embulk.config.ConfigDiff;
24
+ import org.embulk.config.ConfigInject;
25
+ import org.embulk.config.ConfigSource;
26
+ import org.embulk.config.ConfigException;
27
+ import org.embulk.config.Task;
28
+ import org.embulk.config.TaskSource;
29
+ import org.embulk.output.RecordWriter.FieldWriterSet;
30
+ import org.embulk.spi.Exec;
31
+ import org.embulk.spi.ExecSession;
32
+ import org.embulk.spi.OutputPlugin;
33
+ import org.embulk.spi.Schema;
34
+ import org.embulk.spi.TransactionalPageOutput;
35
+ import org.embulk.spi.time.Timestamp;
36
+ import org.joda.time.format.DateTimeFormat;
37
+ import org.jruby.embed.ScriptingContainer;
38
+ import org.slf4j.Logger;
39
+
40
+ public class TdOutputPlugin
41
+ implements OutputPlugin
42
+ {
43
+ public interface PluginTask
44
+ extends Task
45
+ {
46
+ @Config("apikey")
47
+ public String getApiKey();
48
+
49
+ @Config("endpoint")
50
+ @ConfigDefault("\"api.treasuredata.com\"")
51
+ public String getEndpoint();
52
+
53
+ @Config("use_ssl")
54
+ @ConfigDefault("true")
55
+ public boolean getUseSsl();
56
+
57
+ @Config("http_proxy")
58
+ @ConfigDefault("null")
59
+ public Optional<HttpProxyTask> getHttpProxy();
60
+
61
+ // TODO connect_timeout, read_timeout, send_timeout
62
+
63
+ @Config("auto_create_table")
64
+ @ConfigDefault("true")
65
+ public boolean getAutoCreateTable();
66
+
67
+ @Config("database")
68
+ public String getDatabase();
69
+
70
+ @Config("table")
71
+ public String getTable();
72
+
73
+ @Config("session")
74
+ @ConfigDefault("null")
75
+ public Optional<String> getSession();
76
+
77
+ @Config("time_column")
78
+ @ConfigDefault("null")
79
+ public Optional<String> getTimeColumn();
80
+
81
+ @Config("tmpdir")
82
+ @ConfigDefault("\"/tmp\"")
83
+ public String getTempDir();
84
+
85
+ @Config("upload_concurrency")
86
+ @ConfigDefault("2")
87
+ @Min(1)
88
+ @Max(8)
89
+ public int getUploadConcurrency();
90
+
91
+ @Config("file_split_size")
92
+ @ConfigDefault("16384") // default 16MB (unit: kb)
93
+ public long getFileSplitSize();
94
+
95
+ @ConfigInject
96
+ public ScriptingContainer getJRuby();
97
+
98
+ public boolean getDoUpload();
99
+ public void setDoUpload(boolean doUpload);
100
+
101
+ public String getSessionName();
102
+ public void setSessionName(String session);
103
+ }
104
+
105
+ public interface HttpProxyTask
106
+ extends Task
107
+ {
108
+ @Config("host")
109
+ public String getHost();
110
+
111
+ @Config("port")
112
+ public int getPort();
113
+
114
+ @Config("use_ssl")
115
+ @ConfigDefault("false")
116
+ public boolean getUseSsl();
117
+ }
118
+
119
+ private final Logger log;
120
+
121
+ public TdOutputPlugin()
122
+ {
123
+ this.log = Exec.getLogger(getClass());
124
+ }
125
+
126
+ public ConfigDiff transaction(final ConfigSource config, final Schema schema, int processorCount,
127
+ OutputPlugin.Control control)
128
+ {
129
+ final PluginTask task = config.loadConfig(PluginTask.class);
130
+
131
+ // generate session name
132
+ task.setSessionName(buildBulkImportSessionName(task, Exec.session()));
133
+
134
+ try (TdApiClient client = newTdApiClient(task)) {
135
+ String databaseName = task.getDatabase();
136
+ String tableName = task.getTable();
137
+ if (task.getAutoCreateTable()) {
138
+ createTableIfNotExists(client, databaseName, tableName);
139
+ } else {
140
+ // check if the database and/or table exist or not
141
+ validateTableExists(client, databaseName, tableName);
142
+ }
143
+
144
+ // validate FieldWriterSet configuration before transaction is started
145
+ RecordWriter.validateSchema(log, task, schema);
146
+
147
+ return doRun(client, task, control);
148
+ }
149
+ }
150
+
151
+ public ConfigDiff resume(TaskSource taskSource,
152
+ Schema schema, int processorCount,
153
+ OutputPlugin.Control control) {
154
+ PluginTask task = taskSource.loadTask(PluginTask.class);
155
+ try (TdApiClient client = newTdApiClient(task)) {
156
+ return doRun(client, task, control);
157
+ }
158
+ }
159
+
160
+ private ConfigDiff doRun(TdApiClient client, PluginTask task, OutputPlugin.Control control)
161
+ {
162
+ boolean doUpload = startBulkImportSession(client, task.getSessionName(), task.getDatabase(), task.getTable());
163
+ task.setDoUpload(doUpload);
164
+ control.run(task.dump());
165
+ completeBulkImportSession(client, task.getSessionName(), 0); // TODO perform job priority
166
+
167
+ ConfigDiff configDiff = Exec.newConfigDiff();
168
+ configDiff.set("last_session", task.getSessionName());
169
+ return configDiff;
170
+ }
171
+
172
+ public void cleanup(TaskSource taskSource,
173
+ Schema schema, int processorCount,
174
+ List<CommitReport> successCommitReports)
175
+ {
176
+ PluginTask task = taskSource.loadTask(PluginTask.class);
177
+ try (TdApiClient client = newTdApiClient(task)) {
178
+ String sessionName = task.getSessionName();
179
+ log.info("Deleting bulk import session '{}'", sessionName);
180
+ client.deleteBulkImportSession(sessionName);
181
+ }
182
+ }
183
+
184
+ private TdApiClient newTdApiClient(final PluginTask task)
185
+ {
186
+ Optional<HttpProxyConfig> httpProxyConfig = newHttpProxyConfig(task.getHttpProxy());
187
+ TdApiClientConfig config = new TdApiClientConfig(task.getEndpoint(), task.getUseSsl(), httpProxyConfig);
188
+ TdApiClient client = new TdApiClient(task.getApiKey(), config);
189
+ try {
190
+ client.start();
191
+ } catch (IOException e) {
192
+ throw Throwables.propagate(e);
193
+ }
194
+ return client;
195
+ }
196
+
197
+ private Optional<HttpProxyConfig> newHttpProxyConfig(Optional<HttpProxyTask> task)
198
+ {
199
+ Optional<HttpProxyConfig> httpProxyConfig;
200
+ if (task.isPresent()) {
201
+ HttpProxyTask pt = task.get();
202
+ httpProxyConfig = Optional.of(new HttpProxyConfig(pt.getHost(), pt.getPort(), pt.getUseSsl()));
203
+ } else {
204
+ httpProxyConfig = Optional.absent();
205
+ }
206
+ return httpProxyConfig;
207
+ }
208
+
209
+ private void createTableIfNotExists(TdApiClient client, String databaseName, String tableName)
210
+ {
211
+ log.debug("Creating table \"{}\".\"{}\" if not exists", databaseName, tableName);
212
+ try {
213
+ client.createTable(databaseName, tableName);
214
+ log.debug("Created table \"{}\".\"{}\"", databaseName, tableName);
215
+ } catch (TdApiNotFoundException e) {
216
+ try {
217
+ client.createDatabase(databaseName);
218
+ log.debug("Created database \"{}\"", databaseName);
219
+ } catch (TdApiConflictException ex) {
220
+ // ignorable error
221
+ }
222
+ try {
223
+ client.createTable(databaseName, tableName);
224
+ log.debug("Created table \"{}\".\"{}\"", databaseName, tableName);
225
+ } catch (TdApiConflictException exe) {
226
+ // ignorable error
227
+ }
228
+ } catch (TdApiConflictException e) {
229
+ // ignorable error
230
+ }
231
+ }
232
+
233
+ private void validateTableExists(TdApiClient client, String databaseName, String tableName)
234
+ {
235
+ try {
236
+ for (TDTable table : client.getTables(databaseName)) {
237
+ if (table.getName().equals(tableName)) {
238
+ return;
239
+ }
240
+ }
241
+ throw new ConfigException(String.format("Table \"%s\".\"%s\" doesn't exist", databaseName, tableName));
242
+ } catch (TdApiNotFoundException ex) {
243
+ throw new ConfigException(String.format("Database \"%s\" doesn't exist", databaseName), ex);
244
+ }
245
+ }
246
+
247
+ private String buildBulkImportSessionName(PluginTask task, ExecSession exec)
248
+ {
249
+ if (task.getSession().isPresent()) {
250
+ return task.getSession().get();
251
+ } else {
252
+ Timestamp time = exec.getTransactionTime(); // TODO implement Exec.getTransactionUniqueName()
253
+ return String.format("embulk_%s_%09d",
254
+ DateTimeFormat.forPattern("yyyyMMdd_HHmmss").withZoneUTC().print(time.getEpochSecond() * 1000),
255
+ time.getNano());
256
+ }
257
+ }
258
+
259
+ // return false if all files are already uploaded
260
+ private boolean startBulkImportSession(TdApiClient client,
261
+ String sessionName, String databaseName, String tableName)
262
+ {
263
+ log.info("Create bulk_import session {}", sessionName);
264
+ TDBulkImportSession session;
265
+ try {
266
+ client.createBulkImportSession(sessionName, databaseName, tableName);
267
+ } catch (TdApiConflictException ex) {
268
+ // ignorable error
269
+ }
270
+ session = client.getBulkImportSession(sessionName);
271
+ // TODO check associated databaseName and tableName
272
+
273
+ switch (session.getStatus()) {
274
+ case UPLOADING:
275
+ if (session.getUploadFrozen()) {
276
+ return false;
277
+ }
278
+ return true;
279
+ case PERFORMING:
280
+ return false;
281
+ case READY:
282
+ return false;
283
+ case COMMITTING:
284
+ return false;
285
+ case COMMITTED:
286
+ return false;
287
+ case UNKNOWN:
288
+ default:
289
+ throw new RuntimeException("Unknown bulk import status");
290
+ }
291
+ }
292
+
293
+ private void completeBulkImportSession(TdApiClient client, String sessionName, int priority)
294
+ {
295
+ TDBulkImportSession session = client.getBulkImportSession(sessionName);
296
+
297
+ switch (session.getStatus()) {
298
+ case UPLOADING:
299
+ if (!session.getUploadFrozen()) {
300
+ // freeze
301
+ try {
302
+ client.freezeBulkImportSession(sessionName);
303
+ } catch (TdApiConflictException e) {
304
+ // ignorable error
305
+ }
306
+ }
307
+ // perform
308
+ client.performBulkImportSession(sessionName, priority);
309
+
310
+ // pass
311
+ case PERFORMING:
312
+ log.info("Performing bulk import session '{}'", sessionName);
313
+ session = waitForStatusChange(client, sessionName,
314
+ ImportStatus.PERFORMING, ImportStatus.READY,
315
+ "perform");
316
+ log.info(" job id: {}", session.getJobId());
317
+
318
+ // pass
319
+ case READY:
320
+ // TODO add an option to make the transaction failed if error_records or error_parts is too large
321
+ // commit
322
+ log.info("Committing bulk import session '{}'", sessionName);
323
+ log.info(" valid records: {}", session.getValidRecords());
324
+ log.info(" error records: {}", session.getErrorRecords());
325
+ log.info(" valid parts: {}", session.getValidParts());
326
+ log.info(" error parts: {}", session.getErrorParts());
327
+ client.commitBulkImportSession(sessionName);
328
+
329
+ // pass
330
+ case COMMITTING:
331
+ session = waitForStatusChange(client, sessionName,
332
+ ImportStatus.COMMITTING, ImportStatus.COMMITTED,
333
+ "commit");
334
+
335
+ // pass
336
+ case COMMITTED:
337
+ return;
338
+
339
+ case UNKNOWN:
340
+ throw new RuntimeException("Unknown bulk import status");
341
+ }
342
+ }
343
+
344
+ private TDBulkImportSession waitForStatusChange(TdApiClient client, String sessionName,
345
+ ImportStatus current, ImportStatus expecting, String operation)
346
+ {
347
+ TDBulkImportSession importSession;
348
+ while (true) {
349
+ importSession = client.getBulkImportSession(sessionName);
350
+
351
+ if (importSession.is(expecting)) {
352
+ return importSession;
353
+
354
+ } else if (importSession.is(current)) {
355
+ // in progress
356
+
357
+ } else {
358
+ throw new RuntimeException(String.format("Failed to %s bulk import session '%s'",
359
+ operation, sessionName));
360
+ }
361
+
362
+ try {
363
+ Thread.sleep(3000);
364
+ } catch (InterruptedException e) {
365
+ }
366
+ }
367
+ }
368
+
369
+ @Override
370
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int processorIndex)
371
+ {
372
+ final PluginTask task = taskSource.loadTask(PluginTask.class);
373
+
374
+ RecordWriter closeLater = null;
375
+ try {
376
+ FieldWriterSet fieldWriters = new FieldWriterSet(log, task, schema);
377
+ RecordWriter recordWriter = closeLater = new RecordWriter(task, newTdApiClient(task), fieldWriters);
378
+ recordWriter.open(schema);
379
+ closeLater = null;
380
+ return recordWriter;
381
+
382
+ } catch (IOException e) {
383
+ throw Throwables.propagate(e);
384
+ } finally {
385
+ if (closeLater != null) {
386
+ closeLater.close();
387
+ }
388
+ }
389
+ }
390
+ }
@@ -0,0 +1,5 @@
1
+ package org.embulk.output;
2
+
3
+ public class TestTdOutputPlugin
4
+ {
5
+ }
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-output-td
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Muga Nishizawa
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-06-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: '1.0'
25
+ prerelease: false
26
+ type: :development
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '10.0'
39
+ prerelease: false
40
+ type: :development
41
+ description: TreasureData output plugin is an Embulk plugin that loads records to TreasureData read by any input plugins. Search the input plugins by 'embulk-output' keyword.
42
+ email:
43
+ - muga.nishizawa@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - README.md
50
+ - build.gradle
51
+ - embulk-output-td.gemspec
52
+ - gradle/wrapper/gradle-wrapper.jar
53
+ - gradle/wrapper/gradle-wrapper.properties
54
+ - gradlew
55
+ - gradlew.bat
56
+ - lib/embulk/output/td.rb
57
+ - settings.gradle
58
+ - src/main/java/com/treasuredata/api/TdApiClient.java
59
+ - src/main/java/com/treasuredata/api/TdApiClientConfig.java
60
+ - src/main/java/com/treasuredata/api/TdApiConflictException.java
61
+ - src/main/java/com/treasuredata/api/TdApiConstants.java
62
+ - src/main/java/com/treasuredata/api/TdApiException.java
63
+ - src/main/java/com/treasuredata/api/TdApiExecutionException.java
64
+ - src/main/java/com/treasuredata/api/TdApiExecutionInterruptedException.java
65
+ - src/main/java/com/treasuredata/api/TdApiExecutionTimeoutException.java
66
+ - src/main/java/com/treasuredata/api/TdApiNotFoundException.java
67
+ - src/main/java/com/treasuredata/api/TdApiResponseException.java
68
+ - src/main/java/com/treasuredata/api/model/TDArrayColumnType.java
69
+ - src/main/java/com/treasuredata/api/model/TDBulkImportSession.java
70
+ - src/main/java/com/treasuredata/api/model/TDColumn.java
71
+ - src/main/java/com/treasuredata/api/model/TDColumnType.java
72
+ - src/main/java/com/treasuredata/api/model/TDColumnTypeDeserializer.java
73
+ - src/main/java/com/treasuredata/api/model/TDDatabase.java
74
+ - src/main/java/com/treasuredata/api/model/TDDatabaseList.java
75
+ - src/main/java/com/treasuredata/api/model/TDMapColumnType.java
76
+ - src/main/java/com/treasuredata/api/model/TDPrimitiveColumnType.java
77
+ - src/main/java/com/treasuredata/api/model/TDTable.java
78
+ - src/main/java/com/treasuredata/api/model/TDTableList.java
79
+ - src/main/java/com/treasuredata/api/model/TDTablePermission.java
80
+ - src/main/java/com/treasuredata/api/model/TDTableSchema.java
81
+ - src/main/java/com/treasuredata/api/model/TDTableType.java
82
+ - src/main/java/org/embulk/output/FinalizableExecutorService.java
83
+ - src/main/java/org/embulk/output/MsgpackGZFileBuilder.java
84
+ - src/main/java/org/embulk/output/RecordWriter.java
85
+ - src/main/java/org/embulk/output/TdOutputPlugin.java
86
+ - src/test/java/org/embulk/output/TestTdOutputPlugin.java
87
+ - classpath/embulk-output-td-0.1.0.jar
88
+ - classpath/javassist-3.18.1-GA.jar
89
+ - classpath/jetty-client-9.2.2.v20140723.jar
90
+ - classpath/jetty-http-9.2.2.v20140723.jar
91
+ - classpath/jetty-io-9.2.2.v20140723.jar
92
+ - classpath/jetty-util-9.2.2.v20140723.jar
93
+ - classpath/json-simple-1.1.1.jar
94
+ - classpath/msgpack-0.6.11.jar
95
+ homepage: https://github.com/treasure-data/embulk-output-td
96
+ licenses:
97
+ - Apache 2.0
98
+ metadata: {}
99
+ post_install_message:
100
+ rdoc_options: []
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ required_rubygems_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - '>='
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ requirements: []
114
+ rubyforge_project:
115
+ rubygems_version: 2.1.9
116
+ signing_key:
117
+ specification_version: 4
118
+ summary: TreasureData output plugin for Embulk
119
+ test_files: []