embulk-output-td 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/README.md +63 -0
  4. data/build.gradle +79 -0
  5. data/embulk-output-td.gemspec +18 -0
  6. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  7. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  8. data/gradlew +164 -0
  9. data/gradlew.bat +90 -0
  10. data/lib/embulk/output/td.rb +3 -0
  11. data/settings.gradle +1 -0
  12. data/src/main/java/com/treasuredata/api/TdApiClient.java +436 -0
  13. data/src/main/java/com/treasuredata/api/TdApiClientConfig.java +79 -0
  14. data/src/main/java/com/treasuredata/api/TdApiConflictException.java +10 -0
  15. data/src/main/java/com/treasuredata/api/TdApiConstants.java +6 -0
  16. data/src/main/java/com/treasuredata/api/TdApiException.java +20 -0
  17. data/src/main/java/com/treasuredata/api/TdApiExecutionException.java +10 -0
  18. data/src/main/java/com/treasuredata/api/TdApiExecutionInterruptedException.java +15 -0
  19. data/src/main/java/com/treasuredata/api/TdApiExecutionTimeoutException.java +17 -0
  20. data/src/main/java/com/treasuredata/api/TdApiNotFoundException.java +10 -0
  21. data/src/main/java/com/treasuredata/api/TdApiResponseException.java +32 -0
  22. data/src/main/java/com/treasuredata/api/model/TDArrayColumnType.java +80 -0
  23. data/src/main/java/com/treasuredata/api/model/TDBulkImportSession.java +155 -0
  24. data/src/main/java/com/treasuredata/api/model/TDColumn.java +83 -0
  25. data/src/main/java/com/treasuredata/api/model/TDColumnType.java +23 -0
  26. data/src/main/java/com/treasuredata/api/model/TDColumnTypeDeserializer.java +115 -0
  27. data/src/main/java/com/treasuredata/api/model/TDDatabase.java +48 -0
  28. data/src/main/java/com/treasuredata/api/model/TDDatabaseList.java +24 -0
  29. data/src/main/java/com/treasuredata/api/model/TDMapColumnType.java +88 -0
  30. data/src/main/java/com/treasuredata/api/model/TDPrimitiveColumnType.java +61 -0
  31. data/src/main/java/com/treasuredata/api/model/TDTable.java +64 -0
  32. data/src/main/java/com/treasuredata/api/model/TDTableList.java +33 -0
  33. data/src/main/java/com/treasuredata/api/model/TDTablePermission.java +48 -0
  34. data/src/main/java/com/treasuredata/api/model/TDTableSchema.java +44 -0
  35. data/src/main/java/com/treasuredata/api/model/TDTableType.java +36 -0
  36. data/src/main/java/org/embulk/output/FinalizableExecutorService.java +84 -0
  37. data/src/main/java/org/embulk/output/MsgpackGZFileBuilder.java +148 -0
  38. data/src/main/java/org/embulk/output/RecordWriter.java +567 -0
  39. data/src/main/java/org/embulk/output/TdOutputPlugin.java +390 -0
  40. data/src/test/java/org/embulk/output/TestTdOutputPlugin.java +5 -0
  41. metadata +119 -0
@@ -0,0 +1,390 @@
1
+ package org.embulk.output;
2
+
3
+ import java.io.IOException;
4
+ import java.util.List;
5
+ import javax.validation.constraints.Min;
6
+ import javax.validation.constraints.Max;
7
+
8
+ import com.google.common.base.Optional;
9
+ import com.google.common.base.Throwables;
10
+ import com.treasuredata.api.TdApiClient;
11
+ import com.treasuredata.api.TdApiClientConfig;
12
+ import com.treasuredata.api.TdApiClientConfig.HttpProxyConfig;
13
+ import com.treasuredata.api.TdApiConflictException;
14
+ import com.treasuredata.api.TdApiNotFoundException;
15
+ import com.treasuredata.api.TdApiException;
16
+ import com.treasuredata.api.model.TDBulkImportSession;
17
+ import com.treasuredata.api.model.TDBulkImportSession.ImportStatus;
18
+ import com.treasuredata.api.model.TDDatabase;
19
+ import com.treasuredata.api.model.TDTable;
20
+ import org.embulk.config.CommitReport;
21
+ import org.embulk.config.Config;
22
+ import org.embulk.config.ConfigDefault;
23
+ import org.embulk.config.ConfigDiff;
24
+ import org.embulk.config.ConfigInject;
25
+ import org.embulk.config.ConfigSource;
26
+ import org.embulk.config.ConfigException;
27
+ import org.embulk.config.Task;
28
+ import org.embulk.config.TaskSource;
29
+ import org.embulk.output.RecordWriter.FieldWriterSet;
30
+ import org.embulk.spi.Exec;
31
+ import org.embulk.spi.ExecSession;
32
+ import org.embulk.spi.OutputPlugin;
33
+ import org.embulk.spi.Schema;
34
+ import org.embulk.spi.TransactionalPageOutput;
35
+ import org.embulk.spi.time.Timestamp;
36
+ import org.joda.time.format.DateTimeFormat;
37
+ import org.jruby.embed.ScriptingContainer;
38
+ import org.slf4j.Logger;
39
+
40
+ public class TdOutputPlugin
41
+ implements OutputPlugin
42
+ {
43
+ public interface PluginTask
44
+ extends Task
45
+ {
46
+ @Config("apikey")
47
+ public String getApiKey();
48
+
49
+ @Config("endpoint")
50
+ @ConfigDefault("\"api.treasuredata.com\"")
51
+ public String getEndpoint();
52
+
53
+ @Config("use_ssl")
54
+ @ConfigDefault("true")
55
+ public boolean getUseSsl();
56
+
57
+ @Config("http_proxy")
58
+ @ConfigDefault("null")
59
+ public Optional<HttpProxyTask> getHttpProxy();
60
+
61
+ // TODO connect_timeout, read_timeout, send_timeout
62
+
63
+ @Config("auto_create_table")
64
+ @ConfigDefault("true")
65
+ public boolean getAutoCreateTable();
66
+
67
+ @Config("database")
68
+ public String getDatabase();
69
+
70
+ @Config("table")
71
+ public String getTable();
72
+
73
+ @Config("session")
74
+ @ConfigDefault("null")
75
+ public Optional<String> getSession();
76
+
77
+ @Config("time_column")
78
+ @ConfigDefault("null")
79
+ public Optional<String> getTimeColumn();
80
+
81
+ @Config("tmpdir")
82
+ @ConfigDefault("\"/tmp\"")
83
+ public String getTempDir();
84
+
85
+ @Config("upload_concurrency")
86
+ @ConfigDefault("2")
87
+ @Min(1)
88
+ @Max(8)
89
+ public int getUploadConcurrency();
90
+
91
+ @Config("file_split_size")
92
+ @ConfigDefault("16384") // default 16MB (unit: kb)
93
+ public long getFileSplitSize();
94
+
95
+ @ConfigInject
96
+ public ScriptingContainer getJRuby();
97
+
98
+ public boolean getDoUpload();
99
+ public void setDoUpload(boolean doUpload);
100
+
101
+ public String getSessionName();
102
+ public void setSessionName(String session);
103
+ }
104
+
105
+ public interface HttpProxyTask
106
+ extends Task
107
+ {
108
+ @Config("host")
109
+ public String getHost();
110
+
111
+ @Config("port")
112
+ public int getPort();
113
+
114
+ @Config("use_ssl")
115
+ @ConfigDefault("false")
116
+ public boolean getUseSsl();
117
+ }
118
+
119
+ private final Logger log;
120
+
121
+ public TdOutputPlugin()
122
+ {
123
+ this.log = Exec.getLogger(getClass());
124
+ }
125
+
126
+ public ConfigDiff transaction(final ConfigSource config, final Schema schema, int processorCount,
127
+ OutputPlugin.Control control)
128
+ {
129
+ final PluginTask task = config.loadConfig(PluginTask.class);
130
+
131
+ // generate session name
132
+ task.setSessionName(buildBulkImportSessionName(task, Exec.session()));
133
+
134
+ try (TdApiClient client = newTdApiClient(task)) {
135
+ String databaseName = task.getDatabase();
136
+ String tableName = task.getTable();
137
+ if (task.getAutoCreateTable()) {
138
+ createTableIfNotExists(client, databaseName, tableName);
139
+ } else {
140
+ // check if the database and/or table exist or not
141
+ validateTableExists(client, databaseName, tableName);
142
+ }
143
+
144
+ // validate FieldWriterSet configuration before transaction is started
145
+ RecordWriter.validateSchema(log, task, schema);
146
+
147
+ return doRun(client, task, control);
148
+ }
149
+ }
150
+
151
+ public ConfigDiff resume(TaskSource taskSource,
152
+ Schema schema, int processorCount,
153
+ OutputPlugin.Control control) {
154
+ PluginTask task = taskSource.loadTask(PluginTask.class);
155
+ try (TdApiClient client = newTdApiClient(task)) {
156
+ return doRun(client, task, control);
157
+ }
158
+ }
159
+
160
+ private ConfigDiff doRun(TdApiClient client, PluginTask task, OutputPlugin.Control control)
161
+ {
162
+ boolean doUpload = startBulkImportSession(client, task.getSessionName(), task.getDatabase(), task.getTable());
163
+ task.setDoUpload(doUpload);
164
+ control.run(task.dump());
165
+ completeBulkImportSession(client, task.getSessionName(), 0); // TODO perform job priority
166
+
167
+ ConfigDiff configDiff = Exec.newConfigDiff();
168
+ configDiff.set("last_session", task.getSessionName());
169
+ return configDiff;
170
+ }
171
+
172
+ public void cleanup(TaskSource taskSource,
173
+ Schema schema, int processorCount,
174
+ List<CommitReport> successCommitReports)
175
+ {
176
+ PluginTask task = taskSource.loadTask(PluginTask.class);
177
+ try (TdApiClient client = newTdApiClient(task)) {
178
+ String sessionName = task.getSessionName();
179
+ log.info("Deleting bulk import session '{}'", sessionName);
180
+ client.deleteBulkImportSession(sessionName);
181
+ }
182
+ }
183
+
184
+ private TdApiClient newTdApiClient(final PluginTask task)
185
+ {
186
+ Optional<HttpProxyConfig> httpProxyConfig = newHttpProxyConfig(task.getHttpProxy());
187
+ TdApiClientConfig config = new TdApiClientConfig(task.getEndpoint(), task.getUseSsl(), httpProxyConfig);
188
+ TdApiClient client = new TdApiClient(task.getApiKey(), config);
189
+ try {
190
+ client.start();
191
+ } catch (IOException e) {
192
+ throw Throwables.propagate(e);
193
+ }
194
+ return client;
195
+ }
196
+
197
+ private Optional<HttpProxyConfig> newHttpProxyConfig(Optional<HttpProxyTask> task)
198
+ {
199
+ Optional<HttpProxyConfig> httpProxyConfig;
200
+ if (task.isPresent()) {
201
+ HttpProxyTask pt = task.get();
202
+ httpProxyConfig = Optional.of(new HttpProxyConfig(pt.getHost(), pt.getPort(), pt.getUseSsl()));
203
+ } else {
204
+ httpProxyConfig = Optional.absent();
205
+ }
206
+ return httpProxyConfig;
207
+ }
208
+
209
+ private void createTableIfNotExists(TdApiClient client, String databaseName, String tableName)
210
+ {
211
+ log.debug("Creating table \"{}\".\"{}\" if not exists", databaseName, tableName);
212
+ try {
213
+ client.createTable(databaseName, tableName);
214
+ log.debug("Created table \"{}\".\"{}\"", databaseName, tableName);
215
+ } catch (TdApiNotFoundException e) {
216
+ try {
217
+ client.createDatabase(databaseName);
218
+ log.debug("Created database \"{}\"", databaseName);
219
+ } catch (TdApiConflictException ex) {
220
+ // ignorable error
221
+ }
222
+ try {
223
+ client.createTable(databaseName, tableName);
224
+ log.debug("Created table \"{}\".\"{}\"", databaseName, tableName);
225
+ } catch (TdApiConflictException exe) {
226
+ // ignorable error
227
+ }
228
+ } catch (TdApiConflictException e) {
229
+ // ignorable error
230
+ }
231
+ }
232
+
233
+ private void validateTableExists(TdApiClient client, String databaseName, String tableName)
234
+ {
235
+ try {
236
+ for (TDTable table : client.getTables(databaseName)) {
237
+ if (table.getName().equals(tableName)) {
238
+ return;
239
+ }
240
+ }
241
+ throw new ConfigException(String.format("Table \"%s\".\"%s\" doesn't exist", databaseName, tableName));
242
+ } catch (TdApiNotFoundException ex) {
243
+ throw new ConfigException(String.format("Database \"%s\" doesn't exist", databaseName), ex);
244
+ }
245
+ }
246
+
247
+ private String buildBulkImportSessionName(PluginTask task, ExecSession exec)
248
+ {
249
+ if (task.getSession().isPresent()) {
250
+ return task.getSession().get();
251
+ } else {
252
+ Timestamp time = exec.getTransactionTime(); // TODO implement Exec.getTransactionUniqueName()
253
+ return String.format("embulk_%s_%09d",
254
+ DateTimeFormat.forPattern("yyyyMMdd_HHmmss").withZoneUTC().print(time.getEpochSecond() * 1000),
255
+ time.getNano());
256
+ }
257
+ }
258
+
259
+ // return false if all files are already uploaded
260
+ private boolean startBulkImportSession(TdApiClient client,
261
+ String sessionName, String databaseName, String tableName)
262
+ {
263
+ log.info("Create bulk_import session {}", sessionName);
264
+ TDBulkImportSession session;
265
+ try {
266
+ client.createBulkImportSession(sessionName, databaseName, tableName);
267
+ } catch (TdApiConflictException ex) {
268
+ // ignorable error
269
+ }
270
+ session = client.getBulkImportSession(sessionName);
271
+ // TODO check associated databaseName and tableName
272
+
273
+ switch (session.getStatus()) {
274
+ case UPLOADING:
275
+ if (session.getUploadFrozen()) {
276
+ return false;
277
+ }
278
+ return true;
279
+ case PERFORMING:
280
+ return false;
281
+ case READY:
282
+ return false;
283
+ case COMMITTING:
284
+ return false;
285
+ case COMMITTED:
286
+ return false;
287
+ case UNKNOWN:
288
+ default:
289
+ throw new RuntimeException("Unknown bulk import status");
290
+ }
291
+ }
292
+
293
+ private void completeBulkImportSession(TdApiClient client, String sessionName, int priority)
294
+ {
295
+ TDBulkImportSession session = client.getBulkImportSession(sessionName);
296
+
297
+ switch (session.getStatus()) {
298
+ case UPLOADING:
299
+ if (!session.getUploadFrozen()) {
300
+ // freeze
301
+ try {
302
+ client.freezeBulkImportSession(sessionName);
303
+ } catch (TdApiConflictException e) {
304
+ // ignorable error
305
+ }
306
+ }
307
+ // perform
308
+ client.performBulkImportSession(sessionName, priority);
309
+
310
+ // pass
311
+ case PERFORMING:
312
+ log.info("Performing bulk import session '{}'", sessionName);
313
+ session = waitForStatusChange(client, sessionName,
314
+ ImportStatus.PERFORMING, ImportStatus.READY,
315
+ "perform");
316
+ log.info(" job id: {}", session.getJobId());
317
+
318
+ // pass
319
+ case READY:
320
+ // TODO add an option to make the transaction failed if error_records or error_parts is too large
321
+ // commit
322
+ log.info("Committing bulk import session '{}'", sessionName);
323
+ log.info(" valid records: {}", session.getValidRecords());
324
+ log.info(" error records: {}", session.getErrorRecords());
325
+ log.info(" valid parts: {}", session.getValidParts());
326
+ log.info(" error parts: {}", session.getErrorParts());
327
+ client.commitBulkImportSession(sessionName);
328
+
329
+ // pass
330
+ case COMMITTING:
331
+ session = waitForStatusChange(client, sessionName,
332
+ ImportStatus.COMMITTING, ImportStatus.COMMITTED,
333
+ "commit");
334
+
335
+ // pass
336
+ case COMMITTED:
337
+ return;
338
+
339
+ case UNKNOWN:
340
+ throw new RuntimeException("Unknown bulk import status");
341
+ }
342
+ }
343
+
344
+ private TDBulkImportSession waitForStatusChange(TdApiClient client, String sessionName,
345
+ ImportStatus current, ImportStatus expecting, String operation)
346
+ {
347
+ TDBulkImportSession importSession;
348
+ while (true) {
349
+ importSession = client.getBulkImportSession(sessionName);
350
+
351
+ if (importSession.is(expecting)) {
352
+ return importSession;
353
+
354
+ } else if (importSession.is(current)) {
355
+ // in progress
356
+
357
+ } else {
358
+ throw new RuntimeException(String.format("Failed to %s bulk import session '%s'",
359
+ operation, sessionName));
360
+ }
361
+
362
+ try {
363
+ Thread.sleep(3000);
364
+ } catch (InterruptedException e) {
365
+ }
366
+ }
367
+ }
368
+
369
+ @Override
370
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int processorIndex)
371
+ {
372
+ final PluginTask task = taskSource.loadTask(PluginTask.class);
373
+
374
+ RecordWriter closeLater = null;
375
+ try {
376
+ FieldWriterSet fieldWriters = new FieldWriterSet(log, task, schema);
377
+ RecordWriter recordWriter = closeLater = new RecordWriter(task, newTdApiClient(task), fieldWriters);
378
+ recordWriter.open(schema);
379
+ closeLater = null;
380
+ return recordWriter;
381
+
382
+ } catch (IOException e) {
383
+ throw Throwables.propagate(e);
384
+ } finally {
385
+ if (closeLater != null) {
386
+ closeLater.close();
387
+ }
388
+ }
389
+ }
390
+ }
@@ -0,0 +1,5 @@
1
+ package org.embulk.output;
2
+
3
+ public class TestTdOutputPlugin
4
+ {
5
+ }
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-output-td
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Muga Nishizawa
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-06-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: '1.0'
25
+ prerelease: false
26
+ type: :development
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '10.0'
39
+ prerelease: false
40
+ type: :development
41
+ description: TreasureData output plugin is an Embulk plugin that loads records to TreasureData read by any input plugins. Search the input plugins by 'embulk-output' keyword.
42
+ email:
43
+ - muga.nishizawa@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - README.md
50
+ - build.gradle
51
+ - embulk-output-td.gemspec
52
+ - gradle/wrapper/gradle-wrapper.jar
53
+ - gradle/wrapper/gradle-wrapper.properties
54
+ - gradlew
55
+ - gradlew.bat
56
+ - lib/embulk/output/td.rb
57
+ - settings.gradle
58
+ - src/main/java/com/treasuredata/api/TdApiClient.java
59
+ - src/main/java/com/treasuredata/api/TdApiClientConfig.java
60
+ - src/main/java/com/treasuredata/api/TdApiConflictException.java
61
+ - src/main/java/com/treasuredata/api/TdApiConstants.java
62
+ - src/main/java/com/treasuredata/api/TdApiException.java
63
+ - src/main/java/com/treasuredata/api/TdApiExecutionException.java
64
+ - src/main/java/com/treasuredata/api/TdApiExecutionInterruptedException.java
65
+ - src/main/java/com/treasuredata/api/TdApiExecutionTimeoutException.java
66
+ - src/main/java/com/treasuredata/api/TdApiNotFoundException.java
67
+ - src/main/java/com/treasuredata/api/TdApiResponseException.java
68
+ - src/main/java/com/treasuredata/api/model/TDArrayColumnType.java
69
+ - src/main/java/com/treasuredata/api/model/TDBulkImportSession.java
70
+ - src/main/java/com/treasuredata/api/model/TDColumn.java
71
+ - src/main/java/com/treasuredata/api/model/TDColumnType.java
72
+ - src/main/java/com/treasuredata/api/model/TDColumnTypeDeserializer.java
73
+ - src/main/java/com/treasuredata/api/model/TDDatabase.java
74
+ - src/main/java/com/treasuredata/api/model/TDDatabaseList.java
75
+ - src/main/java/com/treasuredata/api/model/TDMapColumnType.java
76
+ - src/main/java/com/treasuredata/api/model/TDPrimitiveColumnType.java
77
+ - src/main/java/com/treasuredata/api/model/TDTable.java
78
+ - src/main/java/com/treasuredata/api/model/TDTableList.java
79
+ - src/main/java/com/treasuredata/api/model/TDTablePermission.java
80
+ - src/main/java/com/treasuredata/api/model/TDTableSchema.java
81
+ - src/main/java/com/treasuredata/api/model/TDTableType.java
82
+ - src/main/java/org/embulk/output/FinalizableExecutorService.java
83
+ - src/main/java/org/embulk/output/MsgpackGZFileBuilder.java
84
+ - src/main/java/org/embulk/output/RecordWriter.java
85
+ - src/main/java/org/embulk/output/TdOutputPlugin.java
86
+ - src/test/java/org/embulk/output/TestTdOutputPlugin.java
87
+ - classpath/embulk-output-td-0.1.0.jar
88
+ - classpath/javassist-3.18.1-GA.jar
89
+ - classpath/jetty-client-9.2.2.v20140723.jar
90
+ - classpath/jetty-http-9.2.2.v20140723.jar
91
+ - classpath/jetty-io-9.2.2.v20140723.jar
92
+ - classpath/jetty-util-9.2.2.v20140723.jar
93
+ - classpath/json-simple-1.1.1.jar
94
+ - classpath/msgpack-0.6.11.jar
95
+ homepage: https://github.com/treasure-data/embulk-output-td
96
+ licenses:
97
+ - Apache 2.0
98
+ metadata: {}
99
+ post_install_message:
100
+ rdoc_options: []
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ required_rubygems_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - '>='
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ requirements: []
114
+ rubyforge_project:
115
+ rubygems_version: 2.1.9
116
+ signing_key:
117
+ specification_version: 4
118
+ summary: TreasureData output plugin for Embulk
119
+ test_files: []