embulk-input-td 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/gradlew.bat ADDED
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_input(
2
+ "td", "org.embulk.input.td.TdInputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,446 @@
1
+ package org.embulk.input.td;
2
+
3
+ import java.io.IOException;
4
+ import java.io.InputStream;
5
+ import java.util.List;
6
+ import java.util.Properties;
7
+ import java.util.zip.GZIPInputStream;
8
+
9
+ import com.fasterxml.jackson.databind.JsonNode;
10
+ import com.fasterxml.jackson.databind.ObjectMapper;
11
+ import com.fasterxml.jackson.databind.node.ArrayNode;
12
+ import com.google.common.base.Function;
13
+ import com.google.common.base.Optional;
14
+ import com.google.common.base.Throwables;
15
+ import com.google.inject.Inject;
16
+ import com.treasuredata.client.ProxyConfig;
17
+ import com.treasuredata.client.TDClient;
18
+ import com.treasuredata.client.TDClientBuilder;
19
+ import com.treasuredata.client.model.TDJob;
20
+ import com.treasuredata.client.model.TDJobRequest;
21
+ import com.treasuredata.client.model.TDJobSummary;
22
+ import org.embulk.config.ConfigException;
23
+ import org.embulk.config.ConfigInject;
24
+ import org.embulk.config.TaskReport;
25
+ import org.embulk.config.Config;
26
+ import org.embulk.config.ConfigDefault;
27
+ import org.embulk.config.ConfigDiff;
28
+ import org.embulk.config.ConfigSource;
29
+ import org.embulk.config.Task;
30
+ import org.embulk.config.TaskSource;
31
+ import org.embulk.input.td.writer.BooleanValueWriter;
32
+ import org.embulk.input.td.writer.DoubleValueWriter;
33
+ import org.embulk.input.td.writer.LongValueWriter;
34
+ import org.embulk.input.td.writer.StringValueWriter;
35
+ import org.embulk.input.td.writer.ValueWriter;
36
+ import org.embulk.spi.BufferAllocator;
37
+ import org.embulk.spi.Column;
38
+ import org.embulk.spi.DataException;
39
+ import org.embulk.spi.Exec;
40
+ import org.embulk.spi.InputPlugin;
41
+ import org.embulk.spi.PageBuilder;
42
+ import org.embulk.spi.PageOutput;
43
+ import org.embulk.spi.Schema;
44
+ import org.embulk.spi.type.Type;
45
+ import org.msgpack.core.MessagePack;
46
+ import org.msgpack.core.MessageUnpacker;
47
+ import org.msgpack.value.ArrayValue;
48
+ import org.msgpack.value.Value;
49
+ import org.slf4j.Logger;
50
+
51
+ import static com.google.common.base.Optional.fromNullable;
52
+ import static com.treasuredata.client.model.TDResultFormat.MESSAGE_PACK_GZ;
53
+ import static java.lang.Integer.parseInt;
54
+ import static java.util.Locale.ENGLISH;
55
+ import static org.embulk.spi.Exec.getLogger;
56
+ import static org.embulk.spi.Exec.newConfigDiff;
57
+ import static org.embulk.spi.Exec.newTaskReport;
58
+ import static org.embulk.spi.type.Types.BOOLEAN;
59
+ import static org.embulk.spi.type.Types.DOUBLE;
60
+ import static org.embulk.spi.type.Types.JSON;
61
+ import static org.embulk.spi.type.Types.LONG;
62
+ import static org.embulk.spi.type.Types.STRING;
63
+ import static org.embulk.spi.type.Types.TIMESTAMP;
64
+
65
+ public class TdInputPlugin
66
+ implements InputPlugin
67
+ {
68
+ public interface PluginTask
69
+ extends Task
70
+ {
71
+ @Config("apikey")
72
+ public String getApiKey();
73
+
74
+ @Config("endpoint")
75
+ @ConfigDefault("\"api.treasuredata.com\"")
76
+ public String getEndpoint();
77
+
78
+ @Config("use_ssl")
79
+ @ConfigDefault("true")
80
+ public boolean getUseSsl();
81
+
82
+ @Config("http_proxy")
83
+ @ConfigDefault("null")
84
+ public Optional<HttpProxyTask> getHttpProxy();
85
+
86
+ // TODO timeout
87
+ // TODO query, database
88
+
89
+ @Config("query")
90
+ @ConfigDefault("null")
91
+ public Optional<String> getQuery();
92
+
93
+ @Config("database")
94
+ @ConfigDefault("null")
95
+ public Optional<String> getDatabase();
96
+
97
+ @Config("job_id")
98
+ @ConfigDefault("null")
99
+ public Optional<String> getJobId();
100
+
101
+ @Config("stop_on_invalid_record")
102
+ @ConfigDefault("false")
103
+ public boolean getStopOnInvalidRecord();
104
+
105
+ // TODO column_options
106
+
107
+ @ConfigInject
108
+ BufferAllocator getBufferAllocator();
109
+ }
110
+
111
+ public interface HttpProxyTask
112
+ extends Task
113
+ {
114
+ @Config("host")
115
+ public String getHost();
116
+
117
+ @Config("port")
118
+ public int getPort();
119
+
120
+ @Config("use_ssl")
121
+ @ConfigDefault("false")
122
+ public boolean getUseSsl();
123
+
124
+ @Config("user")
125
+ @ConfigDefault("null")
126
+ public Optional<String> getUser();
127
+
128
+ @Config("password")
129
+ @ConfigDefault("null")
130
+ public Optional<String> getPassword();
131
+ }
132
+
133
+ private final Logger log;
134
+
135
+ @Inject
136
+ public TdInputPlugin()
137
+ {
138
+ this.log = getLogger(this.getClass());
139
+ }
140
+
141
+ @Override
142
+ public ConfigDiff transaction(ConfigSource config, InputPlugin.Control control)
143
+ {
144
+ PluginTask task = config.loadConfig(PluginTask.class);
145
+ try (TDClient client = newTDClient(task)) {
146
+ TDJob job = getTDJob(task, client);
147
+
148
+ Optional<String> jobResultSchema = job.getResultSchema();
149
+ if (!jobResultSchema.isPresent()) {
150
+ throw new ConfigException(String.format("Not found result schema of job %s", job.getJobId()));
151
+ }
152
+
153
+ Schema inputSchema = convertSchema(job.getType(), toJsonNode(jobResultSchema.get()));
154
+ newValueWriters(inputSchema); // validate if value writers can be created according to the input schema
155
+
156
+ TaskSource taskSource = task.dump().set("job_id", job.getJobId()); // overwrite job_id
157
+ return resume(taskSource, inputSchema, 1, control);
158
+ }
159
+ }
160
+
161
+ private TDClient newTDClient(PluginTask task)
162
+ {
163
+ TDClientBuilder builder = TDClient.newBuilder();
164
+ builder.setApiKey(task.getApiKey());
165
+ builder.setEndpoint(task.getEndpoint());
166
+ builder.setUseSSL(task.getUseSsl());
167
+
168
+ Optional<ProxyConfig>proxyConfig = newProxyConfig(task.getHttpProxy());
169
+ if (proxyConfig.isPresent()) {
170
+ builder.setProxy(proxyConfig.get());
171
+ }
172
+
173
+ return builder.build();
174
+ }
175
+
176
+ private Optional<ProxyConfig> newProxyConfig(Optional<HttpProxyTask> task)
177
+ {
178
+ // This plugin searches http proxy settings and configures them to TDClient. The order of proxy setting searching is:
179
+ // 1. System properties
180
+ // 2. http_proxy config option provided by this plugin
181
+
182
+ Properties props = System.getProperties();
183
+ if (props.containsKey("http.proxyHost") || props.containsKey("https.proxyHost")) {
184
+ boolean useSsl = props.containsKey("https.proxyHost");
185
+ String proto = !useSsl ? "http" : "https";
186
+ String host = props.getProperty(proto + ".proxyHost");
187
+ int port = parseInt(props.getProperty(proto + ".proxyPort", !useSsl ? "80" : "443"));
188
+ Optional<String> user = fromNullable(props.getProperty(proto + ".proxyUser"));
189
+ Optional<String> password = fromNullable(props.getProperty(proto + ".proxyPassword"));
190
+ return Optional.of(new ProxyConfig(host, port, useSsl, user, password));
191
+ }
192
+ else if (task.isPresent()) {
193
+ HttpProxyTask proxyTask = task.get();
194
+ return Optional.of(new ProxyConfig(proxyTask.getHost(), proxyTask.getPort(), proxyTask.getUseSsl(),
195
+ proxyTask.getUser(), proxyTask.getPassword()));
196
+ }
197
+ else {
198
+ return Optional.absent();
199
+ }
200
+ }
201
+
202
+ private TDJob getTDJob(PluginTask task, TDClient client)
203
+ {
204
+ String jobId;
205
+ if (!task.getJobId().isPresent()) {
206
+ if (!task.getQuery().isPresent() || !task.getDatabase().isPresent()) {
207
+ throw new ConfigException("Must specify both of 'query' and 'database' options if 'job_id' option is not used.");
208
+ }
209
+ jobId = submitJob(task, client);
210
+ }
211
+ else {
212
+ jobId = task.getJobId().get();
213
+ }
214
+
215
+ waitJobCompletion(task, client, jobId);
216
+ return client.jobInfo(jobId);
217
+ }
218
+
219
+ private String submitJob(PluginTask task, TDClient client)
220
+ {
221
+ String query = task.getQuery().get();
222
+ String database = task.getDatabase().get();
223
+
224
+ log.info(String.format(ENGLISH, "Submit a query for database '%s': %s", database, query));
225
+ String jobId = client.submit(TDJobRequest.newPrestoQuery(database, query));
226
+ log.info(String.format(ENGLISH, "Job %s is queued.", jobId));
227
+ return jobId;
228
+ }
229
+
230
+ private void waitJobCompletion(PluginTask task, TDClient client, String jobId)
231
+ {
232
+ TDJobSummary js;
233
+ long waitTime = 5 * 1000; // 5 secs
234
+
235
+ // wait for job finish
236
+ log.info(String.format(ENGLISH, "Confirm that job %s finished", jobId));
237
+ while (true) {
238
+ js = client.jobStatus(jobId);
239
+ if (js.getStatus().isFinished()) {
240
+ break;
241
+ }
242
+
243
+ log.debug("Wait for job finished");
244
+ try {
245
+ Thread.sleep(waitTime);
246
+ }
247
+ catch (InterruptedException ignored) {
248
+ }
249
+ }
250
+
251
+ // confirm if the job status is 'success'
252
+ if (js.getStatus() != TDJob.Status.SUCCESS) {
253
+ throw new ConfigException(String.format(ENGLISH, "Cannot download job result because the job was '%s'.", js.getStatus()));
254
+ }
255
+ }
256
+
257
+ private static JsonNode toJsonNode(String schema)
258
+ {
259
+ try {
260
+ return new ObjectMapper().readTree(schema);
261
+ }
262
+ catch (IOException e) {
263
+ throw new ConfigException(String.format(ENGLISH, "Failed to parse job result schema as JSON: %s", schema));
264
+ }
265
+ }
266
+
267
+ private Schema convertSchema(TDJob.Type jobType, JsonNode from)
268
+ {
269
+ Schema.Builder schema = new Schema.Builder();
270
+ ArrayNode a = (ArrayNode) from;
271
+ for (int i = 0; i < a.size(); i++) {
272
+ ArrayNode column = (ArrayNode)a.get(i);
273
+ String name = column.get(0).asText();
274
+ Type type = convertColumnType(jobType, column.get(1).asText());
275
+ schema.add(name, type);
276
+ }
277
+ return schema.build();
278
+ }
279
+
280
+ private Type convertColumnType(TDJob.Type jobType, String from)
281
+ {
282
+ switch (jobType) {
283
+ case PRESTO:
284
+ return convertPrestoColumnType(from);
285
+ case HIVE:
286
+ default:
287
+ throw new ConfigException(String.format(ENGLISH, "Unsupported job type '%s'. Supported types are [presto].", jobType)); // TODO hive
288
+ }
289
+ }
290
+
291
+ private Type convertPrestoColumnType(String from)
292
+ {
293
+ String t = from.toUpperCase(ENGLISH);
294
+ if (t.equals("BOOLEAN")) {
295
+ return BOOLEAN;
296
+ }
297
+ else if (t.equals("BIGINT")) {
298
+ return LONG;
299
+ }
300
+ else if (t.equals("DOUBLE") || t.equals("DECIMAL") || t.startsWith("DECIMAL")) {
301
+ return DOUBLE;
302
+ }
303
+ else if (t.equals("VARCHAR") || t.startsWith("VARCHAR")) {
304
+ return STRING;
305
+ }
306
+ else {
307
+ throw new ConfigException(String.format(ENGLISH, "Unsupported presto type '%s'", from)); // TODO other types
308
+ }
309
+ }
310
+
311
+ @Override
312
+ public ConfigDiff resume(TaskSource taskSource,
313
+ Schema schema, int taskCount,
314
+ InputPlugin.Control control)
315
+ {
316
+ control.run(taskSource, schema, taskCount);
317
+ return newConfigDiff();
318
+ }
319
+
320
+ @Override
321
+ public void cleanup(TaskSource taskSource,
322
+ Schema schema, int taskCount,
323
+ List<TaskReport> successTaskReports)
324
+ {
325
+ // do nothing
326
+ }
327
+
328
+ @Override
329
+ public TaskReport run(TaskSource taskSource,
330
+ final Schema schema, int taskIndex,
331
+ PageOutput output)
332
+ {
333
+ final PluginTask task = taskSource.loadTask(PluginTask.class);
334
+ final BufferAllocator allocator = task.getBufferAllocator();
335
+ final ValueWriter[] writers = newValueWriters(schema);
336
+ final String jobId = taskSource.get(String.class, "job_id");
337
+ final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
338
+
339
+ try (final PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
340
+ final TDClient client = newTDClient(task)) {
341
+ client.jobResult(jobId, MESSAGE_PACK_GZ, new Function<InputStream, Void>() {
342
+ @Override
343
+ public Void apply(InputStream input)
344
+ {
345
+ try (MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(new GZIPInputStream(input))) {
346
+ while (unpacker.hasNext()) {
347
+ try {
348
+ Value v;
349
+ try {
350
+ v = unpacker.unpackValue();
351
+ }
352
+ catch (IOException e) {
353
+ throw new InvalidRecordException("Cannot unpack value", e);
354
+ }
355
+
356
+ if (!v.isArrayValue()) {
357
+ throw new InvalidRecordException(String.format(ENGLISH, "Must be array value: ", v.toString()));
358
+ }
359
+
360
+ ArrayValue record = v.asArrayValue();
361
+ if (record.size() != schema.size()) {
362
+ throw new InvalidRecordException(String.format(ENGLISH, "The size (%d) of the record is invalid", record.size()));
363
+ }
364
+
365
+ // write records to the page
366
+ for (int i = 0; i < writers.length; i++) {
367
+ writers[i].write(record.get(i), pageBuilder);
368
+ }
369
+
370
+ pageBuilder.addRecord();
371
+ }
372
+ catch (InvalidRecordException e) {
373
+ if (stopOnInvalidRecord) {
374
+ throw new DataException(String.format(ENGLISH, "Invalid record (%s)", e.getMessage()), e);
375
+ }
376
+ log.warn(String.format(ENGLISH, "Skipped record (%s)", e.getMessage()));
377
+ }
378
+ }
379
+ }
380
+ catch (IOException e) {
381
+ throw Throwables.propagate(e);
382
+ }
383
+
384
+ return null;
385
+ }
386
+ });
387
+
388
+ pageBuilder.finish();
389
+ }
390
+
391
+ return newTaskReport();
392
+ }
393
+
394
+ private ValueWriter[] newValueWriters(Schema schema)
395
+ {
396
+ ValueWriter[] writers = new ValueWriter[schema.size()];
397
+ for (int i = 0; i < schema.size(); i++) {
398
+ writers[i] = newValueWriter(schema.getColumn(i));
399
+ }
400
+ return writers;
401
+ }
402
+
403
+ private ValueWriter newValueWriter(Column column)
404
+ {
405
+ Type type = column.getType();
406
+ if (type.equals(BOOLEAN)) {
407
+ return new BooleanValueWriter(column);
408
+ }
409
+ else if (type.equals(DOUBLE)) {
410
+ return new DoubleValueWriter(column);
411
+ }
412
+ else if (type.equals(JSON)) {
413
+ throw new ConfigException(String.format(ENGLISH, "Unsupported column type (%s:%s)", column.getName(), type)); // TODO
414
+ }
415
+ else if (type.equals(LONG)) {
416
+ return new LongValueWriter(column);
417
+ }
418
+ else if (type.equals(STRING)) {
419
+ return new StringValueWriter(column);
420
+ }
421
+ else if (type.equals(TIMESTAMP)) {
422
+ throw new ConfigException(String.format(ENGLISH, "Unsupported column type (%s:%s)", column.getName(), type)); // TODO
423
+ }
424
+ else {
425
+ throw new ConfigException(String.format(ENGLISH, "Unsupported column type (%s:%s)", column.getName(), type)); // TODO
426
+ }
427
+ }
428
+
429
+ @Override
430
+ public ConfigDiff guess(ConfigSource config)
431
+ {
432
+ return newConfigDiff(); // do nothing
433
+ }
434
+
435
+ static class InvalidRecordException
436
+ extends RuntimeException
437
+ {
438
+ InvalidRecordException(String cause) {
439
+ super(cause);
440
+ }
441
+
442
+ InvalidRecordException(String cause, Throwable t) {
443
+ super(cause, t);
444
+ }
445
+ }
446
+ }