embulk-input-athena 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +13 -0
  3. data/Dockerfile +8 -0
  4. data/LICENSE +21 -0
  5. data/README.md +46 -0
  6. data/build.gradle +101 -0
  7. data/config/checkstyle/checkstyle.xml +128 -0
  8. data/config/checkstyle/default.xml +108 -0
  9. data/docker-compose.yml +10 -0
  10. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  11. data/gradle/wrapper/gradle-wrapper.properties +5 -0
  12. data/gradlew +172 -0
  13. data/gradlew.bat +84 -0
  14. data/lib/embulk/input/athena.rb +3 -0
  15. data/src/main/java/org/embulk/input/athena/AthenaInputConnection.java +49 -0
  16. data/src/main/java/org/embulk/input/athena/AthenaInputPlugin.java +202 -0
  17. data/src/main/java/org/embulk/input/athena/AthenaInputPlugin.java.tmp1 +192 -0
  18. data/src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java +674 -0
  19. data/src/main/java/org/embulk/input/jdbc/JdbcColumn.java +58 -0
  20. data/src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java +31 -0
  21. data/src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java +397 -0
  22. data/src/main/java/org/embulk/input/jdbc/JdbcLiteral.java +38 -0
  23. data/src/main/java/org/embulk/input/jdbc/JdbcSchema.java +55 -0
  24. data/src/main/java/org/embulk/input/jdbc/Ssl.java +37 -0
  25. data/src/main/java/org/embulk/input/jdbc/ToString.java +54 -0
  26. data/src/main/java/org/embulk/input/jdbc/ToStringMap.java +35 -0
  27. data/src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java +105 -0
  28. data/src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java +45 -0
  29. data/src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java +38 -0
  30. data/src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java +59 -0
  31. data/src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java +56 -0
  32. data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java +21 -0
  33. data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java +207 -0
  34. data/src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java +37 -0
  35. data/src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java +66 -0
  36. data/src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java +66 -0
  37. data/src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java +57 -0
  38. data/src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java +70 -0
  39. data/src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java +96 -0
  40. data/src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java +37 -0
  41. data/src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java +36 -0
  42. data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java +83 -0
  43. data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java +75 -0
  44. data/src/test/java/org/embulk/input/athena/TestAthenaInputPlugin.java +5 -0
  45. metadata +258 -0
@@ -0,0 +1,192 @@
1
+ package org.embulk.input.athena;
2
+
3
+ import java.sql.Connection;
4
+ import java.sql.DriverManager;
5
+ import java.sql.ResultSet;
6
+ import java.sql.SQLException;
7
+ import java.sql.Statement;
8
+ import java.util.List;
9
+ import java.util.Properties;
10
+
11
+ import com.google.common.base.Optional;
12
+
13
+ import org.embulk.config.Config;
14
+ import org.embulk.config.ConfigDefault;
15
+ import org.embulk.config.ConfigDiff;
16
+ import org.embulk.config.ConfigSource;
17
+ import org.embulk.config.Task;
18
+ import org.embulk.config.TaskReport;
19
+ import org.embulk.config.TaskSource;
20
+ import org.embulk.input.jdbc.AbstractJdbcInputPlugin;
21
+ import org.embulk.input.jdbc.JdbcInputConnection;
22
+ import org.embulk.spi.Exec;
23
+ import org.embulk.spi.InputPlugin;
24
+ import org.embulk.spi.PageOutput;
25
+ import org.embulk.spi.Schema;
26
+ import org.embulk.spi.SchemaConfig;
27
+
28
+ public class AthenaInputPlugin
29
+ extends AbstractJdbcInputPlugin
30
+ {
31
+ public interface AthenaPluginTask
32
+ extends AbstractJdbcInputPlugin.PluginTask
33
+ {
34
+ @Config("driver_path")
35
+ @ConfigDefault("null")
36
+ public Optional<String> getDriverPath();
37
+
38
+ // athena_url (required string)
39
+ @Config("athena_url")
40
+ public String getAthenaUrl();
41
+
42
+ // s3_staging_dir (required string)
43
+ @Config("s3_staging_dir")
44
+ public String getS3StagingDir();
45
+
46
+ // access_key (required string)
47
+ @Config("access_key")
48
+ public String getAccessKey();
49
+
50
+ // secret_key (required string)
51
+ @Config("secret_key")
52
+ public String getSecretKey();
53
+
54
+ // configuration option 2 (optional string, null is not allowed)
55
+ // @Config("option2")
56
+ // @ConfigDefault("\"myvalue\"")
57
+ // public String getOption2();
58
+
59
+ // configuration option 3 (optional string, null is allowed)
60
+ // @Config("option3")
61
+ // @ConfigDefault("null")
62
+ // public Optional<String> getOption3();
63
+
64
+ // if you get schema from config
65
+ // @Config("columns")
66
+ // public SchemaConfig getColumns();
67
+ }
68
+
69
+ @Override
70
+ protected Class<? extends PluginTask> getTaskClass() {
71
+ return AthenaPluginTask.class;
72
+ }
73
+
74
+ @Override
75
+ protected AthenaInputConnection newConnection(PluginTask pluginTask) throws SQLException {
76
+ AthenaPluginTask task = (AthenaPluginTask) pluginTask;
77
+ loadDriver("com.amazonaws.athena.jdbc.AthenaDriver", task.getDriverPath());
78
+ //Class.forName("com.amazonaws.athena.jdbc.AthenaDriver");
79
+ Properties properties = new Properties();
80
+ properties.put("s3_staging_dir", task.getS3StagingDir());
81
+ properties.put("user", task.getAccessKey());
82
+ properties.put("password", task.getSecretKey());
83
+ properties.putAll(task.getOptions());
84
+
85
+ Connection connection = DriverManager.getConnection(task.getAthenaUrl(), properties);
86
+ try {
87
+ AthenaInputConnection c = new AthenaInputConnection(connection);
88
+ connection = null;
89
+ return c;
90
+ } finally {
91
+ if (connection != null) {
92
+ connection.close();
93
+ }
94
+ }
95
+ }
96
+
97
+ /*
98
+ @Override
99
+ public ConfigDiff transaction(ConfigSource config,
100
+ InputPlugin.Control control)
101
+ {
102
+ PluginTask task = config.loadConfig(PluginTask.class);
103
+
104
+ // Schema schema = task.getColumns().toSchema();
105
+ Schema schema = Schema.builder().build();
106
+ int taskCount = 1; // number of run() method calls
107
+
108
+ return resume(task.dump(), schema, taskCount, control);
109
+ }
110
+
111
+ @Override
112
+ public ConfigDiff resume(TaskSource taskSource,
113
+ Schema schema, int taskCount,
114
+ InputPlugin.Control control)
115
+ {
116
+ control.run(taskSource, schema, taskCount);
117
+ return Exec.newConfigDiff();
118
+ }
119
+
120
+ @Override
121
+ public void cleanup(TaskSource taskSource,
122
+ Schema schema, int taskCount,
123
+ List<TaskReport> successTaskReports)
124
+ {
125
+ }
126
+
127
+ @Override
128
+ public TaskReport run(TaskSource taskSource,
129
+ Schema schema, int taskIndex,
130
+ PageOutput output)
131
+ {
132
+ PluginTask task = taskSource.loadTask(PluginTask.class);
133
+
134
+ // Write your code here :)
135
+
136
+ Connection connection = null;
137
+ Statement statement = null;
138
+ try {
139
+ connection = getAthenaConnection(task);
140
+ statement = connection.createStatement();
141
+ ResultSet resultSet = statement.executeQuery("select * from default.sample");
142
+
143
+ while(resultSet.next()){
144
+ String statusCode = resultSet.getString("created_at");
145
+ System.out.println("st code" + statusCode);
146
+ }
147
+ resultSet.close();
148
+ connection.close();
149
+ } catch (Exception e){
150
+ e.printStackTrace();
151
+ } finally {
152
+ try {
153
+ if (statement != null)
154
+ statement.close();
155
+ } catch (Exception ex) {
156
+
157
+ }
158
+ try {
159
+ if (connection != null)
160
+ connection.close();
161
+ } catch (Exception ex) {
162
+ ex.printStackTrace();
163
+ }
164
+ }
165
+
166
+ return Exec.newTaskReport();
167
+ }
168
+
169
+ @Override
170
+ public ConfigDiff guess(ConfigSource config)
171
+ {
172
+ return Exec.newConfigDiff();
173
+ }
174
+ */
175
+ /*
176
+ protected Connection getAthenaConnection(PluginTask task) throws ClassNotFoundException, SQLException{
177
+ Class.forName("com.amazonaws.athena.jdbc.AthenaDriver");
178
+ Properties properties = new Properties();
179
+ properties.put("s3_staging_dir", task.getS3StagingDir());
180
+ properties.put("user", task.getAccessKey());
181
+ properties.put("password", task.getSecretKey());
182
+ if (task.getLogPath() != null){
183
+ properties.put("log_path", task.getLogPath());
184
+ }
185
+ if (task.getLogLevel() != null){
186
+ properties.put("log_level", task.getLogLevel());
187
+ }
188
+
189
+ return DriverManager.getConnection(task.getAthenaUrl(), properties);
190
+ }
191
+ */
192
+ }
@@ -0,0 +1,674 @@
1
+ package org.embulk.input.jdbc;
2
+
3
+ import java.io.File;
4
+ import java.io.FileFilter;
5
+ import java.net.MalformedURLException;
6
+ import java.net.URISyntaxException;
7
+ import java.net.URL;
8
+ import java.nio.file.Path;
9
+ import java.util.List;
10
+ import java.util.Map;
11
+ import java.util.Properties;
12
+ import java.nio.file.Paths;
13
+ import java.sql.ResultSet;
14
+ import java.sql.SQLException;
15
+
16
+ import org.slf4j.Logger;
17
+
18
+ import com.fasterxml.jackson.databind.JsonNode;
19
+ import com.google.common.base.Optional;
20
+ import com.google.common.base.Supplier;
21
+ import com.google.common.base.Throwables;
22
+ import com.google.common.collect.ImmutableList;
23
+
24
+ import org.embulk.config.Config;
25
+ import org.embulk.config.ConfigException;
26
+ import org.embulk.config.ConfigDefault;
27
+ import org.embulk.config.ConfigDiff;
28
+ import org.embulk.config.ConfigInject;
29
+ import org.embulk.config.ConfigSource;
30
+ import org.embulk.config.Task;
31
+ import org.embulk.config.TaskReport;
32
+ import org.embulk.config.TaskSource;
33
+ import org.embulk.plugin.PluginClassLoader;
34
+ import org.embulk.spi.BufferAllocator;
35
+ import org.embulk.spi.Column;
36
+ import org.embulk.spi.DataException;
37
+ import org.embulk.spi.PageBuilder;
38
+ import org.embulk.spi.InputPlugin;
39
+ import org.embulk.spi.PageOutput;
40
+ import org.embulk.spi.Schema;
41
+ import org.embulk.spi.Exec;
42
+ import org.embulk.input.jdbc.getter.ColumnGetter;
43
+ import org.embulk.input.jdbc.getter.ColumnGetterFactory;
44
+ import org.embulk.input.jdbc.JdbcInputConnection.BatchSelect;
45
+ import org.embulk.input.jdbc.JdbcInputConnection.PreparedQuery;
46
+ import org.joda.time.DateTimeZone;
47
+
48
+ import static java.util.Locale.ENGLISH;
49
+
50
+ public abstract class AbstractJdbcInputPlugin
51
+ implements InputPlugin
52
+ {
53
+ protected final Logger logger = Exec.getLogger(getClass());
54
+
55
+ public interface PluginTask extends Task
56
+ {
57
+ @Config("options")
58
+ @ConfigDefault("{}")
59
+ public ToStringMap getOptions();
60
+
61
+ @Config("table")
62
+ @ConfigDefault("null")
63
+ public Optional<String> getTable();
64
+ public void setTable(Optional<String> normalizedTableName);
65
+
66
+ @Config("query")
67
+ @ConfigDefault("null")
68
+ public Optional<String> getQuery();
69
+
70
+ @Config("select")
71
+ @ConfigDefault("null")
72
+ public Optional<String> getSelect();
73
+
74
+ @Config("where")
75
+ @ConfigDefault("null")
76
+ public Optional<String> getWhere();
77
+
78
+ @Config("order_by")
79
+ @ConfigDefault("null")
80
+ public Optional<String> getOrderBy();
81
+
82
+ @Config("incremental")
83
+ @ConfigDefault("false")
84
+ public boolean getIncremental();
85
+
86
+ @Config("incremental_columns")
87
+ @ConfigDefault("[]")
88
+ public List<String> getIncrementalColumns();
89
+ public void setIncrementalColumns(List<String> indexes);
90
+
91
+ @Config("last_record")
92
+ @ConfigDefault("null")
93
+ public Optional<List<JsonNode>> getLastRecord();
94
+
95
+ // TODO limit_value is necessary to make sure repeated bulk load transactions
96
+ // don't a same record twice or miss records when the column
97
+ // specified at order_by parameter is not unique.
98
+ // For example, if the order_by column is "timestamp created_at"
99
+ // column whose precision is second, the table can include multiple
100
+ // records with the same created_at time. At the first bulk load
101
+ // transaction, it loads a record with created_at=2015-01-02 00:00:02.
102
+ // Then next transaction will use WHERE created_at > '2015-01-02 00:00:02'.
103
+ // However, if another record with created_at=2014-01-01 23:59:59 is
104
+ // inserted between the 2 transactions, the new record will be skipped.
105
+ // To prevent this scenario, we want to specify
106
+ // limit_value=2015-01-02 00:00:00 (exclusive). With this way, as long as
107
+ // a transaction runs after 2015-01-02 00:00:00 + some minutes, we don't
108
+ // skip records. Ideally, to automate the scheduling, we want to set
109
+ // limit_value="today".
110
+ //
111
+ //@Config("limit_value")
112
+ //@ConfigDefault("null")
113
+ //public Optional<String> getLimitValue();
114
+
115
+ //// TODO probably limit_rows is unnecessary as long as this has
116
+ // supports parallel execution (partition_by option) and resuming.
117
+ //@Config("limit_rows")
118
+ //@ConfigDefault("null")
119
+ //public Optional<Integer> getLimitRows();
120
+
121
+ @Config("connect_timeout")
122
+ @ConfigDefault("300")
123
+ public int getConnectTimeout();
124
+
125
+ @Config("socket_timeout")
126
+ @ConfigDefault("1800")
127
+ public int getSocketTimeout();
128
+
129
+ @Config("fetch_rows")
130
+ @ConfigDefault("10000")
131
+ // TODO set minimum number
132
+ public int getFetchRows();
133
+
134
+ // TODO parallel execution using "partition_by" config
135
+
136
+ @Config("column_options")
137
+ @ConfigDefault("{}")
138
+ public Map<String, JdbcColumnOption> getColumnOptions();
139
+
140
+ @Config("default_timezone")
141
+ @ConfigDefault("\"UTC\"")
142
+ public DateTimeZone getDefaultTimeZone();
143
+
144
+ @Config("default_column_options")
145
+ @ConfigDefault("{}")
146
+ public Map<String, JdbcColumnOption> getDefaultColumnOptions();
147
+
148
+ @Config("after_select")
149
+ @ConfigDefault("null")
150
+ public Optional<String> getAfterSelect();
151
+
152
+ public PreparedQuery getBuiltQuery();
153
+ public void setBuiltQuery(PreparedQuery query);
154
+
155
+ public JdbcSchema getQuerySchema();
156
+ public void setQuerySchema(JdbcSchema schema);
157
+
158
+ public List<Integer> getIncrementalColumnIndexes();
159
+ public void setIncrementalColumnIndexes(List<Integer> indexes);
160
+
161
+ @ConfigInject
162
+ public BufferAllocator getBufferAllocator();
163
+ }
164
+
165
+ // for subclasses to add @Config
166
+ protected Class<? extends PluginTask> getTaskClass()
167
+ {
168
+ return PluginTask.class;
169
+ }
170
+
171
+ protected abstract JdbcInputConnection newConnection(PluginTask task) throws SQLException;
172
+
173
+ @Override
174
+ public ConfigDiff transaction(ConfigSource config,
175
+ InputPlugin.Control control)
176
+ {
177
+ PluginTask task = config.loadConfig(getTaskClass());
178
+
179
+ if (task.getIncremental()) {
180
+ if (task.getOrderBy().isPresent()) {
181
+ throw new ConfigException("order_by option must not be set if incremental is true");
182
+ }
183
+ }
184
+ else {
185
+ if (!task.getIncrementalColumns().isEmpty()) {
186
+ throw new ConfigException("'incremental: true' must be set if incremental_columns is set");
187
+ }
188
+ }
189
+
190
+ Schema schema;
191
+ try (JdbcInputConnection con = newConnection(task)) {
192
+ con.showDriverVersion();
193
+
194
+ // TODO incremental_columns is not set => get primary key
195
+ schema = setupTask(con, task);
196
+ } catch (SQLException ex) {
197
+ throw Throwables.propagate(ex);
198
+ }
199
+
200
+ return buildNextConfigDiff(task, control.run(task.dump(), schema, 1));
201
+ }
202
+
203
+ protected Schema setupTask(JdbcInputConnection con, PluginTask task) throws SQLException
204
+ {
205
+ if (task.getTable().isPresent()) {
206
+ String actualTableName = normalizeTableNameCase(con, task.getTable().get());
207
+ task.setTable(Optional.of(actualTableName));
208
+ }
209
+
210
+ // build SELECT query and gets schema of its result
211
+ String rawQuery = getRawQuery(task, con);
212
+
213
+ JdbcSchema querySchema = con.getSchemaOfQuery(rawQuery);
214
+ task.setQuerySchema(querySchema);
215
+ // query schema should not change after incremental query
216
+
217
+ PreparedQuery preparedQuery;
218
+ if (task.getIncremental()) {
219
+ // build incremental query
220
+
221
+ List<String> incrementalColumns = task.getIncrementalColumns();
222
+ if (incrementalColumns.isEmpty()) {
223
+ // incremental_columns is not set
224
+ if (!task.getTable().isPresent()) {
225
+ throw new ConfigException("incremental_columns option must be set if incremental is true and custom query option is set");
226
+ }
227
+ // get primary keys from the target table to use them as incremental_columns
228
+ List<String> primaryKeys = con.getPrimaryKeys(task.getTable().get());
229
+ if (primaryKeys.isEmpty()) {
230
+ throw new ConfigException(String.format(ENGLISH,
231
+ "Primary key is not available at the table '%s'. incremental_columns option must be set",
232
+ task.getTable().get()));
233
+ }
234
+ logger.info("Using primary keys as incremental_columns: {}", primaryKeys);
235
+ task.setIncrementalColumns(primaryKeys);
236
+ incrementalColumns = primaryKeys;
237
+ }
238
+
239
+ List<Integer> incrementalColumnIndexes = findIncrementalColumnIndexes(querySchema, incrementalColumns);
240
+ task.setIncrementalColumnIndexes(incrementalColumnIndexes);
241
+
242
+ List<JsonNode> lastRecord;
243
+ if (task.getLastRecord().isPresent()) {
244
+ lastRecord = task.getLastRecord().get();
245
+ if (lastRecord.size() != incrementalColumnIndexes.size()) {
246
+ throw new ConfigException("Number of values set at last_record must be same with number of columns set at incremental_columns");
247
+ }
248
+ }
249
+ else {
250
+ lastRecord = null;
251
+ }
252
+
253
+ if (task.getQuery().isPresent()) {
254
+ preparedQuery = con.wrapIncrementalQuery(rawQuery, querySchema, incrementalColumnIndexes, lastRecord);
255
+ }
256
+ else {
257
+ preparedQuery = con.rebuildIncrementalQuery(
258
+ task.getTable().get(), task.getSelect(),
259
+ task.getWhere(),
260
+ querySchema, incrementalColumnIndexes, lastRecord);
261
+ }
262
+ }
263
+ else {
264
+ task.setIncrementalColumnIndexes(ImmutableList.<Integer>of());
265
+ preparedQuery = new PreparedQuery(rawQuery, ImmutableList.<JdbcLiteral>of());
266
+ }
267
+
268
+ task.setBuiltQuery(preparedQuery);
269
+
270
+ // validate column_options
271
+ newColumnGetters(con, task, querySchema, null);
272
+
273
+ ColumnGetterFactory factory = newColumnGetterFactory(null, task.getDefaultTimeZone());
274
+ ImmutableList.Builder<Column> columns = ImmutableList.builder();
275
+ for (int i = 0; i < querySchema.getCount(); i++) {
276
+ JdbcColumn column = querySchema.getColumn(i);
277
+ JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), column, factory.getJdbcType(column.getSqlType()));
278
+ columns.add(new Column(i,
279
+ column.getName(),
280
+ factory.newColumnGetter(con, task, column, columnOption).getToType()));
281
+ }
282
+ return new Schema(columns.build());
283
+ }
284
+
285
+ private String normalizeTableNameCase(JdbcInputConnection con, String tableName)
286
+ throws SQLException
287
+ {
288
+ if (con.tableExists(tableName)) {
289
+ return tableName;
290
+ } else {
291
+ String upperTableName = tableName.toUpperCase();
292
+ String lowerTableName = tableName.toLowerCase();
293
+ boolean upperExists = con.tableExists(upperTableName);
294
+ boolean lowerExists = con.tableExists(lowerTableName);
295
+ if (upperExists && lowerExists) {
296
+ throw new ConfigException(String.format("Cannot specify table '%s' because both '%s' and '%s' exist.",
297
+ tableName, upperTableName, lowerTableName));
298
+ } else if (upperExists) {
299
+ return upperTableName;
300
+ } else if (lowerExists) {
301
+ return lowerTableName;
302
+ } else {
303
+ // fallback to the given table name. this may throw error later at getSchemaOfQuery
304
+ return tableName;
305
+ }
306
+ }
307
+ }
308
+
309
+ private List<Integer> findIncrementalColumnIndexes(JdbcSchema schema, List<String> incrementalColumns)
310
+ throws SQLException
311
+ {
312
+ ImmutableList.Builder<Integer> builder = ImmutableList.builder();
313
+ for (String name : incrementalColumns) {
314
+ Optional<Integer> index = schema.findColumn(name);
315
+ if (index.isPresent()) {
316
+ builder.add(index.get());
317
+ }
318
+ else {
319
+ throw new ConfigException(String.format(ENGLISH,
320
+ "Column name '%s' is in incremental_columns option does not exist",
321
+ name));
322
+ }
323
+ }
324
+ return builder.build();
325
+ }
326
+
327
+ private String getRawQuery(PluginTask task, JdbcInputConnection con) throws SQLException
328
+ {
329
+ if (task.getQuery().isPresent()) {
330
+ if (task.getTable().isPresent() || task.getSelect().isPresent() ||
331
+ task.getWhere().isPresent() || task.getOrderBy().isPresent()) {
332
+ throw new ConfigException("'table', 'select', 'where' and 'order_by' parameters are unnecessary if 'query' parameter is set.");
333
+ } else if (!task.getIncrementalColumns().isEmpty() || task.getLastRecord().isPresent()) {
334
+ throw new ConfigException("'incremental_columns' and 'last_record' parameters are not supported if 'query' parameter is set.");
335
+ }
336
+ return task.getQuery().get();
337
+ } else if (task.getTable().isPresent()) {
338
+ return con.buildSelectQuery(task.getTable().get(), task.getSelect(),
339
+ task.getWhere(), task.getOrderBy());
340
+ } else {
341
+ throw new ConfigException("'table' or 'query' parameter is required");
342
+ }
343
+ }
344
+
345
+ @Override
346
+ public ConfigDiff resume(TaskSource taskSource,
347
+ Schema schema, int taskCount,
348
+ InputPlugin.Control control)
349
+ {
350
+ PluginTask task = taskSource.loadTask(getTaskClass());
351
+
352
+ // TODO when parallel execution is implemented and enabled, (maybe) order_by
353
+ // is necessary to resume. transaction() gets the range of order_by
354
+ // colum and set it to WHERE condition to make the operation deterministic
355
+
356
+ return buildNextConfigDiff(task, control.run(taskSource, schema, taskCount));
357
+ }
358
+
359
+ public ConfigDiff guess(ConfigSource config)
360
+ {
361
+ return Exec.newConfigDiff();
362
+ }
363
+
364
+ protected ConfigDiff buildNextConfigDiff(PluginTask task, List<TaskReport> reports)
365
+ {
366
+ ConfigDiff next = Exec.newConfigDiff();
367
+ if (reports.size() > 0 && reports.get(0).has("last_record")) {
368
+ next.set("last_record", reports.get(0).get(JsonNode.class, "last_record"));
369
+ } else if (task.getLastRecord().isPresent()) {
370
+ next.set("last_record", task.getLastRecord().get());
371
+ }
372
+ return next;
373
+ }
374
+
375
+ @Override
376
+ public void cleanup(TaskSource taskSource,
377
+ Schema schema, int taskCount,
378
+ List<TaskReport> successTaskReports)
379
+ {
380
+ // do nothing
381
+ }
382
+
383
+ private static class LastRecordStore
384
+ {
385
+ private final List<Integer> columnIndexes;
386
+ private final JsonNode[] lastValues;
387
+ private final List<String> columnNames;
388
+
389
+ public LastRecordStore(List<Integer> columnIndexes, List<String> columnNames)
390
+ {
391
+ this.columnIndexes = columnIndexes;
392
+ this.lastValues = new JsonNode[columnIndexes.size()];
393
+ this.columnNames = columnNames;
394
+ }
395
+
396
+ public void accept(List<ColumnGetter> getters)
397
+ throws SQLException
398
+ {
399
+ for (int i = 0; i < columnIndexes.size(); i++) {
400
+ lastValues[i] = getters.get(columnIndexes.get(i)).encodeToJson();
401
+ }
402
+ }
403
+
404
+ public List<JsonNode> getList()
405
+ {
406
+ ImmutableList.Builder<JsonNode> builder = ImmutableList.builder();
407
+ for (int i = 0; i < lastValues.length; i++) {
408
+ if (lastValues[i] == null || lastValues[i].isNull()) {
409
+ throw new DataException(String.format(ENGLISH,
410
+ "incremental_columns can't include null values but the last row is null at column '%s'",
411
+ columnNames.get(i)));
412
+ }
413
+ builder.add(lastValues[i]);
414
+ }
415
+ return builder.build();
416
+ }
417
+ }
418
+
419
+ @Override
420
+ public TaskReport run(TaskSource taskSource,
421
+ Schema schema, int taskIndex,
422
+ PageOutput output)
423
+ {
424
+ PluginTask task = taskSource.loadTask(getTaskClass());
425
+
426
+ PreparedQuery builtQuery = task.getBuiltQuery();
427
+ JdbcSchema querySchema = task.getQuerySchema();
428
+ BufferAllocator allocator = task.getBufferAllocator();
429
+ PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
430
+
431
+ long totalRows = 0;
432
+
433
+ LastRecordStore lastRecordStore = null;
434
+
435
+ try (JdbcInputConnection con = newConnection(task)) {
436
+ List<ColumnGetter> getters = newColumnGetters(con, task, querySchema, pageBuilder);
437
+ try (BatchSelect cursor = con.newSelectCursor(builtQuery, getters, task.getFetchRows(), task.getSocketTimeout())) {
438
+ while (true) {
439
+ long rows = fetch(cursor, getters, pageBuilder);
440
+ if (rows <= 0L) {
441
+ break;
442
+ }
443
+ totalRows += rows;
444
+ }
445
+ }
446
+
447
+ if (task.getIncremental() && totalRows > 0) {
448
+ lastRecordStore = new LastRecordStore(task.getIncrementalColumnIndexes(), task.getIncrementalColumns());
449
+ lastRecordStore.accept(getters);
450
+ }
451
+
452
+ pageBuilder.finish();
453
+
454
+ // after_select runs after pageBuilder.finish because pageBuilder.finish may fail.
455
+ // TODO Output plugin's transaction might still fail. In that case, after_select is
456
+ // already done but output plugin didn't commit the data to the target storage.
457
+ // This means inconsistency between data source and destination. To avoid this
458
+ // issue, we need another option like `after_commit` that runs after output plugin's
459
+ // commit. after_commit can't run in the same transaction with SELECT. So,
460
+ // after_select gets values and store them in TaskReport, and after_commit take
461
+ // them as placeholder. Or, after_select puts values to an intermediate table, and
462
+ // after_commit moves those values to the actual table.
463
+ if (task.getAfterSelect().isPresent()) {
464
+ con.executeUpdate(task.getAfterSelect().get());
465
+ con.connection.commit();
466
+ }
467
+ } catch (SQLException ex) {
468
+ throw Throwables.propagate(ex);
469
+ }
470
+
471
+ TaskReport report = Exec.newTaskReport();
472
+ if (lastRecordStore != null) {
473
+ report.set("last_record", lastRecordStore.getList());
474
+ }
475
+
476
+ return report;
477
+ }
478
+
479
+ protected ColumnGetterFactory newColumnGetterFactory(PageBuilder pageBuilder, DateTimeZone dateTimeZone)
480
+ {
481
+ return new ColumnGetterFactory(pageBuilder, dateTimeZone);
482
+ }
483
+
484
+ private List<ColumnGetter> newColumnGetters(JdbcInputConnection con, PluginTask task, JdbcSchema querySchema, PageBuilder pageBuilder)
485
+ throws SQLException
486
+ {
487
+ ColumnGetterFactory factory = newColumnGetterFactory(pageBuilder, task.getDefaultTimeZone());
488
+ ImmutableList.Builder<ColumnGetter> getters = ImmutableList.builder();
489
+ for (JdbcColumn c : querySchema.getColumns()) {
490
+ JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), c, factory.getJdbcType(c.getSqlType()));
491
+ getters.add(factory.newColumnGetter(con, task, c, columnOption));
492
+ }
493
+ return getters.build();
494
+ }
495
+
496
+ private static JdbcColumnOption columnOptionOf(Map<String, JdbcColumnOption> columnOptions, Map<String, JdbcColumnOption> defaultColumnOptions, JdbcColumn targetColumn, String targetColumnSQLType)
497
+ {
498
+ JdbcColumnOption columnOption = columnOptions.get(targetColumn.getName());
499
+ if (columnOption == null) {
500
+ String foundName = null;
501
+ for (Map.Entry<String, JdbcColumnOption> entry : columnOptions.entrySet()) {
502
+ if (entry.getKey().equalsIgnoreCase(targetColumn.getName())) {
503
+ if (columnOption != null) {
504
+ throw new ConfigException(String.format("Cannot specify column '%s' because both '%s' and '%s' exist in column_options.",
505
+ targetColumn.getName(), foundName, entry.getKey()));
506
+ }
507
+ foundName = entry.getKey();
508
+ columnOption = entry.getValue();
509
+ }
510
+ }
511
+ }
512
+
513
+ return Optional
514
+ .fromNullable(columnOption)
515
+ .or(Optional.fromNullable(defaultColumnOptions.get(targetColumnSQLType)))
516
+ .or(
517
+ // default column option
518
+ new Supplier<JdbcColumnOption>()
519
+ {
520
+ public JdbcColumnOption get()
521
+ {
522
+ return Exec.newConfigSource().loadConfig(JdbcColumnOption.class);
523
+ }
524
+ });
525
+ }
526
+
527
+ private long fetch(BatchSelect cursor,
528
+ List<ColumnGetter> getters, PageBuilder pageBuilder) throws SQLException
529
+ {
530
+ ResultSet result = cursor.fetch();
531
+ if (result == null || !result.next()) {
532
+ return 0;
533
+ }
534
+
535
+ List<Column> columns = pageBuilder.getSchema().getColumns();
536
+ long rows = 0;
537
+ long reportRows = 500;
538
+ do {
539
+ for (int i=0; i < getters.size(); i++) {
540
+ int index = i + 1; // JDBC column index begins from 1
541
+ getters.get(i).getAndSet(result, index, columns.get(i));
542
+ }
543
+ pageBuilder.addRecord();
544
+ rows++;
545
+ if (rows % reportRows == 0) {
546
+ logger.info(String.format("Fetched %,d rows.", rows));
547
+ reportRows *= 2;
548
+ }
549
+ } while (result.next());
550
+
551
+ return rows;
552
+ }
553
+
554
+ //// TODO move to embulk.spi.util?
555
+ //private static class ListPageOutput
556
+ //{
557
+ // public ImmutableList.Builder<Page> pages;
558
+ //
559
+ // public ListPageOutput()
560
+ // {
561
+ // reset();
562
+ // }
563
+ //
564
+ // @Override
565
+ // public void add(Page page)
566
+ // {
567
+ // pages.add(page);
568
+ // }
569
+ //
570
+ // @Override
571
+ // public void finish()
572
+ // {
573
+ // }
574
+ //
575
+ // @Override
576
+ // public void close()
577
+ // {
578
+ // }
579
+ //
580
+ // public List<Page> getPages()
581
+ // {
582
+ // return pages.build();
583
+ // }
584
+ //
585
+ // public void reset()
586
+ // {
587
+ // pages = ImmutableList.builder();
588
+ // }
589
+ //}
590
+
591
+ protected void loadDriver(String className, Optional<String> driverPath)
592
+ {
593
+ if (driverPath.isPresent()) {
594
+ addDriverJarToClasspath(driverPath.get());
595
+ } else {
596
+ try {
597
+ // Gradle test task will add JDBC driver to classpath
598
+ Class.forName(className);
599
+
600
+ } catch (ClassNotFoundException ex) {
601
+ File root = findPluginRoot();
602
+ File driverLib = new File(root, "default_jdbc_driver");
603
+ File[] files = driverLib.listFiles(new FileFilter() {
604
+ @Override
605
+ public boolean accept(File file) {
606
+ return file.isFile() && file.getName().endsWith(".jar");
607
+ }
608
+ });
609
+ if (files == null || files.length == 0) {
610
+ throw new RuntimeException("Cannot find JDBC driver in '" + root.getAbsolutePath() + "'.");
611
+ } else {
612
+ for (File file : files) {
613
+ logger.info("JDBC Driver = " + file.getAbsolutePath());
614
+ addDriverJarToClasspath(file.getAbsolutePath());
615
+ }
616
+ }
617
+ }
618
+ }
619
+
620
+ // Load JDBC Driver
621
+ try {
622
+ Class.forName(className);
623
+ } catch (ClassNotFoundException ex) {
624
+ throw new RuntimeException(ex);
625
+ }
626
+ }
627
+
628
+ protected void addDriverJarToClasspath(String glob)
629
+ {
630
+ // TODO match glob
631
+ PluginClassLoader loader = (PluginClassLoader) getClass().getClassLoader();
632
+ Path path = Paths.get(glob);
633
+ if (!path.toFile().exists()) {
634
+ throw new ConfigException("The specified driver jar doesn't exist: " + glob);
635
+ }
636
+ loader.addPath(Paths.get(glob));
637
+ }
638
+
639
+ protected File findPluginRoot()
640
+ {
641
+ try {
642
+ URL url = getClass().getResource("/" + getClass().getName().replace('.', '/') + ".class");
643
+ if (url.toString().startsWith("jar:")) {
644
+ url = new URL(url.toString().replaceAll("^jar:", "").replaceAll("![^!]*$", ""));
645
+ }
646
+
647
+ File folder = new File(url.toURI()).getParentFile();
648
+ for (;; folder = folder.getParentFile()) {
649
+ if (folder == null) {
650
+ throw new RuntimeException("Cannot find 'embulk-input-xxx' folder.");
651
+ }
652
+
653
+ if (folder.getName().startsWith("embulk-input-")) {
654
+ return folder;
655
+ }
656
+ }
657
+ } catch (MalformedURLException | URISyntaxException e) {
658
+ throw new RuntimeException(e);
659
+ }
660
+ }
661
+
662
+ protected void logConnectionProperties(String url, Properties props)
663
+ {
664
+ Properties maskedProps = new Properties();
665
+ for(String key : props.stringPropertyNames()) {
666
+ if (key.equals("password")) {
667
+ maskedProps.setProperty(key, "***");
668
+ } else {
669
+ maskedProps.setProperty(key, props.getProperty(key));
670
+ }
671
+ }
672
+ logger.info("Connecting to {} options {}", url, maskedProps);
673
+ }
674
+ }