embulk-input-athena 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +13 -0
  3. data/Dockerfile +8 -0
  4. data/LICENSE +21 -0
  5. data/README.md +46 -0
  6. data/build.gradle +101 -0
  7. data/config/checkstyle/checkstyle.xml +128 -0
  8. data/config/checkstyle/default.xml +108 -0
  9. data/docker-compose.yml +10 -0
  10. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  11. data/gradle/wrapper/gradle-wrapper.properties +5 -0
  12. data/gradlew +172 -0
  13. data/gradlew.bat +84 -0
  14. data/lib/embulk/input/athena.rb +3 -0
  15. data/src/main/java/org/embulk/input/athena/AthenaInputConnection.java +49 -0
  16. data/src/main/java/org/embulk/input/athena/AthenaInputPlugin.java +202 -0
  17. data/src/main/java/org/embulk/input/athena/AthenaInputPlugin.java.tmp1 +192 -0
  18. data/src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java +674 -0
  19. data/src/main/java/org/embulk/input/jdbc/JdbcColumn.java +58 -0
  20. data/src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java +31 -0
  21. data/src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java +397 -0
  22. data/src/main/java/org/embulk/input/jdbc/JdbcLiteral.java +38 -0
  23. data/src/main/java/org/embulk/input/jdbc/JdbcSchema.java +55 -0
  24. data/src/main/java/org/embulk/input/jdbc/Ssl.java +37 -0
  25. data/src/main/java/org/embulk/input/jdbc/ToString.java +54 -0
  26. data/src/main/java/org/embulk/input/jdbc/ToStringMap.java +35 -0
  27. data/src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java +105 -0
  28. data/src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java +45 -0
  29. data/src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java +38 -0
  30. data/src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java +59 -0
  31. data/src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java +56 -0
  32. data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java +21 -0
  33. data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java +207 -0
  34. data/src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java +37 -0
  35. data/src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java +66 -0
  36. data/src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java +66 -0
  37. data/src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java +57 -0
  38. data/src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java +70 -0
  39. data/src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java +96 -0
  40. data/src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java +37 -0
  41. data/src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java +36 -0
  42. data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java +83 -0
  43. data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java +75 -0
  44. data/src/test/java/org/embulk/input/athena/TestAthenaInputPlugin.java +5 -0
  45. metadata +258 -0
@@ -0,0 +1,192 @@
1
+ package org.embulk.input.athena;
2
+
3
+ import java.sql.Connection;
4
+ import java.sql.DriverManager;
5
+ import java.sql.ResultSet;
6
+ import java.sql.SQLException;
7
+ import java.sql.Statement;
8
+ import java.util.List;
9
+ import java.util.Properties;
10
+
11
+ import com.google.common.base.Optional;
12
+
13
+ import org.embulk.config.Config;
14
+ import org.embulk.config.ConfigDefault;
15
+ import org.embulk.config.ConfigDiff;
16
+ import org.embulk.config.ConfigSource;
17
+ import org.embulk.config.Task;
18
+ import org.embulk.config.TaskReport;
19
+ import org.embulk.config.TaskSource;
20
+ import org.embulk.input.jdbc.AbstractJdbcInputPlugin;
21
+ import org.embulk.input.jdbc.JdbcInputConnection;
22
+ import org.embulk.spi.Exec;
23
+ import org.embulk.spi.InputPlugin;
24
+ import org.embulk.spi.PageOutput;
25
+ import org.embulk.spi.Schema;
26
+ import org.embulk.spi.SchemaConfig;
27
+
28
+ public class AthenaInputPlugin
29
+ extends AbstractJdbcInputPlugin
30
+ {
31
+ public interface AthenaPluginTask
32
+ extends AbstractJdbcInputPlugin.PluginTask
33
+ {
34
+ @Config("driver_path")
35
+ @ConfigDefault("null")
36
+ public Optional<String> getDriverPath();
37
+
38
+ // athena_url (required string)
39
+ @Config("athena_url")
40
+ public String getAthenaUrl();
41
+
42
+ // s3_staging_dir (required string)
43
+ @Config("s3_staging_dir")
44
+ public String getS3StagingDir();
45
+
46
+ // access_key (required string)
47
+ @Config("access_key")
48
+ public String getAccessKey();
49
+
50
+ // secret_key (required string)
51
+ @Config("secret_key")
52
+ public String getSecretKey();
53
+
54
+ // configuration option 2 (optional string, null is not allowed)
55
+ // @Config("option2")
56
+ // @ConfigDefault("\"myvalue\"")
57
+ // public String getOption2();
58
+
59
+ // configuration option 3 (optional string, null is allowed)
60
+ // @Config("option3")
61
+ // @ConfigDefault("null")
62
+ // public Optional<String> getOption3();
63
+
64
+ // if you get schema from config
65
+ // @Config("columns")
66
+ // public SchemaConfig getColumns();
67
+ }
68
+
69
+ @Override
70
+ protected Class<? extends PluginTask> getTaskClass() {
71
+ return AthenaPluginTask.class;
72
+ }
73
+
74
+ @Override
75
+ protected AthenaInputConnection newConnection(PluginTask pluginTask) throws SQLException {
76
+ AthenaPluginTask task = (AthenaPluginTask) pluginTask;
77
+ loadDriver("com.amazonaws.athena.jdbc.AthenaDriver", task.getDriverPath());
78
+ //Class.forName("com.amazonaws.athena.jdbc.AthenaDriver");
79
+ Properties properties = new Properties();
80
+ properties.put("s3_staging_dir", task.getS3StagingDir());
81
+ properties.put("user", task.getAccessKey());
82
+ properties.put("password", task.getSecretKey());
83
+ properties.putAll(task.getOptions());
84
+
85
+ Connection connection = DriverManager.getConnection(task.getAthenaUrl(), properties);
86
+ try {
87
+ AthenaInputConnection c = new AthenaInputConnection(connection);
88
+ connection = null;
89
+ return c;
90
+ } finally {
91
+ if (connection != null) {
92
+ connection.close();
93
+ }
94
+ }
95
+ }
96
+
97
+ /*
98
+ @Override
99
+ public ConfigDiff transaction(ConfigSource config,
100
+ InputPlugin.Control control)
101
+ {
102
+ PluginTask task = config.loadConfig(PluginTask.class);
103
+
104
+ // Schema schema = task.getColumns().toSchema();
105
+ Schema schema = Schema.builder().build();
106
+ int taskCount = 1; // number of run() method calls
107
+
108
+ return resume(task.dump(), schema, taskCount, control);
109
+ }
110
+
111
+ @Override
112
+ public ConfigDiff resume(TaskSource taskSource,
113
+ Schema schema, int taskCount,
114
+ InputPlugin.Control control)
115
+ {
116
+ control.run(taskSource, schema, taskCount);
117
+ return Exec.newConfigDiff();
118
+ }
119
+
120
+ @Override
121
+ public void cleanup(TaskSource taskSource,
122
+ Schema schema, int taskCount,
123
+ List<TaskReport> successTaskReports)
124
+ {
125
+ }
126
+
127
+ @Override
128
+ public TaskReport run(TaskSource taskSource,
129
+ Schema schema, int taskIndex,
130
+ PageOutput output)
131
+ {
132
+ PluginTask task = taskSource.loadTask(PluginTask.class);
133
+
134
+ // Write your code here :)
135
+
136
+ Connection connection = null;
137
+ Statement statement = null;
138
+ try {
139
+ connection = getAthenaConnection(task);
140
+ statement = connection.createStatement();
141
+ ResultSet resultSet = statement.executeQuery("select * from default.sample");
142
+
143
+ while(resultSet.next()){
144
+ String statusCode = resultSet.getString("created_at");
145
+ System.out.println("st code" + statusCode);
146
+ }
147
+ resultSet.close();
148
+ connection.close();
149
+ } catch (Exception e){
150
+ e.printStackTrace();
151
+ } finally {
152
+ try {
153
+ if (statement != null)
154
+ statement.close();
155
+ } catch (Exception ex) {
156
+
157
+ }
158
+ try {
159
+ if (connection != null)
160
+ connection.close();
161
+ } catch (Exception ex) {
162
+ ex.printStackTrace();
163
+ }
164
+ }
165
+
166
+ return Exec.newTaskReport();
167
+ }
168
+
169
+ @Override
170
+ public ConfigDiff guess(ConfigSource config)
171
+ {
172
+ return Exec.newConfigDiff();
173
+ }
174
+ */
175
+ /*
176
+ protected Connection getAthenaConnection(PluginTask task) throws ClassNotFoundException, SQLException{
177
+ Class.forName("com.amazonaws.athena.jdbc.AthenaDriver");
178
+ Properties properties = new Properties();
179
+ properties.put("s3_staging_dir", task.getS3StagingDir());
180
+ properties.put("user", task.getAccessKey());
181
+ properties.put("password", task.getSecretKey());
182
+ if (task.getLogPath() != null){
183
+ properties.put("log_path", task.getLogPath());
184
+ }
185
+ if (task.getLogLevel() != null){
186
+ properties.put("log_level", task.getLogLevel());
187
+ }
188
+
189
+ return DriverManager.getConnection(task.getAthenaUrl(), properties);
190
+ }
191
+ */
192
+ }
@@ -0,0 +1,674 @@
1
+ package org.embulk.input.jdbc;
2
+
3
+ import java.io.File;
4
+ import java.io.FileFilter;
5
+ import java.net.MalformedURLException;
6
+ import java.net.URISyntaxException;
7
+ import java.net.URL;
8
+ import java.nio.file.Path;
9
+ import java.util.List;
10
+ import java.util.Map;
11
+ import java.util.Properties;
12
+ import java.nio.file.Paths;
13
+ import java.sql.ResultSet;
14
+ import java.sql.SQLException;
15
+
16
+ import org.slf4j.Logger;
17
+
18
+ import com.fasterxml.jackson.databind.JsonNode;
19
+ import com.google.common.base.Optional;
20
+ import com.google.common.base.Supplier;
21
+ import com.google.common.base.Throwables;
22
+ import com.google.common.collect.ImmutableList;
23
+
24
+ import org.embulk.config.Config;
25
+ import org.embulk.config.ConfigException;
26
+ import org.embulk.config.ConfigDefault;
27
+ import org.embulk.config.ConfigDiff;
28
+ import org.embulk.config.ConfigInject;
29
+ import org.embulk.config.ConfigSource;
30
+ import org.embulk.config.Task;
31
+ import org.embulk.config.TaskReport;
32
+ import org.embulk.config.TaskSource;
33
+ import org.embulk.plugin.PluginClassLoader;
34
+ import org.embulk.spi.BufferAllocator;
35
+ import org.embulk.spi.Column;
36
+ import org.embulk.spi.DataException;
37
+ import org.embulk.spi.PageBuilder;
38
+ import org.embulk.spi.InputPlugin;
39
+ import org.embulk.spi.PageOutput;
40
+ import org.embulk.spi.Schema;
41
+ import org.embulk.spi.Exec;
42
+ import org.embulk.input.jdbc.getter.ColumnGetter;
43
+ import org.embulk.input.jdbc.getter.ColumnGetterFactory;
44
+ import org.embulk.input.jdbc.JdbcInputConnection.BatchSelect;
45
+ import org.embulk.input.jdbc.JdbcInputConnection.PreparedQuery;
46
+ import org.joda.time.DateTimeZone;
47
+
48
+ import static java.util.Locale.ENGLISH;
49
+
50
+ public abstract class AbstractJdbcInputPlugin
51
+ implements InputPlugin
52
+ {
53
+ protected final Logger logger = Exec.getLogger(getClass());
54
+
55
+ public interface PluginTask extends Task
56
+ {
57
+ @Config("options")
58
+ @ConfigDefault("{}")
59
+ public ToStringMap getOptions();
60
+
61
+ @Config("table")
62
+ @ConfigDefault("null")
63
+ public Optional<String> getTable();
64
+ public void setTable(Optional<String> normalizedTableName);
65
+
66
+ @Config("query")
67
+ @ConfigDefault("null")
68
+ public Optional<String> getQuery();
69
+
70
+ @Config("select")
71
+ @ConfigDefault("null")
72
+ public Optional<String> getSelect();
73
+
74
+ @Config("where")
75
+ @ConfigDefault("null")
76
+ public Optional<String> getWhere();
77
+
78
+ @Config("order_by")
79
+ @ConfigDefault("null")
80
+ public Optional<String> getOrderBy();
81
+
82
+ @Config("incremental")
83
+ @ConfigDefault("false")
84
+ public boolean getIncremental();
85
+
86
+ @Config("incremental_columns")
87
+ @ConfigDefault("[]")
88
+ public List<String> getIncrementalColumns();
89
+ public void setIncrementalColumns(List<String> indexes);
90
+
91
+ @Config("last_record")
92
+ @ConfigDefault("null")
93
+ public Optional<List<JsonNode>> getLastRecord();
94
+
95
+ // TODO limit_value is necessary to make sure repeated bulk load transactions
96
+ // don't a same record twice or miss records when the column
97
+ // specified at order_by parameter is not unique.
98
+ // For example, if the order_by column is "timestamp created_at"
99
+ // column whose precision is second, the table can include multiple
100
+ // records with the same created_at time. At the first bulk load
101
+ // transaction, it loads a record with created_at=2015-01-02 00:00:02.
102
+ // Then next transaction will use WHERE created_at > '2015-01-02 00:00:02'.
103
+ // However, if another record with created_at=2014-01-01 23:59:59 is
104
+ // inserted between the 2 transactions, the new record will be skipped.
105
+ // To prevent this scenario, we want to specify
106
+ // limit_value=2015-01-02 00:00:00 (exclusive). With this way, as long as
107
+ // a transaction runs after 2015-01-02 00:00:00 + some minutes, we don't
108
+ // skip records. Ideally, to automate the scheduling, we want to set
109
+ // limit_value="today".
110
+ //
111
+ //@Config("limit_value")
112
+ //@ConfigDefault("null")
113
+ //public Optional<String> getLimitValue();
114
+
115
+ //// TODO probably limit_rows is unnecessary as long as this has
116
+ // supports parallel execution (partition_by option) and resuming.
117
+ //@Config("limit_rows")
118
+ //@ConfigDefault("null")
119
+ //public Optional<Integer> getLimitRows();
120
+
121
+ @Config("connect_timeout")
122
+ @ConfigDefault("300")
123
+ public int getConnectTimeout();
124
+
125
+ @Config("socket_timeout")
126
+ @ConfigDefault("1800")
127
+ public int getSocketTimeout();
128
+
129
+ @Config("fetch_rows")
130
+ @ConfigDefault("10000")
131
+ // TODO set minimum number
132
+ public int getFetchRows();
133
+
134
+ // TODO parallel execution using "partition_by" config
135
+
136
+ @Config("column_options")
137
+ @ConfigDefault("{}")
138
+ public Map<String, JdbcColumnOption> getColumnOptions();
139
+
140
+ @Config("default_timezone")
141
+ @ConfigDefault("\"UTC\"")
142
+ public DateTimeZone getDefaultTimeZone();
143
+
144
+ @Config("default_column_options")
145
+ @ConfigDefault("{}")
146
+ public Map<String, JdbcColumnOption> getDefaultColumnOptions();
147
+
148
+ @Config("after_select")
149
+ @ConfigDefault("null")
150
+ public Optional<String> getAfterSelect();
151
+
152
+ public PreparedQuery getBuiltQuery();
153
+ public void setBuiltQuery(PreparedQuery query);
154
+
155
+ public JdbcSchema getQuerySchema();
156
+ public void setQuerySchema(JdbcSchema schema);
157
+
158
+ public List<Integer> getIncrementalColumnIndexes();
159
+ public void setIncrementalColumnIndexes(List<Integer> indexes);
160
+
161
+ @ConfigInject
162
+ public BufferAllocator getBufferAllocator();
163
+ }
164
+
165
+ // for subclasses to add @Config
166
+ protected Class<? extends PluginTask> getTaskClass()
167
+ {
168
+ return PluginTask.class;
169
+ }
170
+
171
+ protected abstract JdbcInputConnection newConnection(PluginTask task) throws SQLException;
172
+
173
+ @Override
174
+ public ConfigDiff transaction(ConfigSource config,
175
+ InputPlugin.Control control)
176
+ {
177
+ PluginTask task = config.loadConfig(getTaskClass());
178
+
179
+ if (task.getIncremental()) {
180
+ if (task.getOrderBy().isPresent()) {
181
+ throw new ConfigException("order_by option must not be set if incremental is true");
182
+ }
183
+ }
184
+ else {
185
+ if (!task.getIncrementalColumns().isEmpty()) {
186
+ throw new ConfigException("'incremental: true' must be set if incremental_columns is set");
187
+ }
188
+ }
189
+
190
+ Schema schema;
191
+ try (JdbcInputConnection con = newConnection(task)) {
192
+ con.showDriverVersion();
193
+
194
+ // TODO incremental_columns is not set => get primary key
195
+ schema = setupTask(con, task);
196
+ } catch (SQLException ex) {
197
+ throw Throwables.propagate(ex);
198
+ }
199
+
200
+ return buildNextConfigDiff(task, control.run(task.dump(), schema, 1));
201
+ }
202
+
203
+ protected Schema setupTask(JdbcInputConnection con, PluginTask task) throws SQLException
204
+ {
205
+ if (task.getTable().isPresent()) {
206
+ String actualTableName = normalizeTableNameCase(con, task.getTable().get());
207
+ task.setTable(Optional.of(actualTableName));
208
+ }
209
+
210
+ // build SELECT query and gets schema of its result
211
+ String rawQuery = getRawQuery(task, con);
212
+
213
+ JdbcSchema querySchema = con.getSchemaOfQuery(rawQuery);
214
+ task.setQuerySchema(querySchema);
215
+ // query schema should not change after incremental query
216
+
217
+ PreparedQuery preparedQuery;
218
+ if (task.getIncremental()) {
219
+ // build incremental query
220
+
221
+ List<String> incrementalColumns = task.getIncrementalColumns();
222
+ if (incrementalColumns.isEmpty()) {
223
+ // incremental_columns is not set
224
+ if (!task.getTable().isPresent()) {
225
+ throw new ConfigException("incremental_columns option must be set if incremental is true and custom query option is set");
226
+ }
227
+ // get primary keys from the target table to use them as incremental_columns
228
+ List<String> primaryKeys = con.getPrimaryKeys(task.getTable().get());
229
+ if (primaryKeys.isEmpty()) {
230
+ throw new ConfigException(String.format(ENGLISH,
231
+ "Primary key is not available at the table '%s'. incremental_columns option must be set",
232
+ task.getTable().get()));
233
+ }
234
+ logger.info("Using primary keys as incremental_columns: {}", primaryKeys);
235
+ task.setIncrementalColumns(primaryKeys);
236
+ incrementalColumns = primaryKeys;
237
+ }
238
+
239
+ List<Integer> incrementalColumnIndexes = findIncrementalColumnIndexes(querySchema, incrementalColumns);
240
+ task.setIncrementalColumnIndexes(incrementalColumnIndexes);
241
+
242
+ List<JsonNode> lastRecord;
243
+ if (task.getLastRecord().isPresent()) {
244
+ lastRecord = task.getLastRecord().get();
245
+ if (lastRecord.size() != incrementalColumnIndexes.size()) {
246
+ throw new ConfigException("Number of values set at last_record must be same with number of columns set at incremental_columns");
247
+ }
248
+ }
249
+ else {
250
+ lastRecord = null;
251
+ }
252
+
253
+ if (task.getQuery().isPresent()) {
254
+ preparedQuery = con.wrapIncrementalQuery(rawQuery, querySchema, incrementalColumnIndexes, lastRecord);
255
+ }
256
+ else {
257
+ preparedQuery = con.rebuildIncrementalQuery(
258
+ task.getTable().get(), task.getSelect(),
259
+ task.getWhere(),
260
+ querySchema, incrementalColumnIndexes, lastRecord);
261
+ }
262
+ }
263
+ else {
264
+ task.setIncrementalColumnIndexes(ImmutableList.<Integer>of());
265
+ preparedQuery = new PreparedQuery(rawQuery, ImmutableList.<JdbcLiteral>of());
266
+ }
267
+
268
+ task.setBuiltQuery(preparedQuery);
269
+
270
+ // validate column_options
271
+ newColumnGetters(con, task, querySchema, null);
272
+
273
+ ColumnGetterFactory factory = newColumnGetterFactory(null, task.getDefaultTimeZone());
274
+ ImmutableList.Builder<Column> columns = ImmutableList.builder();
275
+ for (int i = 0; i < querySchema.getCount(); i++) {
276
+ JdbcColumn column = querySchema.getColumn(i);
277
+ JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), column, factory.getJdbcType(column.getSqlType()));
278
+ columns.add(new Column(i,
279
+ column.getName(),
280
+ factory.newColumnGetter(con, task, column, columnOption).getToType()));
281
+ }
282
+ return new Schema(columns.build());
283
+ }
284
+
285
+ private String normalizeTableNameCase(JdbcInputConnection con, String tableName)
286
+ throws SQLException
287
+ {
288
+ if (con.tableExists(tableName)) {
289
+ return tableName;
290
+ } else {
291
+ String upperTableName = tableName.toUpperCase();
292
+ String lowerTableName = tableName.toLowerCase();
293
+ boolean upperExists = con.tableExists(upperTableName);
294
+ boolean lowerExists = con.tableExists(lowerTableName);
295
+ if (upperExists && lowerExists) {
296
+ throw new ConfigException(String.format("Cannot specify table '%s' because both '%s' and '%s' exist.",
297
+ tableName, upperTableName, lowerTableName));
298
+ } else if (upperExists) {
299
+ return upperTableName;
300
+ } else if (lowerExists) {
301
+ return lowerTableName;
302
+ } else {
303
+ // fallback to the given table name. this may throw error later at getSchemaOfQuery
304
+ return tableName;
305
+ }
306
+ }
307
+ }
308
+
309
+ private List<Integer> findIncrementalColumnIndexes(JdbcSchema schema, List<String> incrementalColumns)
310
+ throws SQLException
311
+ {
312
+ ImmutableList.Builder<Integer> builder = ImmutableList.builder();
313
+ for (String name : incrementalColumns) {
314
+ Optional<Integer> index = schema.findColumn(name);
315
+ if (index.isPresent()) {
316
+ builder.add(index.get());
317
+ }
318
+ else {
319
+ throw new ConfigException(String.format(ENGLISH,
320
+ "Column name '%s' is in incremental_columns option does not exist",
321
+ name));
322
+ }
323
+ }
324
+ return builder.build();
325
+ }
326
+
327
+ private String getRawQuery(PluginTask task, JdbcInputConnection con) throws SQLException
328
+ {
329
+ if (task.getQuery().isPresent()) {
330
+ if (task.getTable().isPresent() || task.getSelect().isPresent() ||
331
+ task.getWhere().isPresent() || task.getOrderBy().isPresent()) {
332
+ throw new ConfigException("'table', 'select', 'where' and 'order_by' parameters are unnecessary if 'query' parameter is set.");
333
+ } else if (!task.getIncrementalColumns().isEmpty() || task.getLastRecord().isPresent()) {
334
+ throw new ConfigException("'incremental_columns' and 'last_record' parameters are not supported if 'query' parameter is set.");
335
+ }
336
+ return task.getQuery().get();
337
+ } else if (task.getTable().isPresent()) {
338
+ return con.buildSelectQuery(task.getTable().get(), task.getSelect(),
339
+ task.getWhere(), task.getOrderBy());
340
+ } else {
341
+ throw new ConfigException("'table' or 'query' parameter is required");
342
+ }
343
+ }
344
+
345
+ @Override
346
+ public ConfigDiff resume(TaskSource taskSource,
347
+ Schema schema, int taskCount,
348
+ InputPlugin.Control control)
349
+ {
350
+ PluginTask task = taskSource.loadTask(getTaskClass());
351
+
352
+ // TODO when parallel execution is implemented and enabled, (maybe) order_by
353
+ // is necessary to resume. transaction() gets the range of order_by
354
+ // colum and set it to WHERE condition to make the operation deterministic
355
+
356
+ return buildNextConfigDiff(task, control.run(taskSource, schema, taskCount));
357
+ }
358
+
359
+ public ConfigDiff guess(ConfigSource config)
360
+ {
361
+ return Exec.newConfigDiff();
362
+ }
363
+
364
+ protected ConfigDiff buildNextConfigDiff(PluginTask task, List<TaskReport> reports)
365
+ {
366
+ ConfigDiff next = Exec.newConfigDiff();
367
+ if (reports.size() > 0 && reports.get(0).has("last_record")) {
368
+ next.set("last_record", reports.get(0).get(JsonNode.class, "last_record"));
369
+ } else if (task.getLastRecord().isPresent()) {
370
+ next.set("last_record", task.getLastRecord().get());
371
+ }
372
+ return next;
373
+ }
374
+
375
+ @Override
376
+ public void cleanup(TaskSource taskSource,
377
+ Schema schema, int taskCount,
378
+ List<TaskReport> successTaskReports)
379
+ {
380
+ // do nothing
381
+ }
382
+
383
+ private static class LastRecordStore
384
+ {
385
+ private final List<Integer> columnIndexes;
386
+ private final JsonNode[] lastValues;
387
+ private final List<String> columnNames;
388
+
389
+ public LastRecordStore(List<Integer> columnIndexes, List<String> columnNames)
390
+ {
391
+ this.columnIndexes = columnIndexes;
392
+ this.lastValues = new JsonNode[columnIndexes.size()];
393
+ this.columnNames = columnNames;
394
+ }
395
+
396
+ public void accept(List<ColumnGetter> getters)
397
+ throws SQLException
398
+ {
399
+ for (int i = 0; i < columnIndexes.size(); i++) {
400
+ lastValues[i] = getters.get(columnIndexes.get(i)).encodeToJson();
401
+ }
402
+ }
403
+
404
+ public List<JsonNode> getList()
405
+ {
406
+ ImmutableList.Builder<JsonNode> builder = ImmutableList.builder();
407
+ for (int i = 0; i < lastValues.length; i++) {
408
+ if (lastValues[i] == null || lastValues[i].isNull()) {
409
+ throw new DataException(String.format(ENGLISH,
410
+ "incremental_columns can't include null values but the last row is null at column '%s'",
411
+ columnNames.get(i)));
412
+ }
413
+ builder.add(lastValues[i]);
414
+ }
415
+ return builder.build();
416
+ }
417
+ }
418
+
419
+ @Override
420
+ public TaskReport run(TaskSource taskSource,
421
+ Schema schema, int taskIndex,
422
+ PageOutput output)
423
+ {
424
+ PluginTask task = taskSource.loadTask(getTaskClass());
425
+
426
+ PreparedQuery builtQuery = task.getBuiltQuery();
427
+ JdbcSchema querySchema = task.getQuerySchema();
428
+ BufferAllocator allocator = task.getBufferAllocator();
429
+ PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
430
+
431
+ long totalRows = 0;
432
+
433
+ LastRecordStore lastRecordStore = null;
434
+
435
+ try (JdbcInputConnection con = newConnection(task)) {
436
+ List<ColumnGetter> getters = newColumnGetters(con, task, querySchema, pageBuilder);
437
+ try (BatchSelect cursor = con.newSelectCursor(builtQuery, getters, task.getFetchRows(), task.getSocketTimeout())) {
438
+ while (true) {
439
+ long rows = fetch(cursor, getters, pageBuilder);
440
+ if (rows <= 0L) {
441
+ break;
442
+ }
443
+ totalRows += rows;
444
+ }
445
+ }
446
+
447
+ if (task.getIncremental() && totalRows > 0) {
448
+ lastRecordStore = new LastRecordStore(task.getIncrementalColumnIndexes(), task.getIncrementalColumns());
449
+ lastRecordStore.accept(getters);
450
+ }
451
+
452
+ pageBuilder.finish();
453
+
454
+ // after_select runs after pageBuilder.finish because pageBuilder.finish may fail.
455
+ // TODO Output plugin's transaction might still fail. In that case, after_select is
456
+ // already done but output plugin didn't commit the data to the target storage.
457
+ // This means inconsistency between data source and destination. To avoid this
458
+ // issue, we need another option like `after_commit` that runs after output plugin's
459
+ // commit. after_commit can't run in the same transaction with SELECT. So,
460
+ // after_select gets values and store them in TaskReport, and after_commit take
461
+ // them as placeholder. Or, after_select puts values to an intermediate table, and
462
+ // after_commit moves those values to the actual table.
463
+ if (task.getAfterSelect().isPresent()) {
464
+ con.executeUpdate(task.getAfterSelect().get());
465
+ con.connection.commit();
466
+ }
467
+ } catch (SQLException ex) {
468
+ throw Throwables.propagate(ex);
469
+ }
470
+
471
+ TaskReport report = Exec.newTaskReport();
472
+ if (lastRecordStore != null) {
473
+ report.set("last_record", lastRecordStore.getList());
474
+ }
475
+
476
+ return report;
477
+ }
478
+
479
+ protected ColumnGetterFactory newColumnGetterFactory(PageBuilder pageBuilder, DateTimeZone dateTimeZone)
480
+ {
481
+ return new ColumnGetterFactory(pageBuilder, dateTimeZone);
482
+ }
483
+
484
+ private List<ColumnGetter> newColumnGetters(JdbcInputConnection con, PluginTask task, JdbcSchema querySchema, PageBuilder pageBuilder)
485
+ throws SQLException
486
+ {
487
+ ColumnGetterFactory factory = newColumnGetterFactory(pageBuilder, task.getDefaultTimeZone());
488
+ ImmutableList.Builder<ColumnGetter> getters = ImmutableList.builder();
489
+ for (JdbcColumn c : querySchema.getColumns()) {
490
+ JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), c, factory.getJdbcType(c.getSqlType()));
491
+ getters.add(factory.newColumnGetter(con, task, c, columnOption));
492
+ }
493
+ return getters.build();
494
+ }
495
+
496
+ private static JdbcColumnOption columnOptionOf(Map<String, JdbcColumnOption> columnOptions, Map<String, JdbcColumnOption> defaultColumnOptions, JdbcColumn targetColumn, String targetColumnSQLType)
497
+ {
498
+ JdbcColumnOption columnOption = columnOptions.get(targetColumn.getName());
499
+ if (columnOption == null) {
500
+ String foundName = null;
501
+ for (Map.Entry<String, JdbcColumnOption> entry : columnOptions.entrySet()) {
502
+ if (entry.getKey().equalsIgnoreCase(targetColumn.getName())) {
503
+ if (columnOption != null) {
504
+ throw new ConfigException(String.format("Cannot specify column '%s' because both '%s' and '%s' exist in column_options.",
505
+ targetColumn.getName(), foundName, entry.getKey()));
506
+ }
507
+ foundName = entry.getKey();
508
+ columnOption = entry.getValue();
509
+ }
510
+ }
511
+ }
512
+
513
+ return Optional
514
+ .fromNullable(columnOption)
515
+ .or(Optional.fromNullable(defaultColumnOptions.get(targetColumnSQLType)))
516
+ .or(
517
+ // default column option
518
+ new Supplier<JdbcColumnOption>()
519
+ {
520
+ public JdbcColumnOption get()
521
+ {
522
+ return Exec.newConfigSource().loadConfig(JdbcColumnOption.class);
523
+ }
524
+ });
525
+ }
526
+
527
+ private long fetch(BatchSelect cursor,
528
+ List<ColumnGetter> getters, PageBuilder pageBuilder) throws SQLException
529
+ {
530
+ ResultSet result = cursor.fetch();
531
+ if (result == null || !result.next()) {
532
+ return 0;
533
+ }
534
+
535
+ List<Column> columns = pageBuilder.getSchema().getColumns();
536
+ long rows = 0;
537
+ long reportRows = 500;
538
+ do {
539
+ for (int i=0; i < getters.size(); i++) {
540
+ int index = i + 1; // JDBC column index begins from 1
541
+ getters.get(i).getAndSet(result, index, columns.get(i));
542
+ }
543
+ pageBuilder.addRecord();
544
+ rows++;
545
+ if (rows % reportRows == 0) {
546
+ logger.info(String.format("Fetched %,d rows.", rows));
547
+ reportRows *= 2;
548
+ }
549
+ } while (result.next());
550
+
551
+ return rows;
552
+ }
553
+
554
+ //// TODO move to embulk.spi.util?
555
+ //private static class ListPageOutput
556
+ //{
557
+ // public ImmutableList.Builder<Page> pages;
558
+ //
559
+ // public ListPageOutput()
560
+ // {
561
+ // reset();
562
+ // }
563
+ //
564
+ // @Override
565
+ // public void add(Page page)
566
+ // {
567
+ // pages.add(page);
568
+ // }
569
+ //
570
+ // @Override
571
+ // public void finish()
572
+ // {
573
+ // }
574
+ //
575
+ // @Override
576
+ // public void close()
577
+ // {
578
+ // }
579
+ //
580
+ // public List<Page> getPages()
581
+ // {
582
+ // return pages.build();
583
+ // }
584
+ //
585
+ // public void reset()
586
+ // {
587
+ // pages = ImmutableList.builder();
588
+ // }
589
+ //}
590
+
591
+ protected void loadDriver(String className, Optional<String> driverPath)
592
+ {
593
+ if (driverPath.isPresent()) {
594
+ addDriverJarToClasspath(driverPath.get());
595
+ } else {
596
+ try {
597
+ // Gradle test task will add JDBC driver to classpath
598
+ Class.forName(className);
599
+
600
+ } catch (ClassNotFoundException ex) {
601
+ File root = findPluginRoot();
602
+ File driverLib = new File(root, "default_jdbc_driver");
603
+ File[] files = driverLib.listFiles(new FileFilter() {
604
+ @Override
605
+ public boolean accept(File file) {
606
+ return file.isFile() && file.getName().endsWith(".jar");
607
+ }
608
+ });
609
+ if (files == null || files.length == 0) {
610
+ throw new RuntimeException("Cannot find JDBC driver in '" + root.getAbsolutePath() + "'.");
611
+ } else {
612
+ for (File file : files) {
613
+ logger.info("JDBC Driver = " + file.getAbsolutePath());
614
+ addDriverJarToClasspath(file.getAbsolutePath());
615
+ }
616
+ }
617
+ }
618
+ }
619
+
620
+ // Load JDBC Driver
621
+ try {
622
+ Class.forName(className);
623
+ } catch (ClassNotFoundException ex) {
624
+ throw new RuntimeException(ex);
625
+ }
626
+ }
627
+
628
+ protected void addDriverJarToClasspath(String glob)
629
+ {
630
+ // TODO match glob
631
+ PluginClassLoader loader = (PluginClassLoader) getClass().getClassLoader();
632
+ Path path = Paths.get(glob);
633
+ if (!path.toFile().exists()) {
634
+ throw new ConfigException("The specified driver jar doesn't exist: " + glob);
635
+ }
636
+ loader.addPath(Paths.get(glob));
637
+ }
638
+
639
+ protected File findPluginRoot()
640
+ {
641
+ try {
642
+ URL url = getClass().getResource("/" + getClass().getName().replace('.', '/') + ".class");
643
+ if (url.toString().startsWith("jar:")) {
644
+ url = new URL(url.toString().replaceAll("^jar:", "").replaceAll("![^!]*$", ""));
645
+ }
646
+
647
+ File folder = new File(url.toURI()).getParentFile();
648
+ for (;; folder = folder.getParentFile()) {
649
+ if (folder == null) {
650
+ throw new RuntimeException("Cannot find 'embulk-input-xxx' folder.");
651
+ }
652
+
653
+ if (folder.getName().startsWith("embulk-input-")) {
654
+ return folder;
655
+ }
656
+ }
657
+ } catch (MalformedURLException | URISyntaxException e) {
658
+ throw new RuntimeException(e);
659
+ }
660
+ }
661
+
662
+ protected void logConnectionProperties(String url, Properties props)
663
+ {
664
+ Properties maskedProps = new Properties();
665
+ for(String key : props.stringPropertyNames()) {
666
+ if (key.equals("password")) {
667
+ maskedProps.setProperty(key, "***");
668
+ } else {
669
+ maskedProps.setProperty(key, props.getProperty(key));
670
+ }
671
+ }
672
+ logger.info("Connecting to {} options {}", url, maskedProps);
673
+ }
674
+ }