embulk-input-athena 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/Dockerfile +8 -0
- data/LICENSE +21 -0
- data/README.md +46 -0
- data/build.gradle +101 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/docker-compose.yml +10 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +5 -0
- data/gradlew +172 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/input/athena.rb +3 -0
- data/src/main/java/org/embulk/input/athena/AthenaInputConnection.java +49 -0
- data/src/main/java/org/embulk/input/athena/AthenaInputPlugin.java +202 -0
- data/src/main/java/org/embulk/input/athena/AthenaInputPlugin.java.tmp1 +192 -0
- data/src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java +674 -0
- data/src/main/java/org/embulk/input/jdbc/JdbcColumn.java +58 -0
- data/src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java +31 -0
- data/src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java +397 -0
- data/src/main/java/org/embulk/input/jdbc/JdbcLiteral.java +38 -0
- data/src/main/java/org/embulk/input/jdbc/JdbcSchema.java +55 -0
- data/src/main/java/org/embulk/input/jdbc/Ssl.java +37 -0
- data/src/main/java/org/embulk/input/jdbc/ToString.java +54 -0
- data/src/main/java/org/embulk/input/jdbc/ToStringMap.java +35 -0
- data/src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java +105 -0
- data/src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java +45 -0
- data/src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java +38 -0
- data/src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java +59 -0
- data/src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java +56 -0
- data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java +21 -0
- data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java +207 -0
- data/src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java +37 -0
- data/src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java +66 -0
- data/src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java +66 -0
- data/src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java +57 -0
- data/src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java +70 -0
- data/src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java +96 -0
- data/src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java +37 -0
- data/src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java +36 -0
- data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java +83 -0
- data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java +75 -0
- data/src/test/java/org/embulk/input/athena/TestAthenaInputPlugin.java +5 -0
- metadata +258 -0
@@ -0,0 +1,192 @@
|
|
1
|
+
package org.embulk.input.athena;
|
2
|
+
|
3
|
+
import java.sql.Connection;
|
4
|
+
import java.sql.DriverManager;
|
5
|
+
import java.sql.ResultSet;
|
6
|
+
import java.sql.SQLException;
|
7
|
+
import java.sql.Statement;
|
8
|
+
import java.util.List;
|
9
|
+
import java.util.Properties;
|
10
|
+
|
11
|
+
import com.google.common.base.Optional;
|
12
|
+
|
13
|
+
import org.embulk.config.Config;
|
14
|
+
import org.embulk.config.ConfigDefault;
|
15
|
+
import org.embulk.config.ConfigDiff;
|
16
|
+
import org.embulk.config.ConfigSource;
|
17
|
+
import org.embulk.config.Task;
|
18
|
+
import org.embulk.config.TaskReport;
|
19
|
+
import org.embulk.config.TaskSource;
|
20
|
+
import org.embulk.input.jdbc.AbstractJdbcInputPlugin;
|
21
|
+
import org.embulk.input.jdbc.JdbcInputConnection;
|
22
|
+
import org.embulk.spi.Exec;
|
23
|
+
import org.embulk.spi.InputPlugin;
|
24
|
+
import org.embulk.spi.PageOutput;
|
25
|
+
import org.embulk.spi.Schema;
|
26
|
+
import org.embulk.spi.SchemaConfig;
|
27
|
+
|
28
|
+
public class AthenaInputPlugin
|
29
|
+
extends AbstractJdbcInputPlugin
|
30
|
+
{
|
31
|
+
public interface AthenaPluginTask
|
32
|
+
extends AbstractJdbcInputPlugin.PluginTask
|
33
|
+
{
|
34
|
+
@Config("driver_path")
|
35
|
+
@ConfigDefault("null")
|
36
|
+
public Optional<String> getDriverPath();
|
37
|
+
|
38
|
+
// athena_url (required string)
|
39
|
+
@Config("athena_url")
|
40
|
+
public String getAthenaUrl();
|
41
|
+
|
42
|
+
// s3_staging_dir (required string)
|
43
|
+
@Config("s3_staging_dir")
|
44
|
+
public String getS3StagingDir();
|
45
|
+
|
46
|
+
// access_key (required string)
|
47
|
+
@Config("access_key")
|
48
|
+
public String getAccessKey();
|
49
|
+
|
50
|
+
// secret_key (required string)
|
51
|
+
@Config("secret_key")
|
52
|
+
public String getSecretKey();
|
53
|
+
|
54
|
+
// configuration option 2 (optional string, null is not allowed)
|
55
|
+
// @Config("option2")
|
56
|
+
// @ConfigDefault("\"myvalue\"")
|
57
|
+
// public String getOption2();
|
58
|
+
|
59
|
+
// configuration option 3 (optional string, null is allowed)
|
60
|
+
// @Config("option3")
|
61
|
+
// @ConfigDefault("null")
|
62
|
+
// public Optional<String> getOption3();
|
63
|
+
|
64
|
+
// if you get schema from config
|
65
|
+
// @Config("columns")
|
66
|
+
// public SchemaConfig getColumns();
|
67
|
+
}
|
68
|
+
|
69
|
+
@Override
|
70
|
+
protected Class<? extends PluginTask> getTaskClass() {
|
71
|
+
return AthenaPluginTask.class;
|
72
|
+
}
|
73
|
+
|
74
|
+
@Override
|
75
|
+
protected AthenaInputConnection newConnection(PluginTask pluginTask) throws SQLException {
|
76
|
+
AthenaPluginTask task = (AthenaPluginTask) pluginTask;
|
77
|
+
loadDriver("com.amazonaws.athena.jdbc.AthenaDriver", task.getDriverPath());
|
78
|
+
//Class.forName("com.amazonaws.athena.jdbc.AthenaDriver");
|
79
|
+
Properties properties = new Properties();
|
80
|
+
properties.put("s3_staging_dir", task.getS3StagingDir());
|
81
|
+
properties.put("user", task.getAccessKey());
|
82
|
+
properties.put("password", task.getSecretKey());
|
83
|
+
properties.putAll(task.getOptions());
|
84
|
+
|
85
|
+
Connection connection = DriverManager.getConnection(task.getAthenaUrl(), properties);
|
86
|
+
try {
|
87
|
+
AthenaInputConnection c = new AthenaInputConnection(connection);
|
88
|
+
connection = null;
|
89
|
+
return c;
|
90
|
+
} finally {
|
91
|
+
if (connection != null) {
|
92
|
+
connection.close();
|
93
|
+
}
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
/*
|
98
|
+
@Override
|
99
|
+
public ConfigDiff transaction(ConfigSource config,
|
100
|
+
InputPlugin.Control control)
|
101
|
+
{
|
102
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
103
|
+
|
104
|
+
// Schema schema = task.getColumns().toSchema();
|
105
|
+
Schema schema = Schema.builder().build();
|
106
|
+
int taskCount = 1; // number of run() method calls
|
107
|
+
|
108
|
+
return resume(task.dump(), schema, taskCount, control);
|
109
|
+
}
|
110
|
+
|
111
|
+
@Override
|
112
|
+
public ConfigDiff resume(TaskSource taskSource,
|
113
|
+
Schema schema, int taskCount,
|
114
|
+
InputPlugin.Control control)
|
115
|
+
{
|
116
|
+
control.run(taskSource, schema, taskCount);
|
117
|
+
return Exec.newConfigDiff();
|
118
|
+
}
|
119
|
+
|
120
|
+
@Override
|
121
|
+
public void cleanup(TaskSource taskSource,
|
122
|
+
Schema schema, int taskCount,
|
123
|
+
List<TaskReport> successTaskReports)
|
124
|
+
{
|
125
|
+
}
|
126
|
+
|
127
|
+
@Override
|
128
|
+
public TaskReport run(TaskSource taskSource,
|
129
|
+
Schema schema, int taskIndex,
|
130
|
+
PageOutput output)
|
131
|
+
{
|
132
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
133
|
+
|
134
|
+
// Write your code here :)
|
135
|
+
|
136
|
+
Connection connection = null;
|
137
|
+
Statement statement = null;
|
138
|
+
try {
|
139
|
+
connection = getAthenaConnection(task);
|
140
|
+
statement = connection.createStatement();
|
141
|
+
ResultSet resultSet = statement.executeQuery("select * from default.sample");
|
142
|
+
|
143
|
+
while(resultSet.next()){
|
144
|
+
String statusCode = resultSet.getString("created_at");
|
145
|
+
System.out.println("st code" + statusCode);
|
146
|
+
}
|
147
|
+
resultSet.close();
|
148
|
+
connection.close();
|
149
|
+
} catch (Exception e){
|
150
|
+
e.printStackTrace();
|
151
|
+
} finally {
|
152
|
+
try {
|
153
|
+
if (statement != null)
|
154
|
+
statement.close();
|
155
|
+
} catch (Exception ex) {
|
156
|
+
|
157
|
+
}
|
158
|
+
try {
|
159
|
+
if (connection != null)
|
160
|
+
connection.close();
|
161
|
+
} catch (Exception ex) {
|
162
|
+
ex.printStackTrace();
|
163
|
+
}
|
164
|
+
}
|
165
|
+
|
166
|
+
return Exec.newTaskReport();
|
167
|
+
}
|
168
|
+
|
169
|
+
@Override
|
170
|
+
public ConfigDiff guess(ConfigSource config)
|
171
|
+
{
|
172
|
+
return Exec.newConfigDiff();
|
173
|
+
}
|
174
|
+
*/
|
175
|
+
/*
|
176
|
+
protected Connection getAthenaConnection(PluginTask task) throws ClassNotFoundException, SQLException{
|
177
|
+
Class.forName("com.amazonaws.athena.jdbc.AthenaDriver");
|
178
|
+
Properties properties = new Properties();
|
179
|
+
properties.put("s3_staging_dir", task.getS3StagingDir());
|
180
|
+
properties.put("user", task.getAccessKey());
|
181
|
+
properties.put("password", task.getSecretKey());
|
182
|
+
if (task.getLogPath() != null){
|
183
|
+
properties.put("log_path", task.getLogPath());
|
184
|
+
}
|
185
|
+
if (task.getLogLevel() != null){
|
186
|
+
properties.put("log_level", task.getLogLevel());
|
187
|
+
}
|
188
|
+
|
189
|
+
return DriverManager.getConnection(task.getAthenaUrl(), properties);
|
190
|
+
}
|
191
|
+
*/
|
192
|
+
}
|
@@ -0,0 +1,674 @@
|
|
1
|
+
package org.embulk.input.jdbc;
|
2
|
+
|
3
|
+
import java.io.File;
|
4
|
+
import java.io.FileFilter;
|
5
|
+
import java.net.MalformedURLException;
|
6
|
+
import java.net.URISyntaxException;
|
7
|
+
import java.net.URL;
|
8
|
+
import java.nio.file.Path;
|
9
|
+
import java.util.List;
|
10
|
+
import java.util.Map;
|
11
|
+
import java.util.Properties;
|
12
|
+
import java.nio.file.Paths;
|
13
|
+
import java.sql.ResultSet;
|
14
|
+
import java.sql.SQLException;
|
15
|
+
|
16
|
+
import org.slf4j.Logger;
|
17
|
+
|
18
|
+
import com.fasterxml.jackson.databind.JsonNode;
|
19
|
+
import com.google.common.base.Optional;
|
20
|
+
import com.google.common.base.Supplier;
|
21
|
+
import com.google.common.base.Throwables;
|
22
|
+
import com.google.common.collect.ImmutableList;
|
23
|
+
|
24
|
+
import org.embulk.config.Config;
|
25
|
+
import org.embulk.config.ConfigException;
|
26
|
+
import org.embulk.config.ConfigDefault;
|
27
|
+
import org.embulk.config.ConfigDiff;
|
28
|
+
import org.embulk.config.ConfigInject;
|
29
|
+
import org.embulk.config.ConfigSource;
|
30
|
+
import org.embulk.config.Task;
|
31
|
+
import org.embulk.config.TaskReport;
|
32
|
+
import org.embulk.config.TaskSource;
|
33
|
+
import org.embulk.plugin.PluginClassLoader;
|
34
|
+
import org.embulk.spi.BufferAllocator;
|
35
|
+
import org.embulk.spi.Column;
|
36
|
+
import org.embulk.spi.DataException;
|
37
|
+
import org.embulk.spi.PageBuilder;
|
38
|
+
import org.embulk.spi.InputPlugin;
|
39
|
+
import org.embulk.spi.PageOutput;
|
40
|
+
import org.embulk.spi.Schema;
|
41
|
+
import org.embulk.spi.Exec;
|
42
|
+
import org.embulk.input.jdbc.getter.ColumnGetter;
|
43
|
+
import org.embulk.input.jdbc.getter.ColumnGetterFactory;
|
44
|
+
import org.embulk.input.jdbc.JdbcInputConnection.BatchSelect;
|
45
|
+
import org.embulk.input.jdbc.JdbcInputConnection.PreparedQuery;
|
46
|
+
import org.joda.time.DateTimeZone;
|
47
|
+
|
48
|
+
import static java.util.Locale.ENGLISH;
|
49
|
+
|
50
|
+
public abstract class AbstractJdbcInputPlugin
|
51
|
+
implements InputPlugin
|
52
|
+
{
|
53
|
+
protected final Logger logger = Exec.getLogger(getClass());
|
54
|
+
|
55
|
+
public interface PluginTask extends Task
|
56
|
+
{
|
57
|
+
@Config("options")
|
58
|
+
@ConfigDefault("{}")
|
59
|
+
public ToStringMap getOptions();
|
60
|
+
|
61
|
+
@Config("table")
|
62
|
+
@ConfigDefault("null")
|
63
|
+
public Optional<String> getTable();
|
64
|
+
public void setTable(Optional<String> normalizedTableName);
|
65
|
+
|
66
|
+
@Config("query")
|
67
|
+
@ConfigDefault("null")
|
68
|
+
public Optional<String> getQuery();
|
69
|
+
|
70
|
+
@Config("select")
|
71
|
+
@ConfigDefault("null")
|
72
|
+
public Optional<String> getSelect();
|
73
|
+
|
74
|
+
@Config("where")
|
75
|
+
@ConfigDefault("null")
|
76
|
+
public Optional<String> getWhere();
|
77
|
+
|
78
|
+
@Config("order_by")
|
79
|
+
@ConfigDefault("null")
|
80
|
+
public Optional<String> getOrderBy();
|
81
|
+
|
82
|
+
@Config("incremental")
|
83
|
+
@ConfigDefault("false")
|
84
|
+
public boolean getIncremental();
|
85
|
+
|
86
|
+
@Config("incremental_columns")
|
87
|
+
@ConfigDefault("[]")
|
88
|
+
public List<String> getIncrementalColumns();
|
89
|
+
public void setIncrementalColumns(List<String> indexes);
|
90
|
+
|
91
|
+
@Config("last_record")
|
92
|
+
@ConfigDefault("null")
|
93
|
+
public Optional<List<JsonNode>> getLastRecord();
|
94
|
+
|
95
|
+
// TODO limit_value is necessary to make sure repeated bulk load transactions
|
96
|
+
// don't a same record twice or miss records when the column
|
97
|
+
// specified at order_by parameter is not unique.
|
98
|
+
// For example, if the order_by column is "timestamp created_at"
|
99
|
+
// column whose precision is second, the table can include multiple
|
100
|
+
// records with the same created_at time. At the first bulk load
|
101
|
+
// transaction, it loads a record with created_at=2015-01-02 00:00:02.
|
102
|
+
// Then next transaction will use WHERE created_at > '2015-01-02 00:00:02'.
|
103
|
+
// However, if another record with created_at=2014-01-01 23:59:59 is
|
104
|
+
// inserted between the 2 transactions, the new record will be skipped.
|
105
|
+
// To prevent this scenario, we want to specify
|
106
|
+
// limit_value=2015-01-02 00:00:00 (exclusive). With this way, as long as
|
107
|
+
// a transaction runs after 2015-01-02 00:00:00 + some minutes, we don't
|
108
|
+
// skip records. Ideally, to automate the scheduling, we want to set
|
109
|
+
// limit_value="today".
|
110
|
+
//
|
111
|
+
//@Config("limit_value")
|
112
|
+
//@ConfigDefault("null")
|
113
|
+
//public Optional<String> getLimitValue();
|
114
|
+
|
115
|
+
//// TODO probably limit_rows is unnecessary as long as this has
|
116
|
+
// supports parallel execution (partition_by option) and resuming.
|
117
|
+
//@Config("limit_rows")
|
118
|
+
//@ConfigDefault("null")
|
119
|
+
//public Optional<Integer> getLimitRows();
|
120
|
+
|
121
|
+
@Config("connect_timeout")
|
122
|
+
@ConfigDefault("300")
|
123
|
+
public int getConnectTimeout();
|
124
|
+
|
125
|
+
@Config("socket_timeout")
|
126
|
+
@ConfigDefault("1800")
|
127
|
+
public int getSocketTimeout();
|
128
|
+
|
129
|
+
@Config("fetch_rows")
|
130
|
+
@ConfigDefault("10000")
|
131
|
+
// TODO set minimum number
|
132
|
+
public int getFetchRows();
|
133
|
+
|
134
|
+
// TODO parallel execution using "partition_by" config
|
135
|
+
|
136
|
+
@Config("column_options")
|
137
|
+
@ConfigDefault("{}")
|
138
|
+
public Map<String, JdbcColumnOption> getColumnOptions();
|
139
|
+
|
140
|
+
@Config("default_timezone")
|
141
|
+
@ConfigDefault("\"UTC\"")
|
142
|
+
public DateTimeZone getDefaultTimeZone();
|
143
|
+
|
144
|
+
@Config("default_column_options")
|
145
|
+
@ConfigDefault("{}")
|
146
|
+
public Map<String, JdbcColumnOption> getDefaultColumnOptions();
|
147
|
+
|
148
|
+
@Config("after_select")
|
149
|
+
@ConfigDefault("null")
|
150
|
+
public Optional<String> getAfterSelect();
|
151
|
+
|
152
|
+
public PreparedQuery getBuiltQuery();
|
153
|
+
public void setBuiltQuery(PreparedQuery query);
|
154
|
+
|
155
|
+
public JdbcSchema getQuerySchema();
|
156
|
+
public void setQuerySchema(JdbcSchema schema);
|
157
|
+
|
158
|
+
public List<Integer> getIncrementalColumnIndexes();
|
159
|
+
public void setIncrementalColumnIndexes(List<Integer> indexes);
|
160
|
+
|
161
|
+
@ConfigInject
|
162
|
+
public BufferAllocator getBufferAllocator();
|
163
|
+
}
|
164
|
+
|
165
|
+
// for subclasses to add @Config
|
166
|
+
protected Class<? extends PluginTask> getTaskClass()
|
167
|
+
{
|
168
|
+
return PluginTask.class;
|
169
|
+
}
|
170
|
+
|
171
|
+
protected abstract JdbcInputConnection newConnection(PluginTask task) throws SQLException;
|
172
|
+
|
173
|
+
@Override
|
174
|
+
public ConfigDiff transaction(ConfigSource config,
|
175
|
+
InputPlugin.Control control)
|
176
|
+
{
|
177
|
+
PluginTask task = config.loadConfig(getTaskClass());
|
178
|
+
|
179
|
+
if (task.getIncremental()) {
|
180
|
+
if (task.getOrderBy().isPresent()) {
|
181
|
+
throw new ConfigException("order_by option must not be set if incremental is true");
|
182
|
+
}
|
183
|
+
}
|
184
|
+
else {
|
185
|
+
if (!task.getIncrementalColumns().isEmpty()) {
|
186
|
+
throw new ConfigException("'incremental: true' must be set if incremental_columns is set");
|
187
|
+
}
|
188
|
+
}
|
189
|
+
|
190
|
+
Schema schema;
|
191
|
+
try (JdbcInputConnection con = newConnection(task)) {
|
192
|
+
con.showDriverVersion();
|
193
|
+
|
194
|
+
// TODO incremental_columns is not set => get primary key
|
195
|
+
schema = setupTask(con, task);
|
196
|
+
} catch (SQLException ex) {
|
197
|
+
throw Throwables.propagate(ex);
|
198
|
+
}
|
199
|
+
|
200
|
+
return buildNextConfigDiff(task, control.run(task.dump(), schema, 1));
|
201
|
+
}
|
202
|
+
|
203
|
+
protected Schema setupTask(JdbcInputConnection con, PluginTask task) throws SQLException
|
204
|
+
{
|
205
|
+
if (task.getTable().isPresent()) {
|
206
|
+
String actualTableName = normalizeTableNameCase(con, task.getTable().get());
|
207
|
+
task.setTable(Optional.of(actualTableName));
|
208
|
+
}
|
209
|
+
|
210
|
+
// build SELECT query and gets schema of its result
|
211
|
+
String rawQuery = getRawQuery(task, con);
|
212
|
+
|
213
|
+
JdbcSchema querySchema = con.getSchemaOfQuery(rawQuery);
|
214
|
+
task.setQuerySchema(querySchema);
|
215
|
+
// query schema should not change after incremental query
|
216
|
+
|
217
|
+
PreparedQuery preparedQuery;
|
218
|
+
if (task.getIncremental()) {
|
219
|
+
// build incremental query
|
220
|
+
|
221
|
+
List<String> incrementalColumns = task.getIncrementalColumns();
|
222
|
+
if (incrementalColumns.isEmpty()) {
|
223
|
+
// incremental_columns is not set
|
224
|
+
if (!task.getTable().isPresent()) {
|
225
|
+
throw new ConfigException("incremental_columns option must be set if incremental is true and custom query option is set");
|
226
|
+
}
|
227
|
+
// get primary keys from the target table to use them as incremental_columns
|
228
|
+
List<String> primaryKeys = con.getPrimaryKeys(task.getTable().get());
|
229
|
+
if (primaryKeys.isEmpty()) {
|
230
|
+
throw new ConfigException(String.format(ENGLISH,
|
231
|
+
"Primary key is not available at the table '%s'. incremental_columns option must be set",
|
232
|
+
task.getTable().get()));
|
233
|
+
}
|
234
|
+
logger.info("Using primary keys as incremental_columns: {}", primaryKeys);
|
235
|
+
task.setIncrementalColumns(primaryKeys);
|
236
|
+
incrementalColumns = primaryKeys;
|
237
|
+
}
|
238
|
+
|
239
|
+
List<Integer> incrementalColumnIndexes = findIncrementalColumnIndexes(querySchema, incrementalColumns);
|
240
|
+
task.setIncrementalColumnIndexes(incrementalColumnIndexes);
|
241
|
+
|
242
|
+
List<JsonNode> lastRecord;
|
243
|
+
if (task.getLastRecord().isPresent()) {
|
244
|
+
lastRecord = task.getLastRecord().get();
|
245
|
+
if (lastRecord.size() != incrementalColumnIndexes.size()) {
|
246
|
+
throw new ConfigException("Number of values set at last_record must be same with number of columns set at incremental_columns");
|
247
|
+
}
|
248
|
+
}
|
249
|
+
else {
|
250
|
+
lastRecord = null;
|
251
|
+
}
|
252
|
+
|
253
|
+
if (task.getQuery().isPresent()) {
|
254
|
+
preparedQuery = con.wrapIncrementalQuery(rawQuery, querySchema, incrementalColumnIndexes, lastRecord);
|
255
|
+
}
|
256
|
+
else {
|
257
|
+
preparedQuery = con.rebuildIncrementalQuery(
|
258
|
+
task.getTable().get(), task.getSelect(),
|
259
|
+
task.getWhere(),
|
260
|
+
querySchema, incrementalColumnIndexes, lastRecord);
|
261
|
+
}
|
262
|
+
}
|
263
|
+
else {
|
264
|
+
task.setIncrementalColumnIndexes(ImmutableList.<Integer>of());
|
265
|
+
preparedQuery = new PreparedQuery(rawQuery, ImmutableList.<JdbcLiteral>of());
|
266
|
+
}
|
267
|
+
|
268
|
+
task.setBuiltQuery(preparedQuery);
|
269
|
+
|
270
|
+
// validate column_options
|
271
|
+
newColumnGetters(con, task, querySchema, null);
|
272
|
+
|
273
|
+
ColumnGetterFactory factory = newColumnGetterFactory(null, task.getDefaultTimeZone());
|
274
|
+
ImmutableList.Builder<Column> columns = ImmutableList.builder();
|
275
|
+
for (int i = 0; i < querySchema.getCount(); i++) {
|
276
|
+
JdbcColumn column = querySchema.getColumn(i);
|
277
|
+
JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), column, factory.getJdbcType(column.getSqlType()));
|
278
|
+
columns.add(new Column(i,
|
279
|
+
column.getName(),
|
280
|
+
factory.newColumnGetter(con, task, column, columnOption).getToType()));
|
281
|
+
}
|
282
|
+
return new Schema(columns.build());
|
283
|
+
}
|
284
|
+
|
285
|
+
private String normalizeTableNameCase(JdbcInputConnection con, String tableName)
|
286
|
+
throws SQLException
|
287
|
+
{
|
288
|
+
if (con.tableExists(tableName)) {
|
289
|
+
return tableName;
|
290
|
+
} else {
|
291
|
+
String upperTableName = tableName.toUpperCase();
|
292
|
+
String lowerTableName = tableName.toLowerCase();
|
293
|
+
boolean upperExists = con.tableExists(upperTableName);
|
294
|
+
boolean lowerExists = con.tableExists(lowerTableName);
|
295
|
+
if (upperExists && lowerExists) {
|
296
|
+
throw new ConfigException(String.format("Cannot specify table '%s' because both '%s' and '%s' exist.",
|
297
|
+
tableName, upperTableName, lowerTableName));
|
298
|
+
} else if (upperExists) {
|
299
|
+
return upperTableName;
|
300
|
+
} else if (lowerExists) {
|
301
|
+
return lowerTableName;
|
302
|
+
} else {
|
303
|
+
// fallback to the given table name. this may throw error later at getSchemaOfQuery
|
304
|
+
return tableName;
|
305
|
+
}
|
306
|
+
}
|
307
|
+
}
|
308
|
+
|
309
|
+
private List<Integer> findIncrementalColumnIndexes(JdbcSchema schema, List<String> incrementalColumns)
|
310
|
+
throws SQLException
|
311
|
+
{
|
312
|
+
ImmutableList.Builder<Integer> builder = ImmutableList.builder();
|
313
|
+
for (String name : incrementalColumns) {
|
314
|
+
Optional<Integer> index = schema.findColumn(name);
|
315
|
+
if (index.isPresent()) {
|
316
|
+
builder.add(index.get());
|
317
|
+
}
|
318
|
+
else {
|
319
|
+
throw new ConfigException(String.format(ENGLISH,
|
320
|
+
"Column name '%s' is in incremental_columns option does not exist",
|
321
|
+
name));
|
322
|
+
}
|
323
|
+
}
|
324
|
+
return builder.build();
|
325
|
+
}
|
326
|
+
|
327
|
+
private String getRawQuery(PluginTask task, JdbcInputConnection con) throws SQLException
|
328
|
+
{
|
329
|
+
if (task.getQuery().isPresent()) {
|
330
|
+
if (task.getTable().isPresent() || task.getSelect().isPresent() ||
|
331
|
+
task.getWhere().isPresent() || task.getOrderBy().isPresent()) {
|
332
|
+
throw new ConfigException("'table', 'select', 'where' and 'order_by' parameters are unnecessary if 'query' parameter is set.");
|
333
|
+
} else if (!task.getIncrementalColumns().isEmpty() || task.getLastRecord().isPresent()) {
|
334
|
+
throw new ConfigException("'incremental_columns' and 'last_record' parameters are not supported if 'query' parameter is set.");
|
335
|
+
}
|
336
|
+
return task.getQuery().get();
|
337
|
+
} else if (task.getTable().isPresent()) {
|
338
|
+
return con.buildSelectQuery(task.getTable().get(), task.getSelect(),
|
339
|
+
task.getWhere(), task.getOrderBy());
|
340
|
+
} else {
|
341
|
+
throw new ConfigException("'table' or 'query' parameter is required");
|
342
|
+
}
|
343
|
+
}
|
344
|
+
|
345
|
+
@Override
|
346
|
+
public ConfigDiff resume(TaskSource taskSource,
|
347
|
+
Schema schema, int taskCount,
|
348
|
+
InputPlugin.Control control)
|
349
|
+
{
|
350
|
+
PluginTask task = taskSource.loadTask(getTaskClass());
|
351
|
+
|
352
|
+
// TODO when parallel execution is implemented and enabled, (maybe) order_by
|
353
|
+
// is necessary to resume. transaction() gets the range of order_by
|
354
|
+
// colum and set it to WHERE condition to make the operation deterministic
|
355
|
+
|
356
|
+
return buildNextConfigDiff(task, control.run(taskSource, schema, taskCount));
|
357
|
+
}
|
358
|
+
|
359
|
+
public ConfigDiff guess(ConfigSource config)
|
360
|
+
{
|
361
|
+
return Exec.newConfigDiff();
|
362
|
+
}
|
363
|
+
|
364
|
+
protected ConfigDiff buildNextConfigDiff(PluginTask task, List<TaskReport> reports)
|
365
|
+
{
|
366
|
+
ConfigDiff next = Exec.newConfigDiff();
|
367
|
+
if (reports.size() > 0 && reports.get(0).has("last_record")) {
|
368
|
+
next.set("last_record", reports.get(0).get(JsonNode.class, "last_record"));
|
369
|
+
} else if (task.getLastRecord().isPresent()) {
|
370
|
+
next.set("last_record", task.getLastRecord().get());
|
371
|
+
}
|
372
|
+
return next;
|
373
|
+
}
|
374
|
+
|
375
|
+
@Override
|
376
|
+
public void cleanup(TaskSource taskSource,
|
377
|
+
Schema schema, int taskCount,
|
378
|
+
List<TaskReport> successTaskReports)
|
379
|
+
{
|
380
|
+
// do nothing
|
381
|
+
}
|
382
|
+
|
383
|
+
private static class LastRecordStore
|
384
|
+
{
|
385
|
+
private final List<Integer> columnIndexes;
|
386
|
+
private final JsonNode[] lastValues;
|
387
|
+
private final List<String> columnNames;
|
388
|
+
|
389
|
+
public LastRecordStore(List<Integer> columnIndexes, List<String> columnNames)
|
390
|
+
{
|
391
|
+
this.columnIndexes = columnIndexes;
|
392
|
+
this.lastValues = new JsonNode[columnIndexes.size()];
|
393
|
+
this.columnNames = columnNames;
|
394
|
+
}
|
395
|
+
|
396
|
+
public void accept(List<ColumnGetter> getters)
|
397
|
+
throws SQLException
|
398
|
+
{
|
399
|
+
for (int i = 0; i < columnIndexes.size(); i++) {
|
400
|
+
lastValues[i] = getters.get(columnIndexes.get(i)).encodeToJson();
|
401
|
+
}
|
402
|
+
}
|
403
|
+
|
404
|
+
public List<JsonNode> getList()
|
405
|
+
{
|
406
|
+
ImmutableList.Builder<JsonNode> builder = ImmutableList.builder();
|
407
|
+
for (int i = 0; i < lastValues.length; i++) {
|
408
|
+
if (lastValues[i] == null || lastValues[i].isNull()) {
|
409
|
+
throw new DataException(String.format(ENGLISH,
|
410
|
+
"incremental_columns can't include null values but the last row is null at column '%s'",
|
411
|
+
columnNames.get(i)));
|
412
|
+
}
|
413
|
+
builder.add(lastValues[i]);
|
414
|
+
}
|
415
|
+
return builder.build();
|
416
|
+
}
|
417
|
+
}
|
418
|
+
|
419
|
+
@Override
|
420
|
+
public TaskReport run(TaskSource taskSource,
|
421
|
+
Schema schema, int taskIndex,
|
422
|
+
PageOutput output)
|
423
|
+
{
|
424
|
+
PluginTask task = taskSource.loadTask(getTaskClass());
|
425
|
+
|
426
|
+
PreparedQuery builtQuery = task.getBuiltQuery();
|
427
|
+
JdbcSchema querySchema = task.getQuerySchema();
|
428
|
+
BufferAllocator allocator = task.getBufferAllocator();
|
429
|
+
PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
|
430
|
+
|
431
|
+
long totalRows = 0;
|
432
|
+
|
433
|
+
LastRecordStore lastRecordStore = null;
|
434
|
+
|
435
|
+
try (JdbcInputConnection con = newConnection(task)) {
|
436
|
+
List<ColumnGetter> getters = newColumnGetters(con, task, querySchema, pageBuilder);
|
437
|
+
try (BatchSelect cursor = con.newSelectCursor(builtQuery, getters, task.getFetchRows(), task.getSocketTimeout())) {
|
438
|
+
while (true) {
|
439
|
+
long rows = fetch(cursor, getters, pageBuilder);
|
440
|
+
if (rows <= 0L) {
|
441
|
+
break;
|
442
|
+
}
|
443
|
+
totalRows += rows;
|
444
|
+
}
|
445
|
+
}
|
446
|
+
|
447
|
+
if (task.getIncremental() && totalRows > 0) {
|
448
|
+
lastRecordStore = new LastRecordStore(task.getIncrementalColumnIndexes(), task.getIncrementalColumns());
|
449
|
+
lastRecordStore.accept(getters);
|
450
|
+
}
|
451
|
+
|
452
|
+
pageBuilder.finish();
|
453
|
+
|
454
|
+
// after_select runs after pageBuilder.finish because pageBuilder.finish may fail.
|
455
|
+
// TODO Output plugin's transaction might still fail. In that case, after_select is
|
456
|
+
// already done but output plugin didn't commit the data to the target storage.
|
457
|
+
// This means inconsistency between data source and destination. To avoid this
|
458
|
+
// issue, we need another option like `after_commit` that runs after output plugin's
|
459
|
+
// commit. after_commit can't run in the same transaction with SELECT. So,
|
460
|
+
// after_select gets values and store them in TaskReport, and after_commit take
|
461
|
+
// them as placeholder. Or, after_select puts values to an intermediate table, and
|
462
|
+
// after_commit moves those values to the actual table.
|
463
|
+
if (task.getAfterSelect().isPresent()) {
|
464
|
+
con.executeUpdate(task.getAfterSelect().get());
|
465
|
+
con.connection.commit();
|
466
|
+
}
|
467
|
+
} catch (SQLException ex) {
|
468
|
+
throw Throwables.propagate(ex);
|
469
|
+
}
|
470
|
+
|
471
|
+
TaskReport report = Exec.newTaskReport();
|
472
|
+
if (lastRecordStore != null) {
|
473
|
+
report.set("last_record", lastRecordStore.getList());
|
474
|
+
}
|
475
|
+
|
476
|
+
return report;
|
477
|
+
}
|
478
|
+
|
479
|
+
protected ColumnGetterFactory newColumnGetterFactory(PageBuilder pageBuilder, DateTimeZone dateTimeZone)
|
480
|
+
{
|
481
|
+
return new ColumnGetterFactory(pageBuilder, dateTimeZone);
|
482
|
+
}
|
483
|
+
|
484
|
+
private List<ColumnGetter> newColumnGetters(JdbcInputConnection con, PluginTask task, JdbcSchema querySchema, PageBuilder pageBuilder)
|
485
|
+
throws SQLException
|
486
|
+
{
|
487
|
+
ColumnGetterFactory factory = newColumnGetterFactory(pageBuilder, task.getDefaultTimeZone());
|
488
|
+
ImmutableList.Builder<ColumnGetter> getters = ImmutableList.builder();
|
489
|
+
for (JdbcColumn c : querySchema.getColumns()) {
|
490
|
+
JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), c, factory.getJdbcType(c.getSqlType()));
|
491
|
+
getters.add(factory.newColumnGetter(con, task, c, columnOption));
|
492
|
+
}
|
493
|
+
return getters.build();
|
494
|
+
}
|
495
|
+
|
496
|
+
private static JdbcColumnOption columnOptionOf(Map<String, JdbcColumnOption> columnOptions, Map<String, JdbcColumnOption> defaultColumnOptions, JdbcColumn targetColumn, String targetColumnSQLType)
|
497
|
+
{
|
498
|
+
JdbcColumnOption columnOption = columnOptions.get(targetColumn.getName());
|
499
|
+
if (columnOption == null) {
|
500
|
+
String foundName = null;
|
501
|
+
for (Map.Entry<String, JdbcColumnOption> entry : columnOptions.entrySet()) {
|
502
|
+
if (entry.getKey().equalsIgnoreCase(targetColumn.getName())) {
|
503
|
+
if (columnOption != null) {
|
504
|
+
throw new ConfigException(String.format("Cannot specify column '%s' because both '%s' and '%s' exist in column_options.",
|
505
|
+
targetColumn.getName(), foundName, entry.getKey()));
|
506
|
+
}
|
507
|
+
foundName = entry.getKey();
|
508
|
+
columnOption = entry.getValue();
|
509
|
+
}
|
510
|
+
}
|
511
|
+
}
|
512
|
+
|
513
|
+
return Optional
|
514
|
+
.fromNullable(columnOption)
|
515
|
+
.or(Optional.fromNullable(defaultColumnOptions.get(targetColumnSQLType)))
|
516
|
+
.or(
|
517
|
+
// default column option
|
518
|
+
new Supplier<JdbcColumnOption>()
|
519
|
+
{
|
520
|
+
public JdbcColumnOption get()
|
521
|
+
{
|
522
|
+
return Exec.newConfigSource().loadConfig(JdbcColumnOption.class);
|
523
|
+
}
|
524
|
+
});
|
525
|
+
}
|
526
|
+
|
527
|
+
private long fetch(BatchSelect cursor,
|
528
|
+
List<ColumnGetter> getters, PageBuilder pageBuilder) throws SQLException
|
529
|
+
{
|
530
|
+
ResultSet result = cursor.fetch();
|
531
|
+
if (result == null || !result.next()) {
|
532
|
+
return 0;
|
533
|
+
}
|
534
|
+
|
535
|
+
List<Column> columns = pageBuilder.getSchema().getColumns();
|
536
|
+
long rows = 0;
|
537
|
+
long reportRows = 500;
|
538
|
+
do {
|
539
|
+
for (int i=0; i < getters.size(); i++) {
|
540
|
+
int index = i + 1; // JDBC column index begins from 1
|
541
|
+
getters.get(i).getAndSet(result, index, columns.get(i));
|
542
|
+
}
|
543
|
+
pageBuilder.addRecord();
|
544
|
+
rows++;
|
545
|
+
if (rows % reportRows == 0) {
|
546
|
+
logger.info(String.format("Fetched %,d rows.", rows));
|
547
|
+
reportRows *= 2;
|
548
|
+
}
|
549
|
+
} while (result.next());
|
550
|
+
|
551
|
+
return rows;
|
552
|
+
}
|
553
|
+
|
554
|
+
//// TODO move to embulk.spi.util?
|
555
|
+
//private static class ListPageOutput
|
556
|
+
//{
|
557
|
+
// public ImmutableList.Builder<Page> pages;
|
558
|
+
//
|
559
|
+
// public ListPageOutput()
|
560
|
+
// {
|
561
|
+
// reset();
|
562
|
+
// }
|
563
|
+
//
|
564
|
+
// @Override
|
565
|
+
// public void add(Page page)
|
566
|
+
// {
|
567
|
+
// pages.add(page);
|
568
|
+
// }
|
569
|
+
//
|
570
|
+
// @Override
|
571
|
+
// public void finish()
|
572
|
+
// {
|
573
|
+
// }
|
574
|
+
//
|
575
|
+
// @Override
|
576
|
+
// public void close()
|
577
|
+
// {
|
578
|
+
// }
|
579
|
+
//
|
580
|
+
// public List<Page> getPages()
|
581
|
+
// {
|
582
|
+
// return pages.build();
|
583
|
+
// }
|
584
|
+
//
|
585
|
+
// public void reset()
|
586
|
+
// {
|
587
|
+
// pages = ImmutableList.builder();
|
588
|
+
// }
|
589
|
+
//}
|
590
|
+
|
591
|
+
protected void loadDriver(String className, Optional<String> driverPath)
|
592
|
+
{
|
593
|
+
if (driverPath.isPresent()) {
|
594
|
+
addDriverJarToClasspath(driverPath.get());
|
595
|
+
} else {
|
596
|
+
try {
|
597
|
+
// Gradle test task will add JDBC driver to classpath
|
598
|
+
Class.forName(className);
|
599
|
+
|
600
|
+
} catch (ClassNotFoundException ex) {
|
601
|
+
File root = findPluginRoot();
|
602
|
+
File driverLib = new File(root, "default_jdbc_driver");
|
603
|
+
File[] files = driverLib.listFiles(new FileFilter() {
|
604
|
+
@Override
|
605
|
+
public boolean accept(File file) {
|
606
|
+
return file.isFile() && file.getName().endsWith(".jar");
|
607
|
+
}
|
608
|
+
});
|
609
|
+
if (files == null || files.length == 0) {
|
610
|
+
throw new RuntimeException("Cannot find JDBC driver in '" + root.getAbsolutePath() + "'.");
|
611
|
+
} else {
|
612
|
+
for (File file : files) {
|
613
|
+
logger.info("JDBC Driver = " + file.getAbsolutePath());
|
614
|
+
addDriverJarToClasspath(file.getAbsolutePath());
|
615
|
+
}
|
616
|
+
}
|
617
|
+
}
|
618
|
+
}
|
619
|
+
|
620
|
+
// Load JDBC Driver
|
621
|
+
try {
|
622
|
+
Class.forName(className);
|
623
|
+
} catch (ClassNotFoundException ex) {
|
624
|
+
throw new RuntimeException(ex);
|
625
|
+
}
|
626
|
+
}
|
627
|
+
|
628
|
+
protected void addDriverJarToClasspath(String glob)
|
629
|
+
{
|
630
|
+
// TODO match glob
|
631
|
+
PluginClassLoader loader = (PluginClassLoader) getClass().getClassLoader();
|
632
|
+
Path path = Paths.get(glob);
|
633
|
+
if (!path.toFile().exists()) {
|
634
|
+
throw new ConfigException("The specified driver jar doesn't exist: " + glob);
|
635
|
+
}
|
636
|
+
loader.addPath(Paths.get(glob));
|
637
|
+
}
|
638
|
+
|
639
|
+
protected File findPluginRoot()
|
640
|
+
{
|
641
|
+
try {
|
642
|
+
URL url = getClass().getResource("/" + getClass().getName().replace('.', '/') + ".class");
|
643
|
+
if (url.toString().startsWith("jar:")) {
|
644
|
+
url = new URL(url.toString().replaceAll("^jar:", "").replaceAll("![^!]*$", ""));
|
645
|
+
}
|
646
|
+
|
647
|
+
File folder = new File(url.toURI()).getParentFile();
|
648
|
+
for (;; folder = folder.getParentFile()) {
|
649
|
+
if (folder == null) {
|
650
|
+
throw new RuntimeException("Cannot find 'embulk-input-xxx' folder.");
|
651
|
+
}
|
652
|
+
|
653
|
+
if (folder.getName().startsWith("embulk-input-")) {
|
654
|
+
return folder;
|
655
|
+
}
|
656
|
+
}
|
657
|
+
} catch (MalformedURLException | URISyntaxException e) {
|
658
|
+
throw new RuntimeException(e);
|
659
|
+
}
|
660
|
+
}
|
661
|
+
|
662
|
+
protected void logConnectionProperties(String url, Properties props)
|
663
|
+
{
|
664
|
+
Properties maskedProps = new Properties();
|
665
|
+
for(String key : props.stringPropertyNames()) {
|
666
|
+
if (key.equals("password")) {
|
667
|
+
maskedProps.setProperty(key, "***");
|
668
|
+
} else {
|
669
|
+
maskedProps.setProperty(key, props.getProperty(key));
|
670
|
+
}
|
671
|
+
}
|
672
|
+
logger.info("Connecting to {} options {}", url, maskedProps);
|
673
|
+
}
|
674
|
+
}
|