embulk-input-athena 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/Dockerfile +8 -0
- data/LICENSE +21 -0
- data/README.md +46 -0
- data/build.gradle +101 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/docker-compose.yml +10 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +5 -0
- data/gradlew +172 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/input/athena.rb +3 -0
- data/src/main/java/org/embulk/input/athena/AthenaInputConnection.java +49 -0
- data/src/main/java/org/embulk/input/athena/AthenaInputPlugin.java +202 -0
- data/src/main/java/org/embulk/input/athena/AthenaInputPlugin.java.tmp1 +192 -0
- data/src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java +674 -0
- data/src/main/java/org/embulk/input/jdbc/JdbcColumn.java +58 -0
- data/src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java +31 -0
- data/src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java +397 -0
- data/src/main/java/org/embulk/input/jdbc/JdbcLiteral.java +38 -0
- data/src/main/java/org/embulk/input/jdbc/JdbcSchema.java +55 -0
- data/src/main/java/org/embulk/input/jdbc/Ssl.java +37 -0
- data/src/main/java/org/embulk/input/jdbc/ToString.java +54 -0
- data/src/main/java/org/embulk/input/jdbc/ToStringMap.java +35 -0
- data/src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java +105 -0
- data/src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java +45 -0
- data/src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java +38 -0
- data/src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java +59 -0
- data/src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java +56 -0
- data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java +21 -0
- data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java +207 -0
- data/src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java +37 -0
- data/src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java +66 -0
- data/src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java +66 -0
- data/src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java +57 -0
- data/src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java +70 -0
- data/src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java +96 -0
- data/src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java +37 -0
- data/src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java +36 -0
- data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java +83 -0
- data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java +75 -0
- data/src/test/java/org/embulk/input/athena/TestAthenaInputPlugin.java +5 -0
- metadata +258 -0
@@ -0,0 +1,192 @@
|
|
1
|
+
package org.embulk.input.athena;
|
2
|
+
|
3
|
+
import java.sql.Connection;
|
4
|
+
import java.sql.DriverManager;
|
5
|
+
import java.sql.ResultSet;
|
6
|
+
import java.sql.SQLException;
|
7
|
+
import java.sql.Statement;
|
8
|
+
import java.util.List;
|
9
|
+
import java.util.Properties;
|
10
|
+
|
11
|
+
import com.google.common.base.Optional;
|
12
|
+
|
13
|
+
import org.embulk.config.Config;
|
14
|
+
import org.embulk.config.ConfigDefault;
|
15
|
+
import org.embulk.config.ConfigDiff;
|
16
|
+
import org.embulk.config.ConfigSource;
|
17
|
+
import org.embulk.config.Task;
|
18
|
+
import org.embulk.config.TaskReport;
|
19
|
+
import org.embulk.config.TaskSource;
|
20
|
+
import org.embulk.input.jdbc.AbstractJdbcInputPlugin;
|
21
|
+
import org.embulk.input.jdbc.JdbcInputConnection;
|
22
|
+
import org.embulk.spi.Exec;
|
23
|
+
import org.embulk.spi.InputPlugin;
|
24
|
+
import org.embulk.spi.PageOutput;
|
25
|
+
import org.embulk.spi.Schema;
|
26
|
+
import org.embulk.spi.SchemaConfig;
|
27
|
+
|
28
|
+
public class AthenaInputPlugin
|
29
|
+
extends AbstractJdbcInputPlugin
|
30
|
+
{
|
31
|
+
public interface AthenaPluginTask
|
32
|
+
extends AbstractJdbcInputPlugin.PluginTask
|
33
|
+
{
|
34
|
+
@Config("driver_path")
|
35
|
+
@ConfigDefault("null")
|
36
|
+
public Optional<String> getDriverPath();
|
37
|
+
|
38
|
+
// athena_url (required string)
|
39
|
+
@Config("athena_url")
|
40
|
+
public String getAthenaUrl();
|
41
|
+
|
42
|
+
// s3_staging_dir (required string)
|
43
|
+
@Config("s3_staging_dir")
|
44
|
+
public String getS3StagingDir();
|
45
|
+
|
46
|
+
// access_key (required string)
|
47
|
+
@Config("access_key")
|
48
|
+
public String getAccessKey();
|
49
|
+
|
50
|
+
// secret_key (required string)
|
51
|
+
@Config("secret_key")
|
52
|
+
public String getSecretKey();
|
53
|
+
|
54
|
+
// configuration option 2 (optional string, null is not allowed)
|
55
|
+
// @Config("option2")
|
56
|
+
// @ConfigDefault("\"myvalue\"")
|
57
|
+
// public String getOption2();
|
58
|
+
|
59
|
+
// configuration option 3 (optional string, null is allowed)
|
60
|
+
// @Config("option3")
|
61
|
+
// @ConfigDefault("null")
|
62
|
+
// public Optional<String> getOption3();
|
63
|
+
|
64
|
+
// if you get schema from config
|
65
|
+
// @Config("columns")
|
66
|
+
// public SchemaConfig getColumns();
|
67
|
+
}
|
68
|
+
|
69
|
+
@Override
|
70
|
+
protected Class<? extends PluginTask> getTaskClass() {
|
71
|
+
return AthenaPluginTask.class;
|
72
|
+
}
|
73
|
+
|
74
|
+
@Override
|
75
|
+
protected AthenaInputConnection newConnection(PluginTask pluginTask) throws SQLException {
|
76
|
+
AthenaPluginTask task = (AthenaPluginTask) pluginTask;
|
77
|
+
loadDriver("com.amazonaws.athena.jdbc.AthenaDriver", task.getDriverPath());
|
78
|
+
//Class.forName("com.amazonaws.athena.jdbc.AthenaDriver");
|
79
|
+
Properties properties = new Properties();
|
80
|
+
properties.put("s3_staging_dir", task.getS3StagingDir());
|
81
|
+
properties.put("user", task.getAccessKey());
|
82
|
+
properties.put("password", task.getSecretKey());
|
83
|
+
properties.putAll(task.getOptions());
|
84
|
+
|
85
|
+
Connection connection = DriverManager.getConnection(task.getAthenaUrl(), properties);
|
86
|
+
try {
|
87
|
+
AthenaInputConnection c = new AthenaInputConnection(connection);
|
88
|
+
connection = null;
|
89
|
+
return c;
|
90
|
+
} finally {
|
91
|
+
if (connection != null) {
|
92
|
+
connection.close();
|
93
|
+
}
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
/*
|
98
|
+
@Override
|
99
|
+
public ConfigDiff transaction(ConfigSource config,
|
100
|
+
InputPlugin.Control control)
|
101
|
+
{
|
102
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
103
|
+
|
104
|
+
// Schema schema = task.getColumns().toSchema();
|
105
|
+
Schema schema = Schema.builder().build();
|
106
|
+
int taskCount = 1; // number of run() method calls
|
107
|
+
|
108
|
+
return resume(task.dump(), schema, taskCount, control);
|
109
|
+
}
|
110
|
+
|
111
|
+
@Override
|
112
|
+
public ConfigDiff resume(TaskSource taskSource,
|
113
|
+
Schema schema, int taskCount,
|
114
|
+
InputPlugin.Control control)
|
115
|
+
{
|
116
|
+
control.run(taskSource, schema, taskCount);
|
117
|
+
return Exec.newConfigDiff();
|
118
|
+
}
|
119
|
+
|
120
|
+
@Override
|
121
|
+
public void cleanup(TaskSource taskSource,
|
122
|
+
Schema schema, int taskCount,
|
123
|
+
List<TaskReport> successTaskReports)
|
124
|
+
{
|
125
|
+
}
|
126
|
+
|
127
|
+
@Override
|
128
|
+
public TaskReport run(TaskSource taskSource,
|
129
|
+
Schema schema, int taskIndex,
|
130
|
+
PageOutput output)
|
131
|
+
{
|
132
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
133
|
+
|
134
|
+
// Write your code here :)
|
135
|
+
|
136
|
+
Connection connection = null;
|
137
|
+
Statement statement = null;
|
138
|
+
try {
|
139
|
+
connection = getAthenaConnection(task);
|
140
|
+
statement = connection.createStatement();
|
141
|
+
ResultSet resultSet = statement.executeQuery("select * from default.sample");
|
142
|
+
|
143
|
+
while(resultSet.next()){
|
144
|
+
String statusCode = resultSet.getString("created_at");
|
145
|
+
System.out.println("st code" + statusCode);
|
146
|
+
}
|
147
|
+
resultSet.close();
|
148
|
+
connection.close();
|
149
|
+
} catch (Exception e){
|
150
|
+
e.printStackTrace();
|
151
|
+
} finally {
|
152
|
+
try {
|
153
|
+
if (statement != null)
|
154
|
+
statement.close();
|
155
|
+
} catch (Exception ex) {
|
156
|
+
|
157
|
+
}
|
158
|
+
try {
|
159
|
+
if (connection != null)
|
160
|
+
connection.close();
|
161
|
+
} catch (Exception ex) {
|
162
|
+
ex.printStackTrace();
|
163
|
+
}
|
164
|
+
}
|
165
|
+
|
166
|
+
return Exec.newTaskReport();
|
167
|
+
}
|
168
|
+
|
169
|
+
@Override
|
170
|
+
public ConfigDiff guess(ConfigSource config)
|
171
|
+
{
|
172
|
+
return Exec.newConfigDiff();
|
173
|
+
}
|
174
|
+
*/
|
175
|
+
/*
|
176
|
+
protected Connection getAthenaConnection(PluginTask task) throws ClassNotFoundException, SQLException{
|
177
|
+
Class.forName("com.amazonaws.athena.jdbc.AthenaDriver");
|
178
|
+
Properties properties = new Properties();
|
179
|
+
properties.put("s3_staging_dir", task.getS3StagingDir());
|
180
|
+
properties.put("user", task.getAccessKey());
|
181
|
+
properties.put("password", task.getSecretKey());
|
182
|
+
if (task.getLogPath() != null){
|
183
|
+
properties.put("log_path", task.getLogPath());
|
184
|
+
}
|
185
|
+
if (task.getLogLevel() != null){
|
186
|
+
properties.put("log_level", task.getLogLevel());
|
187
|
+
}
|
188
|
+
|
189
|
+
return DriverManager.getConnection(task.getAthenaUrl(), properties);
|
190
|
+
}
|
191
|
+
*/
|
192
|
+
}
|
@@ -0,0 +1,674 @@
|
|
1
|
+
package org.embulk.input.jdbc;
|
2
|
+
|
3
|
+
import java.io.File;
|
4
|
+
import java.io.FileFilter;
|
5
|
+
import java.net.MalformedURLException;
|
6
|
+
import java.net.URISyntaxException;
|
7
|
+
import java.net.URL;
|
8
|
+
import java.nio.file.Path;
|
9
|
+
import java.util.List;
|
10
|
+
import java.util.Map;
|
11
|
+
import java.util.Properties;
|
12
|
+
import java.nio.file.Paths;
|
13
|
+
import java.sql.ResultSet;
|
14
|
+
import java.sql.SQLException;
|
15
|
+
|
16
|
+
import org.slf4j.Logger;
|
17
|
+
|
18
|
+
import com.fasterxml.jackson.databind.JsonNode;
|
19
|
+
import com.google.common.base.Optional;
|
20
|
+
import com.google.common.base.Supplier;
|
21
|
+
import com.google.common.base.Throwables;
|
22
|
+
import com.google.common.collect.ImmutableList;
|
23
|
+
|
24
|
+
import org.embulk.config.Config;
|
25
|
+
import org.embulk.config.ConfigException;
|
26
|
+
import org.embulk.config.ConfigDefault;
|
27
|
+
import org.embulk.config.ConfigDiff;
|
28
|
+
import org.embulk.config.ConfigInject;
|
29
|
+
import org.embulk.config.ConfigSource;
|
30
|
+
import org.embulk.config.Task;
|
31
|
+
import org.embulk.config.TaskReport;
|
32
|
+
import org.embulk.config.TaskSource;
|
33
|
+
import org.embulk.plugin.PluginClassLoader;
|
34
|
+
import org.embulk.spi.BufferAllocator;
|
35
|
+
import org.embulk.spi.Column;
|
36
|
+
import org.embulk.spi.DataException;
|
37
|
+
import org.embulk.spi.PageBuilder;
|
38
|
+
import org.embulk.spi.InputPlugin;
|
39
|
+
import org.embulk.spi.PageOutput;
|
40
|
+
import org.embulk.spi.Schema;
|
41
|
+
import org.embulk.spi.Exec;
|
42
|
+
import org.embulk.input.jdbc.getter.ColumnGetter;
|
43
|
+
import org.embulk.input.jdbc.getter.ColumnGetterFactory;
|
44
|
+
import org.embulk.input.jdbc.JdbcInputConnection.BatchSelect;
|
45
|
+
import org.embulk.input.jdbc.JdbcInputConnection.PreparedQuery;
|
46
|
+
import org.joda.time.DateTimeZone;
|
47
|
+
|
48
|
+
import static java.util.Locale.ENGLISH;
|
49
|
+
|
50
|
+
public abstract class AbstractJdbcInputPlugin
|
51
|
+
implements InputPlugin
|
52
|
+
{
|
53
|
+
protected final Logger logger = Exec.getLogger(getClass());
|
54
|
+
|
55
|
+
public interface PluginTask extends Task
|
56
|
+
{
|
57
|
+
@Config("options")
|
58
|
+
@ConfigDefault("{}")
|
59
|
+
public ToStringMap getOptions();
|
60
|
+
|
61
|
+
@Config("table")
|
62
|
+
@ConfigDefault("null")
|
63
|
+
public Optional<String> getTable();
|
64
|
+
public void setTable(Optional<String> normalizedTableName);
|
65
|
+
|
66
|
+
@Config("query")
|
67
|
+
@ConfigDefault("null")
|
68
|
+
public Optional<String> getQuery();
|
69
|
+
|
70
|
+
@Config("select")
|
71
|
+
@ConfigDefault("null")
|
72
|
+
public Optional<String> getSelect();
|
73
|
+
|
74
|
+
@Config("where")
|
75
|
+
@ConfigDefault("null")
|
76
|
+
public Optional<String> getWhere();
|
77
|
+
|
78
|
+
@Config("order_by")
|
79
|
+
@ConfigDefault("null")
|
80
|
+
public Optional<String> getOrderBy();
|
81
|
+
|
82
|
+
@Config("incremental")
|
83
|
+
@ConfigDefault("false")
|
84
|
+
public boolean getIncremental();
|
85
|
+
|
86
|
+
@Config("incremental_columns")
|
87
|
+
@ConfigDefault("[]")
|
88
|
+
public List<String> getIncrementalColumns();
|
89
|
+
public void setIncrementalColumns(List<String> indexes);
|
90
|
+
|
91
|
+
@Config("last_record")
|
92
|
+
@ConfigDefault("null")
|
93
|
+
public Optional<List<JsonNode>> getLastRecord();
|
94
|
+
|
95
|
+
// TODO limit_value is necessary to make sure repeated bulk load transactions
|
96
|
+
// don't a same record twice or miss records when the column
|
97
|
+
// specified at order_by parameter is not unique.
|
98
|
+
// For example, if the order_by column is "timestamp created_at"
|
99
|
+
// column whose precision is second, the table can include multiple
|
100
|
+
// records with the same created_at time. At the first bulk load
|
101
|
+
// transaction, it loads a record with created_at=2015-01-02 00:00:02.
|
102
|
+
// Then next transaction will use WHERE created_at > '2015-01-02 00:00:02'.
|
103
|
+
// However, if another record with created_at=2014-01-01 23:59:59 is
|
104
|
+
// inserted between the 2 transactions, the new record will be skipped.
|
105
|
+
// To prevent this scenario, we want to specify
|
106
|
+
// limit_value=2015-01-02 00:00:00 (exclusive). With this way, as long as
|
107
|
+
// a transaction runs after 2015-01-02 00:00:00 + some minutes, we don't
|
108
|
+
// skip records. Ideally, to automate the scheduling, we want to set
|
109
|
+
// limit_value="today".
|
110
|
+
//
|
111
|
+
//@Config("limit_value")
|
112
|
+
//@ConfigDefault("null")
|
113
|
+
//public Optional<String> getLimitValue();
|
114
|
+
|
115
|
+
//// TODO probably limit_rows is unnecessary as long as this has
|
116
|
+
// supports parallel execution (partition_by option) and resuming.
|
117
|
+
//@Config("limit_rows")
|
118
|
+
//@ConfigDefault("null")
|
119
|
+
//public Optional<Integer> getLimitRows();
|
120
|
+
|
121
|
+
@Config("connect_timeout")
|
122
|
+
@ConfigDefault("300")
|
123
|
+
public int getConnectTimeout();
|
124
|
+
|
125
|
+
@Config("socket_timeout")
|
126
|
+
@ConfigDefault("1800")
|
127
|
+
public int getSocketTimeout();
|
128
|
+
|
129
|
+
@Config("fetch_rows")
|
130
|
+
@ConfigDefault("10000")
|
131
|
+
// TODO set minimum number
|
132
|
+
public int getFetchRows();
|
133
|
+
|
134
|
+
// TODO parallel execution using "partition_by" config
|
135
|
+
|
136
|
+
@Config("column_options")
|
137
|
+
@ConfigDefault("{}")
|
138
|
+
public Map<String, JdbcColumnOption> getColumnOptions();
|
139
|
+
|
140
|
+
@Config("default_timezone")
|
141
|
+
@ConfigDefault("\"UTC\"")
|
142
|
+
public DateTimeZone getDefaultTimeZone();
|
143
|
+
|
144
|
+
@Config("default_column_options")
|
145
|
+
@ConfigDefault("{}")
|
146
|
+
public Map<String, JdbcColumnOption> getDefaultColumnOptions();
|
147
|
+
|
148
|
+
@Config("after_select")
|
149
|
+
@ConfigDefault("null")
|
150
|
+
public Optional<String> getAfterSelect();
|
151
|
+
|
152
|
+
public PreparedQuery getBuiltQuery();
|
153
|
+
public void setBuiltQuery(PreparedQuery query);
|
154
|
+
|
155
|
+
public JdbcSchema getQuerySchema();
|
156
|
+
public void setQuerySchema(JdbcSchema schema);
|
157
|
+
|
158
|
+
public List<Integer> getIncrementalColumnIndexes();
|
159
|
+
public void setIncrementalColumnIndexes(List<Integer> indexes);
|
160
|
+
|
161
|
+
@ConfigInject
|
162
|
+
public BufferAllocator getBufferAllocator();
|
163
|
+
}
|
164
|
+
|
165
|
+
// for subclasses to add @Config
|
166
|
+
protected Class<? extends PluginTask> getTaskClass()
|
167
|
+
{
|
168
|
+
return PluginTask.class;
|
169
|
+
}
|
170
|
+
|
171
|
+
protected abstract JdbcInputConnection newConnection(PluginTask task) throws SQLException;
|
172
|
+
|
173
|
+
@Override
|
174
|
+
public ConfigDiff transaction(ConfigSource config,
|
175
|
+
InputPlugin.Control control)
|
176
|
+
{
|
177
|
+
PluginTask task = config.loadConfig(getTaskClass());
|
178
|
+
|
179
|
+
if (task.getIncremental()) {
|
180
|
+
if (task.getOrderBy().isPresent()) {
|
181
|
+
throw new ConfigException("order_by option must not be set if incremental is true");
|
182
|
+
}
|
183
|
+
}
|
184
|
+
else {
|
185
|
+
if (!task.getIncrementalColumns().isEmpty()) {
|
186
|
+
throw new ConfigException("'incremental: true' must be set if incremental_columns is set");
|
187
|
+
}
|
188
|
+
}
|
189
|
+
|
190
|
+
Schema schema;
|
191
|
+
try (JdbcInputConnection con = newConnection(task)) {
|
192
|
+
con.showDriverVersion();
|
193
|
+
|
194
|
+
// TODO incremental_columns is not set => get primary key
|
195
|
+
schema = setupTask(con, task);
|
196
|
+
} catch (SQLException ex) {
|
197
|
+
throw Throwables.propagate(ex);
|
198
|
+
}
|
199
|
+
|
200
|
+
return buildNextConfigDiff(task, control.run(task.dump(), schema, 1));
|
201
|
+
}
|
202
|
+
|
203
|
+
protected Schema setupTask(JdbcInputConnection con, PluginTask task) throws SQLException
|
204
|
+
{
|
205
|
+
if (task.getTable().isPresent()) {
|
206
|
+
String actualTableName = normalizeTableNameCase(con, task.getTable().get());
|
207
|
+
task.setTable(Optional.of(actualTableName));
|
208
|
+
}
|
209
|
+
|
210
|
+
// build SELECT query and gets schema of its result
|
211
|
+
String rawQuery = getRawQuery(task, con);
|
212
|
+
|
213
|
+
JdbcSchema querySchema = con.getSchemaOfQuery(rawQuery);
|
214
|
+
task.setQuerySchema(querySchema);
|
215
|
+
// query schema should not change after incremental query
|
216
|
+
|
217
|
+
PreparedQuery preparedQuery;
|
218
|
+
if (task.getIncremental()) {
|
219
|
+
// build incremental query
|
220
|
+
|
221
|
+
List<String> incrementalColumns = task.getIncrementalColumns();
|
222
|
+
if (incrementalColumns.isEmpty()) {
|
223
|
+
// incremental_columns is not set
|
224
|
+
if (!task.getTable().isPresent()) {
|
225
|
+
throw new ConfigException("incremental_columns option must be set if incremental is true and custom query option is set");
|
226
|
+
}
|
227
|
+
// get primary keys from the target table to use them as incremental_columns
|
228
|
+
List<String> primaryKeys = con.getPrimaryKeys(task.getTable().get());
|
229
|
+
if (primaryKeys.isEmpty()) {
|
230
|
+
throw new ConfigException(String.format(ENGLISH,
|
231
|
+
"Primary key is not available at the table '%s'. incremental_columns option must be set",
|
232
|
+
task.getTable().get()));
|
233
|
+
}
|
234
|
+
logger.info("Using primary keys as incremental_columns: {}", primaryKeys);
|
235
|
+
task.setIncrementalColumns(primaryKeys);
|
236
|
+
incrementalColumns = primaryKeys;
|
237
|
+
}
|
238
|
+
|
239
|
+
List<Integer> incrementalColumnIndexes = findIncrementalColumnIndexes(querySchema, incrementalColumns);
|
240
|
+
task.setIncrementalColumnIndexes(incrementalColumnIndexes);
|
241
|
+
|
242
|
+
List<JsonNode> lastRecord;
|
243
|
+
if (task.getLastRecord().isPresent()) {
|
244
|
+
lastRecord = task.getLastRecord().get();
|
245
|
+
if (lastRecord.size() != incrementalColumnIndexes.size()) {
|
246
|
+
throw new ConfigException("Number of values set at last_record must be same with number of columns set at incremental_columns");
|
247
|
+
}
|
248
|
+
}
|
249
|
+
else {
|
250
|
+
lastRecord = null;
|
251
|
+
}
|
252
|
+
|
253
|
+
if (task.getQuery().isPresent()) {
|
254
|
+
preparedQuery = con.wrapIncrementalQuery(rawQuery, querySchema, incrementalColumnIndexes, lastRecord);
|
255
|
+
}
|
256
|
+
else {
|
257
|
+
preparedQuery = con.rebuildIncrementalQuery(
|
258
|
+
task.getTable().get(), task.getSelect(),
|
259
|
+
task.getWhere(),
|
260
|
+
querySchema, incrementalColumnIndexes, lastRecord);
|
261
|
+
}
|
262
|
+
}
|
263
|
+
else {
|
264
|
+
task.setIncrementalColumnIndexes(ImmutableList.<Integer>of());
|
265
|
+
preparedQuery = new PreparedQuery(rawQuery, ImmutableList.<JdbcLiteral>of());
|
266
|
+
}
|
267
|
+
|
268
|
+
task.setBuiltQuery(preparedQuery);
|
269
|
+
|
270
|
+
// validate column_options
|
271
|
+
newColumnGetters(con, task, querySchema, null);
|
272
|
+
|
273
|
+
ColumnGetterFactory factory = newColumnGetterFactory(null, task.getDefaultTimeZone());
|
274
|
+
ImmutableList.Builder<Column> columns = ImmutableList.builder();
|
275
|
+
for (int i = 0; i < querySchema.getCount(); i++) {
|
276
|
+
JdbcColumn column = querySchema.getColumn(i);
|
277
|
+
JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), column, factory.getJdbcType(column.getSqlType()));
|
278
|
+
columns.add(new Column(i,
|
279
|
+
column.getName(),
|
280
|
+
factory.newColumnGetter(con, task, column, columnOption).getToType()));
|
281
|
+
}
|
282
|
+
return new Schema(columns.build());
|
283
|
+
}
|
284
|
+
|
285
|
+
private String normalizeTableNameCase(JdbcInputConnection con, String tableName)
|
286
|
+
throws SQLException
|
287
|
+
{
|
288
|
+
if (con.tableExists(tableName)) {
|
289
|
+
return tableName;
|
290
|
+
} else {
|
291
|
+
String upperTableName = tableName.toUpperCase();
|
292
|
+
String lowerTableName = tableName.toLowerCase();
|
293
|
+
boolean upperExists = con.tableExists(upperTableName);
|
294
|
+
boolean lowerExists = con.tableExists(lowerTableName);
|
295
|
+
if (upperExists && lowerExists) {
|
296
|
+
throw new ConfigException(String.format("Cannot specify table '%s' because both '%s' and '%s' exist.",
|
297
|
+
tableName, upperTableName, lowerTableName));
|
298
|
+
} else if (upperExists) {
|
299
|
+
return upperTableName;
|
300
|
+
} else if (lowerExists) {
|
301
|
+
return lowerTableName;
|
302
|
+
} else {
|
303
|
+
// fallback to the given table name. this may throw error later at getSchemaOfQuery
|
304
|
+
return tableName;
|
305
|
+
}
|
306
|
+
}
|
307
|
+
}
|
308
|
+
|
309
|
+
private List<Integer> findIncrementalColumnIndexes(JdbcSchema schema, List<String> incrementalColumns)
|
310
|
+
throws SQLException
|
311
|
+
{
|
312
|
+
ImmutableList.Builder<Integer> builder = ImmutableList.builder();
|
313
|
+
for (String name : incrementalColumns) {
|
314
|
+
Optional<Integer> index = schema.findColumn(name);
|
315
|
+
if (index.isPresent()) {
|
316
|
+
builder.add(index.get());
|
317
|
+
}
|
318
|
+
else {
|
319
|
+
throw new ConfigException(String.format(ENGLISH,
|
320
|
+
"Column name '%s' is in incremental_columns option does not exist",
|
321
|
+
name));
|
322
|
+
}
|
323
|
+
}
|
324
|
+
return builder.build();
|
325
|
+
}
|
326
|
+
|
327
|
+
private String getRawQuery(PluginTask task, JdbcInputConnection con) throws SQLException
|
328
|
+
{
|
329
|
+
if (task.getQuery().isPresent()) {
|
330
|
+
if (task.getTable().isPresent() || task.getSelect().isPresent() ||
|
331
|
+
task.getWhere().isPresent() || task.getOrderBy().isPresent()) {
|
332
|
+
throw new ConfigException("'table', 'select', 'where' and 'order_by' parameters are unnecessary if 'query' parameter is set.");
|
333
|
+
} else if (!task.getIncrementalColumns().isEmpty() || task.getLastRecord().isPresent()) {
|
334
|
+
throw new ConfigException("'incremental_columns' and 'last_record' parameters are not supported if 'query' parameter is set.");
|
335
|
+
}
|
336
|
+
return task.getQuery().get();
|
337
|
+
} else if (task.getTable().isPresent()) {
|
338
|
+
return con.buildSelectQuery(task.getTable().get(), task.getSelect(),
|
339
|
+
task.getWhere(), task.getOrderBy());
|
340
|
+
} else {
|
341
|
+
throw new ConfigException("'table' or 'query' parameter is required");
|
342
|
+
}
|
343
|
+
}
|
344
|
+
|
345
|
+
@Override
|
346
|
+
public ConfigDiff resume(TaskSource taskSource,
|
347
|
+
Schema schema, int taskCount,
|
348
|
+
InputPlugin.Control control)
|
349
|
+
{
|
350
|
+
PluginTask task = taskSource.loadTask(getTaskClass());
|
351
|
+
|
352
|
+
// TODO when parallel execution is implemented and enabled, (maybe) order_by
|
353
|
+
// is necessary to resume. transaction() gets the range of order_by
|
354
|
+
// colum and set it to WHERE condition to make the operation deterministic
|
355
|
+
|
356
|
+
return buildNextConfigDiff(task, control.run(taskSource, schema, taskCount));
|
357
|
+
}
|
358
|
+
|
359
|
+
public ConfigDiff guess(ConfigSource config)
|
360
|
+
{
|
361
|
+
return Exec.newConfigDiff();
|
362
|
+
}
|
363
|
+
|
364
|
+
protected ConfigDiff buildNextConfigDiff(PluginTask task, List<TaskReport> reports)
|
365
|
+
{
|
366
|
+
ConfigDiff next = Exec.newConfigDiff();
|
367
|
+
if (reports.size() > 0 && reports.get(0).has("last_record")) {
|
368
|
+
next.set("last_record", reports.get(0).get(JsonNode.class, "last_record"));
|
369
|
+
} else if (task.getLastRecord().isPresent()) {
|
370
|
+
next.set("last_record", task.getLastRecord().get());
|
371
|
+
}
|
372
|
+
return next;
|
373
|
+
}
|
374
|
+
|
375
|
+
@Override
|
376
|
+
public void cleanup(TaskSource taskSource,
|
377
|
+
Schema schema, int taskCount,
|
378
|
+
List<TaskReport> successTaskReports)
|
379
|
+
{
|
380
|
+
// do nothing
|
381
|
+
}
|
382
|
+
|
383
|
+
private static class LastRecordStore
|
384
|
+
{
|
385
|
+
private final List<Integer> columnIndexes;
|
386
|
+
private final JsonNode[] lastValues;
|
387
|
+
private final List<String> columnNames;
|
388
|
+
|
389
|
+
public LastRecordStore(List<Integer> columnIndexes, List<String> columnNames)
|
390
|
+
{
|
391
|
+
this.columnIndexes = columnIndexes;
|
392
|
+
this.lastValues = new JsonNode[columnIndexes.size()];
|
393
|
+
this.columnNames = columnNames;
|
394
|
+
}
|
395
|
+
|
396
|
+
public void accept(List<ColumnGetter> getters)
|
397
|
+
throws SQLException
|
398
|
+
{
|
399
|
+
for (int i = 0; i < columnIndexes.size(); i++) {
|
400
|
+
lastValues[i] = getters.get(columnIndexes.get(i)).encodeToJson();
|
401
|
+
}
|
402
|
+
}
|
403
|
+
|
404
|
+
public List<JsonNode> getList()
|
405
|
+
{
|
406
|
+
ImmutableList.Builder<JsonNode> builder = ImmutableList.builder();
|
407
|
+
for (int i = 0; i < lastValues.length; i++) {
|
408
|
+
if (lastValues[i] == null || lastValues[i].isNull()) {
|
409
|
+
throw new DataException(String.format(ENGLISH,
|
410
|
+
"incremental_columns can't include null values but the last row is null at column '%s'",
|
411
|
+
columnNames.get(i)));
|
412
|
+
}
|
413
|
+
builder.add(lastValues[i]);
|
414
|
+
}
|
415
|
+
return builder.build();
|
416
|
+
}
|
417
|
+
}
|
418
|
+
|
419
|
+
@Override
|
420
|
+
public TaskReport run(TaskSource taskSource,
|
421
|
+
Schema schema, int taskIndex,
|
422
|
+
PageOutput output)
|
423
|
+
{
|
424
|
+
PluginTask task = taskSource.loadTask(getTaskClass());
|
425
|
+
|
426
|
+
PreparedQuery builtQuery = task.getBuiltQuery();
|
427
|
+
JdbcSchema querySchema = task.getQuerySchema();
|
428
|
+
BufferAllocator allocator = task.getBufferAllocator();
|
429
|
+
PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
|
430
|
+
|
431
|
+
long totalRows = 0;
|
432
|
+
|
433
|
+
LastRecordStore lastRecordStore = null;
|
434
|
+
|
435
|
+
try (JdbcInputConnection con = newConnection(task)) {
|
436
|
+
List<ColumnGetter> getters = newColumnGetters(con, task, querySchema, pageBuilder);
|
437
|
+
try (BatchSelect cursor = con.newSelectCursor(builtQuery, getters, task.getFetchRows(), task.getSocketTimeout())) {
|
438
|
+
while (true) {
|
439
|
+
long rows = fetch(cursor, getters, pageBuilder);
|
440
|
+
if (rows <= 0L) {
|
441
|
+
break;
|
442
|
+
}
|
443
|
+
totalRows += rows;
|
444
|
+
}
|
445
|
+
}
|
446
|
+
|
447
|
+
if (task.getIncremental() && totalRows > 0) {
|
448
|
+
lastRecordStore = new LastRecordStore(task.getIncrementalColumnIndexes(), task.getIncrementalColumns());
|
449
|
+
lastRecordStore.accept(getters);
|
450
|
+
}
|
451
|
+
|
452
|
+
pageBuilder.finish();
|
453
|
+
|
454
|
+
// after_select runs after pageBuilder.finish because pageBuilder.finish may fail.
|
455
|
+
// TODO Output plugin's transaction might still fail. In that case, after_select is
|
456
|
+
// already done but output plugin didn't commit the data to the target storage.
|
457
|
+
// This means inconsistency between data source and destination. To avoid this
|
458
|
+
// issue, we need another option like `after_commit` that runs after output plugin's
|
459
|
+
// commit. after_commit can't run in the same transaction with SELECT. So,
|
460
|
+
// after_select gets values and store them in TaskReport, and after_commit take
|
461
|
+
// them as placeholder. Or, after_select puts values to an intermediate table, and
|
462
|
+
// after_commit moves those values to the actual table.
|
463
|
+
if (task.getAfterSelect().isPresent()) {
|
464
|
+
con.executeUpdate(task.getAfterSelect().get());
|
465
|
+
con.connection.commit();
|
466
|
+
}
|
467
|
+
} catch (SQLException ex) {
|
468
|
+
throw Throwables.propagate(ex);
|
469
|
+
}
|
470
|
+
|
471
|
+
TaskReport report = Exec.newTaskReport();
|
472
|
+
if (lastRecordStore != null) {
|
473
|
+
report.set("last_record", lastRecordStore.getList());
|
474
|
+
}
|
475
|
+
|
476
|
+
return report;
|
477
|
+
}
|
478
|
+
|
479
|
+
protected ColumnGetterFactory newColumnGetterFactory(PageBuilder pageBuilder, DateTimeZone dateTimeZone)
|
480
|
+
{
|
481
|
+
return new ColumnGetterFactory(pageBuilder, dateTimeZone);
|
482
|
+
}
|
483
|
+
|
484
|
+
private List<ColumnGetter> newColumnGetters(JdbcInputConnection con, PluginTask task, JdbcSchema querySchema, PageBuilder pageBuilder)
|
485
|
+
throws SQLException
|
486
|
+
{
|
487
|
+
ColumnGetterFactory factory = newColumnGetterFactory(pageBuilder, task.getDefaultTimeZone());
|
488
|
+
ImmutableList.Builder<ColumnGetter> getters = ImmutableList.builder();
|
489
|
+
for (JdbcColumn c : querySchema.getColumns()) {
|
490
|
+
JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), c, factory.getJdbcType(c.getSqlType()));
|
491
|
+
getters.add(factory.newColumnGetter(con, task, c, columnOption));
|
492
|
+
}
|
493
|
+
return getters.build();
|
494
|
+
}
|
495
|
+
|
496
|
+
private static JdbcColumnOption columnOptionOf(Map<String, JdbcColumnOption> columnOptions, Map<String, JdbcColumnOption> defaultColumnOptions, JdbcColumn targetColumn, String targetColumnSQLType)
|
497
|
+
{
|
498
|
+
JdbcColumnOption columnOption = columnOptions.get(targetColumn.getName());
|
499
|
+
if (columnOption == null) {
|
500
|
+
String foundName = null;
|
501
|
+
for (Map.Entry<String, JdbcColumnOption> entry : columnOptions.entrySet()) {
|
502
|
+
if (entry.getKey().equalsIgnoreCase(targetColumn.getName())) {
|
503
|
+
if (columnOption != null) {
|
504
|
+
throw new ConfigException(String.format("Cannot specify column '%s' because both '%s' and '%s' exist in column_options.",
|
505
|
+
targetColumn.getName(), foundName, entry.getKey()));
|
506
|
+
}
|
507
|
+
foundName = entry.getKey();
|
508
|
+
columnOption = entry.getValue();
|
509
|
+
}
|
510
|
+
}
|
511
|
+
}
|
512
|
+
|
513
|
+
return Optional
|
514
|
+
.fromNullable(columnOption)
|
515
|
+
.or(Optional.fromNullable(defaultColumnOptions.get(targetColumnSQLType)))
|
516
|
+
.or(
|
517
|
+
// default column option
|
518
|
+
new Supplier<JdbcColumnOption>()
|
519
|
+
{
|
520
|
+
public JdbcColumnOption get()
|
521
|
+
{
|
522
|
+
return Exec.newConfigSource().loadConfig(JdbcColumnOption.class);
|
523
|
+
}
|
524
|
+
});
|
525
|
+
}
|
526
|
+
|
527
|
+
private long fetch(BatchSelect cursor,
|
528
|
+
List<ColumnGetter> getters, PageBuilder pageBuilder) throws SQLException
|
529
|
+
{
|
530
|
+
ResultSet result = cursor.fetch();
|
531
|
+
if (result == null || !result.next()) {
|
532
|
+
return 0;
|
533
|
+
}
|
534
|
+
|
535
|
+
List<Column> columns = pageBuilder.getSchema().getColumns();
|
536
|
+
long rows = 0;
|
537
|
+
long reportRows = 500;
|
538
|
+
do {
|
539
|
+
for (int i=0; i < getters.size(); i++) {
|
540
|
+
int index = i + 1; // JDBC column index begins from 1
|
541
|
+
getters.get(i).getAndSet(result, index, columns.get(i));
|
542
|
+
}
|
543
|
+
pageBuilder.addRecord();
|
544
|
+
rows++;
|
545
|
+
if (rows % reportRows == 0) {
|
546
|
+
logger.info(String.format("Fetched %,d rows.", rows));
|
547
|
+
reportRows *= 2;
|
548
|
+
}
|
549
|
+
} while (result.next());
|
550
|
+
|
551
|
+
return rows;
|
552
|
+
}
|
553
|
+
|
554
|
+
//// TODO move to embulk.spi.util?
|
555
|
+
//private static class ListPageOutput
|
556
|
+
//{
|
557
|
+
// public ImmutableList.Builder<Page> pages;
|
558
|
+
//
|
559
|
+
// public ListPageOutput()
|
560
|
+
// {
|
561
|
+
// reset();
|
562
|
+
// }
|
563
|
+
//
|
564
|
+
// @Override
|
565
|
+
// public void add(Page page)
|
566
|
+
// {
|
567
|
+
// pages.add(page);
|
568
|
+
// }
|
569
|
+
//
|
570
|
+
// @Override
|
571
|
+
// public void finish()
|
572
|
+
// {
|
573
|
+
// }
|
574
|
+
//
|
575
|
+
// @Override
|
576
|
+
// public void close()
|
577
|
+
// {
|
578
|
+
// }
|
579
|
+
//
|
580
|
+
// public List<Page> getPages()
|
581
|
+
// {
|
582
|
+
// return pages.build();
|
583
|
+
// }
|
584
|
+
//
|
585
|
+
// public void reset()
|
586
|
+
// {
|
587
|
+
// pages = ImmutableList.builder();
|
588
|
+
// }
|
589
|
+
//}
|
590
|
+
|
591
|
+
protected void loadDriver(String className, Optional<String> driverPath)
|
592
|
+
{
|
593
|
+
if (driverPath.isPresent()) {
|
594
|
+
addDriverJarToClasspath(driverPath.get());
|
595
|
+
} else {
|
596
|
+
try {
|
597
|
+
// Gradle test task will add JDBC driver to classpath
|
598
|
+
Class.forName(className);
|
599
|
+
|
600
|
+
} catch (ClassNotFoundException ex) {
|
601
|
+
File root = findPluginRoot();
|
602
|
+
File driverLib = new File(root, "default_jdbc_driver");
|
603
|
+
File[] files = driverLib.listFiles(new FileFilter() {
|
604
|
+
@Override
|
605
|
+
public boolean accept(File file) {
|
606
|
+
return file.isFile() && file.getName().endsWith(".jar");
|
607
|
+
}
|
608
|
+
});
|
609
|
+
if (files == null || files.length == 0) {
|
610
|
+
throw new RuntimeException("Cannot find JDBC driver in '" + root.getAbsolutePath() + "'.");
|
611
|
+
} else {
|
612
|
+
for (File file : files) {
|
613
|
+
logger.info("JDBC Driver = " + file.getAbsolutePath());
|
614
|
+
addDriverJarToClasspath(file.getAbsolutePath());
|
615
|
+
}
|
616
|
+
}
|
617
|
+
}
|
618
|
+
}
|
619
|
+
|
620
|
+
// Load JDBC Driver
|
621
|
+
try {
|
622
|
+
Class.forName(className);
|
623
|
+
} catch (ClassNotFoundException ex) {
|
624
|
+
throw new RuntimeException(ex);
|
625
|
+
}
|
626
|
+
}
|
627
|
+
|
628
|
+
protected void addDriverJarToClasspath(String glob)
|
629
|
+
{
|
630
|
+
// TODO match glob
|
631
|
+
PluginClassLoader loader = (PluginClassLoader) getClass().getClassLoader();
|
632
|
+
Path path = Paths.get(glob);
|
633
|
+
if (!path.toFile().exists()) {
|
634
|
+
throw new ConfigException("The specified driver jar doesn't exist: " + glob);
|
635
|
+
}
|
636
|
+
loader.addPath(Paths.get(glob));
|
637
|
+
}
|
638
|
+
|
639
|
+
protected File findPluginRoot()
|
640
|
+
{
|
641
|
+
try {
|
642
|
+
URL url = getClass().getResource("/" + getClass().getName().replace('.', '/') + ".class");
|
643
|
+
if (url.toString().startsWith("jar:")) {
|
644
|
+
url = new URL(url.toString().replaceAll("^jar:", "").replaceAll("![^!]*$", ""));
|
645
|
+
}
|
646
|
+
|
647
|
+
File folder = new File(url.toURI()).getParentFile();
|
648
|
+
for (;; folder = folder.getParentFile()) {
|
649
|
+
if (folder == null) {
|
650
|
+
throw new RuntimeException("Cannot find 'embulk-input-xxx' folder.");
|
651
|
+
}
|
652
|
+
|
653
|
+
if (folder.getName().startsWith("embulk-input-")) {
|
654
|
+
return folder;
|
655
|
+
}
|
656
|
+
}
|
657
|
+
} catch (MalformedURLException | URISyntaxException e) {
|
658
|
+
throw new RuntimeException(e);
|
659
|
+
}
|
660
|
+
}
|
661
|
+
|
662
|
+
protected void logConnectionProperties(String url, Properties props)
|
663
|
+
{
|
664
|
+
Properties maskedProps = new Properties();
|
665
|
+
for(String key : props.stringPropertyNames()) {
|
666
|
+
if (key.equals("password")) {
|
667
|
+
maskedProps.setProperty(key, "***");
|
668
|
+
} else {
|
669
|
+
maskedProps.setProperty(key, props.getProperty(key));
|
670
|
+
}
|
671
|
+
}
|
672
|
+
logger.info("Connecting to {} options {}", url, maskedProps);
|
673
|
+
}
|
674
|
+
}
|