embulk-input-athena 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/build.gradle +7 -3
- data/src/main/java/org/embulk/input/athena/AthenaInputConnection.java +1 -0
- metadata +4 -28
- data/src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java +0 -674
- data/src/main/java/org/embulk/input/jdbc/JdbcColumn.java +0 -58
- data/src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java +0 -31
- data/src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java +0 -397
- data/src/main/java/org/embulk/input/jdbc/JdbcLiteral.java +0 -38
- data/src/main/java/org/embulk/input/jdbc/JdbcSchema.java +0 -55
- data/src/main/java/org/embulk/input/jdbc/Ssl.java +0 -37
- data/src/main/java/org/embulk/input/jdbc/ToString.java +0 -54
- data/src/main/java/org/embulk/input/jdbc/ToStringMap.java +0 -35
- data/src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java +0 -105
- data/src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java +0 -45
- data/src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java +0 -38
- data/src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java +0 -59
- data/src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java +0 -56
- data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java +0 -21
- data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java +0 -207
- data/src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java +0 -37
- data/src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java +0 -66
- data/src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java +0 -66
- data/src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java +0 -57
- data/src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java +0 -70
- data/src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java +0 -96
- data/src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java +0 -37
- data/src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java +0 -36
- data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java +0 -83
- data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java +0 -75
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ad4b3382cf370da50743fdef54e85cbaf9ee3c6b
|
4
|
+
data.tar.gz: 8efd48a28b3dc003aff73c4253abccccdc465df9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b8dcea7434f50c191421d6bc79d91ab0c0725ead169e86fb40846906e243657cf722d01bba4f9437dfe8e1d40ca901da97583696f42ac8cce268a904038fd1a4
|
7
|
+
data.tar.gz: 69d636d9f53a252db12ff0106e1dfa9b3cfcb00f7c8de005f70137007f850272b54519154dd4db5cf6f3bd35b327dcfddf421389854e6256d1d4aad0358b54f8
|
data/.gitignore
CHANGED
data/build.gradle
CHANGED
@@ -10,12 +10,13 @@ repositories {
|
|
10
10
|
jcenter()
|
11
11
|
// for athena jdbc
|
12
12
|
maven { url "https://maven.atlassian.com/repository/public" }
|
13
|
+
maven { url "https://dl.bintray.com/embulk-input-jdbc/maven" }
|
13
14
|
}
|
14
15
|
configurations {
|
15
16
|
provided
|
16
17
|
}
|
17
18
|
|
18
|
-
version = "0.1.
|
19
|
+
version = "0.1.1"
|
19
20
|
|
20
21
|
sourceCompatibility = 1.8
|
21
22
|
targetCompatibility = 1.8
|
@@ -24,15 +25,18 @@ dependencies {
|
|
24
25
|
compile "org.embulk:embulk-core:0.8.39"
|
25
26
|
provided "org.embulk:embulk-core:0.8.39"
|
26
27
|
// https://mvnrepository.com/artifact/com.amazonaws.athena.jdbc/AthenaJDBC41
|
27
|
-
//
|
28
|
-
compile
|
28
|
+
// TODO: update jdbc
|
29
|
+
compile group: 'com.amazonaws.athena.jdbc', name: 'AthenaJDBC41', version: '1.0.1-atlassian-hosted'
|
30
|
+
//compile files ('build/AthenaJDBC41-1.1.0.jar')
|
29
31
|
compile group: 'com.amazonaws', name: 'aws-java-sdk', version: '1.11.301'
|
32
|
+
compile 'org.embulk.input.jdbc:embulk-input-jdbc:0.9.1'
|
30
33
|
testCompile "junit:junit:4.+"
|
31
34
|
}
|
32
35
|
|
33
36
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
34
37
|
doFirst { file("classpath").deleteDir() }
|
35
38
|
from (configurations.runtime - configurations.provided + files(jar.archivePath))
|
39
|
+
// from ("build/AthenaJDBC41-1.1.0.jar'")
|
36
40
|
into "classpath"
|
37
41
|
}
|
38
42
|
clean { delete "classpath" }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-athena
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- shinji19
|
@@ -61,33 +61,8 @@ files:
|
|
61
61
|
- src/main/java/org/embulk/input/athena/AthenaInputConnection.java
|
62
62
|
- src/main/java/org/embulk/input/athena/AthenaInputPlugin.java
|
63
63
|
- src/main/java/org/embulk/input/athena/AthenaInputPlugin.java.tmp1
|
64
|
-
- src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java
|
65
|
-
- src/main/java/org/embulk/input/jdbc/JdbcColumn.java
|
66
|
-
- src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java
|
67
|
-
- src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java
|
68
|
-
- src/main/java/org/embulk/input/jdbc/JdbcLiteral.java
|
69
|
-
- src/main/java/org/embulk/input/jdbc/JdbcSchema.java
|
70
|
-
- src/main/java/org/embulk/input/jdbc/Ssl.java
|
71
|
-
- src/main/java/org/embulk/input/jdbc/ToString.java
|
72
|
-
- src/main/java/org/embulk/input/jdbc/ToStringMap.java
|
73
|
-
- src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java
|
74
|
-
- src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java
|
75
|
-
- src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java
|
76
|
-
- src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java
|
77
|
-
- src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java
|
78
|
-
- src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java
|
79
|
-
- src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java
|
80
|
-
- src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java
|
81
|
-
- src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java
|
82
|
-
- src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java
|
83
|
-
- src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java
|
84
|
-
- src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java
|
85
|
-
- src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java
|
86
|
-
- src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java
|
87
|
-
- src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java
|
88
|
-
- src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java
|
89
|
-
- src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java
|
90
64
|
- src/test/java/org/embulk/input/athena/TestAthenaInputPlugin.java
|
65
|
+
- classpath/AthenaJDBC41-1.0.1-atlassian-hosted.jar
|
91
66
|
- classpath/aws-java-sdk-1.11.301.jar
|
92
67
|
- classpath/aws-java-sdk-acm-1.11.301.jar
|
93
68
|
- classpath/aws-java-sdk-alexaforbusiness-1.11.301.jar
|
@@ -217,7 +192,8 @@ files:
|
|
217
192
|
- classpath/aws-java-sdk-xray-1.11.301.jar
|
218
193
|
- classpath/commons-codec-1.10.jar
|
219
194
|
- classpath/commons-logging-1.2.jar
|
220
|
-
- classpath/embulk-input-athena-0.1.
|
195
|
+
- classpath/embulk-input-athena-0.1.1.jar
|
196
|
+
- classpath/embulk-input-jdbc-0.9.1.jar
|
221
197
|
- classpath/httpclient-4.5.5.jar
|
222
198
|
- classpath/httpcore-4.4.9.jar
|
223
199
|
- classpath/ion-java-1.0.2.jar
|
@@ -1,674 +0,0 @@
|
|
1
|
-
package org.embulk.input.jdbc;
|
2
|
-
|
3
|
-
import java.io.File;
|
4
|
-
import java.io.FileFilter;
|
5
|
-
import java.net.MalformedURLException;
|
6
|
-
import java.net.URISyntaxException;
|
7
|
-
import java.net.URL;
|
8
|
-
import java.nio.file.Path;
|
9
|
-
import java.util.List;
|
10
|
-
import java.util.Map;
|
11
|
-
import java.util.Properties;
|
12
|
-
import java.nio.file.Paths;
|
13
|
-
import java.sql.ResultSet;
|
14
|
-
import java.sql.SQLException;
|
15
|
-
|
16
|
-
import org.slf4j.Logger;
|
17
|
-
|
18
|
-
import com.fasterxml.jackson.databind.JsonNode;
|
19
|
-
import com.google.common.base.Optional;
|
20
|
-
import com.google.common.base.Supplier;
|
21
|
-
import com.google.common.base.Throwables;
|
22
|
-
import com.google.common.collect.ImmutableList;
|
23
|
-
|
24
|
-
import org.embulk.config.Config;
|
25
|
-
import org.embulk.config.ConfigException;
|
26
|
-
import org.embulk.config.ConfigDefault;
|
27
|
-
import org.embulk.config.ConfigDiff;
|
28
|
-
import org.embulk.config.ConfigInject;
|
29
|
-
import org.embulk.config.ConfigSource;
|
30
|
-
import org.embulk.config.Task;
|
31
|
-
import org.embulk.config.TaskReport;
|
32
|
-
import org.embulk.config.TaskSource;
|
33
|
-
import org.embulk.plugin.PluginClassLoader;
|
34
|
-
import org.embulk.spi.BufferAllocator;
|
35
|
-
import org.embulk.spi.Column;
|
36
|
-
import org.embulk.spi.DataException;
|
37
|
-
import org.embulk.spi.PageBuilder;
|
38
|
-
import org.embulk.spi.InputPlugin;
|
39
|
-
import org.embulk.spi.PageOutput;
|
40
|
-
import org.embulk.spi.Schema;
|
41
|
-
import org.embulk.spi.Exec;
|
42
|
-
import org.embulk.input.jdbc.getter.ColumnGetter;
|
43
|
-
import org.embulk.input.jdbc.getter.ColumnGetterFactory;
|
44
|
-
import org.embulk.input.jdbc.JdbcInputConnection.BatchSelect;
|
45
|
-
import org.embulk.input.jdbc.JdbcInputConnection.PreparedQuery;
|
46
|
-
import org.joda.time.DateTimeZone;
|
47
|
-
|
48
|
-
import static java.util.Locale.ENGLISH;
|
49
|
-
|
50
|
-
public abstract class AbstractJdbcInputPlugin
|
51
|
-
implements InputPlugin
|
52
|
-
{
|
53
|
-
protected final Logger logger = Exec.getLogger(getClass());
|
54
|
-
|
55
|
-
public interface PluginTask extends Task
|
56
|
-
{
|
57
|
-
@Config("options")
|
58
|
-
@ConfigDefault("{}")
|
59
|
-
public ToStringMap getOptions();
|
60
|
-
|
61
|
-
@Config("table")
|
62
|
-
@ConfigDefault("null")
|
63
|
-
public Optional<String> getTable();
|
64
|
-
public void setTable(Optional<String> normalizedTableName);
|
65
|
-
|
66
|
-
@Config("query")
|
67
|
-
@ConfigDefault("null")
|
68
|
-
public Optional<String> getQuery();
|
69
|
-
|
70
|
-
@Config("select")
|
71
|
-
@ConfigDefault("null")
|
72
|
-
public Optional<String> getSelect();
|
73
|
-
|
74
|
-
@Config("where")
|
75
|
-
@ConfigDefault("null")
|
76
|
-
public Optional<String> getWhere();
|
77
|
-
|
78
|
-
@Config("order_by")
|
79
|
-
@ConfigDefault("null")
|
80
|
-
public Optional<String> getOrderBy();
|
81
|
-
|
82
|
-
@Config("incremental")
|
83
|
-
@ConfigDefault("false")
|
84
|
-
public boolean getIncremental();
|
85
|
-
|
86
|
-
@Config("incremental_columns")
|
87
|
-
@ConfigDefault("[]")
|
88
|
-
public List<String> getIncrementalColumns();
|
89
|
-
public void setIncrementalColumns(List<String> indexes);
|
90
|
-
|
91
|
-
@Config("last_record")
|
92
|
-
@ConfigDefault("null")
|
93
|
-
public Optional<List<JsonNode>> getLastRecord();
|
94
|
-
|
95
|
-
// TODO limit_value is necessary to make sure repeated bulk load transactions
|
96
|
-
// don't a same record twice or miss records when the column
|
97
|
-
// specified at order_by parameter is not unique.
|
98
|
-
// For example, if the order_by column is "timestamp created_at"
|
99
|
-
// column whose precision is second, the table can include multiple
|
100
|
-
// records with the same created_at time. At the first bulk load
|
101
|
-
// transaction, it loads a record with created_at=2015-01-02 00:00:02.
|
102
|
-
// Then next transaction will use WHERE created_at > '2015-01-02 00:00:02'.
|
103
|
-
// However, if another record with created_at=2014-01-01 23:59:59 is
|
104
|
-
// inserted between the 2 transactions, the new record will be skipped.
|
105
|
-
// To prevent this scenario, we want to specify
|
106
|
-
// limit_value=2015-01-02 00:00:00 (exclusive). With this way, as long as
|
107
|
-
// a transaction runs after 2015-01-02 00:00:00 + some minutes, we don't
|
108
|
-
// skip records. Ideally, to automate the scheduling, we want to set
|
109
|
-
// limit_value="today".
|
110
|
-
//
|
111
|
-
//@Config("limit_value")
|
112
|
-
//@ConfigDefault("null")
|
113
|
-
//public Optional<String> getLimitValue();
|
114
|
-
|
115
|
-
//// TODO probably limit_rows is unnecessary as long as this has
|
116
|
-
// supports parallel execution (partition_by option) and resuming.
|
117
|
-
//@Config("limit_rows")
|
118
|
-
//@ConfigDefault("null")
|
119
|
-
//public Optional<Integer> getLimitRows();
|
120
|
-
|
121
|
-
@Config("connect_timeout")
|
122
|
-
@ConfigDefault("300")
|
123
|
-
public int getConnectTimeout();
|
124
|
-
|
125
|
-
@Config("socket_timeout")
|
126
|
-
@ConfigDefault("1800")
|
127
|
-
public int getSocketTimeout();
|
128
|
-
|
129
|
-
@Config("fetch_rows")
|
130
|
-
@ConfigDefault("10000")
|
131
|
-
// TODO set minimum number
|
132
|
-
public int getFetchRows();
|
133
|
-
|
134
|
-
// TODO parallel execution using "partition_by" config
|
135
|
-
|
136
|
-
@Config("column_options")
|
137
|
-
@ConfigDefault("{}")
|
138
|
-
public Map<String, JdbcColumnOption> getColumnOptions();
|
139
|
-
|
140
|
-
@Config("default_timezone")
|
141
|
-
@ConfigDefault("\"UTC\"")
|
142
|
-
public DateTimeZone getDefaultTimeZone();
|
143
|
-
|
144
|
-
@Config("default_column_options")
|
145
|
-
@ConfigDefault("{}")
|
146
|
-
public Map<String, JdbcColumnOption> getDefaultColumnOptions();
|
147
|
-
|
148
|
-
@Config("after_select")
|
149
|
-
@ConfigDefault("null")
|
150
|
-
public Optional<String> getAfterSelect();
|
151
|
-
|
152
|
-
public PreparedQuery getBuiltQuery();
|
153
|
-
public void setBuiltQuery(PreparedQuery query);
|
154
|
-
|
155
|
-
public JdbcSchema getQuerySchema();
|
156
|
-
public void setQuerySchema(JdbcSchema schema);
|
157
|
-
|
158
|
-
public List<Integer> getIncrementalColumnIndexes();
|
159
|
-
public void setIncrementalColumnIndexes(List<Integer> indexes);
|
160
|
-
|
161
|
-
@ConfigInject
|
162
|
-
public BufferAllocator getBufferAllocator();
|
163
|
-
}
|
164
|
-
|
165
|
-
// for subclasses to add @Config
|
166
|
-
protected Class<? extends PluginTask> getTaskClass()
|
167
|
-
{
|
168
|
-
return PluginTask.class;
|
169
|
-
}
|
170
|
-
|
171
|
-
protected abstract JdbcInputConnection newConnection(PluginTask task) throws SQLException;
|
172
|
-
|
173
|
-
@Override
|
174
|
-
public ConfigDiff transaction(ConfigSource config,
|
175
|
-
InputPlugin.Control control)
|
176
|
-
{
|
177
|
-
PluginTask task = config.loadConfig(getTaskClass());
|
178
|
-
|
179
|
-
if (task.getIncremental()) {
|
180
|
-
if (task.getOrderBy().isPresent()) {
|
181
|
-
throw new ConfigException("order_by option must not be set if incremental is true");
|
182
|
-
}
|
183
|
-
}
|
184
|
-
else {
|
185
|
-
if (!task.getIncrementalColumns().isEmpty()) {
|
186
|
-
throw new ConfigException("'incremental: true' must be set if incremental_columns is set");
|
187
|
-
}
|
188
|
-
}
|
189
|
-
|
190
|
-
Schema schema;
|
191
|
-
try (JdbcInputConnection con = newConnection(task)) {
|
192
|
-
con.showDriverVersion();
|
193
|
-
|
194
|
-
// TODO incremental_columns is not set => get primary key
|
195
|
-
schema = setupTask(con, task);
|
196
|
-
} catch (SQLException ex) {
|
197
|
-
throw Throwables.propagate(ex);
|
198
|
-
}
|
199
|
-
|
200
|
-
return buildNextConfigDiff(task, control.run(task.dump(), schema, 1));
|
201
|
-
}
|
202
|
-
|
203
|
-
protected Schema setupTask(JdbcInputConnection con, PluginTask task) throws SQLException
|
204
|
-
{
|
205
|
-
if (task.getTable().isPresent()) {
|
206
|
-
String actualTableName = normalizeTableNameCase(con, task.getTable().get());
|
207
|
-
task.setTable(Optional.of(actualTableName));
|
208
|
-
}
|
209
|
-
|
210
|
-
// build SELECT query and gets schema of its result
|
211
|
-
String rawQuery = getRawQuery(task, con);
|
212
|
-
|
213
|
-
JdbcSchema querySchema = con.getSchemaOfQuery(rawQuery);
|
214
|
-
task.setQuerySchema(querySchema);
|
215
|
-
// query schema should not change after incremental query
|
216
|
-
|
217
|
-
PreparedQuery preparedQuery;
|
218
|
-
if (task.getIncremental()) {
|
219
|
-
// build incremental query
|
220
|
-
|
221
|
-
List<String> incrementalColumns = task.getIncrementalColumns();
|
222
|
-
if (incrementalColumns.isEmpty()) {
|
223
|
-
// incremental_columns is not set
|
224
|
-
if (!task.getTable().isPresent()) {
|
225
|
-
throw new ConfigException("incremental_columns option must be set if incremental is true and custom query option is set");
|
226
|
-
}
|
227
|
-
// get primary keys from the target table to use them as incremental_columns
|
228
|
-
List<String> primaryKeys = con.getPrimaryKeys(task.getTable().get());
|
229
|
-
if (primaryKeys.isEmpty()) {
|
230
|
-
throw new ConfigException(String.format(ENGLISH,
|
231
|
-
"Primary key is not available at the table '%s'. incremental_columns option must be set",
|
232
|
-
task.getTable().get()));
|
233
|
-
}
|
234
|
-
logger.info("Using primary keys as incremental_columns: {}", primaryKeys);
|
235
|
-
task.setIncrementalColumns(primaryKeys);
|
236
|
-
incrementalColumns = primaryKeys;
|
237
|
-
}
|
238
|
-
|
239
|
-
List<Integer> incrementalColumnIndexes = findIncrementalColumnIndexes(querySchema, incrementalColumns);
|
240
|
-
task.setIncrementalColumnIndexes(incrementalColumnIndexes);
|
241
|
-
|
242
|
-
List<JsonNode> lastRecord;
|
243
|
-
if (task.getLastRecord().isPresent()) {
|
244
|
-
lastRecord = task.getLastRecord().get();
|
245
|
-
if (lastRecord.size() != incrementalColumnIndexes.size()) {
|
246
|
-
throw new ConfigException("Number of values set at last_record must be same with number of columns set at incremental_columns");
|
247
|
-
}
|
248
|
-
}
|
249
|
-
else {
|
250
|
-
lastRecord = null;
|
251
|
-
}
|
252
|
-
|
253
|
-
if (task.getQuery().isPresent()) {
|
254
|
-
preparedQuery = con.wrapIncrementalQuery(rawQuery, querySchema, incrementalColumnIndexes, lastRecord);
|
255
|
-
}
|
256
|
-
else {
|
257
|
-
preparedQuery = con.rebuildIncrementalQuery(
|
258
|
-
task.getTable().get(), task.getSelect(),
|
259
|
-
task.getWhere(),
|
260
|
-
querySchema, incrementalColumnIndexes, lastRecord);
|
261
|
-
}
|
262
|
-
}
|
263
|
-
else {
|
264
|
-
task.setIncrementalColumnIndexes(ImmutableList.<Integer>of());
|
265
|
-
preparedQuery = new PreparedQuery(rawQuery, ImmutableList.<JdbcLiteral>of());
|
266
|
-
}
|
267
|
-
|
268
|
-
task.setBuiltQuery(preparedQuery);
|
269
|
-
|
270
|
-
// validate column_options
|
271
|
-
newColumnGetters(con, task, querySchema, null);
|
272
|
-
|
273
|
-
ColumnGetterFactory factory = newColumnGetterFactory(null, task.getDefaultTimeZone());
|
274
|
-
ImmutableList.Builder<Column> columns = ImmutableList.builder();
|
275
|
-
for (int i = 0; i < querySchema.getCount(); i++) {
|
276
|
-
JdbcColumn column = querySchema.getColumn(i);
|
277
|
-
JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), column, factory.getJdbcType(column.getSqlType()));
|
278
|
-
columns.add(new Column(i,
|
279
|
-
column.getName(),
|
280
|
-
factory.newColumnGetter(con, task, column, columnOption).getToType()));
|
281
|
-
}
|
282
|
-
return new Schema(columns.build());
|
283
|
-
}
|
284
|
-
|
285
|
-
private String normalizeTableNameCase(JdbcInputConnection con, String tableName)
|
286
|
-
throws SQLException
|
287
|
-
{
|
288
|
-
if (con.tableExists(tableName)) {
|
289
|
-
return tableName;
|
290
|
-
} else {
|
291
|
-
String upperTableName = tableName.toUpperCase();
|
292
|
-
String lowerTableName = tableName.toLowerCase();
|
293
|
-
boolean upperExists = con.tableExists(upperTableName);
|
294
|
-
boolean lowerExists = con.tableExists(lowerTableName);
|
295
|
-
if (upperExists && lowerExists) {
|
296
|
-
throw new ConfigException(String.format("Cannot specify table '%s' because both '%s' and '%s' exist.",
|
297
|
-
tableName, upperTableName, lowerTableName));
|
298
|
-
} else if (upperExists) {
|
299
|
-
return upperTableName;
|
300
|
-
} else if (lowerExists) {
|
301
|
-
return lowerTableName;
|
302
|
-
} else {
|
303
|
-
// fallback to the given table name. this may throw error later at getSchemaOfQuery
|
304
|
-
return tableName;
|
305
|
-
}
|
306
|
-
}
|
307
|
-
}
|
308
|
-
|
309
|
-
private List<Integer> findIncrementalColumnIndexes(JdbcSchema schema, List<String> incrementalColumns)
|
310
|
-
throws SQLException
|
311
|
-
{
|
312
|
-
ImmutableList.Builder<Integer> builder = ImmutableList.builder();
|
313
|
-
for (String name : incrementalColumns) {
|
314
|
-
Optional<Integer> index = schema.findColumn(name);
|
315
|
-
if (index.isPresent()) {
|
316
|
-
builder.add(index.get());
|
317
|
-
}
|
318
|
-
else {
|
319
|
-
throw new ConfigException(String.format(ENGLISH,
|
320
|
-
"Column name '%s' is in incremental_columns option does not exist",
|
321
|
-
name));
|
322
|
-
}
|
323
|
-
}
|
324
|
-
return builder.build();
|
325
|
-
}
|
326
|
-
|
327
|
-
private String getRawQuery(PluginTask task, JdbcInputConnection con) throws SQLException
|
328
|
-
{
|
329
|
-
if (task.getQuery().isPresent()) {
|
330
|
-
if (task.getTable().isPresent() || task.getSelect().isPresent() ||
|
331
|
-
task.getWhere().isPresent() || task.getOrderBy().isPresent()) {
|
332
|
-
throw new ConfigException("'table', 'select', 'where' and 'order_by' parameters are unnecessary if 'query' parameter is set.");
|
333
|
-
} else if (!task.getIncrementalColumns().isEmpty() || task.getLastRecord().isPresent()) {
|
334
|
-
throw new ConfigException("'incremental_columns' and 'last_record' parameters are not supported if 'query' parameter is set.");
|
335
|
-
}
|
336
|
-
return task.getQuery().get();
|
337
|
-
} else if (task.getTable().isPresent()) {
|
338
|
-
return con.buildSelectQuery(task.getTable().get(), task.getSelect(),
|
339
|
-
task.getWhere(), task.getOrderBy());
|
340
|
-
} else {
|
341
|
-
throw new ConfigException("'table' or 'query' parameter is required");
|
342
|
-
}
|
343
|
-
}
|
344
|
-
|
345
|
-
@Override
|
346
|
-
public ConfigDiff resume(TaskSource taskSource,
|
347
|
-
Schema schema, int taskCount,
|
348
|
-
InputPlugin.Control control)
|
349
|
-
{
|
350
|
-
PluginTask task = taskSource.loadTask(getTaskClass());
|
351
|
-
|
352
|
-
// TODO when parallel execution is implemented and enabled, (maybe) order_by
|
353
|
-
// is necessary to resume. transaction() gets the range of order_by
|
354
|
-
// colum and set it to WHERE condition to make the operation deterministic
|
355
|
-
|
356
|
-
return buildNextConfigDiff(task, control.run(taskSource, schema, taskCount));
|
357
|
-
}
|
358
|
-
|
359
|
-
public ConfigDiff guess(ConfigSource config)
|
360
|
-
{
|
361
|
-
return Exec.newConfigDiff();
|
362
|
-
}
|
363
|
-
|
364
|
-
protected ConfigDiff buildNextConfigDiff(PluginTask task, List<TaskReport> reports)
|
365
|
-
{
|
366
|
-
ConfigDiff next = Exec.newConfigDiff();
|
367
|
-
if (reports.size() > 0 && reports.get(0).has("last_record")) {
|
368
|
-
next.set("last_record", reports.get(0).get(JsonNode.class, "last_record"));
|
369
|
-
} else if (task.getLastRecord().isPresent()) {
|
370
|
-
next.set("last_record", task.getLastRecord().get());
|
371
|
-
}
|
372
|
-
return next;
|
373
|
-
}
|
374
|
-
|
375
|
-
@Override
|
376
|
-
public void cleanup(TaskSource taskSource,
|
377
|
-
Schema schema, int taskCount,
|
378
|
-
List<TaskReport> successTaskReports)
|
379
|
-
{
|
380
|
-
// do nothing
|
381
|
-
}
|
382
|
-
|
383
|
-
private static class LastRecordStore
|
384
|
-
{
|
385
|
-
private final List<Integer> columnIndexes;
|
386
|
-
private final JsonNode[] lastValues;
|
387
|
-
private final List<String> columnNames;
|
388
|
-
|
389
|
-
public LastRecordStore(List<Integer> columnIndexes, List<String> columnNames)
|
390
|
-
{
|
391
|
-
this.columnIndexes = columnIndexes;
|
392
|
-
this.lastValues = new JsonNode[columnIndexes.size()];
|
393
|
-
this.columnNames = columnNames;
|
394
|
-
}
|
395
|
-
|
396
|
-
public void accept(List<ColumnGetter> getters)
|
397
|
-
throws SQLException
|
398
|
-
{
|
399
|
-
for (int i = 0; i < columnIndexes.size(); i++) {
|
400
|
-
lastValues[i] = getters.get(columnIndexes.get(i)).encodeToJson();
|
401
|
-
}
|
402
|
-
}
|
403
|
-
|
404
|
-
public List<JsonNode> getList()
|
405
|
-
{
|
406
|
-
ImmutableList.Builder<JsonNode> builder = ImmutableList.builder();
|
407
|
-
for (int i = 0; i < lastValues.length; i++) {
|
408
|
-
if (lastValues[i] == null || lastValues[i].isNull()) {
|
409
|
-
throw new DataException(String.format(ENGLISH,
|
410
|
-
"incremental_columns can't include null values but the last row is null at column '%s'",
|
411
|
-
columnNames.get(i)));
|
412
|
-
}
|
413
|
-
builder.add(lastValues[i]);
|
414
|
-
}
|
415
|
-
return builder.build();
|
416
|
-
}
|
417
|
-
}
|
418
|
-
|
419
|
-
@Override
|
420
|
-
public TaskReport run(TaskSource taskSource,
|
421
|
-
Schema schema, int taskIndex,
|
422
|
-
PageOutput output)
|
423
|
-
{
|
424
|
-
PluginTask task = taskSource.loadTask(getTaskClass());
|
425
|
-
|
426
|
-
PreparedQuery builtQuery = task.getBuiltQuery();
|
427
|
-
JdbcSchema querySchema = task.getQuerySchema();
|
428
|
-
BufferAllocator allocator = task.getBufferAllocator();
|
429
|
-
PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
|
430
|
-
|
431
|
-
long totalRows = 0;
|
432
|
-
|
433
|
-
LastRecordStore lastRecordStore = null;
|
434
|
-
|
435
|
-
try (JdbcInputConnection con = newConnection(task)) {
|
436
|
-
List<ColumnGetter> getters = newColumnGetters(con, task, querySchema, pageBuilder);
|
437
|
-
try (BatchSelect cursor = con.newSelectCursor(builtQuery, getters, task.getFetchRows(), task.getSocketTimeout())) {
|
438
|
-
while (true) {
|
439
|
-
long rows = fetch(cursor, getters, pageBuilder);
|
440
|
-
if (rows <= 0L) {
|
441
|
-
break;
|
442
|
-
}
|
443
|
-
totalRows += rows;
|
444
|
-
}
|
445
|
-
}
|
446
|
-
|
447
|
-
if (task.getIncremental() && totalRows > 0) {
|
448
|
-
lastRecordStore = new LastRecordStore(task.getIncrementalColumnIndexes(), task.getIncrementalColumns());
|
449
|
-
lastRecordStore.accept(getters);
|
450
|
-
}
|
451
|
-
|
452
|
-
pageBuilder.finish();
|
453
|
-
|
454
|
-
// after_select runs after pageBuilder.finish because pageBuilder.finish may fail.
|
455
|
-
// TODO Output plugin's transaction might still fail. In that case, after_select is
|
456
|
-
// already done but output plugin didn't commit the data to the target storage.
|
457
|
-
// This means inconsistency between data source and destination. To avoid this
|
458
|
-
// issue, we need another option like `after_commit` that runs after output plugin's
|
459
|
-
// commit. after_commit can't run in the same transaction with SELECT. So,
|
460
|
-
// after_select gets values and store them in TaskReport, and after_commit take
|
461
|
-
// them as placeholder. Or, after_select puts values to an intermediate table, and
|
462
|
-
// after_commit moves those values to the actual table.
|
463
|
-
if (task.getAfterSelect().isPresent()) {
|
464
|
-
con.executeUpdate(task.getAfterSelect().get());
|
465
|
-
con.connection.commit();
|
466
|
-
}
|
467
|
-
} catch (SQLException ex) {
|
468
|
-
throw Throwables.propagate(ex);
|
469
|
-
}
|
470
|
-
|
471
|
-
TaskReport report = Exec.newTaskReport();
|
472
|
-
if (lastRecordStore != null) {
|
473
|
-
report.set("last_record", lastRecordStore.getList());
|
474
|
-
}
|
475
|
-
|
476
|
-
return report;
|
477
|
-
}
|
478
|
-
|
479
|
-
protected ColumnGetterFactory newColumnGetterFactory(PageBuilder pageBuilder, DateTimeZone dateTimeZone)
|
480
|
-
{
|
481
|
-
return new ColumnGetterFactory(pageBuilder, dateTimeZone);
|
482
|
-
}
|
483
|
-
|
484
|
-
private List<ColumnGetter> newColumnGetters(JdbcInputConnection con, PluginTask task, JdbcSchema querySchema, PageBuilder pageBuilder)
|
485
|
-
throws SQLException
|
486
|
-
{
|
487
|
-
ColumnGetterFactory factory = newColumnGetterFactory(pageBuilder, task.getDefaultTimeZone());
|
488
|
-
ImmutableList.Builder<ColumnGetter> getters = ImmutableList.builder();
|
489
|
-
for (JdbcColumn c : querySchema.getColumns()) {
|
490
|
-
JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), c, factory.getJdbcType(c.getSqlType()));
|
491
|
-
getters.add(factory.newColumnGetter(con, task, c, columnOption));
|
492
|
-
}
|
493
|
-
return getters.build();
|
494
|
-
}
|
495
|
-
|
496
|
-
private static JdbcColumnOption columnOptionOf(Map<String, JdbcColumnOption> columnOptions, Map<String, JdbcColumnOption> defaultColumnOptions, JdbcColumn targetColumn, String targetColumnSQLType)
|
497
|
-
{
|
498
|
-
JdbcColumnOption columnOption = columnOptions.get(targetColumn.getName());
|
499
|
-
if (columnOption == null) {
|
500
|
-
String foundName = null;
|
501
|
-
for (Map.Entry<String, JdbcColumnOption> entry : columnOptions.entrySet()) {
|
502
|
-
if (entry.getKey().equalsIgnoreCase(targetColumn.getName())) {
|
503
|
-
if (columnOption != null) {
|
504
|
-
throw new ConfigException(String.format("Cannot specify column '%s' because both '%s' and '%s' exist in column_options.",
|
505
|
-
targetColumn.getName(), foundName, entry.getKey()));
|
506
|
-
}
|
507
|
-
foundName = entry.getKey();
|
508
|
-
columnOption = entry.getValue();
|
509
|
-
}
|
510
|
-
}
|
511
|
-
}
|
512
|
-
|
513
|
-
return Optional
|
514
|
-
.fromNullable(columnOption)
|
515
|
-
.or(Optional.fromNullable(defaultColumnOptions.get(targetColumnSQLType)))
|
516
|
-
.or(
|
517
|
-
// default column option
|
518
|
-
new Supplier<JdbcColumnOption>()
|
519
|
-
{
|
520
|
-
public JdbcColumnOption get()
|
521
|
-
{
|
522
|
-
return Exec.newConfigSource().loadConfig(JdbcColumnOption.class);
|
523
|
-
}
|
524
|
-
});
|
525
|
-
}
|
526
|
-
|
527
|
-
private long fetch(BatchSelect cursor,
|
528
|
-
List<ColumnGetter> getters, PageBuilder pageBuilder) throws SQLException
|
529
|
-
{
|
530
|
-
ResultSet result = cursor.fetch();
|
531
|
-
if (result == null || !result.next()) {
|
532
|
-
return 0;
|
533
|
-
}
|
534
|
-
|
535
|
-
List<Column> columns = pageBuilder.getSchema().getColumns();
|
536
|
-
long rows = 0;
|
537
|
-
long reportRows = 500;
|
538
|
-
do {
|
539
|
-
for (int i=0; i < getters.size(); i++) {
|
540
|
-
int index = i + 1; // JDBC column index begins from 1
|
541
|
-
getters.get(i).getAndSet(result, index, columns.get(i));
|
542
|
-
}
|
543
|
-
pageBuilder.addRecord();
|
544
|
-
rows++;
|
545
|
-
if (rows % reportRows == 0) {
|
546
|
-
logger.info(String.format("Fetched %,d rows.", rows));
|
547
|
-
reportRows *= 2;
|
548
|
-
}
|
549
|
-
} while (result.next());
|
550
|
-
|
551
|
-
return rows;
|
552
|
-
}
|
553
|
-
|
554
|
-
//// TODO move to embulk.spi.util?
|
555
|
-
//private static class ListPageOutput
|
556
|
-
//{
|
557
|
-
// public ImmutableList.Builder<Page> pages;
|
558
|
-
//
|
559
|
-
// public ListPageOutput()
|
560
|
-
// {
|
561
|
-
// reset();
|
562
|
-
// }
|
563
|
-
//
|
564
|
-
// @Override
|
565
|
-
// public void add(Page page)
|
566
|
-
// {
|
567
|
-
// pages.add(page);
|
568
|
-
// }
|
569
|
-
//
|
570
|
-
// @Override
|
571
|
-
// public void finish()
|
572
|
-
// {
|
573
|
-
// }
|
574
|
-
//
|
575
|
-
// @Override
|
576
|
-
// public void close()
|
577
|
-
// {
|
578
|
-
// }
|
579
|
-
//
|
580
|
-
// public List<Page> getPages()
|
581
|
-
// {
|
582
|
-
// return pages.build();
|
583
|
-
// }
|
584
|
-
//
|
585
|
-
// public void reset()
|
586
|
-
// {
|
587
|
-
// pages = ImmutableList.builder();
|
588
|
-
// }
|
589
|
-
//}
|
590
|
-
|
591
|
-
protected void loadDriver(String className, Optional<String> driverPath)
|
592
|
-
{
|
593
|
-
if (driverPath.isPresent()) {
|
594
|
-
addDriverJarToClasspath(driverPath.get());
|
595
|
-
} else {
|
596
|
-
try {
|
597
|
-
// Gradle test task will add JDBC driver to classpath
|
598
|
-
Class.forName(className);
|
599
|
-
|
600
|
-
} catch (ClassNotFoundException ex) {
|
601
|
-
File root = findPluginRoot();
|
602
|
-
File driverLib = new File(root, "default_jdbc_driver");
|
603
|
-
File[] files = driverLib.listFiles(new FileFilter() {
|
604
|
-
@Override
|
605
|
-
public boolean accept(File file) {
|
606
|
-
return file.isFile() && file.getName().endsWith(".jar");
|
607
|
-
}
|
608
|
-
});
|
609
|
-
if (files == null || files.length == 0) {
|
610
|
-
throw new RuntimeException("Cannot find JDBC driver in '" + root.getAbsolutePath() + "'.");
|
611
|
-
} else {
|
612
|
-
for (File file : files) {
|
613
|
-
logger.info("JDBC Driver = " + file.getAbsolutePath());
|
614
|
-
addDriverJarToClasspath(file.getAbsolutePath());
|
615
|
-
}
|
616
|
-
}
|
617
|
-
}
|
618
|
-
}
|
619
|
-
|
620
|
-
// Load JDBC Driver
|
621
|
-
try {
|
622
|
-
Class.forName(className);
|
623
|
-
} catch (ClassNotFoundException ex) {
|
624
|
-
throw new RuntimeException(ex);
|
625
|
-
}
|
626
|
-
}
|
627
|
-
|
628
|
-
protected void addDriverJarToClasspath(String glob)
|
629
|
-
{
|
630
|
-
// TODO match glob
|
631
|
-
PluginClassLoader loader = (PluginClassLoader) getClass().getClassLoader();
|
632
|
-
Path path = Paths.get(glob);
|
633
|
-
if (!path.toFile().exists()) {
|
634
|
-
throw new ConfigException("The specified driver jar doesn't exist: " + glob);
|
635
|
-
}
|
636
|
-
loader.addPath(Paths.get(glob));
|
637
|
-
}
|
638
|
-
|
639
|
-
protected File findPluginRoot()
|
640
|
-
{
|
641
|
-
try {
|
642
|
-
URL url = getClass().getResource("/" + getClass().getName().replace('.', '/') + ".class");
|
643
|
-
if (url.toString().startsWith("jar:")) {
|
644
|
-
url = new URL(url.toString().replaceAll("^jar:", "").replaceAll("![^!]*$", ""));
|
645
|
-
}
|
646
|
-
|
647
|
-
File folder = new File(url.toURI()).getParentFile();
|
648
|
-
for (;; folder = folder.getParentFile()) {
|
649
|
-
if (folder == null) {
|
650
|
-
throw new RuntimeException("Cannot find 'embulk-input-xxx' folder.");
|
651
|
-
}
|
652
|
-
|
653
|
-
if (folder.getName().startsWith("embulk-input-")) {
|
654
|
-
return folder;
|
655
|
-
}
|
656
|
-
}
|
657
|
-
} catch (MalformedURLException | URISyntaxException e) {
|
658
|
-
throw new RuntimeException(e);
|
659
|
-
}
|
660
|
-
}
|
661
|
-
|
662
|
-
protected void logConnectionProperties(String url, Properties props)
|
663
|
-
{
|
664
|
-
Properties maskedProps = new Properties();
|
665
|
-
for(String key : props.stringPropertyNames()) {
|
666
|
-
if (key.equals("password")) {
|
667
|
-
maskedProps.setProperty(key, "***");
|
668
|
-
} else {
|
669
|
-
maskedProps.setProperty(key, props.getProperty(key));
|
670
|
-
}
|
671
|
-
}
|
672
|
-
logger.info("Connecting to {} options {}", url, maskedProps);
|
673
|
-
}
|
674
|
-
}
|