embulk-input-athena 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -1
  3. data/build.gradle +7 -3
  4. data/src/main/java/org/embulk/input/athena/AthenaInputConnection.java +1 -0
  5. metadata +4 -28
  6. data/src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java +0 -674
  7. data/src/main/java/org/embulk/input/jdbc/JdbcColumn.java +0 -58
  8. data/src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java +0 -31
  9. data/src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java +0 -397
  10. data/src/main/java/org/embulk/input/jdbc/JdbcLiteral.java +0 -38
  11. data/src/main/java/org/embulk/input/jdbc/JdbcSchema.java +0 -55
  12. data/src/main/java/org/embulk/input/jdbc/Ssl.java +0 -37
  13. data/src/main/java/org/embulk/input/jdbc/ToString.java +0 -54
  14. data/src/main/java/org/embulk/input/jdbc/ToStringMap.java +0 -35
  15. data/src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java +0 -105
  16. data/src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java +0 -45
  17. data/src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java +0 -38
  18. data/src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java +0 -59
  19. data/src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java +0 -56
  20. data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java +0 -21
  21. data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java +0 -207
  22. data/src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java +0 -37
  23. data/src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java +0 -66
  24. data/src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java +0 -66
  25. data/src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java +0 -57
  26. data/src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java +0 -70
  27. data/src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java +0 -96
  28. data/src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java +0 -37
  29. data/src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java +0 -36
  30. data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java +0 -83
  31. data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java +0 -75
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c3fb336d83e38770353f87f111859738619bb3e6
4
- data.tar.gz: 7fe07d1b0fd72c8e08ba963e21b590c26242da79
3
+ metadata.gz: ad4b3382cf370da50743fdef54e85cbaf9ee3c6b
4
+ data.tar.gz: 8efd48a28b3dc003aff73c4253abccccdc465df9
5
5
  SHA512:
6
- metadata.gz: 4483e0985f1741775a6abbf8a9183231be12f52b5f28dc278c850fbf926fba5a2090e9fd660fd6d5b93782dec551b644828e6b767c096254ccc269deb755728f
7
- data.tar.gz: bb989515c90ebdd73c651099fcfe6b136d1bfcdc1a5c30c979bbb5f0b3e2d782b8212110cb282f3129888bd347cc6e9878ae5ab2deb4118d108d58201a765f2c
6
+ metadata.gz: b8dcea7434f50c191421d6bc79d91ab0c0725ead169e86fb40846906e243657cf722d01bba4f9437dfe8e1d40ca901da97583696f42ac8cce268a904038fd1a4
7
+ data.tar.gz: 69d636d9f53a252db12ff0106e1dfa9b3cfcb00f7c8de005f70137007f850272b54519154dd4db5cf6f3bd35b327dcfddf421389854e6256d1d4aad0358b54f8
data/.gitignore CHANGED
@@ -10,4 +10,5 @@ build/
10
10
  /.metadata/
11
11
  .classpath
12
12
  .project
13
- bin
13
+ bin/
14
+ .vscode/
data/build.gradle CHANGED
@@ -10,12 +10,13 @@ repositories {
10
10
  jcenter()
11
11
  // for athena jdbc
12
12
  maven { url "https://maven.atlassian.com/repository/public" }
13
+ maven { url "https://dl.bintray.com/embulk-input-jdbc/maven" }
13
14
  }
14
15
  configurations {
15
16
  provided
16
17
  }
17
18
 
18
- version = "0.1.0"
19
+ version = "0.1.1"
19
20
 
20
21
  sourceCompatibility = 1.8
21
22
  targetCompatibility = 1.8
@@ -24,15 +25,18 @@ dependencies {
24
25
  compile "org.embulk:embulk-core:0.8.39"
25
26
  provided "org.embulk:embulk-core:0.8.39"
26
27
  // https://mvnrepository.com/artifact/com.amazonaws.athena.jdbc/AthenaJDBC41
27
- // compile group: 'com.amazonaws.athena.jdbc', name: 'AthenaJDBC41', version: '1.0.1-atlassian-hosted'
28
- compile files ('build/AthenaJDBC41-1.1.0.jar')
28
+ // TODO: update jdbc
29
+ compile group: 'com.amazonaws.athena.jdbc', name: 'AthenaJDBC41', version: '1.0.1-atlassian-hosted'
30
+ //compile files ('build/AthenaJDBC41-1.1.0.jar')
29
31
  compile group: 'com.amazonaws', name: 'aws-java-sdk', version: '1.11.301'
32
+ compile 'org.embulk.input.jdbc:embulk-input-jdbc:0.9.1'
30
33
  testCompile "junit:junit:4.+"
31
34
  }
32
35
 
33
36
  task classpath(type: Copy, dependsOn: ["jar"]) {
34
37
  doFirst { file("classpath").deleteDir() }
35
38
  from (configurations.runtime - configurations.provided + files(jar.archivePath))
39
+ // from ("build/AthenaJDBC41-1.1.0.jar'")
36
40
  into "classpath"
37
41
  }
38
42
  clean { delete "classpath" }
@@ -17,6 +17,7 @@ public class AthenaInputConnection
17
17
  throws SQLException
18
18
  {
19
19
  super(connection, null);
20
+ connection.setAutoCommit(true);
20
21
  }
21
22
 
22
23
  @Override
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-athena
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - shinji19
@@ -61,33 +61,8 @@ files:
61
61
  - src/main/java/org/embulk/input/athena/AthenaInputConnection.java
62
62
  - src/main/java/org/embulk/input/athena/AthenaInputPlugin.java
63
63
  - src/main/java/org/embulk/input/athena/AthenaInputPlugin.java.tmp1
64
- - src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java
65
- - src/main/java/org/embulk/input/jdbc/JdbcColumn.java
66
- - src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java
67
- - src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java
68
- - src/main/java/org/embulk/input/jdbc/JdbcLiteral.java
69
- - src/main/java/org/embulk/input/jdbc/JdbcSchema.java
70
- - src/main/java/org/embulk/input/jdbc/Ssl.java
71
- - src/main/java/org/embulk/input/jdbc/ToString.java
72
- - src/main/java/org/embulk/input/jdbc/ToStringMap.java
73
- - src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java
74
- - src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java
75
- - src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java
76
- - src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java
77
- - src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java
78
- - src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java
79
- - src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java
80
- - src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java
81
- - src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java
82
- - src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java
83
- - src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java
84
- - src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java
85
- - src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java
86
- - src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java
87
- - src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java
88
- - src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java
89
- - src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java
90
64
  - src/test/java/org/embulk/input/athena/TestAthenaInputPlugin.java
65
+ - classpath/AthenaJDBC41-1.0.1-atlassian-hosted.jar
91
66
  - classpath/aws-java-sdk-1.11.301.jar
92
67
  - classpath/aws-java-sdk-acm-1.11.301.jar
93
68
  - classpath/aws-java-sdk-alexaforbusiness-1.11.301.jar
@@ -217,7 +192,8 @@ files:
217
192
  - classpath/aws-java-sdk-xray-1.11.301.jar
218
193
  - classpath/commons-codec-1.10.jar
219
194
  - classpath/commons-logging-1.2.jar
220
- - classpath/embulk-input-athena-0.1.0.jar
195
+ - classpath/embulk-input-athena-0.1.1.jar
196
+ - classpath/embulk-input-jdbc-0.9.1.jar
221
197
  - classpath/httpclient-4.5.5.jar
222
198
  - classpath/httpcore-4.4.9.jar
223
199
  - classpath/ion-java-1.0.2.jar
@@ -1,674 +0,0 @@
1
- package org.embulk.input.jdbc;
2
-
3
- import java.io.File;
4
- import java.io.FileFilter;
5
- import java.net.MalformedURLException;
6
- import java.net.URISyntaxException;
7
- import java.net.URL;
8
- import java.nio.file.Path;
9
- import java.util.List;
10
- import java.util.Map;
11
- import java.util.Properties;
12
- import java.nio.file.Paths;
13
- import java.sql.ResultSet;
14
- import java.sql.SQLException;
15
-
16
- import org.slf4j.Logger;
17
-
18
- import com.fasterxml.jackson.databind.JsonNode;
19
- import com.google.common.base.Optional;
20
- import com.google.common.base.Supplier;
21
- import com.google.common.base.Throwables;
22
- import com.google.common.collect.ImmutableList;
23
-
24
- import org.embulk.config.Config;
25
- import org.embulk.config.ConfigException;
26
- import org.embulk.config.ConfigDefault;
27
- import org.embulk.config.ConfigDiff;
28
- import org.embulk.config.ConfigInject;
29
- import org.embulk.config.ConfigSource;
30
- import org.embulk.config.Task;
31
- import org.embulk.config.TaskReport;
32
- import org.embulk.config.TaskSource;
33
- import org.embulk.plugin.PluginClassLoader;
34
- import org.embulk.spi.BufferAllocator;
35
- import org.embulk.spi.Column;
36
- import org.embulk.spi.DataException;
37
- import org.embulk.spi.PageBuilder;
38
- import org.embulk.spi.InputPlugin;
39
- import org.embulk.spi.PageOutput;
40
- import org.embulk.spi.Schema;
41
- import org.embulk.spi.Exec;
42
- import org.embulk.input.jdbc.getter.ColumnGetter;
43
- import org.embulk.input.jdbc.getter.ColumnGetterFactory;
44
- import org.embulk.input.jdbc.JdbcInputConnection.BatchSelect;
45
- import org.embulk.input.jdbc.JdbcInputConnection.PreparedQuery;
46
- import org.joda.time.DateTimeZone;
47
-
48
- import static java.util.Locale.ENGLISH;
49
-
50
- public abstract class AbstractJdbcInputPlugin
51
- implements InputPlugin
52
- {
53
- protected final Logger logger = Exec.getLogger(getClass());
54
-
55
- public interface PluginTask extends Task
56
- {
57
- @Config("options")
58
- @ConfigDefault("{}")
59
- public ToStringMap getOptions();
60
-
61
- @Config("table")
62
- @ConfigDefault("null")
63
- public Optional<String> getTable();
64
- public void setTable(Optional<String> normalizedTableName);
65
-
66
- @Config("query")
67
- @ConfigDefault("null")
68
- public Optional<String> getQuery();
69
-
70
- @Config("select")
71
- @ConfigDefault("null")
72
- public Optional<String> getSelect();
73
-
74
- @Config("where")
75
- @ConfigDefault("null")
76
- public Optional<String> getWhere();
77
-
78
- @Config("order_by")
79
- @ConfigDefault("null")
80
- public Optional<String> getOrderBy();
81
-
82
- @Config("incremental")
83
- @ConfigDefault("false")
84
- public boolean getIncremental();
85
-
86
- @Config("incremental_columns")
87
- @ConfigDefault("[]")
88
- public List<String> getIncrementalColumns();
89
- public void setIncrementalColumns(List<String> indexes);
90
-
91
- @Config("last_record")
92
- @ConfigDefault("null")
93
- public Optional<List<JsonNode>> getLastRecord();
94
-
95
- // TODO limit_value is necessary to make sure repeated bulk load transactions
96
- // don't a same record twice or miss records when the column
97
- // specified at order_by parameter is not unique.
98
- // For example, if the order_by column is "timestamp created_at"
99
- // column whose precision is second, the table can include multiple
100
- // records with the same created_at time. At the first bulk load
101
- // transaction, it loads a record with created_at=2015-01-02 00:00:02.
102
- // Then next transaction will use WHERE created_at > '2015-01-02 00:00:02'.
103
- // However, if another record with created_at=2014-01-01 23:59:59 is
104
- // inserted between the 2 transactions, the new record will be skipped.
105
- // To prevent this scenario, we want to specify
106
- // limit_value=2015-01-02 00:00:00 (exclusive). With this way, as long as
107
- // a transaction runs after 2015-01-02 00:00:00 + some minutes, we don't
108
- // skip records. Ideally, to automate the scheduling, we want to set
109
- // limit_value="today".
110
- //
111
- //@Config("limit_value")
112
- //@ConfigDefault("null")
113
- //public Optional<String> getLimitValue();
114
-
115
- //// TODO probably limit_rows is unnecessary as long as this has
116
- // supports parallel execution (partition_by option) and resuming.
117
- //@Config("limit_rows")
118
- //@ConfigDefault("null")
119
- //public Optional<Integer> getLimitRows();
120
-
121
- @Config("connect_timeout")
122
- @ConfigDefault("300")
123
- public int getConnectTimeout();
124
-
125
- @Config("socket_timeout")
126
- @ConfigDefault("1800")
127
- public int getSocketTimeout();
128
-
129
- @Config("fetch_rows")
130
- @ConfigDefault("10000")
131
- // TODO set minimum number
132
- public int getFetchRows();
133
-
134
- // TODO parallel execution using "partition_by" config
135
-
136
- @Config("column_options")
137
- @ConfigDefault("{}")
138
- public Map<String, JdbcColumnOption> getColumnOptions();
139
-
140
- @Config("default_timezone")
141
- @ConfigDefault("\"UTC\"")
142
- public DateTimeZone getDefaultTimeZone();
143
-
144
- @Config("default_column_options")
145
- @ConfigDefault("{}")
146
- public Map<String, JdbcColumnOption> getDefaultColumnOptions();
147
-
148
- @Config("after_select")
149
- @ConfigDefault("null")
150
- public Optional<String> getAfterSelect();
151
-
152
- public PreparedQuery getBuiltQuery();
153
- public void setBuiltQuery(PreparedQuery query);
154
-
155
- public JdbcSchema getQuerySchema();
156
- public void setQuerySchema(JdbcSchema schema);
157
-
158
- public List<Integer> getIncrementalColumnIndexes();
159
- public void setIncrementalColumnIndexes(List<Integer> indexes);
160
-
161
- @ConfigInject
162
- public BufferAllocator getBufferAllocator();
163
- }
164
-
165
- // for subclasses to add @Config
166
- protected Class<? extends PluginTask> getTaskClass()
167
- {
168
- return PluginTask.class;
169
- }
170
-
171
- protected abstract JdbcInputConnection newConnection(PluginTask task) throws SQLException;
172
-
173
- @Override
174
- public ConfigDiff transaction(ConfigSource config,
175
- InputPlugin.Control control)
176
- {
177
- PluginTask task = config.loadConfig(getTaskClass());
178
-
179
- if (task.getIncremental()) {
180
- if (task.getOrderBy().isPresent()) {
181
- throw new ConfigException("order_by option must not be set if incremental is true");
182
- }
183
- }
184
- else {
185
- if (!task.getIncrementalColumns().isEmpty()) {
186
- throw new ConfigException("'incremental: true' must be set if incremental_columns is set");
187
- }
188
- }
189
-
190
- Schema schema;
191
- try (JdbcInputConnection con = newConnection(task)) {
192
- con.showDriverVersion();
193
-
194
- // TODO incremental_columns is not set => get primary key
195
- schema = setupTask(con, task);
196
- } catch (SQLException ex) {
197
- throw Throwables.propagate(ex);
198
- }
199
-
200
- return buildNextConfigDiff(task, control.run(task.dump(), schema, 1));
201
- }
202
-
203
- protected Schema setupTask(JdbcInputConnection con, PluginTask task) throws SQLException
204
- {
205
- if (task.getTable().isPresent()) {
206
- String actualTableName = normalizeTableNameCase(con, task.getTable().get());
207
- task.setTable(Optional.of(actualTableName));
208
- }
209
-
210
- // build SELECT query and gets schema of its result
211
- String rawQuery = getRawQuery(task, con);
212
-
213
- JdbcSchema querySchema = con.getSchemaOfQuery(rawQuery);
214
- task.setQuerySchema(querySchema);
215
- // query schema should not change after incremental query
216
-
217
- PreparedQuery preparedQuery;
218
- if (task.getIncremental()) {
219
- // build incremental query
220
-
221
- List<String> incrementalColumns = task.getIncrementalColumns();
222
- if (incrementalColumns.isEmpty()) {
223
- // incremental_columns is not set
224
- if (!task.getTable().isPresent()) {
225
- throw new ConfigException("incremental_columns option must be set if incremental is true and custom query option is set");
226
- }
227
- // get primary keys from the target table to use them as incremental_columns
228
- List<String> primaryKeys = con.getPrimaryKeys(task.getTable().get());
229
- if (primaryKeys.isEmpty()) {
230
- throw new ConfigException(String.format(ENGLISH,
231
- "Primary key is not available at the table '%s'. incremental_columns option must be set",
232
- task.getTable().get()));
233
- }
234
- logger.info("Using primary keys as incremental_columns: {}", primaryKeys);
235
- task.setIncrementalColumns(primaryKeys);
236
- incrementalColumns = primaryKeys;
237
- }
238
-
239
- List<Integer> incrementalColumnIndexes = findIncrementalColumnIndexes(querySchema, incrementalColumns);
240
- task.setIncrementalColumnIndexes(incrementalColumnIndexes);
241
-
242
- List<JsonNode> lastRecord;
243
- if (task.getLastRecord().isPresent()) {
244
- lastRecord = task.getLastRecord().get();
245
- if (lastRecord.size() != incrementalColumnIndexes.size()) {
246
- throw new ConfigException("Number of values set at last_record must be same with number of columns set at incremental_columns");
247
- }
248
- }
249
- else {
250
- lastRecord = null;
251
- }
252
-
253
- if (task.getQuery().isPresent()) {
254
- preparedQuery = con.wrapIncrementalQuery(rawQuery, querySchema, incrementalColumnIndexes, lastRecord);
255
- }
256
- else {
257
- preparedQuery = con.rebuildIncrementalQuery(
258
- task.getTable().get(), task.getSelect(),
259
- task.getWhere(),
260
- querySchema, incrementalColumnIndexes, lastRecord);
261
- }
262
- }
263
- else {
264
- task.setIncrementalColumnIndexes(ImmutableList.<Integer>of());
265
- preparedQuery = new PreparedQuery(rawQuery, ImmutableList.<JdbcLiteral>of());
266
- }
267
-
268
- task.setBuiltQuery(preparedQuery);
269
-
270
- // validate column_options
271
- newColumnGetters(con, task, querySchema, null);
272
-
273
- ColumnGetterFactory factory = newColumnGetterFactory(null, task.getDefaultTimeZone());
274
- ImmutableList.Builder<Column> columns = ImmutableList.builder();
275
- for (int i = 0; i < querySchema.getCount(); i++) {
276
- JdbcColumn column = querySchema.getColumn(i);
277
- JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), column, factory.getJdbcType(column.getSqlType()));
278
- columns.add(new Column(i,
279
- column.getName(),
280
- factory.newColumnGetter(con, task, column, columnOption).getToType()));
281
- }
282
- return new Schema(columns.build());
283
- }
284
-
285
- private String normalizeTableNameCase(JdbcInputConnection con, String tableName)
286
- throws SQLException
287
- {
288
- if (con.tableExists(tableName)) {
289
- return tableName;
290
- } else {
291
- String upperTableName = tableName.toUpperCase();
292
- String lowerTableName = tableName.toLowerCase();
293
- boolean upperExists = con.tableExists(upperTableName);
294
- boolean lowerExists = con.tableExists(lowerTableName);
295
- if (upperExists && lowerExists) {
296
- throw new ConfigException(String.format("Cannot specify table '%s' because both '%s' and '%s' exist.",
297
- tableName, upperTableName, lowerTableName));
298
- } else if (upperExists) {
299
- return upperTableName;
300
- } else if (lowerExists) {
301
- return lowerTableName;
302
- } else {
303
- // fallback to the given table name. this may throw error later at getSchemaOfQuery
304
- return tableName;
305
- }
306
- }
307
- }
308
-
309
- private List<Integer> findIncrementalColumnIndexes(JdbcSchema schema, List<String> incrementalColumns)
310
- throws SQLException
311
- {
312
- ImmutableList.Builder<Integer> builder = ImmutableList.builder();
313
- for (String name : incrementalColumns) {
314
- Optional<Integer> index = schema.findColumn(name);
315
- if (index.isPresent()) {
316
- builder.add(index.get());
317
- }
318
- else {
319
- throw new ConfigException(String.format(ENGLISH,
320
- "Column name '%s' is in incremental_columns option does not exist",
321
- name));
322
- }
323
- }
324
- return builder.build();
325
- }
326
-
327
- private String getRawQuery(PluginTask task, JdbcInputConnection con) throws SQLException
328
- {
329
- if (task.getQuery().isPresent()) {
330
- if (task.getTable().isPresent() || task.getSelect().isPresent() ||
331
- task.getWhere().isPresent() || task.getOrderBy().isPresent()) {
332
- throw new ConfigException("'table', 'select', 'where' and 'order_by' parameters are unnecessary if 'query' parameter is set.");
333
- } else if (!task.getIncrementalColumns().isEmpty() || task.getLastRecord().isPresent()) {
334
- throw new ConfigException("'incremental_columns' and 'last_record' parameters are not supported if 'query' parameter is set.");
335
- }
336
- return task.getQuery().get();
337
- } else if (task.getTable().isPresent()) {
338
- return con.buildSelectQuery(task.getTable().get(), task.getSelect(),
339
- task.getWhere(), task.getOrderBy());
340
- } else {
341
- throw new ConfigException("'table' or 'query' parameter is required");
342
- }
343
- }
344
-
345
- @Override
346
- public ConfigDiff resume(TaskSource taskSource,
347
- Schema schema, int taskCount,
348
- InputPlugin.Control control)
349
- {
350
- PluginTask task = taskSource.loadTask(getTaskClass());
351
-
352
- // TODO when parallel execution is implemented and enabled, (maybe) order_by
353
- // is necessary to resume. transaction() gets the range of order_by
354
- // colum and set it to WHERE condition to make the operation deterministic
355
-
356
- return buildNextConfigDiff(task, control.run(taskSource, schema, taskCount));
357
- }
358
-
359
- public ConfigDiff guess(ConfigSource config)
360
- {
361
- return Exec.newConfigDiff();
362
- }
363
-
364
- protected ConfigDiff buildNextConfigDiff(PluginTask task, List<TaskReport> reports)
365
- {
366
- ConfigDiff next = Exec.newConfigDiff();
367
- if (reports.size() > 0 && reports.get(0).has("last_record")) {
368
- next.set("last_record", reports.get(0).get(JsonNode.class, "last_record"));
369
- } else if (task.getLastRecord().isPresent()) {
370
- next.set("last_record", task.getLastRecord().get());
371
- }
372
- return next;
373
- }
374
-
375
- @Override
376
- public void cleanup(TaskSource taskSource,
377
- Schema schema, int taskCount,
378
- List<TaskReport> successTaskReports)
379
- {
380
- // do nothing
381
- }
382
-
383
- private static class LastRecordStore
384
- {
385
- private final List<Integer> columnIndexes;
386
- private final JsonNode[] lastValues;
387
- private final List<String> columnNames;
388
-
389
- public LastRecordStore(List<Integer> columnIndexes, List<String> columnNames)
390
- {
391
- this.columnIndexes = columnIndexes;
392
- this.lastValues = new JsonNode[columnIndexes.size()];
393
- this.columnNames = columnNames;
394
- }
395
-
396
- public void accept(List<ColumnGetter> getters)
397
- throws SQLException
398
- {
399
- for (int i = 0; i < columnIndexes.size(); i++) {
400
- lastValues[i] = getters.get(columnIndexes.get(i)).encodeToJson();
401
- }
402
- }
403
-
404
- public List<JsonNode> getList()
405
- {
406
- ImmutableList.Builder<JsonNode> builder = ImmutableList.builder();
407
- for (int i = 0; i < lastValues.length; i++) {
408
- if (lastValues[i] == null || lastValues[i].isNull()) {
409
- throw new DataException(String.format(ENGLISH,
410
- "incremental_columns can't include null values but the last row is null at column '%s'",
411
- columnNames.get(i)));
412
- }
413
- builder.add(lastValues[i]);
414
- }
415
- return builder.build();
416
- }
417
- }
418
-
419
- @Override
420
- public TaskReport run(TaskSource taskSource,
421
- Schema schema, int taskIndex,
422
- PageOutput output)
423
- {
424
- PluginTask task = taskSource.loadTask(getTaskClass());
425
-
426
- PreparedQuery builtQuery = task.getBuiltQuery();
427
- JdbcSchema querySchema = task.getQuerySchema();
428
- BufferAllocator allocator = task.getBufferAllocator();
429
- PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
430
-
431
- long totalRows = 0;
432
-
433
- LastRecordStore lastRecordStore = null;
434
-
435
- try (JdbcInputConnection con = newConnection(task)) {
436
- List<ColumnGetter> getters = newColumnGetters(con, task, querySchema, pageBuilder);
437
- try (BatchSelect cursor = con.newSelectCursor(builtQuery, getters, task.getFetchRows(), task.getSocketTimeout())) {
438
- while (true) {
439
- long rows = fetch(cursor, getters, pageBuilder);
440
- if (rows <= 0L) {
441
- break;
442
- }
443
- totalRows += rows;
444
- }
445
- }
446
-
447
- if (task.getIncremental() && totalRows > 0) {
448
- lastRecordStore = new LastRecordStore(task.getIncrementalColumnIndexes(), task.getIncrementalColumns());
449
- lastRecordStore.accept(getters);
450
- }
451
-
452
- pageBuilder.finish();
453
-
454
- // after_select runs after pageBuilder.finish because pageBuilder.finish may fail.
455
- // TODO Output plugin's transaction might still fail. In that case, after_select is
456
- // already done but output plugin didn't commit the data to the target storage.
457
- // This means inconsistency between data source and destination. To avoid this
458
- // issue, we need another option like `after_commit` that runs after output plugin's
459
- // commit. after_commit can't run in the same transaction with SELECT. So,
460
- // after_select gets values and store them in TaskReport, and after_commit take
461
- // them as placeholder. Or, after_select puts values to an intermediate table, and
462
- // after_commit moves those values to the actual table.
463
- if (task.getAfterSelect().isPresent()) {
464
- con.executeUpdate(task.getAfterSelect().get());
465
- con.connection.commit();
466
- }
467
- } catch (SQLException ex) {
468
- throw Throwables.propagate(ex);
469
- }
470
-
471
- TaskReport report = Exec.newTaskReport();
472
- if (lastRecordStore != null) {
473
- report.set("last_record", lastRecordStore.getList());
474
- }
475
-
476
- return report;
477
- }
478
-
479
- protected ColumnGetterFactory newColumnGetterFactory(PageBuilder pageBuilder, DateTimeZone dateTimeZone)
480
- {
481
- return new ColumnGetterFactory(pageBuilder, dateTimeZone);
482
- }
483
-
484
- private List<ColumnGetter> newColumnGetters(JdbcInputConnection con, PluginTask task, JdbcSchema querySchema, PageBuilder pageBuilder)
485
- throws SQLException
486
- {
487
- ColumnGetterFactory factory = newColumnGetterFactory(pageBuilder, task.getDefaultTimeZone());
488
- ImmutableList.Builder<ColumnGetter> getters = ImmutableList.builder();
489
- for (JdbcColumn c : querySchema.getColumns()) {
490
- JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), c, factory.getJdbcType(c.getSqlType()));
491
- getters.add(factory.newColumnGetter(con, task, c, columnOption));
492
- }
493
- return getters.build();
494
- }
495
-
496
- private static JdbcColumnOption columnOptionOf(Map<String, JdbcColumnOption> columnOptions, Map<String, JdbcColumnOption> defaultColumnOptions, JdbcColumn targetColumn, String targetColumnSQLType)
497
- {
498
- JdbcColumnOption columnOption = columnOptions.get(targetColumn.getName());
499
- if (columnOption == null) {
500
- String foundName = null;
501
- for (Map.Entry<String, JdbcColumnOption> entry : columnOptions.entrySet()) {
502
- if (entry.getKey().equalsIgnoreCase(targetColumn.getName())) {
503
- if (columnOption != null) {
504
- throw new ConfigException(String.format("Cannot specify column '%s' because both '%s' and '%s' exist in column_options.",
505
- targetColumn.getName(), foundName, entry.getKey()));
506
- }
507
- foundName = entry.getKey();
508
- columnOption = entry.getValue();
509
- }
510
- }
511
- }
512
-
513
- return Optional
514
- .fromNullable(columnOption)
515
- .or(Optional.fromNullable(defaultColumnOptions.get(targetColumnSQLType)))
516
- .or(
517
- // default column option
518
- new Supplier<JdbcColumnOption>()
519
- {
520
- public JdbcColumnOption get()
521
- {
522
- return Exec.newConfigSource().loadConfig(JdbcColumnOption.class);
523
- }
524
- });
525
- }
526
-
527
- private long fetch(BatchSelect cursor,
528
- List<ColumnGetter> getters, PageBuilder pageBuilder) throws SQLException
529
- {
530
- ResultSet result = cursor.fetch();
531
- if (result == null || !result.next()) {
532
- return 0;
533
- }
534
-
535
- List<Column> columns = pageBuilder.getSchema().getColumns();
536
- long rows = 0;
537
- long reportRows = 500;
538
- do {
539
- for (int i=0; i < getters.size(); i++) {
540
- int index = i + 1; // JDBC column index begins from 1
541
- getters.get(i).getAndSet(result, index, columns.get(i));
542
- }
543
- pageBuilder.addRecord();
544
- rows++;
545
- if (rows % reportRows == 0) {
546
- logger.info(String.format("Fetched %,d rows.", rows));
547
- reportRows *= 2;
548
- }
549
- } while (result.next());
550
-
551
- return rows;
552
- }
553
-
554
- //// TODO move to embulk.spi.util?
555
- //private static class ListPageOutput
556
- //{
557
- // public ImmutableList.Builder<Page> pages;
558
- //
559
- // public ListPageOutput()
560
- // {
561
- // reset();
562
- // }
563
- //
564
- // @Override
565
- // public void add(Page page)
566
- // {
567
- // pages.add(page);
568
- // }
569
- //
570
- // @Override
571
- // public void finish()
572
- // {
573
- // }
574
- //
575
- // @Override
576
- // public void close()
577
- // {
578
- // }
579
- //
580
- // public List<Page> getPages()
581
- // {
582
- // return pages.build();
583
- // }
584
- //
585
- // public void reset()
586
- // {
587
- // pages = ImmutableList.builder();
588
- // }
589
- //}
590
-
591
- protected void loadDriver(String className, Optional<String> driverPath)
592
- {
593
- if (driverPath.isPresent()) {
594
- addDriverJarToClasspath(driverPath.get());
595
- } else {
596
- try {
597
- // Gradle test task will add JDBC driver to classpath
598
- Class.forName(className);
599
-
600
- } catch (ClassNotFoundException ex) {
601
- File root = findPluginRoot();
602
- File driverLib = new File(root, "default_jdbc_driver");
603
- File[] files = driverLib.listFiles(new FileFilter() {
604
- @Override
605
- public boolean accept(File file) {
606
- return file.isFile() && file.getName().endsWith(".jar");
607
- }
608
- });
609
- if (files == null || files.length == 0) {
610
- throw new RuntimeException("Cannot find JDBC driver in '" + root.getAbsolutePath() + "'.");
611
- } else {
612
- for (File file : files) {
613
- logger.info("JDBC Driver = " + file.getAbsolutePath());
614
- addDriverJarToClasspath(file.getAbsolutePath());
615
- }
616
- }
617
- }
618
- }
619
-
620
- // Load JDBC Driver
621
- try {
622
- Class.forName(className);
623
- } catch (ClassNotFoundException ex) {
624
- throw new RuntimeException(ex);
625
- }
626
- }
627
-
628
- protected void addDriverJarToClasspath(String glob)
629
- {
630
- // TODO match glob
631
- PluginClassLoader loader = (PluginClassLoader) getClass().getClassLoader();
632
- Path path = Paths.get(glob);
633
- if (!path.toFile().exists()) {
634
- throw new ConfigException("The specified driver jar doesn't exist: " + glob);
635
- }
636
- loader.addPath(Paths.get(glob));
637
- }
638
-
639
- protected File findPluginRoot()
640
- {
641
- try {
642
- URL url = getClass().getResource("/" + getClass().getName().replace('.', '/') + ".class");
643
- if (url.toString().startsWith("jar:")) {
644
- url = new URL(url.toString().replaceAll("^jar:", "").replaceAll("![^!]*$", ""));
645
- }
646
-
647
- File folder = new File(url.toURI()).getParentFile();
648
- for (;; folder = folder.getParentFile()) {
649
- if (folder == null) {
650
- throw new RuntimeException("Cannot find 'embulk-input-xxx' folder.");
651
- }
652
-
653
- if (folder.getName().startsWith("embulk-input-")) {
654
- return folder;
655
- }
656
- }
657
- } catch (MalformedURLException | URISyntaxException e) {
658
- throw new RuntimeException(e);
659
- }
660
- }
661
-
662
- protected void logConnectionProperties(String url, Properties props)
663
- {
664
- Properties maskedProps = new Properties();
665
- for(String key : props.stringPropertyNames()) {
666
- if (key.equals("password")) {
667
- maskedProps.setProperty(key, "***");
668
- } else {
669
- maskedProps.setProperty(key, props.getProperty(key));
670
- }
671
- }
672
- logger.info("Connecting to {} options {}", url, maskedProps);
673
- }
674
- }