embulk-input-athena 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -1
  3. data/build.gradle +7 -3
  4. data/src/main/java/org/embulk/input/athena/AthenaInputConnection.java +1 -0
  5. metadata +4 -28
  6. data/src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java +0 -674
  7. data/src/main/java/org/embulk/input/jdbc/JdbcColumn.java +0 -58
  8. data/src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java +0 -31
  9. data/src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java +0 -397
  10. data/src/main/java/org/embulk/input/jdbc/JdbcLiteral.java +0 -38
  11. data/src/main/java/org/embulk/input/jdbc/JdbcSchema.java +0 -55
  12. data/src/main/java/org/embulk/input/jdbc/Ssl.java +0 -37
  13. data/src/main/java/org/embulk/input/jdbc/ToString.java +0 -54
  14. data/src/main/java/org/embulk/input/jdbc/ToStringMap.java +0 -35
  15. data/src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java +0 -105
  16. data/src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java +0 -45
  17. data/src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java +0 -38
  18. data/src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java +0 -59
  19. data/src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java +0 -56
  20. data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java +0 -21
  21. data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java +0 -207
  22. data/src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java +0 -37
  23. data/src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java +0 -66
  24. data/src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java +0 -66
  25. data/src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java +0 -57
  26. data/src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java +0 -70
  27. data/src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java +0 -96
  28. data/src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java +0 -37
  29. data/src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java +0 -36
  30. data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java +0 -83
  31. data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java +0 -75
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c3fb336d83e38770353f87f111859738619bb3e6
4
- data.tar.gz: 7fe07d1b0fd72c8e08ba963e21b590c26242da79
3
+ metadata.gz: ad4b3382cf370da50743fdef54e85cbaf9ee3c6b
4
+ data.tar.gz: 8efd48a28b3dc003aff73c4253abccccdc465df9
5
5
  SHA512:
6
- metadata.gz: 4483e0985f1741775a6abbf8a9183231be12f52b5f28dc278c850fbf926fba5a2090e9fd660fd6d5b93782dec551b644828e6b767c096254ccc269deb755728f
7
- data.tar.gz: bb989515c90ebdd73c651099fcfe6b136d1bfcdc1a5c30c979bbb5f0b3e2d782b8212110cb282f3129888bd347cc6e9878ae5ab2deb4118d108d58201a765f2c
6
+ metadata.gz: b8dcea7434f50c191421d6bc79d91ab0c0725ead169e86fb40846906e243657cf722d01bba4f9437dfe8e1d40ca901da97583696f42ac8cce268a904038fd1a4
7
+ data.tar.gz: 69d636d9f53a252db12ff0106e1dfa9b3cfcb00f7c8de005f70137007f850272b54519154dd4db5cf6f3bd35b327dcfddf421389854e6256d1d4aad0358b54f8
data/.gitignore CHANGED
@@ -10,4 +10,5 @@ build/
10
10
  /.metadata/
11
11
  .classpath
12
12
  .project
13
- bin
13
+ bin/
14
+ .vscode/
data/build.gradle CHANGED
@@ -10,12 +10,13 @@ repositories {
10
10
  jcenter()
11
11
  // for athena jdbc
12
12
  maven { url "https://maven.atlassian.com/repository/public" }
13
+ maven { url "https://dl.bintray.com/embulk-input-jdbc/maven" }
13
14
  }
14
15
  configurations {
15
16
  provided
16
17
  }
17
18
 
18
- version = "0.1.0"
19
+ version = "0.1.1"
19
20
 
20
21
  sourceCompatibility = 1.8
21
22
  targetCompatibility = 1.8
@@ -24,15 +25,18 @@ dependencies {
24
25
  compile "org.embulk:embulk-core:0.8.39"
25
26
  provided "org.embulk:embulk-core:0.8.39"
26
27
  // https://mvnrepository.com/artifact/com.amazonaws.athena.jdbc/AthenaJDBC41
27
- // compile group: 'com.amazonaws.athena.jdbc', name: 'AthenaJDBC41', version: '1.0.1-atlassian-hosted'
28
- compile files ('build/AthenaJDBC41-1.1.0.jar')
28
+ // TODO: update jdbc
29
+ compile group: 'com.amazonaws.athena.jdbc', name: 'AthenaJDBC41', version: '1.0.1-atlassian-hosted'
30
+ //compile files ('build/AthenaJDBC41-1.1.0.jar')
29
31
  compile group: 'com.amazonaws', name: 'aws-java-sdk', version: '1.11.301'
32
+ compile 'org.embulk.input.jdbc:embulk-input-jdbc:0.9.1'
30
33
  testCompile "junit:junit:4.+"
31
34
  }
32
35
 
33
36
  task classpath(type: Copy, dependsOn: ["jar"]) {
34
37
  doFirst { file("classpath").deleteDir() }
35
38
  from (configurations.runtime - configurations.provided + files(jar.archivePath))
39
+ // from ("build/AthenaJDBC41-1.1.0.jar'")
36
40
  into "classpath"
37
41
  }
38
42
  clean { delete "classpath" }
@@ -17,6 +17,7 @@ public class AthenaInputConnection
17
17
  throws SQLException
18
18
  {
19
19
  super(connection, null);
20
+ connection.setAutoCommit(true);
20
21
  }
21
22
 
22
23
  @Override
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-athena
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - shinji19
@@ -61,33 +61,8 @@ files:
61
61
  - src/main/java/org/embulk/input/athena/AthenaInputConnection.java
62
62
  - src/main/java/org/embulk/input/athena/AthenaInputPlugin.java
63
63
  - src/main/java/org/embulk/input/athena/AthenaInputPlugin.java.tmp1
64
- - src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java
65
- - src/main/java/org/embulk/input/jdbc/JdbcColumn.java
66
- - src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java
67
- - src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java
68
- - src/main/java/org/embulk/input/jdbc/JdbcLiteral.java
69
- - src/main/java/org/embulk/input/jdbc/JdbcSchema.java
70
- - src/main/java/org/embulk/input/jdbc/Ssl.java
71
- - src/main/java/org/embulk/input/jdbc/ToString.java
72
- - src/main/java/org/embulk/input/jdbc/ToStringMap.java
73
- - src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java
74
- - src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java
75
- - src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java
76
- - src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java
77
- - src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java
78
- - src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java
79
- - src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java
80
- - src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java
81
- - src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java
82
- - src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java
83
- - src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java
84
- - src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java
85
- - src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java
86
- - src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java
87
- - src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java
88
- - src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java
89
- - src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java
90
64
  - src/test/java/org/embulk/input/athena/TestAthenaInputPlugin.java
65
+ - classpath/AthenaJDBC41-1.0.1-atlassian-hosted.jar
91
66
  - classpath/aws-java-sdk-1.11.301.jar
92
67
  - classpath/aws-java-sdk-acm-1.11.301.jar
93
68
  - classpath/aws-java-sdk-alexaforbusiness-1.11.301.jar
@@ -217,7 +192,8 @@ files:
217
192
  - classpath/aws-java-sdk-xray-1.11.301.jar
218
193
  - classpath/commons-codec-1.10.jar
219
194
  - classpath/commons-logging-1.2.jar
220
- - classpath/embulk-input-athena-0.1.0.jar
195
+ - classpath/embulk-input-athena-0.1.1.jar
196
+ - classpath/embulk-input-jdbc-0.9.1.jar
221
197
  - classpath/httpclient-4.5.5.jar
222
198
  - classpath/httpcore-4.4.9.jar
223
199
  - classpath/ion-java-1.0.2.jar
@@ -1,674 +0,0 @@
1
- package org.embulk.input.jdbc;
2
-
3
- import java.io.File;
4
- import java.io.FileFilter;
5
- import java.net.MalformedURLException;
6
- import java.net.URISyntaxException;
7
- import java.net.URL;
8
- import java.nio.file.Path;
9
- import java.util.List;
10
- import java.util.Map;
11
- import java.util.Properties;
12
- import java.nio.file.Paths;
13
- import java.sql.ResultSet;
14
- import java.sql.SQLException;
15
-
16
- import org.slf4j.Logger;
17
-
18
- import com.fasterxml.jackson.databind.JsonNode;
19
- import com.google.common.base.Optional;
20
- import com.google.common.base.Supplier;
21
- import com.google.common.base.Throwables;
22
- import com.google.common.collect.ImmutableList;
23
-
24
- import org.embulk.config.Config;
25
- import org.embulk.config.ConfigException;
26
- import org.embulk.config.ConfigDefault;
27
- import org.embulk.config.ConfigDiff;
28
- import org.embulk.config.ConfigInject;
29
- import org.embulk.config.ConfigSource;
30
- import org.embulk.config.Task;
31
- import org.embulk.config.TaskReport;
32
- import org.embulk.config.TaskSource;
33
- import org.embulk.plugin.PluginClassLoader;
34
- import org.embulk.spi.BufferAllocator;
35
- import org.embulk.spi.Column;
36
- import org.embulk.spi.DataException;
37
- import org.embulk.spi.PageBuilder;
38
- import org.embulk.spi.InputPlugin;
39
- import org.embulk.spi.PageOutput;
40
- import org.embulk.spi.Schema;
41
- import org.embulk.spi.Exec;
42
- import org.embulk.input.jdbc.getter.ColumnGetter;
43
- import org.embulk.input.jdbc.getter.ColumnGetterFactory;
44
- import org.embulk.input.jdbc.JdbcInputConnection.BatchSelect;
45
- import org.embulk.input.jdbc.JdbcInputConnection.PreparedQuery;
46
- import org.joda.time.DateTimeZone;
47
-
48
- import static java.util.Locale.ENGLISH;
49
-
50
- public abstract class AbstractJdbcInputPlugin
51
- implements InputPlugin
52
- {
53
- protected final Logger logger = Exec.getLogger(getClass());
54
-
55
- public interface PluginTask extends Task
56
- {
57
- @Config("options")
58
- @ConfigDefault("{}")
59
- public ToStringMap getOptions();
60
-
61
- @Config("table")
62
- @ConfigDefault("null")
63
- public Optional<String> getTable();
64
- public void setTable(Optional<String> normalizedTableName);
65
-
66
- @Config("query")
67
- @ConfigDefault("null")
68
- public Optional<String> getQuery();
69
-
70
- @Config("select")
71
- @ConfigDefault("null")
72
- public Optional<String> getSelect();
73
-
74
- @Config("where")
75
- @ConfigDefault("null")
76
- public Optional<String> getWhere();
77
-
78
- @Config("order_by")
79
- @ConfigDefault("null")
80
- public Optional<String> getOrderBy();
81
-
82
- @Config("incremental")
83
- @ConfigDefault("false")
84
- public boolean getIncremental();
85
-
86
- @Config("incremental_columns")
87
- @ConfigDefault("[]")
88
- public List<String> getIncrementalColumns();
89
- public void setIncrementalColumns(List<String> indexes);
90
-
91
- @Config("last_record")
92
- @ConfigDefault("null")
93
- public Optional<List<JsonNode>> getLastRecord();
94
-
95
- // TODO limit_value is necessary to make sure repeated bulk load transactions
96
- // don't a same record twice or miss records when the column
97
- // specified at order_by parameter is not unique.
98
- // For example, if the order_by column is "timestamp created_at"
99
- // column whose precision is second, the table can include multiple
100
- // records with the same created_at time. At the first bulk load
101
- // transaction, it loads a record with created_at=2015-01-02 00:00:02.
102
- // Then next transaction will use WHERE created_at > '2015-01-02 00:00:02'.
103
- // However, if another record with created_at=2014-01-01 23:59:59 is
104
- // inserted between the 2 transactions, the new record will be skipped.
105
- // To prevent this scenario, we want to specify
106
- // limit_value=2015-01-02 00:00:00 (exclusive). With this way, as long as
107
- // a transaction runs after 2015-01-02 00:00:00 + some minutes, we don't
108
- // skip records. Ideally, to automate the scheduling, we want to set
109
- // limit_value="today".
110
- //
111
- //@Config("limit_value")
112
- //@ConfigDefault("null")
113
- //public Optional<String> getLimitValue();
114
-
115
- //// TODO probably limit_rows is unnecessary as long as this has
116
- // supports parallel execution (partition_by option) and resuming.
117
- //@Config("limit_rows")
118
- //@ConfigDefault("null")
119
- //public Optional<Integer> getLimitRows();
120
-
121
- @Config("connect_timeout")
122
- @ConfigDefault("300")
123
- public int getConnectTimeout();
124
-
125
- @Config("socket_timeout")
126
- @ConfigDefault("1800")
127
- public int getSocketTimeout();
128
-
129
- @Config("fetch_rows")
130
- @ConfigDefault("10000")
131
- // TODO set minimum number
132
- public int getFetchRows();
133
-
134
- // TODO parallel execution using "partition_by" config
135
-
136
- @Config("column_options")
137
- @ConfigDefault("{}")
138
- public Map<String, JdbcColumnOption> getColumnOptions();
139
-
140
- @Config("default_timezone")
141
- @ConfigDefault("\"UTC\"")
142
- public DateTimeZone getDefaultTimeZone();
143
-
144
- @Config("default_column_options")
145
- @ConfigDefault("{}")
146
- public Map<String, JdbcColumnOption> getDefaultColumnOptions();
147
-
148
- @Config("after_select")
149
- @ConfigDefault("null")
150
- public Optional<String> getAfterSelect();
151
-
152
- public PreparedQuery getBuiltQuery();
153
- public void setBuiltQuery(PreparedQuery query);
154
-
155
- public JdbcSchema getQuerySchema();
156
- public void setQuerySchema(JdbcSchema schema);
157
-
158
- public List<Integer> getIncrementalColumnIndexes();
159
- public void setIncrementalColumnIndexes(List<Integer> indexes);
160
-
161
- @ConfigInject
162
- public BufferAllocator getBufferAllocator();
163
- }
164
-
165
- // for subclasses to add @Config
166
- protected Class<? extends PluginTask> getTaskClass()
167
- {
168
- return PluginTask.class;
169
- }
170
-
171
- protected abstract JdbcInputConnection newConnection(PluginTask task) throws SQLException;
172
-
173
- @Override
174
- public ConfigDiff transaction(ConfigSource config,
175
- InputPlugin.Control control)
176
- {
177
- PluginTask task = config.loadConfig(getTaskClass());
178
-
179
- if (task.getIncremental()) {
180
- if (task.getOrderBy().isPresent()) {
181
- throw new ConfigException("order_by option must not be set if incremental is true");
182
- }
183
- }
184
- else {
185
- if (!task.getIncrementalColumns().isEmpty()) {
186
- throw new ConfigException("'incremental: true' must be set if incremental_columns is set");
187
- }
188
- }
189
-
190
- Schema schema;
191
- try (JdbcInputConnection con = newConnection(task)) {
192
- con.showDriverVersion();
193
-
194
- // TODO incremental_columns is not set => get primary key
195
- schema = setupTask(con, task);
196
- } catch (SQLException ex) {
197
- throw Throwables.propagate(ex);
198
- }
199
-
200
- return buildNextConfigDiff(task, control.run(task.dump(), schema, 1));
201
- }
202
-
203
- protected Schema setupTask(JdbcInputConnection con, PluginTask task) throws SQLException
204
- {
205
- if (task.getTable().isPresent()) {
206
- String actualTableName = normalizeTableNameCase(con, task.getTable().get());
207
- task.setTable(Optional.of(actualTableName));
208
- }
209
-
210
- // build SELECT query and gets schema of its result
211
- String rawQuery = getRawQuery(task, con);
212
-
213
- JdbcSchema querySchema = con.getSchemaOfQuery(rawQuery);
214
- task.setQuerySchema(querySchema);
215
- // query schema should not change after incremental query
216
-
217
- PreparedQuery preparedQuery;
218
- if (task.getIncremental()) {
219
- // build incremental query
220
-
221
- List<String> incrementalColumns = task.getIncrementalColumns();
222
- if (incrementalColumns.isEmpty()) {
223
- // incremental_columns is not set
224
- if (!task.getTable().isPresent()) {
225
- throw new ConfigException("incremental_columns option must be set if incremental is true and custom query option is set");
226
- }
227
- // get primary keys from the target table to use them as incremental_columns
228
- List<String> primaryKeys = con.getPrimaryKeys(task.getTable().get());
229
- if (primaryKeys.isEmpty()) {
230
- throw new ConfigException(String.format(ENGLISH,
231
- "Primary key is not available at the table '%s'. incremental_columns option must be set",
232
- task.getTable().get()));
233
- }
234
- logger.info("Using primary keys as incremental_columns: {}", primaryKeys);
235
- task.setIncrementalColumns(primaryKeys);
236
- incrementalColumns = primaryKeys;
237
- }
238
-
239
- List<Integer> incrementalColumnIndexes = findIncrementalColumnIndexes(querySchema, incrementalColumns);
240
- task.setIncrementalColumnIndexes(incrementalColumnIndexes);
241
-
242
- List<JsonNode> lastRecord;
243
- if (task.getLastRecord().isPresent()) {
244
- lastRecord = task.getLastRecord().get();
245
- if (lastRecord.size() != incrementalColumnIndexes.size()) {
246
- throw new ConfigException("Number of values set at last_record must be same with number of columns set at incremental_columns");
247
- }
248
- }
249
- else {
250
- lastRecord = null;
251
- }
252
-
253
- if (task.getQuery().isPresent()) {
254
- preparedQuery = con.wrapIncrementalQuery(rawQuery, querySchema, incrementalColumnIndexes, lastRecord);
255
- }
256
- else {
257
- preparedQuery = con.rebuildIncrementalQuery(
258
- task.getTable().get(), task.getSelect(),
259
- task.getWhere(),
260
- querySchema, incrementalColumnIndexes, lastRecord);
261
- }
262
- }
263
- else {
264
- task.setIncrementalColumnIndexes(ImmutableList.<Integer>of());
265
- preparedQuery = new PreparedQuery(rawQuery, ImmutableList.<JdbcLiteral>of());
266
- }
267
-
268
- task.setBuiltQuery(preparedQuery);
269
-
270
- // validate column_options
271
- newColumnGetters(con, task, querySchema, null);
272
-
273
- ColumnGetterFactory factory = newColumnGetterFactory(null, task.getDefaultTimeZone());
274
- ImmutableList.Builder<Column> columns = ImmutableList.builder();
275
- for (int i = 0; i < querySchema.getCount(); i++) {
276
- JdbcColumn column = querySchema.getColumn(i);
277
- JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), column, factory.getJdbcType(column.getSqlType()));
278
- columns.add(new Column(i,
279
- column.getName(),
280
- factory.newColumnGetter(con, task, column, columnOption).getToType()));
281
- }
282
- return new Schema(columns.build());
283
- }
284
-
285
- private String normalizeTableNameCase(JdbcInputConnection con, String tableName)
286
- throws SQLException
287
- {
288
- if (con.tableExists(tableName)) {
289
- return tableName;
290
- } else {
291
- String upperTableName = tableName.toUpperCase();
292
- String lowerTableName = tableName.toLowerCase();
293
- boolean upperExists = con.tableExists(upperTableName);
294
- boolean lowerExists = con.tableExists(lowerTableName);
295
- if (upperExists && lowerExists) {
296
- throw new ConfigException(String.format("Cannot specify table '%s' because both '%s' and '%s' exist.",
297
- tableName, upperTableName, lowerTableName));
298
- } else if (upperExists) {
299
- return upperTableName;
300
- } else if (lowerExists) {
301
- return lowerTableName;
302
- } else {
303
- // fallback to the given table name. this may throw error later at getSchemaOfQuery
304
- return tableName;
305
- }
306
- }
307
- }
308
-
309
- private List<Integer> findIncrementalColumnIndexes(JdbcSchema schema, List<String> incrementalColumns)
310
- throws SQLException
311
- {
312
- ImmutableList.Builder<Integer> builder = ImmutableList.builder();
313
- for (String name : incrementalColumns) {
314
- Optional<Integer> index = schema.findColumn(name);
315
- if (index.isPresent()) {
316
- builder.add(index.get());
317
- }
318
- else {
319
- throw new ConfigException(String.format(ENGLISH,
320
- "Column name '%s' is in incremental_columns option does not exist",
321
- name));
322
- }
323
- }
324
- return builder.build();
325
- }
326
-
327
- private String getRawQuery(PluginTask task, JdbcInputConnection con) throws SQLException
328
- {
329
- if (task.getQuery().isPresent()) {
330
- if (task.getTable().isPresent() || task.getSelect().isPresent() ||
331
- task.getWhere().isPresent() || task.getOrderBy().isPresent()) {
332
- throw new ConfigException("'table', 'select', 'where' and 'order_by' parameters are unnecessary if 'query' parameter is set.");
333
- } else if (!task.getIncrementalColumns().isEmpty() || task.getLastRecord().isPresent()) {
334
- throw new ConfigException("'incremental_columns' and 'last_record' parameters are not supported if 'query' parameter is set.");
335
- }
336
- return task.getQuery().get();
337
- } else if (task.getTable().isPresent()) {
338
- return con.buildSelectQuery(task.getTable().get(), task.getSelect(),
339
- task.getWhere(), task.getOrderBy());
340
- } else {
341
- throw new ConfigException("'table' or 'query' parameter is required");
342
- }
343
- }
344
-
345
- @Override
346
- public ConfigDiff resume(TaskSource taskSource,
347
- Schema schema, int taskCount,
348
- InputPlugin.Control control)
349
- {
350
- PluginTask task = taskSource.loadTask(getTaskClass());
351
-
352
- // TODO when parallel execution is implemented and enabled, (maybe) order_by
353
- // is necessary to resume. transaction() gets the range of order_by
354
- // colum and set it to WHERE condition to make the operation deterministic
355
-
356
- return buildNextConfigDiff(task, control.run(taskSource, schema, taskCount));
357
- }
358
-
359
- public ConfigDiff guess(ConfigSource config)
360
- {
361
- return Exec.newConfigDiff();
362
- }
363
-
364
- protected ConfigDiff buildNextConfigDiff(PluginTask task, List<TaskReport> reports)
365
- {
366
- ConfigDiff next = Exec.newConfigDiff();
367
- if (reports.size() > 0 && reports.get(0).has("last_record")) {
368
- next.set("last_record", reports.get(0).get(JsonNode.class, "last_record"));
369
- } else if (task.getLastRecord().isPresent()) {
370
- next.set("last_record", task.getLastRecord().get());
371
- }
372
- return next;
373
- }
374
-
375
- @Override
376
- public void cleanup(TaskSource taskSource,
377
- Schema schema, int taskCount,
378
- List<TaskReport> successTaskReports)
379
- {
380
- // do nothing
381
- }
382
-
383
- private static class LastRecordStore
384
- {
385
- private final List<Integer> columnIndexes;
386
- private final JsonNode[] lastValues;
387
- private final List<String> columnNames;
388
-
389
- public LastRecordStore(List<Integer> columnIndexes, List<String> columnNames)
390
- {
391
- this.columnIndexes = columnIndexes;
392
- this.lastValues = new JsonNode[columnIndexes.size()];
393
- this.columnNames = columnNames;
394
- }
395
-
396
- public void accept(List<ColumnGetter> getters)
397
- throws SQLException
398
- {
399
- for (int i = 0; i < columnIndexes.size(); i++) {
400
- lastValues[i] = getters.get(columnIndexes.get(i)).encodeToJson();
401
- }
402
- }
403
-
404
- public List<JsonNode> getList()
405
- {
406
- ImmutableList.Builder<JsonNode> builder = ImmutableList.builder();
407
- for (int i = 0; i < lastValues.length; i++) {
408
- if (lastValues[i] == null || lastValues[i].isNull()) {
409
- throw new DataException(String.format(ENGLISH,
410
- "incremental_columns can't include null values but the last row is null at column '%s'",
411
- columnNames.get(i)));
412
- }
413
- builder.add(lastValues[i]);
414
- }
415
- return builder.build();
416
- }
417
- }
418
-
419
- @Override
420
- public TaskReport run(TaskSource taskSource,
421
- Schema schema, int taskIndex,
422
- PageOutput output)
423
- {
424
- PluginTask task = taskSource.loadTask(getTaskClass());
425
-
426
- PreparedQuery builtQuery = task.getBuiltQuery();
427
- JdbcSchema querySchema = task.getQuerySchema();
428
- BufferAllocator allocator = task.getBufferAllocator();
429
- PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
430
-
431
- long totalRows = 0;
432
-
433
- LastRecordStore lastRecordStore = null;
434
-
435
- try (JdbcInputConnection con = newConnection(task)) {
436
- List<ColumnGetter> getters = newColumnGetters(con, task, querySchema, pageBuilder);
437
- try (BatchSelect cursor = con.newSelectCursor(builtQuery, getters, task.getFetchRows(), task.getSocketTimeout())) {
438
- while (true) {
439
- long rows = fetch(cursor, getters, pageBuilder);
440
- if (rows <= 0L) {
441
- break;
442
- }
443
- totalRows += rows;
444
- }
445
- }
446
-
447
- if (task.getIncremental() && totalRows > 0) {
448
- lastRecordStore = new LastRecordStore(task.getIncrementalColumnIndexes(), task.getIncrementalColumns());
449
- lastRecordStore.accept(getters);
450
- }
451
-
452
- pageBuilder.finish();
453
-
454
- // after_select runs after pageBuilder.finish because pageBuilder.finish may fail.
455
- // TODO Output plugin's transaction might still fail. In that case, after_select is
456
- // already done but output plugin didn't commit the data to the target storage.
457
- // This means inconsistency between data source and destination. To avoid this
458
- // issue, we need another option like `after_commit` that runs after output plugin's
459
- // commit. after_commit can't run in the same transaction with SELECT. So,
460
- // after_select gets values and store them in TaskReport, and after_commit take
461
- // them as placeholder. Or, after_select puts values to an intermediate table, and
462
- // after_commit moves those values to the actual table.
463
- if (task.getAfterSelect().isPresent()) {
464
- con.executeUpdate(task.getAfterSelect().get());
465
- con.connection.commit();
466
- }
467
- } catch (SQLException ex) {
468
- throw Throwables.propagate(ex);
469
- }
470
-
471
- TaskReport report = Exec.newTaskReport();
472
- if (lastRecordStore != null) {
473
- report.set("last_record", lastRecordStore.getList());
474
- }
475
-
476
- return report;
477
- }
478
-
479
- protected ColumnGetterFactory newColumnGetterFactory(PageBuilder pageBuilder, DateTimeZone dateTimeZone)
480
- {
481
- return new ColumnGetterFactory(pageBuilder, dateTimeZone);
482
- }
483
-
484
- private List<ColumnGetter> newColumnGetters(JdbcInputConnection con, PluginTask task, JdbcSchema querySchema, PageBuilder pageBuilder)
485
- throws SQLException
486
- {
487
- ColumnGetterFactory factory = newColumnGetterFactory(pageBuilder, task.getDefaultTimeZone());
488
- ImmutableList.Builder<ColumnGetter> getters = ImmutableList.builder();
489
- for (JdbcColumn c : querySchema.getColumns()) {
490
- JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), c, factory.getJdbcType(c.getSqlType()));
491
- getters.add(factory.newColumnGetter(con, task, c, columnOption));
492
- }
493
- return getters.build();
494
- }
495
-
496
- private static JdbcColumnOption columnOptionOf(Map<String, JdbcColumnOption> columnOptions, Map<String, JdbcColumnOption> defaultColumnOptions, JdbcColumn targetColumn, String targetColumnSQLType)
497
- {
498
- JdbcColumnOption columnOption = columnOptions.get(targetColumn.getName());
499
- if (columnOption == null) {
500
- String foundName = null;
501
- for (Map.Entry<String, JdbcColumnOption> entry : columnOptions.entrySet()) {
502
- if (entry.getKey().equalsIgnoreCase(targetColumn.getName())) {
503
- if (columnOption != null) {
504
- throw new ConfigException(String.format("Cannot specify column '%s' because both '%s' and '%s' exist in column_options.",
505
- targetColumn.getName(), foundName, entry.getKey()));
506
- }
507
- foundName = entry.getKey();
508
- columnOption = entry.getValue();
509
- }
510
- }
511
- }
512
-
513
- return Optional
514
- .fromNullable(columnOption)
515
- .or(Optional.fromNullable(defaultColumnOptions.get(targetColumnSQLType)))
516
- .or(
517
- // default column option
518
- new Supplier<JdbcColumnOption>()
519
- {
520
- public JdbcColumnOption get()
521
- {
522
- return Exec.newConfigSource().loadConfig(JdbcColumnOption.class);
523
- }
524
- });
525
- }
526
-
527
- private long fetch(BatchSelect cursor,
528
- List<ColumnGetter> getters, PageBuilder pageBuilder) throws SQLException
529
- {
530
- ResultSet result = cursor.fetch();
531
- if (result == null || !result.next()) {
532
- return 0;
533
- }
534
-
535
- List<Column> columns = pageBuilder.getSchema().getColumns();
536
- long rows = 0;
537
- long reportRows = 500;
538
- do {
539
- for (int i=0; i < getters.size(); i++) {
540
- int index = i + 1; // JDBC column index begins from 1
541
- getters.get(i).getAndSet(result, index, columns.get(i));
542
- }
543
- pageBuilder.addRecord();
544
- rows++;
545
- if (rows % reportRows == 0) {
546
- logger.info(String.format("Fetched %,d rows.", rows));
547
- reportRows *= 2;
548
- }
549
- } while (result.next());
550
-
551
- return rows;
552
- }
553
-
554
- //// TODO move to embulk.spi.util?
555
- //private static class ListPageOutput
556
- //{
557
- // public ImmutableList.Builder<Page> pages;
558
- //
559
- // public ListPageOutput()
560
- // {
561
- // reset();
562
- // }
563
- //
564
- // @Override
565
- // public void add(Page page)
566
- // {
567
- // pages.add(page);
568
- // }
569
- //
570
- // @Override
571
- // public void finish()
572
- // {
573
- // }
574
- //
575
- // @Override
576
- // public void close()
577
- // {
578
- // }
579
- //
580
- // public List<Page> getPages()
581
- // {
582
- // return pages.build();
583
- // }
584
- //
585
- // public void reset()
586
- // {
587
- // pages = ImmutableList.builder();
588
- // }
589
- //}
590
-
591
- protected void loadDriver(String className, Optional<String> driverPath)
592
- {
593
- if (driverPath.isPresent()) {
594
- addDriverJarToClasspath(driverPath.get());
595
- } else {
596
- try {
597
- // Gradle test task will add JDBC driver to classpath
598
- Class.forName(className);
599
-
600
- } catch (ClassNotFoundException ex) {
601
- File root = findPluginRoot();
602
- File driverLib = new File(root, "default_jdbc_driver");
603
- File[] files = driverLib.listFiles(new FileFilter() {
604
- @Override
605
- public boolean accept(File file) {
606
- return file.isFile() && file.getName().endsWith(".jar");
607
- }
608
- });
609
- if (files == null || files.length == 0) {
610
- throw new RuntimeException("Cannot find JDBC driver in '" + root.getAbsolutePath() + "'.");
611
- } else {
612
- for (File file : files) {
613
- logger.info("JDBC Driver = " + file.getAbsolutePath());
614
- addDriverJarToClasspath(file.getAbsolutePath());
615
- }
616
- }
617
- }
618
- }
619
-
620
- // Load JDBC Driver
621
- try {
622
- Class.forName(className);
623
- } catch (ClassNotFoundException ex) {
624
- throw new RuntimeException(ex);
625
- }
626
- }
627
-
628
- protected void addDriverJarToClasspath(String glob)
629
- {
630
- // TODO match glob
631
- PluginClassLoader loader = (PluginClassLoader) getClass().getClassLoader();
632
- Path path = Paths.get(glob);
633
- if (!path.toFile().exists()) {
634
- throw new ConfigException("The specified driver jar doesn't exist: " + glob);
635
- }
636
- loader.addPath(Paths.get(glob));
637
- }
638
-
639
- protected File findPluginRoot()
640
- {
641
- try {
642
- URL url = getClass().getResource("/" + getClass().getName().replace('.', '/') + ".class");
643
- if (url.toString().startsWith("jar:")) {
644
- url = new URL(url.toString().replaceAll("^jar:", "").replaceAll("![^!]*$", ""));
645
- }
646
-
647
- File folder = new File(url.toURI()).getParentFile();
648
- for (;; folder = folder.getParentFile()) {
649
- if (folder == null) {
650
- throw new RuntimeException("Cannot find 'embulk-input-xxx' folder.");
651
- }
652
-
653
- if (folder.getName().startsWith("embulk-input-")) {
654
- return folder;
655
- }
656
- }
657
- } catch (MalformedURLException | URISyntaxException e) {
658
- throw new RuntimeException(e);
659
- }
660
- }
661
-
662
- protected void logConnectionProperties(String url, Properties props)
663
- {
664
- Properties maskedProps = new Properties();
665
- for(String key : props.stringPropertyNames()) {
666
- if (key.equals("password")) {
667
- maskedProps.setProperty(key, "***");
668
- } else {
669
- maskedProps.setProperty(key, props.getProperty(key));
670
- }
671
- }
672
- logger.info("Connecting to {} options {}", url, maskedProps);
673
- }
674
- }