embulk-input-athena 0.1.0 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -1
  3. data/README.md +21 -9
  4. data/build.gradle +22 -11
  5. data/docker-compose.yml +7 -1
  6. data/src/main/java/org/embulk/input/athena/AthenaInputConnection.java +3 -0
  7. data/src/main/java/org/embulk/input/athena/AthenaInputPlugin.java +182 -33
  8. data/src/main/resources/log4j.properties +4 -0
  9. metadata +25 -49
  10. data/src/main/java/org/embulk/input/athena/AthenaInputPlugin.java.tmp1 +0 -192
  11. data/src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java +0 -674
  12. data/src/main/java/org/embulk/input/jdbc/JdbcColumn.java +0 -58
  13. data/src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java +0 -31
  14. data/src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java +0 -397
  15. data/src/main/java/org/embulk/input/jdbc/JdbcLiteral.java +0 -38
  16. data/src/main/java/org/embulk/input/jdbc/JdbcSchema.java +0 -55
  17. data/src/main/java/org/embulk/input/jdbc/Ssl.java +0 -37
  18. data/src/main/java/org/embulk/input/jdbc/ToString.java +0 -54
  19. data/src/main/java/org/embulk/input/jdbc/ToStringMap.java +0 -35
  20. data/src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java +0 -105
  21. data/src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java +0 -45
  22. data/src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java +0 -38
  23. data/src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java +0 -59
  24. data/src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java +0 -56
  25. data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java +0 -21
  26. data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java +0 -207
  27. data/src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java +0 -37
  28. data/src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java +0 -66
  29. data/src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java +0 -66
  30. data/src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java +0 -57
  31. data/src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java +0 -70
  32. data/src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java +0 -96
  33. data/src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java +0 -37
  34. data/src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java +0 -36
  35. data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java +0 -83
  36. data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java +0 -75
@@ -0,0 +1,4 @@
1
+ log4j.rootLogger=INFO, ROOT
2
+
3
+ log4j.appender.ROOT=org.apache.log4j.varia.NullAppender
4
+ log4j.appender.ROOT.layout=org.apache.log4j.PatternLayout
metadata CHANGED
@@ -1,19 +1,19 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-athena
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - shinji19
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-16 00:00:00.000000000 Z
11
+ date: 2021-02-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
- - - ~>
16
+ - - "~>"
17
17
  - !ruby/object:Gem::Version
18
18
  version: '1.0'
19
19
  name: bundler
@@ -21,13 +21,13 @@ dependencies:
21
21
  type: :development
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  requirement: !ruby/object:Gem::Requirement
29
29
  requirements:
30
- - - '>='
30
+ - - ">="
31
31
  - !ruby/object:Gem::Version
32
32
  version: '10.0'
33
33
  name: rake
@@ -35,7 +35,7 @@ dependencies:
35
35
  type: :development
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
41
  description: Loads records from Athena.
@@ -45,49 +45,12 @@ executables: []
45
45
  extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
- - .gitignore
48
+ - ".gitignore"
49
49
  - Dockerfile
50
50
  - LICENSE
51
51
  - README.md
52
52
  - build.gradle
53
- - config/checkstyle/checkstyle.xml
54
- - config/checkstyle/default.xml
55
- - docker-compose.yml
56
- - gradle/wrapper/gradle-wrapper.jar
57
- - gradle/wrapper/gradle-wrapper.properties
58
- - gradlew
59
- - gradlew.bat
60
- - lib/embulk/input/athena.rb
61
- - src/main/java/org/embulk/input/athena/AthenaInputConnection.java
62
- - src/main/java/org/embulk/input/athena/AthenaInputPlugin.java
63
- - src/main/java/org/embulk/input/athena/AthenaInputPlugin.java.tmp1
64
- - src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java
65
- - src/main/java/org/embulk/input/jdbc/JdbcColumn.java
66
- - src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java
67
- - src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java
68
- - src/main/java/org/embulk/input/jdbc/JdbcLiteral.java
69
- - src/main/java/org/embulk/input/jdbc/JdbcSchema.java
70
- - src/main/java/org/embulk/input/jdbc/Ssl.java
71
- - src/main/java/org/embulk/input/jdbc/ToString.java
72
- - src/main/java/org/embulk/input/jdbc/ToStringMap.java
73
- - src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java
74
- - src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java
75
- - src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java
76
- - src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java
77
- - src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java
78
- - src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java
79
- - src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java
80
- - src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java
81
- - src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java
82
- - src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java
83
- - src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java
84
- - src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java
85
- - src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java
86
- - src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java
87
- - src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java
88
- - src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java
89
- - src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java
90
- - src/test/java/org/embulk/input/athena/TestAthenaInputPlugin.java
53
+ - classpath/AthenaJDBC41-1.1.0.jar
91
54
  - classpath/aws-java-sdk-1.11.301.jar
92
55
  - classpath/aws-java-sdk-acm-1.11.301.jar
93
56
  - classpath/aws-java-sdk-alexaforbusiness-1.11.301.jar
@@ -217,7 +180,8 @@ files:
217
180
  - classpath/aws-java-sdk-xray-1.11.301.jar
218
181
  - classpath/commons-codec-1.10.jar
219
182
  - classpath/commons-logging-1.2.jar
220
- - classpath/embulk-input-athena-0.1.0.jar
183
+ - classpath/embulk-input-athena-0.1.6.jar
184
+ - classpath/embulk-input-jdbc-0.9.1.jar
221
185
  - classpath/httpclient-4.5.5.jar
222
186
  - classpath/httpcore-4.4.9.jar
223
187
  - classpath/ion-java-1.0.2.jar
@@ -231,6 +195,18 @@ files:
231
195
  - classpath/netty-handler-4.1.17.Final.jar
232
196
  - classpath/netty-resolver-4.1.17.Final.jar
233
197
  - classpath/netty-transport-4.1.17.Final.jar
198
+ - config/checkstyle/checkstyle.xml
199
+ - config/checkstyle/default.xml
200
+ - docker-compose.yml
201
+ - gradle/wrapper/gradle-wrapper.jar
202
+ - gradle/wrapper/gradle-wrapper.properties
203
+ - gradlew
204
+ - gradlew.bat
205
+ - lib/embulk/input/athena.rb
206
+ - src/main/java/org/embulk/input/athena/AthenaInputConnection.java
207
+ - src/main/java/org/embulk/input/athena/AthenaInputPlugin.java
208
+ - src/main/resources/log4j.properties
209
+ - src/test/java/org/embulk/input/athena/TestAthenaInputPlugin.java
234
210
  homepage: https://github.com/shinji19/embulk-input-athena
235
211
  licenses:
236
212
  - MIT
@@ -241,17 +217,17 @@ require_paths:
241
217
  - lib
242
218
  required_ruby_version: !ruby/object:Gem::Requirement
243
219
  requirements:
244
- - - '>='
220
+ - - ">="
245
221
  - !ruby/object:Gem::Version
246
222
  version: '0'
247
223
  required_rubygems_version: !ruby/object:Gem::Requirement
248
224
  requirements:
249
- - - '>='
225
+ - - ">="
250
226
  - !ruby/object:Gem::Version
251
227
  version: '0'
252
228
  requirements: []
253
229
  rubyforge_project:
254
- rubygems_version: 2.1.9
230
+ rubygems_version: 2.6.8
255
231
  signing_key:
256
232
  specification_version: 4
257
233
  summary: Athena input plugin for Embulk
@@ -1,192 +0,0 @@
1
- package org.embulk.input.athena;
2
-
3
- import java.sql.Connection;
4
- import java.sql.DriverManager;
5
- import java.sql.ResultSet;
6
- import java.sql.SQLException;
7
- import java.sql.Statement;
8
- import java.util.List;
9
- import java.util.Properties;
10
-
11
- import com.google.common.base.Optional;
12
-
13
- import org.embulk.config.Config;
14
- import org.embulk.config.ConfigDefault;
15
- import org.embulk.config.ConfigDiff;
16
- import org.embulk.config.ConfigSource;
17
- import org.embulk.config.Task;
18
- import org.embulk.config.TaskReport;
19
- import org.embulk.config.TaskSource;
20
- import org.embulk.input.jdbc.AbstractJdbcInputPlugin;
21
- import org.embulk.input.jdbc.JdbcInputConnection;
22
- import org.embulk.spi.Exec;
23
- import org.embulk.spi.InputPlugin;
24
- import org.embulk.spi.PageOutput;
25
- import org.embulk.spi.Schema;
26
- import org.embulk.spi.SchemaConfig;
27
-
28
- public class AthenaInputPlugin
29
- extends AbstractJdbcInputPlugin
30
- {
31
- public interface AthenaPluginTask
32
- extends AbstractJdbcInputPlugin.PluginTask
33
- {
34
- @Config("driver_path")
35
- @ConfigDefault("null")
36
- public Optional<String> getDriverPath();
37
-
38
- // athena_url (required string)
39
- @Config("athena_url")
40
- public String getAthenaUrl();
41
-
42
- // s3_staging_dir (required string)
43
- @Config("s3_staging_dir")
44
- public String getS3StagingDir();
45
-
46
- // access_key (required string)
47
- @Config("access_key")
48
- public String getAccessKey();
49
-
50
- // secret_key (required string)
51
- @Config("secret_key")
52
- public String getSecretKey();
53
-
54
- // configuration option 2 (optional string, null is not allowed)
55
- // @Config("option2")
56
- // @ConfigDefault("\"myvalue\"")
57
- // public String getOption2();
58
-
59
- // configuration option 3 (optional string, null is allowed)
60
- // @Config("option3")
61
- // @ConfigDefault("null")
62
- // public Optional<String> getOption3();
63
-
64
- // if you get schema from config
65
- // @Config("columns")
66
- // public SchemaConfig getColumns();
67
- }
68
-
69
- @Override
70
- protected Class<? extends PluginTask> getTaskClass() {
71
- return AthenaPluginTask.class;
72
- }
73
-
74
- @Override
75
- protected AthenaInputConnection newConnection(PluginTask pluginTask) throws SQLException {
76
- AthenaPluginTask task = (AthenaPluginTask) pluginTask;
77
- loadDriver("com.amazonaws.athena.jdbc.AthenaDriver", task.getDriverPath());
78
- //Class.forName("com.amazonaws.athena.jdbc.AthenaDriver");
79
- Properties properties = new Properties();
80
- properties.put("s3_staging_dir", task.getS3StagingDir());
81
- properties.put("user", task.getAccessKey());
82
- properties.put("password", task.getSecretKey());
83
- properties.putAll(task.getOptions());
84
-
85
- Connection connection = DriverManager.getConnection(task.getAthenaUrl(), properties);
86
- try {
87
- AthenaInputConnection c = new AthenaInputConnection(connection);
88
- connection = null;
89
- return c;
90
- } finally {
91
- if (connection != null) {
92
- connection.close();
93
- }
94
- }
95
- }
96
-
97
- /*
98
- @Override
99
- public ConfigDiff transaction(ConfigSource config,
100
- InputPlugin.Control control)
101
- {
102
- PluginTask task = config.loadConfig(PluginTask.class);
103
-
104
- // Schema schema = task.getColumns().toSchema();
105
- Schema schema = Schema.builder().build();
106
- int taskCount = 1; // number of run() method calls
107
-
108
- return resume(task.dump(), schema, taskCount, control);
109
- }
110
-
111
- @Override
112
- public ConfigDiff resume(TaskSource taskSource,
113
- Schema schema, int taskCount,
114
- InputPlugin.Control control)
115
- {
116
- control.run(taskSource, schema, taskCount);
117
- return Exec.newConfigDiff();
118
- }
119
-
120
- @Override
121
- public void cleanup(TaskSource taskSource,
122
- Schema schema, int taskCount,
123
- List<TaskReport> successTaskReports)
124
- {
125
- }
126
-
127
- @Override
128
- public TaskReport run(TaskSource taskSource,
129
- Schema schema, int taskIndex,
130
- PageOutput output)
131
- {
132
- PluginTask task = taskSource.loadTask(PluginTask.class);
133
-
134
- // Write your code here :)
135
-
136
- Connection connection = null;
137
- Statement statement = null;
138
- try {
139
- connection = getAthenaConnection(task);
140
- statement = connection.createStatement();
141
- ResultSet resultSet = statement.executeQuery("select * from default.sample");
142
-
143
- while(resultSet.next()){
144
- String statusCode = resultSet.getString("created_at");
145
- System.out.println("st code" + statusCode);
146
- }
147
- resultSet.close();
148
- connection.close();
149
- } catch (Exception e){
150
- e.printStackTrace();
151
- } finally {
152
- try {
153
- if (statement != null)
154
- statement.close();
155
- } catch (Exception ex) {
156
-
157
- }
158
- try {
159
- if (connection != null)
160
- connection.close();
161
- } catch (Exception ex) {
162
- ex.printStackTrace();
163
- }
164
- }
165
-
166
- return Exec.newTaskReport();
167
- }
168
-
169
- @Override
170
- public ConfigDiff guess(ConfigSource config)
171
- {
172
- return Exec.newConfigDiff();
173
- }
174
- */
175
- /*
176
- protected Connection getAthenaConnection(PluginTask task) throws ClassNotFoundException, SQLException{
177
- Class.forName("com.amazonaws.athena.jdbc.AthenaDriver");
178
- Properties properties = new Properties();
179
- properties.put("s3_staging_dir", task.getS3StagingDir());
180
- properties.put("user", task.getAccessKey());
181
- properties.put("password", task.getSecretKey());
182
- if (task.getLogPath() != null){
183
- properties.put("log_path", task.getLogPath());
184
- }
185
- if (task.getLogLevel() != null){
186
- properties.put("log_level", task.getLogLevel());
187
- }
188
-
189
- return DriverManager.getConnection(task.getAthenaUrl(), properties);
190
- }
191
- */
192
- }
@@ -1,674 +0,0 @@
1
- package org.embulk.input.jdbc;
2
-
3
- import java.io.File;
4
- import java.io.FileFilter;
5
- import java.net.MalformedURLException;
6
- import java.net.URISyntaxException;
7
- import java.net.URL;
8
- import java.nio.file.Path;
9
- import java.util.List;
10
- import java.util.Map;
11
- import java.util.Properties;
12
- import java.nio.file.Paths;
13
- import java.sql.ResultSet;
14
- import java.sql.SQLException;
15
-
16
- import org.slf4j.Logger;
17
-
18
- import com.fasterxml.jackson.databind.JsonNode;
19
- import com.google.common.base.Optional;
20
- import com.google.common.base.Supplier;
21
- import com.google.common.base.Throwables;
22
- import com.google.common.collect.ImmutableList;
23
-
24
- import org.embulk.config.Config;
25
- import org.embulk.config.ConfigException;
26
- import org.embulk.config.ConfigDefault;
27
- import org.embulk.config.ConfigDiff;
28
- import org.embulk.config.ConfigInject;
29
- import org.embulk.config.ConfigSource;
30
- import org.embulk.config.Task;
31
- import org.embulk.config.TaskReport;
32
- import org.embulk.config.TaskSource;
33
- import org.embulk.plugin.PluginClassLoader;
34
- import org.embulk.spi.BufferAllocator;
35
- import org.embulk.spi.Column;
36
- import org.embulk.spi.DataException;
37
- import org.embulk.spi.PageBuilder;
38
- import org.embulk.spi.InputPlugin;
39
- import org.embulk.spi.PageOutput;
40
- import org.embulk.spi.Schema;
41
- import org.embulk.spi.Exec;
42
- import org.embulk.input.jdbc.getter.ColumnGetter;
43
- import org.embulk.input.jdbc.getter.ColumnGetterFactory;
44
- import org.embulk.input.jdbc.JdbcInputConnection.BatchSelect;
45
- import org.embulk.input.jdbc.JdbcInputConnection.PreparedQuery;
46
- import org.joda.time.DateTimeZone;
47
-
48
- import static java.util.Locale.ENGLISH;
49
-
50
- public abstract class AbstractJdbcInputPlugin
51
- implements InputPlugin
52
- {
53
- protected final Logger logger = Exec.getLogger(getClass());
54
-
55
- public interface PluginTask extends Task
56
- {
57
- @Config("options")
58
- @ConfigDefault("{}")
59
- public ToStringMap getOptions();
60
-
61
- @Config("table")
62
- @ConfigDefault("null")
63
- public Optional<String> getTable();
64
- public void setTable(Optional<String> normalizedTableName);
65
-
66
- @Config("query")
67
- @ConfigDefault("null")
68
- public Optional<String> getQuery();
69
-
70
- @Config("select")
71
- @ConfigDefault("null")
72
- public Optional<String> getSelect();
73
-
74
- @Config("where")
75
- @ConfigDefault("null")
76
- public Optional<String> getWhere();
77
-
78
- @Config("order_by")
79
- @ConfigDefault("null")
80
- public Optional<String> getOrderBy();
81
-
82
- @Config("incremental")
83
- @ConfigDefault("false")
84
- public boolean getIncremental();
85
-
86
- @Config("incremental_columns")
87
- @ConfigDefault("[]")
88
- public List<String> getIncrementalColumns();
89
- public void setIncrementalColumns(List<String> indexes);
90
-
91
- @Config("last_record")
92
- @ConfigDefault("null")
93
- public Optional<List<JsonNode>> getLastRecord();
94
-
95
- // TODO limit_value is necessary to make sure repeated bulk load transactions
96
- // don't a same record twice or miss records when the column
97
- // specified at order_by parameter is not unique.
98
- // For example, if the order_by column is "timestamp created_at"
99
- // column whose precision is second, the table can include multiple
100
- // records with the same created_at time. At the first bulk load
101
- // transaction, it loads a record with created_at=2015-01-02 00:00:02.
102
- // Then next transaction will use WHERE created_at > '2015-01-02 00:00:02'.
103
- // However, if another record with created_at=2014-01-01 23:59:59 is
104
- // inserted between the 2 transactions, the new record will be skipped.
105
- // To prevent this scenario, we want to specify
106
- // limit_value=2015-01-02 00:00:00 (exclusive). With this way, as long as
107
- // a transaction runs after 2015-01-02 00:00:00 + some minutes, we don't
108
- // skip records. Ideally, to automate the scheduling, we want to set
109
- // limit_value="today".
110
- //
111
- //@Config("limit_value")
112
- //@ConfigDefault("null")
113
- //public Optional<String> getLimitValue();
114
-
115
- //// TODO probably limit_rows is unnecessary as long as this has
116
- // supports parallel execution (partition_by option) and resuming.
117
- //@Config("limit_rows")
118
- //@ConfigDefault("null")
119
- //public Optional<Integer> getLimitRows();
120
-
121
- @Config("connect_timeout")
122
- @ConfigDefault("300")
123
- public int getConnectTimeout();
124
-
125
- @Config("socket_timeout")
126
- @ConfigDefault("1800")
127
- public int getSocketTimeout();
128
-
129
- @Config("fetch_rows")
130
- @ConfigDefault("10000")
131
- // TODO set minimum number
132
- public int getFetchRows();
133
-
134
- // TODO parallel execution using "partition_by" config
135
-
136
- @Config("column_options")
137
- @ConfigDefault("{}")
138
- public Map<String, JdbcColumnOption> getColumnOptions();
139
-
140
- @Config("default_timezone")
141
- @ConfigDefault("\"UTC\"")
142
- public DateTimeZone getDefaultTimeZone();
143
-
144
- @Config("default_column_options")
145
- @ConfigDefault("{}")
146
- public Map<String, JdbcColumnOption> getDefaultColumnOptions();
147
-
148
- @Config("after_select")
149
- @ConfigDefault("null")
150
- public Optional<String> getAfterSelect();
151
-
152
- public PreparedQuery getBuiltQuery();
153
- public void setBuiltQuery(PreparedQuery query);
154
-
155
- public JdbcSchema getQuerySchema();
156
- public void setQuerySchema(JdbcSchema schema);
157
-
158
- public List<Integer> getIncrementalColumnIndexes();
159
- public void setIncrementalColumnIndexes(List<Integer> indexes);
160
-
161
- @ConfigInject
162
- public BufferAllocator getBufferAllocator();
163
- }
164
-
165
- // for subclasses to add @Config
166
- protected Class<? extends PluginTask> getTaskClass()
167
- {
168
- return PluginTask.class;
169
- }
170
-
171
- protected abstract JdbcInputConnection newConnection(PluginTask task) throws SQLException;
172
-
173
- @Override
174
- public ConfigDiff transaction(ConfigSource config,
175
- InputPlugin.Control control)
176
- {
177
- PluginTask task = config.loadConfig(getTaskClass());
178
-
179
- if (task.getIncremental()) {
180
- if (task.getOrderBy().isPresent()) {
181
- throw new ConfigException("order_by option must not be set if incremental is true");
182
- }
183
- }
184
- else {
185
- if (!task.getIncrementalColumns().isEmpty()) {
186
- throw new ConfigException("'incremental: true' must be set if incremental_columns is set");
187
- }
188
- }
189
-
190
- Schema schema;
191
- try (JdbcInputConnection con = newConnection(task)) {
192
- con.showDriverVersion();
193
-
194
- // TODO incremental_columns is not set => get primary key
195
- schema = setupTask(con, task);
196
- } catch (SQLException ex) {
197
- throw Throwables.propagate(ex);
198
- }
199
-
200
- return buildNextConfigDiff(task, control.run(task.dump(), schema, 1));
201
- }
202
-
203
- protected Schema setupTask(JdbcInputConnection con, PluginTask task) throws SQLException
204
- {
205
- if (task.getTable().isPresent()) {
206
- String actualTableName = normalizeTableNameCase(con, task.getTable().get());
207
- task.setTable(Optional.of(actualTableName));
208
- }
209
-
210
- // build SELECT query and gets schema of its result
211
- String rawQuery = getRawQuery(task, con);
212
-
213
- JdbcSchema querySchema = con.getSchemaOfQuery(rawQuery);
214
- task.setQuerySchema(querySchema);
215
- // query schema should not change after incremental query
216
-
217
- PreparedQuery preparedQuery;
218
- if (task.getIncremental()) {
219
- // build incremental query
220
-
221
- List<String> incrementalColumns = task.getIncrementalColumns();
222
- if (incrementalColumns.isEmpty()) {
223
- // incremental_columns is not set
224
- if (!task.getTable().isPresent()) {
225
- throw new ConfigException("incremental_columns option must be set if incremental is true and custom query option is set");
226
- }
227
- // get primary keys from the target table to use them as incremental_columns
228
- List<String> primaryKeys = con.getPrimaryKeys(task.getTable().get());
229
- if (primaryKeys.isEmpty()) {
230
- throw new ConfigException(String.format(ENGLISH,
231
- "Primary key is not available at the table '%s'. incremental_columns option must be set",
232
- task.getTable().get()));
233
- }
234
- logger.info("Using primary keys as incremental_columns: {}", primaryKeys);
235
- task.setIncrementalColumns(primaryKeys);
236
- incrementalColumns = primaryKeys;
237
- }
238
-
239
- List<Integer> incrementalColumnIndexes = findIncrementalColumnIndexes(querySchema, incrementalColumns);
240
- task.setIncrementalColumnIndexes(incrementalColumnIndexes);
241
-
242
- List<JsonNode> lastRecord;
243
- if (task.getLastRecord().isPresent()) {
244
- lastRecord = task.getLastRecord().get();
245
- if (lastRecord.size() != incrementalColumnIndexes.size()) {
246
- throw new ConfigException("Number of values set at last_record must be same with number of columns set at incremental_columns");
247
- }
248
- }
249
- else {
250
- lastRecord = null;
251
- }
252
-
253
- if (task.getQuery().isPresent()) {
254
- preparedQuery = con.wrapIncrementalQuery(rawQuery, querySchema, incrementalColumnIndexes, lastRecord);
255
- }
256
- else {
257
- preparedQuery = con.rebuildIncrementalQuery(
258
- task.getTable().get(), task.getSelect(),
259
- task.getWhere(),
260
- querySchema, incrementalColumnIndexes, lastRecord);
261
- }
262
- }
263
- else {
264
- task.setIncrementalColumnIndexes(ImmutableList.<Integer>of());
265
- preparedQuery = new PreparedQuery(rawQuery, ImmutableList.<JdbcLiteral>of());
266
- }
267
-
268
- task.setBuiltQuery(preparedQuery);
269
-
270
- // validate column_options
271
- newColumnGetters(con, task, querySchema, null);
272
-
273
- ColumnGetterFactory factory = newColumnGetterFactory(null, task.getDefaultTimeZone());
274
- ImmutableList.Builder<Column> columns = ImmutableList.builder();
275
- for (int i = 0; i < querySchema.getCount(); i++) {
276
- JdbcColumn column = querySchema.getColumn(i);
277
- JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), column, factory.getJdbcType(column.getSqlType()));
278
- columns.add(new Column(i,
279
- column.getName(),
280
- factory.newColumnGetter(con, task, column, columnOption).getToType()));
281
- }
282
- return new Schema(columns.build());
283
- }
284
-
285
- private String normalizeTableNameCase(JdbcInputConnection con, String tableName)
286
- throws SQLException
287
- {
288
- if (con.tableExists(tableName)) {
289
- return tableName;
290
- } else {
291
- String upperTableName = tableName.toUpperCase();
292
- String lowerTableName = tableName.toLowerCase();
293
- boolean upperExists = con.tableExists(upperTableName);
294
- boolean lowerExists = con.tableExists(lowerTableName);
295
- if (upperExists && lowerExists) {
296
- throw new ConfigException(String.format("Cannot specify table '%s' because both '%s' and '%s' exist.",
297
- tableName, upperTableName, lowerTableName));
298
- } else if (upperExists) {
299
- return upperTableName;
300
- } else if (lowerExists) {
301
- return lowerTableName;
302
- } else {
303
- // fallback to the given table name. this may throw error later at getSchemaOfQuery
304
- return tableName;
305
- }
306
- }
307
- }
308
-
309
- private List<Integer> findIncrementalColumnIndexes(JdbcSchema schema, List<String> incrementalColumns)
310
- throws SQLException
311
- {
312
- ImmutableList.Builder<Integer> builder = ImmutableList.builder();
313
- for (String name : incrementalColumns) {
314
- Optional<Integer> index = schema.findColumn(name);
315
- if (index.isPresent()) {
316
- builder.add(index.get());
317
- }
318
- else {
319
- throw new ConfigException(String.format(ENGLISH,
320
- "Column name '%s' is in incremental_columns option does not exist",
321
- name));
322
- }
323
- }
324
- return builder.build();
325
- }
326
-
327
- private String getRawQuery(PluginTask task, JdbcInputConnection con) throws SQLException
328
- {
329
- if (task.getQuery().isPresent()) {
330
- if (task.getTable().isPresent() || task.getSelect().isPresent() ||
331
- task.getWhere().isPresent() || task.getOrderBy().isPresent()) {
332
- throw new ConfigException("'table', 'select', 'where' and 'order_by' parameters are unnecessary if 'query' parameter is set.");
333
- } else if (!task.getIncrementalColumns().isEmpty() || task.getLastRecord().isPresent()) {
334
- throw new ConfigException("'incremental_columns' and 'last_record' parameters are not supported if 'query' parameter is set.");
335
- }
336
- return task.getQuery().get();
337
- } else if (task.getTable().isPresent()) {
338
- return con.buildSelectQuery(task.getTable().get(), task.getSelect(),
339
- task.getWhere(), task.getOrderBy());
340
- } else {
341
- throw new ConfigException("'table' or 'query' parameter is required");
342
- }
343
- }
344
-
345
- @Override
346
- public ConfigDiff resume(TaskSource taskSource,
347
- Schema schema, int taskCount,
348
- InputPlugin.Control control)
349
- {
350
- PluginTask task = taskSource.loadTask(getTaskClass());
351
-
352
- // TODO when parallel execution is implemented and enabled, (maybe) order_by
353
- // is necessary to resume. transaction() gets the range of order_by
354
- // colum and set it to WHERE condition to make the operation deterministic
355
-
356
- return buildNextConfigDiff(task, control.run(taskSource, schema, taskCount));
357
- }
358
-
359
- public ConfigDiff guess(ConfigSource config)
360
- {
361
- return Exec.newConfigDiff();
362
- }
363
-
364
- protected ConfigDiff buildNextConfigDiff(PluginTask task, List<TaskReport> reports)
365
- {
366
- ConfigDiff next = Exec.newConfigDiff();
367
- if (reports.size() > 0 && reports.get(0).has("last_record")) {
368
- next.set("last_record", reports.get(0).get(JsonNode.class, "last_record"));
369
- } else if (task.getLastRecord().isPresent()) {
370
- next.set("last_record", task.getLastRecord().get());
371
- }
372
- return next;
373
- }
374
-
375
- @Override
376
- public void cleanup(TaskSource taskSource,
377
- Schema schema, int taskCount,
378
- List<TaskReport> successTaskReports)
379
- {
380
- // do nothing
381
- }
382
-
383
- private static class LastRecordStore
384
- {
385
- private final List<Integer> columnIndexes;
386
- private final JsonNode[] lastValues;
387
- private final List<String> columnNames;
388
-
389
- public LastRecordStore(List<Integer> columnIndexes, List<String> columnNames)
390
- {
391
- this.columnIndexes = columnIndexes;
392
- this.lastValues = new JsonNode[columnIndexes.size()];
393
- this.columnNames = columnNames;
394
- }
395
-
396
- public void accept(List<ColumnGetter> getters)
397
- throws SQLException
398
- {
399
- for (int i = 0; i < columnIndexes.size(); i++) {
400
- lastValues[i] = getters.get(columnIndexes.get(i)).encodeToJson();
401
- }
402
- }
403
-
404
- public List<JsonNode> getList()
405
- {
406
- ImmutableList.Builder<JsonNode> builder = ImmutableList.builder();
407
- for (int i = 0; i < lastValues.length; i++) {
408
- if (lastValues[i] == null || lastValues[i].isNull()) {
409
- throw new DataException(String.format(ENGLISH,
410
- "incremental_columns can't include null values but the last row is null at column '%s'",
411
- columnNames.get(i)));
412
- }
413
- builder.add(lastValues[i]);
414
- }
415
- return builder.build();
416
- }
417
- }
418
-
419
- @Override
420
- public TaskReport run(TaskSource taskSource,
421
- Schema schema, int taskIndex,
422
- PageOutput output)
423
- {
424
- PluginTask task = taskSource.loadTask(getTaskClass());
425
-
426
- PreparedQuery builtQuery = task.getBuiltQuery();
427
- JdbcSchema querySchema = task.getQuerySchema();
428
- BufferAllocator allocator = task.getBufferAllocator();
429
- PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
430
-
431
- long totalRows = 0;
432
-
433
- LastRecordStore lastRecordStore = null;
434
-
435
- try (JdbcInputConnection con = newConnection(task)) {
436
- List<ColumnGetter> getters = newColumnGetters(con, task, querySchema, pageBuilder);
437
- try (BatchSelect cursor = con.newSelectCursor(builtQuery, getters, task.getFetchRows(), task.getSocketTimeout())) {
438
- while (true) {
439
- long rows = fetch(cursor, getters, pageBuilder);
440
- if (rows <= 0L) {
441
- break;
442
- }
443
- totalRows += rows;
444
- }
445
- }
446
-
447
- if (task.getIncremental() && totalRows > 0) {
448
- lastRecordStore = new LastRecordStore(task.getIncrementalColumnIndexes(), task.getIncrementalColumns());
449
- lastRecordStore.accept(getters);
450
- }
451
-
452
- pageBuilder.finish();
453
-
454
- // after_select runs after pageBuilder.finish because pageBuilder.finish may fail.
455
- // TODO Output plugin's transaction might still fail. In that case, after_select is
456
- // already done but output plugin didn't commit the data to the target storage.
457
- // This means inconsistency between data source and destination. To avoid this
458
- // issue, we need another option like `after_commit` that runs after output plugin's
459
- // commit. after_commit can't run in the same transaction with SELECT. So,
460
- // after_select gets values and store them in TaskReport, and after_commit take
461
- // them as placeholder. Or, after_select puts values to an intermediate table, and
462
- // after_commit moves those values to the actual table.
463
- if (task.getAfterSelect().isPresent()) {
464
- con.executeUpdate(task.getAfterSelect().get());
465
- con.connection.commit();
466
- }
467
- } catch (SQLException ex) {
468
- throw Throwables.propagate(ex);
469
- }
470
-
471
- TaskReport report = Exec.newTaskReport();
472
- if (lastRecordStore != null) {
473
- report.set("last_record", lastRecordStore.getList());
474
- }
475
-
476
- return report;
477
- }
478
-
479
- protected ColumnGetterFactory newColumnGetterFactory(PageBuilder pageBuilder, DateTimeZone dateTimeZone)
480
- {
481
- return new ColumnGetterFactory(pageBuilder, dateTimeZone);
482
- }
483
-
484
- private List<ColumnGetter> newColumnGetters(JdbcInputConnection con, PluginTask task, JdbcSchema querySchema, PageBuilder pageBuilder)
485
- throws SQLException
486
- {
487
- ColumnGetterFactory factory = newColumnGetterFactory(pageBuilder, task.getDefaultTimeZone());
488
- ImmutableList.Builder<ColumnGetter> getters = ImmutableList.builder();
489
- for (JdbcColumn c : querySchema.getColumns()) {
490
- JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), c, factory.getJdbcType(c.getSqlType()));
491
- getters.add(factory.newColumnGetter(con, task, c, columnOption));
492
- }
493
- return getters.build();
494
- }
495
-
496
- private static JdbcColumnOption columnOptionOf(Map<String, JdbcColumnOption> columnOptions, Map<String, JdbcColumnOption> defaultColumnOptions, JdbcColumn targetColumn, String targetColumnSQLType)
497
- {
498
- JdbcColumnOption columnOption = columnOptions.get(targetColumn.getName());
499
- if (columnOption == null) {
500
- String foundName = null;
501
- for (Map.Entry<String, JdbcColumnOption> entry : columnOptions.entrySet()) {
502
- if (entry.getKey().equalsIgnoreCase(targetColumn.getName())) {
503
- if (columnOption != null) {
504
- throw new ConfigException(String.format("Cannot specify column '%s' because both '%s' and '%s' exist in column_options.",
505
- targetColumn.getName(), foundName, entry.getKey()));
506
- }
507
- foundName = entry.getKey();
508
- columnOption = entry.getValue();
509
- }
510
- }
511
- }
512
-
513
- return Optional
514
- .fromNullable(columnOption)
515
- .or(Optional.fromNullable(defaultColumnOptions.get(targetColumnSQLType)))
516
- .or(
517
- // default column option
518
- new Supplier<JdbcColumnOption>()
519
- {
520
- public JdbcColumnOption get()
521
- {
522
- return Exec.newConfigSource().loadConfig(JdbcColumnOption.class);
523
- }
524
- });
525
- }
526
-
527
- private long fetch(BatchSelect cursor,
528
- List<ColumnGetter> getters, PageBuilder pageBuilder) throws SQLException
529
- {
530
- ResultSet result = cursor.fetch();
531
- if (result == null || !result.next()) {
532
- return 0;
533
- }
534
-
535
- List<Column> columns = pageBuilder.getSchema().getColumns();
536
- long rows = 0;
537
- long reportRows = 500;
538
- do {
539
- for (int i=0; i < getters.size(); i++) {
540
- int index = i + 1; // JDBC column index begins from 1
541
- getters.get(i).getAndSet(result, index, columns.get(i));
542
- }
543
- pageBuilder.addRecord();
544
- rows++;
545
- if (rows % reportRows == 0) {
546
- logger.info(String.format("Fetched %,d rows.", rows));
547
- reportRows *= 2;
548
- }
549
- } while (result.next());
550
-
551
- return rows;
552
- }
553
-
554
- //// TODO move to embulk.spi.util?
555
- //private static class ListPageOutput
556
- //{
557
- // public ImmutableList.Builder<Page> pages;
558
- //
559
- // public ListPageOutput()
560
- // {
561
- // reset();
562
- // }
563
- //
564
- // @Override
565
- // public void add(Page page)
566
- // {
567
- // pages.add(page);
568
- // }
569
- //
570
- // @Override
571
- // public void finish()
572
- // {
573
- // }
574
- //
575
- // @Override
576
- // public void close()
577
- // {
578
- // }
579
- //
580
- // public List<Page> getPages()
581
- // {
582
- // return pages.build();
583
- // }
584
- //
585
- // public void reset()
586
- // {
587
- // pages = ImmutableList.builder();
588
- // }
589
- //}
590
-
591
- protected void loadDriver(String className, Optional<String> driverPath)
592
- {
593
- if (driverPath.isPresent()) {
594
- addDriverJarToClasspath(driverPath.get());
595
- } else {
596
- try {
597
- // Gradle test task will add JDBC driver to classpath
598
- Class.forName(className);
599
-
600
- } catch (ClassNotFoundException ex) {
601
- File root = findPluginRoot();
602
- File driverLib = new File(root, "default_jdbc_driver");
603
- File[] files = driverLib.listFiles(new FileFilter() {
604
- @Override
605
- public boolean accept(File file) {
606
- return file.isFile() && file.getName().endsWith(".jar");
607
- }
608
- });
609
- if (files == null || files.length == 0) {
610
- throw new RuntimeException("Cannot find JDBC driver in '" + root.getAbsolutePath() + "'.");
611
- } else {
612
- for (File file : files) {
613
- logger.info("JDBC Driver = " + file.getAbsolutePath());
614
- addDriverJarToClasspath(file.getAbsolutePath());
615
- }
616
- }
617
- }
618
- }
619
-
620
- // Load JDBC Driver
621
- try {
622
- Class.forName(className);
623
- } catch (ClassNotFoundException ex) {
624
- throw new RuntimeException(ex);
625
- }
626
- }
627
-
628
- protected void addDriverJarToClasspath(String glob)
629
- {
630
- // TODO match glob
631
- PluginClassLoader loader = (PluginClassLoader) getClass().getClassLoader();
632
- Path path = Paths.get(glob);
633
- if (!path.toFile().exists()) {
634
- throw new ConfigException("The specified driver jar doesn't exist: " + glob);
635
- }
636
- loader.addPath(Paths.get(glob));
637
- }
638
-
639
- protected File findPluginRoot()
640
- {
641
- try {
642
- URL url = getClass().getResource("/" + getClass().getName().replace('.', '/') + ".class");
643
- if (url.toString().startsWith("jar:")) {
644
- url = new URL(url.toString().replaceAll("^jar:", "").replaceAll("![^!]*$", ""));
645
- }
646
-
647
- File folder = new File(url.toURI()).getParentFile();
648
- for (;; folder = folder.getParentFile()) {
649
- if (folder == null) {
650
- throw new RuntimeException("Cannot find 'embulk-input-xxx' folder.");
651
- }
652
-
653
- if (folder.getName().startsWith("embulk-input-")) {
654
- return folder;
655
- }
656
- }
657
- } catch (MalformedURLException | URISyntaxException e) {
658
- throw new RuntimeException(e);
659
- }
660
- }
661
-
662
- protected void logConnectionProperties(String url, Properties props)
663
- {
664
- Properties maskedProps = new Properties();
665
- for(String key : props.stringPropertyNames()) {
666
- if (key.equals("password")) {
667
- maskedProps.setProperty(key, "***");
668
- } else {
669
- maskedProps.setProperty(key, props.getProperty(key));
670
- }
671
- }
672
- logger.info("Connecting to {} options {}", url, maskedProps);
673
- }
674
- }