embulk-input-athena 0.1.0 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -1
  3. data/README.md +21 -9
  4. data/build.gradle +22 -11
  5. data/docker-compose.yml +7 -1
  6. data/src/main/java/org/embulk/input/athena/AthenaInputConnection.java +3 -0
  7. data/src/main/java/org/embulk/input/athena/AthenaInputPlugin.java +182 -33
  8. data/src/main/resources/log4j.properties +4 -0
  9. metadata +25 -49
  10. data/src/main/java/org/embulk/input/athena/AthenaInputPlugin.java.tmp1 +0 -192
  11. data/src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java +0 -674
  12. data/src/main/java/org/embulk/input/jdbc/JdbcColumn.java +0 -58
  13. data/src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java +0 -31
  14. data/src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java +0 -397
  15. data/src/main/java/org/embulk/input/jdbc/JdbcLiteral.java +0 -38
  16. data/src/main/java/org/embulk/input/jdbc/JdbcSchema.java +0 -55
  17. data/src/main/java/org/embulk/input/jdbc/Ssl.java +0 -37
  18. data/src/main/java/org/embulk/input/jdbc/ToString.java +0 -54
  19. data/src/main/java/org/embulk/input/jdbc/ToStringMap.java +0 -35
  20. data/src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java +0 -105
  21. data/src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java +0 -45
  22. data/src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java +0 -38
  23. data/src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java +0 -59
  24. data/src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java +0 -56
  25. data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java +0 -21
  26. data/src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java +0 -207
  27. data/src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java +0 -37
  28. data/src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java +0 -66
  29. data/src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java +0 -66
  30. data/src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java +0 -57
  31. data/src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java +0 -70
  32. data/src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java +0 -96
  33. data/src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java +0 -37
  34. data/src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java +0 -36
  35. data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java +0 -83
  36. data/src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java +0 -75
@@ -0,0 +1,4 @@
1
+ log4j.rootLogger=INFO, ROOT
2
+
3
+ log4j.appender.ROOT=org.apache.log4j.varia.NullAppender
4
+ log4j.appender.ROOT.layout=org.apache.log4j.PatternLayout
metadata CHANGED
@@ -1,19 +1,19 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-athena
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - shinji19
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-16 00:00:00.000000000 Z
11
+ date: 2021-02-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
- - - ~>
16
+ - - "~>"
17
17
  - !ruby/object:Gem::Version
18
18
  version: '1.0'
19
19
  name: bundler
@@ -21,13 +21,13 @@ dependencies:
21
21
  type: :development
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  requirement: !ruby/object:Gem::Requirement
29
29
  requirements:
30
- - - '>='
30
+ - - ">="
31
31
  - !ruby/object:Gem::Version
32
32
  version: '10.0'
33
33
  name: rake
@@ -35,7 +35,7 @@ dependencies:
35
35
  type: :development
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
41
  description: Loads records from Athena.
@@ -45,49 +45,12 @@ executables: []
45
45
  extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
- - .gitignore
48
+ - ".gitignore"
49
49
  - Dockerfile
50
50
  - LICENSE
51
51
  - README.md
52
52
  - build.gradle
53
- - config/checkstyle/checkstyle.xml
54
- - config/checkstyle/default.xml
55
- - docker-compose.yml
56
- - gradle/wrapper/gradle-wrapper.jar
57
- - gradle/wrapper/gradle-wrapper.properties
58
- - gradlew
59
- - gradlew.bat
60
- - lib/embulk/input/athena.rb
61
- - src/main/java/org/embulk/input/athena/AthenaInputConnection.java
62
- - src/main/java/org/embulk/input/athena/AthenaInputPlugin.java
63
- - src/main/java/org/embulk/input/athena/AthenaInputPlugin.java.tmp1
64
- - src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java
65
- - src/main/java/org/embulk/input/jdbc/JdbcColumn.java
66
- - src/main/java/org/embulk/input/jdbc/JdbcColumnOption.java
67
- - src/main/java/org/embulk/input/jdbc/JdbcInputConnection.java
68
- - src/main/java/org/embulk/input/jdbc/JdbcLiteral.java
69
- - src/main/java/org/embulk/input/jdbc/JdbcSchema.java
70
- - src/main/java/org/embulk/input/jdbc/Ssl.java
71
- - src/main/java/org/embulk/input/jdbc/ToString.java
72
- - src/main/java/org/embulk/input/jdbc/ToStringMap.java
73
- - src/main/java/org/embulk/input/jdbc/getter/AbstractColumnGetter.java
74
- - src/main/java/org/embulk/input/jdbc/getter/AbstractIncrementalHandler.java
75
- - src/main/java/org/embulk/input/jdbc/getter/AbstractTimestampColumnGetter.java
76
- - src/main/java/org/embulk/input/jdbc/getter/BigDecimalColumnGetter.java
77
- - src/main/java/org/embulk/input/jdbc/getter/BooleanColumnGetter.java
78
- - src/main/java/org/embulk/input/jdbc/getter/ColumnGetter.java
79
- - src/main/java/org/embulk/input/jdbc/getter/ColumnGetterFactory.java
80
- - src/main/java/org/embulk/input/jdbc/getter/DateColumnGetter.java
81
- - src/main/java/org/embulk/input/jdbc/getter/DoubleColumnGetter.java
82
- - src/main/java/org/embulk/input/jdbc/getter/FloatColumnGetter.java
83
- - src/main/java/org/embulk/input/jdbc/getter/JsonColumnGetter.java
84
- - src/main/java/org/embulk/input/jdbc/getter/LongColumnGetter.java
85
- - src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java
86
- - src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java
87
- - src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java
88
- - src/main/java/org/embulk/input/jdbc/getter/TimestampWithTimeZoneIncrementalHandler.java
89
- - src/main/java/org/embulk/input/jdbc/getter/TimestampWithoutTimeZoneIncrementalHandler.java
90
- - src/test/java/org/embulk/input/athena/TestAthenaInputPlugin.java
53
+ - classpath/AthenaJDBC41-1.1.0.jar
91
54
  - classpath/aws-java-sdk-1.11.301.jar
92
55
  - classpath/aws-java-sdk-acm-1.11.301.jar
93
56
  - classpath/aws-java-sdk-alexaforbusiness-1.11.301.jar
@@ -217,7 +180,8 @@ files:
217
180
  - classpath/aws-java-sdk-xray-1.11.301.jar
218
181
  - classpath/commons-codec-1.10.jar
219
182
  - classpath/commons-logging-1.2.jar
220
- - classpath/embulk-input-athena-0.1.0.jar
183
+ - classpath/embulk-input-athena-0.1.6.jar
184
+ - classpath/embulk-input-jdbc-0.9.1.jar
221
185
  - classpath/httpclient-4.5.5.jar
222
186
  - classpath/httpcore-4.4.9.jar
223
187
  - classpath/ion-java-1.0.2.jar
@@ -231,6 +195,18 @@ files:
231
195
  - classpath/netty-handler-4.1.17.Final.jar
232
196
  - classpath/netty-resolver-4.1.17.Final.jar
233
197
  - classpath/netty-transport-4.1.17.Final.jar
198
+ - config/checkstyle/checkstyle.xml
199
+ - config/checkstyle/default.xml
200
+ - docker-compose.yml
201
+ - gradle/wrapper/gradle-wrapper.jar
202
+ - gradle/wrapper/gradle-wrapper.properties
203
+ - gradlew
204
+ - gradlew.bat
205
+ - lib/embulk/input/athena.rb
206
+ - src/main/java/org/embulk/input/athena/AthenaInputConnection.java
207
+ - src/main/java/org/embulk/input/athena/AthenaInputPlugin.java
208
+ - src/main/resources/log4j.properties
209
+ - src/test/java/org/embulk/input/athena/TestAthenaInputPlugin.java
234
210
  homepage: https://github.com/shinji19/embulk-input-athena
235
211
  licenses:
236
212
  - MIT
@@ -241,17 +217,17 @@ require_paths:
241
217
  - lib
242
218
  required_ruby_version: !ruby/object:Gem::Requirement
243
219
  requirements:
244
- - - '>='
220
+ - - ">="
245
221
  - !ruby/object:Gem::Version
246
222
  version: '0'
247
223
  required_rubygems_version: !ruby/object:Gem::Requirement
248
224
  requirements:
249
- - - '>='
225
+ - - ">="
250
226
  - !ruby/object:Gem::Version
251
227
  version: '0'
252
228
  requirements: []
253
229
  rubyforge_project:
254
- rubygems_version: 2.1.9
230
+ rubygems_version: 2.6.8
255
231
  signing_key:
256
232
  specification_version: 4
257
233
  summary: Athena input plugin for Embulk
@@ -1,192 +0,0 @@
1
- package org.embulk.input.athena;
2
-
3
- import java.sql.Connection;
4
- import java.sql.DriverManager;
5
- import java.sql.ResultSet;
6
- import java.sql.SQLException;
7
- import java.sql.Statement;
8
- import java.util.List;
9
- import java.util.Properties;
10
-
11
- import com.google.common.base.Optional;
12
-
13
- import org.embulk.config.Config;
14
- import org.embulk.config.ConfigDefault;
15
- import org.embulk.config.ConfigDiff;
16
- import org.embulk.config.ConfigSource;
17
- import org.embulk.config.Task;
18
- import org.embulk.config.TaskReport;
19
- import org.embulk.config.TaskSource;
20
- import org.embulk.input.jdbc.AbstractJdbcInputPlugin;
21
- import org.embulk.input.jdbc.JdbcInputConnection;
22
- import org.embulk.spi.Exec;
23
- import org.embulk.spi.InputPlugin;
24
- import org.embulk.spi.PageOutput;
25
- import org.embulk.spi.Schema;
26
- import org.embulk.spi.SchemaConfig;
27
-
28
- public class AthenaInputPlugin
29
- extends AbstractJdbcInputPlugin
30
- {
31
- public interface AthenaPluginTask
32
- extends AbstractJdbcInputPlugin.PluginTask
33
- {
34
- @Config("driver_path")
35
- @ConfigDefault("null")
36
- public Optional<String> getDriverPath();
37
-
38
- // athena_url (required string)
39
- @Config("athena_url")
40
- public String getAthenaUrl();
41
-
42
- // s3_staging_dir (required string)
43
- @Config("s3_staging_dir")
44
- public String getS3StagingDir();
45
-
46
- // access_key (required string)
47
- @Config("access_key")
48
- public String getAccessKey();
49
-
50
- // secret_key (required string)
51
- @Config("secret_key")
52
- public String getSecretKey();
53
-
54
- // configuration option 2 (optional string, null is not allowed)
55
- // @Config("option2")
56
- // @ConfigDefault("\"myvalue\"")
57
- // public String getOption2();
58
-
59
- // configuration option 3 (optional string, null is allowed)
60
- // @Config("option3")
61
- // @ConfigDefault("null")
62
- // public Optional<String> getOption3();
63
-
64
- // if you get schema from config
65
- // @Config("columns")
66
- // public SchemaConfig getColumns();
67
- }
68
-
69
- @Override
70
- protected Class<? extends PluginTask> getTaskClass() {
71
- return AthenaPluginTask.class;
72
- }
73
-
74
- @Override
75
- protected AthenaInputConnection newConnection(PluginTask pluginTask) throws SQLException {
76
- AthenaPluginTask task = (AthenaPluginTask) pluginTask;
77
- loadDriver("com.amazonaws.athena.jdbc.AthenaDriver", task.getDriverPath());
78
- //Class.forName("com.amazonaws.athena.jdbc.AthenaDriver");
79
- Properties properties = new Properties();
80
- properties.put("s3_staging_dir", task.getS3StagingDir());
81
- properties.put("user", task.getAccessKey());
82
- properties.put("password", task.getSecretKey());
83
- properties.putAll(task.getOptions());
84
-
85
- Connection connection = DriverManager.getConnection(task.getAthenaUrl(), properties);
86
- try {
87
- AthenaInputConnection c = new AthenaInputConnection(connection);
88
- connection = null;
89
- return c;
90
- } finally {
91
- if (connection != null) {
92
- connection.close();
93
- }
94
- }
95
- }
96
-
97
- /*
98
- @Override
99
- public ConfigDiff transaction(ConfigSource config,
100
- InputPlugin.Control control)
101
- {
102
- PluginTask task = config.loadConfig(PluginTask.class);
103
-
104
- // Schema schema = task.getColumns().toSchema();
105
- Schema schema = Schema.builder().build();
106
- int taskCount = 1; // number of run() method calls
107
-
108
- return resume(task.dump(), schema, taskCount, control);
109
- }
110
-
111
- @Override
112
- public ConfigDiff resume(TaskSource taskSource,
113
- Schema schema, int taskCount,
114
- InputPlugin.Control control)
115
- {
116
- control.run(taskSource, schema, taskCount);
117
- return Exec.newConfigDiff();
118
- }
119
-
120
- @Override
121
- public void cleanup(TaskSource taskSource,
122
- Schema schema, int taskCount,
123
- List<TaskReport> successTaskReports)
124
- {
125
- }
126
-
127
- @Override
128
- public TaskReport run(TaskSource taskSource,
129
- Schema schema, int taskIndex,
130
- PageOutput output)
131
- {
132
- PluginTask task = taskSource.loadTask(PluginTask.class);
133
-
134
- // Write your code here :)
135
-
136
- Connection connection = null;
137
- Statement statement = null;
138
- try {
139
- connection = getAthenaConnection(task);
140
- statement = connection.createStatement();
141
- ResultSet resultSet = statement.executeQuery("select * from default.sample");
142
-
143
- while(resultSet.next()){
144
- String statusCode = resultSet.getString("created_at");
145
- System.out.println("st code" + statusCode);
146
- }
147
- resultSet.close();
148
- connection.close();
149
- } catch (Exception e){
150
- e.printStackTrace();
151
- } finally {
152
- try {
153
- if (statement != null)
154
- statement.close();
155
- } catch (Exception ex) {
156
-
157
- }
158
- try {
159
- if (connection != null)
160
- connection.close();
161
- } catch (Exception ex) {
162
- ex.printStackTrace();
163
- }
164
- }
165
-
166
- return Exec.newTaskReport();
167
- }
168
-
169
- @Override
170
- public ConfigDiff guess(ConfigSource config)
171
- {
172
- return Exec.newConfigDiff();
173
- }
174
- */
175
- /*
176
- protected Connection getAthenaConnection(PluginTask task) throws ClassNotFoundException, SQLException{
177
- Class.forName("com.amazonaws.athena.jdbc.AthenaDriver");
178
- Properties properties = new Properties();
179
- properties.put("s3_staging_dir", task.getS3StagingDir());
180
- properties.put("user", task.getAccessKey());
181
- properties.put("password", task.getSecretKey());
182
- if (task.getLogPath() != null){
183
- properties.put("log_path", task.getLogPath());
184
- }
185
- if (task.getLogLevel() != null){
186
- properties.put("log_level", task.getLogLevel());
187
- }
188
-
189
- return DriverManager.getConnection(task.getAthenaUrl(), properties);
190
- }
191
- */
192
- }
@@ -1,674 +0,0 @@
1
- package org.embulk.input.jdbc;
2
-
3
- import java.io.File;
4
- import java.io.FileFilter;
5
- import java.net.MalformedURLException;
6
- import java.net.URISyntaxException;
7
- import java.net.URL;
8
- import java.nio.file.Path;
9
- import java.util.List;
10
- import java.util.Map;
11
- import java.util.Properties;
12
- import java.nio.file.Paths;
13
- import java.sql.ResultSet;
14
- import java.sql.SQLException;
15
-
16
- import org.slf4j.Logger;
17
-
18
- import com.fasterxml.jackson.databind.JsonNode;
19
- import com.google.common.base.Optional;
20
- import com.google.common.base.Supplier;
21
- import com.google.common.base.Throwables;
22
- import com.google.common.collect.ImmutableList;
23
-
24
- import org.embulk.config.Config;
25
- import org.embulk.config.ConfigException;
26
- import org.embulk.config.ConfigDefault;
27
- import org.embulk.config.ConfigDiff;
28
- import org.embulk.config.ConfigInject;
29
- import org.embulk.config.ConfigSource;
30
- import org.embulk.config.Task;
31
- import org.embulk.config.TaskReport;
32
- import org.embulk.config.TaskSource;
33
- import org.embulk.plugin.PluginClassLoader;
34
- import org.embulk.spi.BufferAllocator;
35
- import org.embulk.spi.Column;
36
- import org.embulk.spi.DataException;
37
- import org.embulk.spi.PageBuilder;
38
- import org.embulk.spi.InputPlugin;
39
- import org.embulk.spi.PageOutput;
40
- import org.embulk.spi.Schema;
41
- import org.embulk.spi.Exec;
42
- import org.embulk.input.jdbc.getter.ColumnGetter;
43
- import org.embulk.input.jdbc.getter.ColumnGetterFactory;
44
- import org.embulk.input.jdbc.JdbcInputConnection.BatchSelect;
45
- import org.embulk.input.jdbc.JdbcInputConnection.PreparedQuery;
46
- import org.joda.time.DateTimeZone;
47
-
48
- import static java.util.Locale.ENGLISH;
49
-
50
- public abstract class AbstractJdbcInputPlugin
51
- implements InputPlugin
52
- {
53
- protected final Logger logger = Exec.getLogger(getClass());
54
-
55
- public interface PluginTask extends Task
56
- {
57
- @Config("options")
58
- @ConfigDefault("{}")
59
- public ToStringMap getOptions();
60
-
61
- @Config("table")
62
- @ConfigDefault("null")
63
- public Optional<String> getTable();
64
- public void setTable(Optional<String> normalizedTableName);
65
-
66
- @Config("query")
67
- @ConfigDefault("null")
68
- public Optional<String> getQuery();
69
-
70
- @Config("select")
71
- @ConfigDefault("null")
72
- public Optional<String> getSelect();
73
-
74
- @Config("where")
75
- @ConfigDefault("null")
76
- public Optional<String> getWhere();
77
-
78
- @Config("order_by")
79
- @ConfigDefault("null")
80
- public Optional<String> getOrderBy();
81
-
82
- @Config("incremental")
83
- @ConfigDefault("false")
84
- public boolean getIncremental();
85
-
86
- @Config("incremental_columns")
87
- @ConfigDefault("[]")
88
- public List<String> getIncrementalColumns();
89
- public void setIncrementalColumns(List<String> indexes);
90
-
91
- @Config("last_record")
92
- @ConfigDefault("null")
93
- public Optional<List<JsonNode>> getLastRecord();
94
-
95
- // TODO limit_value is necessary to make sure repeated bulk load transactions
96
- // don't a same record twice or miss records when the column
97
- // specified at order_by parameter is not unique.
98
- // For example, if the order_by column is "timestamp created_at"
99
- // column whose precision is second, the table can include multiple
100
- // records with the same created_at time. At the first bulk load
101
- // transaction, it loads a record with created_at=2015-01-02 00:00:02.
102
- // Then next transaction will use WHERE created_at > '2015-01-02 00:00:02'.
103
- // However, if another record with created_at=2014-01-01 23:59:59 is
104
- // inserted between the 2 transactions, the new record will be skipped.
105
- // To prevent this scenario, we want to specify
106
- // limit_value=2015-01-02 00:00:00 (exclusive). With this way, as long as
107
- // a transaction runs after 2015-01-02 00:00:00 + some minutes, we don't
108
- // skip records. Ideally, to automate the scheduling, we want to set
109
- // limit_value="today".
110
- //
111
- //@Config("limit_value")
112
- //@ConfigDefault("null")
113
- //public Optional<String> getLimitValue();
114
-
115
- //// TODO probably limit_rows is unnecessary as long as this has
116
- // supports parallel execution (partition_by option) and resuming.
117
- //@Config("limit_rows")
118
- //@ConfigDefault("null")
119
- //public Optional<Integer> getLimitRows();
120
-
121
- @Config("connect_timeout")
122
- @ConfigDefault("300")
123
- public int getConnectTimeout();
124
-
125
- @Config("socket_timeout")
126
- @ConfigDefault("1800")
127
- public int getSocketTimeout();
128
-
129
- @Config("fetch_rows")
130
- @ConfigDefault("10000")
131
- // TODO set minimum number
132
- public int getFetchRows();
133
-
134
- // TODO parallel execution using "partition_by" config
135
-
136
- @Config("column_options")
137
- @ConfigDefault("{}")
138
- public Map<String, JdbcColumnOption> getColumnOptions();
139
-
140
- @Config("default_timezone")
141
- @ConfigDefault("\"UTC\"")
142
- public DateTimeZone getDefaultTimeZone();
143
-
144
- @Config("default_column_options")
145
- @ConfigDefault("{}")
146
- public Map<String, JdbcColumnOption> getDefaultColumnOptions();
147
-
148
- @Config("after_select")
149
- @ConfigDefault("null")
150
- public Optional<String> getAfterSelect();
151
-
152
- public PreparedQuery getBuiltQuery();
153
- public void setBuiltQuery(PreparedQuery query);
154
-
155
- public JdbcSchema getQuerySchema();
156
- public void setQuerySchema(JdbcSchema schema);
157
-
158
- public List<Integer> getIncrementalColumnIndexes();
159
- public void setIncrementalColumnIndexes(List<Integer> indexes);
160
-
161
- @ConfigInject
162
- public BufferAllocator getBufferAllocator();
163
- }
164
-
165
- // for subclasses to add @Config
166
- protected Class<? extends PluginTask> getTaskClass()
167
- {
168
- return PluginTask.class;
169
- }
170
-
171
- protected abstract JdbcInputConnection newConnection(PluginTask task) throws SQLException;
172
-
173
- @Override
174
- public ConfigDiff transaction(ConfigSource config,
175
- InputPlugin.Control control)
176
- {
177
- PluginTask task = config.loadConfig(getTaskClass());
178
-
179
- if (task.getIncremental()) {
180
- if (task.getOrderBy().isPresent()) {
181
- throw new ConfigException("order_by option must not be set if incremental is true");
182
- }
183
- }
184
- else {
185
- if (!task.getIncrementalColumns().isEmpty()) {
186
- throw new ConfigException("'incremental: true' must be set if incremental_columns is set");
187
- }
188
- }
189
-
190
- Schema schema;
191
- try (JdbcInputConnection con = newConnection(task)) {
192
- con.showDriverVersion();
193
-
194
- // TODO incremental_columns is not set => get primary key
195
- schema = setupTask(con, task);
196
- } catch (SQLException ex) {
197
- throw Throwables.propagate(ex);
198
- }
199
-
200
- return buildNextConfigDiff(task, control.run(task.dump(), schema, 1));
201
- }
202
-
203
- protected Schema setupTask(JdbcInputConnection con, PluginTask task) throws SQLException
204
- {
205
- if (task.getTable().isPresent()) {
206
- String actualTableName = normalizeTableNameCase(con, task.getTable().get());
207
- task.setTable(Optional.of(actualTableName));
208
- }
209
-
210
- // build SELECT query and gets schema of its result
211
- String rawQuery = getRawQuery(task, con);
212
-
213
- JdbcSchema querySchema = con.getSchemaOfQuery(rawQuery);
214
- task.setQuerySchema(querySchema);
215
- // query schema should not change after incremental query
216
-
217
- PreparedQuery preparedQuery;
218
- if (task.getIncremental()) {
219
- // build incremental query
220
-
221
- List<String> incrementalColumns = task.getIncrementalColumns();
222
- if (incrementalColumns.isEmpty()) {
223
- // incremental_columns is not set
224
- if (!task.getTable().isPresent()) {
225
- throw new ConfigException("incremental_columns option must be set if incremental is true and custom query option is set");
226
- }
227
- // get primary keys from the target table to use them as incremental_columns
228
- List<String> primaryKeys = con.getPrimaryKeys(task.getTable().get());
229
- if (primaryKeys.isEmpty()) {
230
- throw new ConfigException(String.format(ENGLISH,
231
- "Primary key is not available at the table '%s'. incremental_columns option must be set",
232
- task.getTable().get()));
233
- }
234
- logger.info("Using primary keys as incremental_columns: {}", primaryKeys);
235
- task.setIncrementalColumns(primaryKeys);
236
- incrementalColumns = primaryKeys;
237
- }
238
-
239
- List<Integer> incrementalColumnIndexes = findIncrementalColumnIndexes(querySchema, incrementalColumns);
240
- task.setIncrementalColumnIndexes(incrementalColumnIndexes);
241
-
242
- List<JsonNode> lastRecord;
243
- if (task.getLastRecord().isPresent()) {
244
- lastRecord = task.getLastRecord().get();
245
- if (lastRecord.size() != incrementalColumnIndexes.size()) {
246
- throw new ConfigException("Number of values set at last_record must be same with number of columns set at incremental_columns");
247
- }
248
- }
249
- else {
250
- lastRecord = null;
251
- }
252
-
253
- if (task.getQuery().isPresent()) {
254
- preparedQuery = con.wrapIncrementalQuery(rawQuery, querySchema, incrementalColumnIndexes, lastRecord);
255
- }
256
- else {
257
- preparedQuery = con.rebuildIncrementalQuery(
258
- task.getTable().get(), task.getSelect(),
259
- task.getWhere(),
260
- querySchema, incrementalColumnIndexes, lastRecord);
261
- }
262
- }
263
- else {
264
- task.setIncrementalColumnIndexes(ImmutableList.<Integer>of());
265
- preparedQuery = new PreparedQuery(rawQuery, ImmutableList.<JdbcLiteral>of());
266
- }
267
-
268
- task.setBuiltQuery(preparedQuery);
269
-
270
- // validate column_options
271
- newColumnGetters(con, task, querySchema, null);
272
-
273
- ColumnGetterFactory factory = newColumnGetterFactory(null, task.getDefaultTimeZone());
274
- ImmutableList.Builder<Column> columns = ImmutableList.builder();
275
- for (int i = 0; i < querySchema.getCount(); i++) {
276
- JdbcColumn column = querySchema.getColumn(i);
277
- JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), column, factory.getJdbcType(column.getSqlType()));
278
- columns.add(new Column(i,
279
- column.getName(),
280
- factory.newColumnGetter(con, task, column, columnOption).getToType()));
281
- }
282
- return new Schema(columns.build());
283
- }
284
-
285
- private String normalizeTableNameCase(JdbcInputConnection con, String tableName)
286
- throws SQLException
287
- {
288
- if (con.tableExists(tableName)) {
289
- return tableName;
290
- } else {
291
- String upperTableName = tableName.toUpperCase();
292
- String lowerTableName = tableName.toLowerCase();
293
- boolean upperExists = con.tableExists(upperTableName);
294
- boolean lowerExists = con.tableExists(lowerTableName);
295
- if (upperExists && lowerExists) {
296
- throw new ConfigException(String.format("Cannot specify table '%s' because both '%s' and '%s' exist.",
297
- tableName, upperTableName, lowerTableName));
298
- } else if (upperExists) {
299
- return upperTableName;
300
- } else if (lowerExists) {
301
- return lowerTableName;
302
- } else {
303
- // fallback to the given table name. this may throw error later at getSchemaOfQuery
304
- return tableName;
305
- }
306
- }
307
- }
308
-
309
- private List<Integer> findIncrementalColumnIndexes(JdbcSchema schema, List<String> incrementalColumns)
310
- throws SQLException
311
- {
312
- ImmutableList.Builder<Integer> builder = ImmutableList.builder();
313
- for (String name : incrementalColumns) {
314
- Optional<Integer> index = schema.findColumn(name);
315
- if (index.isPresent()) {
316
- builder.add(index.get());
317
- }
318
- else {
319
- throw new ConfigException(String.format(ENGLISH,
320
- "Column name '%s' is in incremental_columns option does not exist",
321
- name));
322
- }
323
- }
324
- return builder.build();
325
- }
326
-
327
- private String getRawQuery(PluginTask task, JdbcInputConnection con) throws SQLException
328
- {
329
- if (task.getQuery().isPresent()) {
330
- if (task.getTable().isPresent() || task.getSelect().isPresent() ||
331
- task.getWhere().isPresent() || task.getOrderBy().isPresent()) {
332
- throw new ConfigException("'table', 'select', 'where' and 'order_by' parameters are unnecessary if 'query' parameter is set.");
333
- } else if (!task.getIncrementalColumns().isEmpty() || task.getLastRecord().isPresent()) {
334
- throw new ConfigException("'incremental_columns' and 'last_record' parameters are not supported if 'query' parameter is set.");
335
- }
336
- return task.getQuery().get();
337
- } else if (task.getTable().isPresent()) {
338
- return con.buildSelectQuery(task.getTable().get(), task.getSelect(),
339
- task.getWhere(), task.getOrderBy());
340
- } else {
341
- throw new ConfigException("'table' or 'query' parameter is required");
342
- }
343
- }
344
-
345
- @Override
346
- public ConfigDiff resume(TaskSource taskSource,
347
- Schema schema, int taskCount,
348
- InputPlugin.Control control)
349
- {
350
- PluginTask task = taskSource.loadTask(getTaskClass());
351
-
352
- // TODO when parallel execution is implemented and enabled, (maybe) order_by
353
- // is necessary to resume. transaction() gets the range of order_by
354
- // colum and set it to WHERE condition to make the operation deterministic
355
-
356
- return buildNextConfigDiff(task, control.run(taskSource, schema, taskCount));
357
- }
358
-
359
- public ConfigDiff guess(ConfigSource config)
360
- {
361
- return Exec.newConfigDiff();
362
- }
363
-
364
- protected ConfigDiff buildNextConfigDiff(PluginTask task, List<TaskReport> reports)
365
- {
366
- ConfigDiff next = Exec.newConfigDiff();
367
- if (reports.size() > 0 && reports.get(0).has("last_record")) {
368
- next.set("last_record", reports.get(0).get(JsonNode.class, "last_record"));
369
- } else if (task.getLastRecord().isPresent()) {
370
- next.set("last_record", task.getLastRecord().get());
371
- }
372
- return next;
373
- }
374
-
375
- @Override
376
- public void cleanup(TaskSource taskSource,
377
- Schema schema, int taskCount,
378
- List<TaskReport> successTaskReports)
379
- {
380
- // do nothing
381
- }
382
-
383
- private static class LastRecordStore
384
- {
385
- private final List<Integer> columnIndexes;
386
- private final JsonNode[] lastValues;
387
- private final List<String> columnNames;
388
-
389
- public LastRecordStore(List<Integer> columnIndexes, List<String> columnNames)
390
- {
391
- this.columnIndexes = columnIndexes;
392
- this.lastValues = new JsonNode[columnIndexes.size()];
393
- this.columnNames = columnNames;
394
- }
395
-
396
- public void accept(List<ColumnGetter> getters)
397
- throws SQLException
398
- {
399
- for (int i = 0; i < columnIndexes.size(); i++) {
400
- lastValues[i] = getters.get(columnIndexes.get(i)).encodeToJson();
401
- }
402
- }
403
-
404
- public List<JsonNode> getList()
405
- {
406
- ImmutableList.Builder<JsonNode> builder = ImmutableList.builder();
407
- for (int i = 0; i < lastValues.length; i++) {
408
- if (lastValues[i] == null || lastValues[i].isNull()) {
409
- throw new DataException(String.format(ENGLISH,
410
- "incremental_columns can't include null values but the last row is null at column '%s'",
411
- columnNames.get(i)));
412
- }
413
- builder.add(lastValues[i]);
414
- }
415
- return builder.build();
416
- }
417
- }
418
-
419
- @Override
420
- public TaskReport run(TaskSource taskSource,
421
- Schema schema, int taskIndex,
422
- PageOutput output)
423
- {
424
- PluginTask task = taskSource.loadTask(getTaskClass());
425
-
426
- PreparedQuery builtQuery = task.getBuiltQuery();
427
- JdbcSchema querySchema = task.getQuerySchema();
428
- BufferAllocator allocator = task.getBufferAllocator();
429
- PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
430
-
431
- long totalRows = 0;
432
-
433
- LastRecordStore lastRecordStore = null;
434
-
435
- try (JdbcInputConnection con = newConnection(task)) {
436
- List<ColumnGetter> getters = newColumnGetters(con, task, querySchema, pageBuilder);
437
- try (BatchSelect cursor = con.newSelectCursor(builtQuery, getters, task.getFetchRows(), task.getSocketTimeout())) {
438
- while (true) {
439
- long rows = fetch(cursor, getters, pageBuilder);
440
- if (rows <= 0L) {
441
- break;
442
- }
443
- totalRows += rows;
444
- }
445
- }
446
-
447
- if (task.getIncremental() && totalRows > 0) {
448
- lastRecordStore = new LastRecordStore(task.getIncrementalColumnIndexes(), task.getIncrementalColumns());
449
- lastRecordStore.accept(getters);
450
- }
451
-
452
- pageBuilder.finish();
453
-
454
- // after_select runs after pageBuilder.finish because pageBuilder.finish may fail.
455
- // TODO Output plugin's transaction might still fail. In that case, after_select is
456
- // already done but output plugin didn't commit the data to the target storage.
457
- // This means inconsistency between data source and destination. To avoid this
458
- // issue, we need another option like `after_commit` that runs after output plugin's
459
- // commit. after_commit can't run in the same transaction with SELECT. So,
460
- // after_select gets values and store them in TaskReport, and after_commit take
461
- // them as placeholder. Or, after_select puts values to an intermediate table, and
462
- // after_commit moves those values to the actual table.
463
- if (task.getAfterSelect().isPresent()) {
464
- con.executeUpdate(task.getAfterSelect().get());
465
- con.connection.commit();
466
- }
467
- } catch (SQLException ex) {
468
- throw Throwables.propagate(ex);
469
- }
470
-
471
- TaskReport report = Exec.newTaskReport();
472
- if (lastRecordStore != null) {
473
- report.set("last_record", lastRecordStore.getList());
474
- }
475
-
476
- return report;
477
- }
478
-
479
- protected ColumnGetterFactory newColumnGetterFactory(PageBuilder pageBuilder, DateTimeZone dateTimeZone)
480
- {
481
- return new ColumnGetterFactory(pageBuilder, dateTimeZone);
482
- }
483
-
484
- private List<ColumnGetter> newColumnGetters(JdbcInputConnection con, PluginTask task, JdbcSchema querySchema, PageBuilder pageBuilder)
485
- throws SQLException
486
- {
487
- ColumnGetterFactory factory = newColumnGetterFactory(pageBuilder, task.getDefaultTimeZone());
488
- ImmutableList.Builder<ColumnGetter> getters = ImmutableList.builder();
489
- for (JdbcColumn c : querySchema.getColumns()) {
490
- JdbcColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), c, factory.getJdbcType(c.getSqlType()));
491
- getters.add(factory.newColumnGetter(con, task, c, columnOption));
492
- }
493
- return getters.build();
494
- }
495
-
496
- private static JdbcColumnOption columnOptionOf(Map<String, JdbcColumnOption> columnOptions, Map<String, JdbcColumnOption> defaultColumnOptions, JdbcColumn targetColumn, String targetColumnSQLType)
497
- {
498
- JdbcColumnOption columnOption = columnOptions.get(targetColumn.getName());
499
- if (columnOption == null) {
500
- String foundName = null;
501
- for (Map.Entry<String, JdbcColumnOption> entry : columnOptions.entrySet()) {
502
- if (entry.getKey().equalsIgnoreCase(targetColumn.getName())) {
503
- if (columnOption != null) {
504
- throw new ConfigException(String.format("Cannot specify column '%s' because both '%s' and '%s' exist in column_options.",
505
- targetColumn.getName(), foundName, entry.getKey()));
506
- }
507
- foundName = entry.getKey();
508
- columnOption = entry.getValue();
509
- }
510
- }
511
- }
512
-
513
- return Optional
514
- .fromNullable(columnOption)
515
- .or(Optional.fromNullable(defaultColumnOptions.get(targetColumnSQLType)))
516
- .or(
517
- // default column option
518
- new Supplier<JdbcColumnOption>()
519
- {
520
- public JdbcColumnOption get()
521
- {
522
- return Exec.newConfigSource().loadConfig(JdbcColumnOption.class);
523
- }
524
- });
525
- }
526
-
527
- private long fetch(BatchSelect cursor,
528
- List<ColumnGetter> getters, PageBuilder pageBuilder) throws SQLException
529
- {
530
- ResultSet result = cursor.fetch();
531
- if (result == null || !result.next()) {
532
- return 0;
533
- }
534
-
535
- List<Column> columns = pageBuilder.getSchema().getColumns();
536
- long rows = 0;
537
- long reportRows = 500;
538
- do {
539
- for (int i=0; i < getters.size(); i++) {
540
- int index = i + 1; // JDBC column index begins from 1
541
- getters.get(i).getAndSet(result, index, columns.get(i));
542
- }
543
- pageBuilder.addRecord();
544
- rows++;
545
- if (rows % reportRows == 0) {
546
- logger.info(String.format("Fetched %,d rows.", rows));
547
- reportRows *= 2;
548
- }
549
- } while (result.next());
550
-
551
- return rows;
552
- }
553
-
554
- //// TODO move to embulk.spi.util?
555
- //private static class ListPageOutput
556
- //{
557
- // public ImmutableList.Builder<Page> pages;
558
- //
559
- // public ListPageOutput()
560
- // {
561
- // reset();
562
- // }
563
- //
564
- // @Override
565
- // public void add(Page page)
566
- // {
567
- // pages.add(page);
568
- // }
569
- //
570
- // @Override
571
- // public void finish()
572
- // {
573
- // }
574
- //
575
- // @Override
576
- // public void close()
577
- // {
578
- // }
579
- //
580
- // public List<Page> getPages()
581
- // {
582
- // return pages.build();
583
- // }
584
- //
585
- // public void reset()
586
- // {
587
- // pages = ImmutableList.builder();
588
- // }
589
- //}
590
-
591
- protected void loadDriver(String className, Optional<String> driverPath)
592
- {
593
- if (driverPath.isPresent()) {
594
- addDriverJarToClasspath(driverPath.get());
595
- } else {
596
- try {
597
- // Gradle test task will add JDBC driver to classpath
598
- Class.forName(className);
599
-
600
- } catch (ClassNotFoundException ex) {
601
- File root = findPluginRoot();
602
- File driverLib = new File(root, "default_jdbc_driver");
603
- File[] files = driverLib.listFiles(new FileFilter() {
604
- @Override
605
- public boolean accept(File file) {
606
- return file.isFile() && file.getName().endsWith(".jar");
607
- }
608
- });
609
- if (files == null || files.length == 0) {
610
- throw new RuntimeException("Cannot find JDBC driver in '" + root.getAbsolutePath() + "'.");
611
- } else {
612
- for (File file : files) {
613
- logger.info("JDBC Driver = " + file.getAbsolutePath());
614
- addDriverJarToClasspath(file.getAbsolutePath());
615
- }
616
- }
617
- }
618
- }
619
-
620
- // Load JDBC Driver
621
- try {
622
- Class.forName(className);
623
- } catch (ClassNotFoundException ex) {
624
- throw new RuntimeException(ex);
625
- }
626
- }
627
-
628
- protected void addDriverJarToClasspath(String glob)
629
- {
630
- // TODO match glob
631
- PluginClassLoader loader = (PluginClassLoader) getClass().getClassLoader();
632
- Path path = Paths.get(glob);
633
- if (!path.toFile().exists()) {
634
- throw new ConfigException("The specified driver jar doesn't exist: " + glob);
635
- }
636
- loader.addPath(Paths.get(glob));
637
- }
638
-
639
- protected File findPluginRoot()
640
- {
641
- try {
642
- URL url = getClass().getResource("/" + getClass().getName().replace('.', '/') + ".class");
643
- if (url.toString().startsWith("jar:")) {
644
- url = new URL(url.toString().replaceAll("^jar:", "").replaceAll("![^!]*$", ""));
645
- }
646
-
647
- File folder = new File(url.toURI()).getParentFile();
648
- for (;; folder = folder.getParentFile()) {
649
- if (folder == null) {
650
- throw new RuntimeException("Cannot find 'embulk-input-xxx' folder.");
651
- }
652
-
653
- if (folder.getName().startsWith("embulk-input-")) {
654
- return folder;
655
- }
656
- }
657
- } catch (MalformedURLException | URISyntaxException e) {
658
- throw new RuntimeException(e);
659
- }
660
- }
661
-
662
- protected void logConnectionProperties(String url, Properties props)
663
- {
664
- Properties maskedProps = new Properties();
665
- for(String key : props.stringPropertyNames()) {
666
- if (key.equals("password")) {
667
- maskedProps.setProperty(key, "***");
668
- } else {
669
- maskedProps.setProperty(key, props.getProperty(key));
670
- }
671
- }
672
- logger.info("Connecting to {} options {}", url, maskedProps);
673
- }
674
- }