embulk-input-jdbc 0.7.4 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0f0e8890c29d644c993bd0507a380802fb4c0e2c
4
- data.tar.gz: 535b176cb211f3115482534f2a6b0373923c9e15
3
+ metadata.gz: 121506005fab6a020ff1ab8e908f07e49dd7c5fb
4
+ data.tar.gz: 85081ddc42add58819ac8b4a0a9a5e8e52c1f129
5
5
  SHA512:
6
- metadata.gz: 4f774a9727295f35f9e16c06e9fd57d6918398f44ab070a55606b62a9402fa3857b1cb1823f67ef2388e1a016c016a443d2d40bb3b41c7fda28c0f45991378bd
7
- data.tar.gz: 2fe9ba6cbf6d2cc768f0d641371e0a08e464ee9cbfa91bf5016bea97ac9bf1e1a24ecb56110e9fe49c22251a9f1df56c59b9115bfae73c2622f2bcc02227a37d
6
+ metadata.gz: 13c70bfde5dcc988607ea3f77dc84a70402b90878c7bf536ef5e5d3a2530e370b5b75f2eac04dabaabbccbbac9180db60c9612bbebfbf0116c5761adcaae22e2
7
+ data.tar.gz: 41c9d0e17c2a825c740d2e0467dc3b1582b8f1d8f6649d8f42d5940ef9e138be731a679a4364c5823a13bc2b46d0949fbf623c00c383a109bd2bd110609c3283
data/README.md ADDED
@@ -0,0 +1,148 @@
1
+ # Generic JDBC input plugin for Embulk
2
+
3
+ Generic JDBC input plugin for Embulk loads records from a database using a JDBC driver. If the database follows ANSI SQL standards and JDBC standards strictly, this plugin works. But because of many incompatibilities, use case of this plugin is very limited. It's recommended to use specific plugins for the databases.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: input
8
+ * **Resume supported**: yes
9
+
10
+ ## Configuration
11
+
12
+ - **driver_path**: path to the jar file of the JDBC driver (e.g. 'sqlite-jdbc-3.8.7.jar') (string, optional)
13
+ - **driver_class**: class name of the JDBC driver (e.g. 'org.sqlite.JDBC') (string, required)
14
+ - **url**: URL of the JDBC connection (e.g. 'jdbc:sqlite:mydb.sqlite3') (string, required)
15
+ - **user**: database login user name (string, optional)
16
+ - **password**: database login password (string, default: optional)
17
+ - **schema**: destination schema name (string, default: use the default schema)
18
+ - **fetch_rows**: number of rows to fetch one time (integer, default: 10000)
19
+ - **connect_timeout**: not supported.
20
+ - **socket_timeout**: timeout for executing the query. 0 means no timeout. (integer (seconds), default: 1800)
21
+ - **options**: extra JDBC properties (hash, default: {})
22
+ - If you write SQL directly,
23
+ - **query**: SQL to run (string)
24
+ - If **query** is not set,
25
+ - **table**: destination table name (string, required)
26
+ - **select**: expression of select (e.g. `id, created_at`) (string, default: "*")
27
+ - **where**: WHERE condition to filter the rows (string, default: no-condition)
28
+ - **order_by**: expression of ORDER BY to sort rows (e.g. `created_at DESC, id ASC`) (string, default: not sorted)
29
+ - **default_timezone**: If the sql type of a column is `date`/`time`/`datetime` and the embulk type is `string`, column values are formatted int this default_timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
30
+ - **default_column_options**: column_options for each JDBC type as default. Key is a JDBC type (e.g. 'DATE', 'BIGINT'). Value is same as column_options's value.
31
+ - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
32
+ - **value_type**: embulk get values from database as this value_type. Typically, the value_type determines `getXXX` method of `java.sql.PreparedStatement`. `value_type: json` is an exception which uses `getString` and parses the result as a JSON string.
33
+ (string, default: depends on the sql type of the column. Available values options are: `long`, `double`, `float`, `decimal`, `boolean`, `string`, `json`, `date`, `time`, `timestamp`)
34
+ - **type**: Column values are converted to this embulk type.
35
+ Available values options are: `boolean`, `long`, `double`, `string`, `json`, `timestamp`).
36
+ By default, the embulk type is determined according to the sql type of the column (or value_type if specified).
37
+ - **timestamp_format**: If the sql type of the column is `date`/`time`/`datetime` and the embulk type is `string`, column values are formatted by this timestamp_format. And if the embulk type is `timestamp`, this timestamp_format may be used in the output plugin. For example, stdout plugin use the timestamp_format, but *csv formatter plugin doesn't use*. (string, default : `%Y-%m-%d` for `date`, `%H:%M:%S` for `time`, `%Y-%m-%d %H:%M:%S` for `timestamp`)
38
+ - **timezone**: If the sql type of the column is `date`/`time`/`datetime` and the embulk type is `string`, column values are formatted in this timezone.
39
+ (string, value of default_timezone option is used by default)
40
+ - **after_select**: if set, this SQL will be executed after the SELECT query in the same transaction.
41
+
42
+
43
+ ## Incremental loading
44
+
45
+ Incremental loading uses monotonically increasing unique columns (such as auto-increment id) to load records inserted (or updated) after last execution.
46
+
47
+ First, if `incremental: true` is set, this plugin loads all records with additional ORDER BY. For example, if `incremental_columns: [updated_at, id]` option is set, query will be as following:
48
+
49
+ ```
50
+ SELECT * FROM (
51
+ ...original query is here...
52
+ )
53
+ ORDER BY updated_at, id
54
+ ```
55
+
56
+ When bulk data loading finishes successfully, it outputs `last_record: ` paramater as config-diff so that next execution uses it.
57
+
58
+ At the next execution, when `last_record: ` is also set, this plugin generates additional WHERE conditions to load records larger than the last record. For example, if `last_record: ["2017-01-01 00:32:12", 5291]` is set,
59
+
60
+ ```
61
+ SELECT * FROM (
62
+ ...original query is here...
63
+ )
64
+ WHERE created_at > '2017-01-01 00:32:12' OR (created_at = '2017-01-01 00:32:12' AND id > 5291)
65
+ ORDER BY updated_at, id
66
+ ```
67
+
68
+ Then, it updates `last_record: ` so that next execution uses the updated last_record.
69
+
70
+ **IMPORTANT**: If you set `incremental_columns: ` option, make sure that there is an index on the columns to avoid full table scan. For this example, following index should be created:
71
+
72
+ ```
73
+ CREATE INDEX embulk_incremental_loading_index ON table (updated_at, id);
74
+ ```
75
+
76
+ Recommended usage is to leave `incremental_columns` unset and let this plugin automatically finds an auto-increment primary key. Currently, only strings and integers are supported as incremental_columns.
77
+
78
+
79
+ ## Example
80
+
81
+ ```yaml
82
+ in:
83
+ type: jdbc
84
+ driver_path: /opt/oracle/ojdbc6.jar
85
+ driver_class: oracle.jdbc.driver.OracleDriver
86
+ url: jdbc:oracle:thin:@127.0.0.1:1521:mydb
87
+ user: myuser
88
+ password: "mypassword"
89
+ table: "my_table"
90
+ select: "col1, col2, col3"
91
+ where: "col4 != 'a'"
92
+ order_by: "col1 DESC"
93
+ ```
94
+
95
+ This configuration will generate following SQL:
96
+
97
+ ```
98
+ SELECT col1, col2, col3
99
+ FROM "my_table"
100
+ WHERE col4 != 'a'
101
+ ORDER BY col1 DESC
102
+ ```
103
+
104
+ If you need a complex SQL,
105
+
106
+ ```yaml
107
+ in:
108
+ type: jdbc
109
+ driver_path: /opt/oracle/ojdbc6.jar
110
+ driver_class: oracle.jdbc.driver.OracleDriver
111
+ url: jdbc:oracle:thin:@127.0.0.1:1521:mydb
112
+ user: myuser
113
+ password: "mypassword"
114
+ query: |
115
+ SELECT t1.id, t1.name, t2.id AS t2_id, t2.name AS t2_name
116
+ FROM table1 AS t1
117
+ LEFT JOIN table2 AS t2
118
+ ON t1.id = t2.t1_id
119
+ ```
120
+
121
+ Advanced configuration:
122
+
123
+ ```yaml
124
+ in:
125
+ type: jdbc
126
+ driver_path: /opt/oracle/ojdbc6.jar
127
+ driver_class: oracle.jdbc.driver.OracleDriver
128
+ url: jdbc:oracle:thin:@127.0.0.1:1521:mydb
129
+ user: myuser
130
+ password: "mypassword"
131
+ table: "my_table"
132
+ select: "col1, col2, col3"
133
+ where: "col4 != 'a'"
134
+ default_column_options:
135
+ DATE: { type: string, timestamp_format: "%Y/%m/%d", timezone: "+0900"}
136
+ BIGINT: { type: string }
137
+ column_options:
138
+ col1: {type: long}
139
+ col3: {type: string, timestamp_format: "%Y/%m/%d", timezone: "+0900"}
140
+ after_select: "update my_table set col5 = '1' where col4 != 'a'"
141
+
142
+ ```
143
+
144
+ ## Build
145
+
146
+ ```
147
+ $ ./gradlew gem
148
+ ```
@@ -0,0 +1,254 @@
1
+ package org.embulk.input;
2
+
3
+ import static java.util.Locale.ENGLISH;
4
+
5
+ import java.io.File;
6
+ import java.io.FileInputStream;
7
+ import java.io.IOException;
8
+ import java.io.InputStreamReader;
9
+ import java.net.URISyntaxException;
10
+ import java.nio.charset.Charset;
11
+ import java.sql.Connection;
12
+ import java.sql.ResultSet;
13
+ import java.sql.SQLException;
14
+ import java.sql.Statement;
15
+ import java.util.ArrayList;
16
+ import java.util.Collections;
17
+ import java.util.Comparator;
18
+ import java.util.List;
19
+ import java.util.Map;
20
+ import java.util.regex.Matcher;
21
+ import java.util.regex.Pattern;
22
+
23
+ import org.embulk.config.ConfigException;
24
+ import org.embulk.input.jdbc.AbstractJdbcInputPlugin;
25
+ import org.embulk.input.tester.EmbulkPluginTester;
26
+ import org.embulk.input.tester.EmbulkPluginTester.PluginDefinition;
27
+ import org.yaml.snakeyaml.Yaml;
28
+
29
+ import com.google.common.io.Files;
30
+
31
+ public abstract class AbstractJdbcInputPluginTest
32
+ {
33
+ private static final String CONFIG_FILE_NAME = "tests.yml";
34
+
35
+ protected boolean enabled;
36
+ // TODO:destroy EmbulkPluginTester after test
37
+ protected EmbulkPluginTester tester = new EmbulkPluginTester();
38
+ private String pluginName;
39
+ private Map<String, ?> testConfigurations;
40
+
41
+ protected AbstractJdbcInputPluginTest()
42
+ {
43
+ try {
44
+ prepare();
45
+ } catch (SQLException e) {
46
+ throw new RuntimeException(e);
47
+ }
48
+ }
49
+
50
+ protected abstract void prepare() throws SQLException;
51
+
52
+
53
+ private Map<String, ?> getTestConfigs()
54
+ {
55
+ if (testConfigurations == null) {
56
+ for (PluginDefinition pluginDefinition : tester.getPlugins()) {
57
+ if (AbstractJdbcInputPlugin.class.isAssignableFrom(pluginDefinition.impl)) {
58
+ pluginName = pluginDefinition.name;
59
+ break;
60
+ }
61
+ }
62
+
63
+ Yaml yaml = new Yaml();
64
+ File configFile = new File(CONFIG_FILE_NAME);
65
+ if (!configFile.exists()) {
66
+ configFile = new File("../" + CONFIG_FILE_NAME);
67
+ if (!configFile.exists()) {
68
+ throw new ConfigException(String.format(ENGLISH, "\"%s\" doesn't exist.",
69
+ CONFIG_FILE_NAME));
70
+ }
71
+ }
72
+
73
+ try {
74
+ InputStreamReader reader = new InputStreamReader(new FileInputStream(configFile), Charset.forName("UTF8"));
75
+ try {
76
+ Map<String, ?> allTestConfigs = (Map<String, ?>)yaml.load(reader);
77
+ if (!allTestConfigs.containsKey(pluginName)) {
78
+ throw new ConfigException(String.format(ENGLISH, "\"%s\" doesn't contain \"%s\" element.",
79
+ CONFIG_FILE_NAME, pluginName));
80
+ }
81
+ testConfigurations = (Map<String, ?>)allTestConfigs.get(pluginName);
82
+ } finally {
83
+ reader.close();
84
+ }
85
+ } catch (IOException e) {
86
+ throw new RuntimeException(e);
87
+ }
88
+ }
89
+ return testConfigurations;
90
+ }
91
+
92
+ protected Object getTestConfig(String name, boolean required)
93
+ {
94
+ Map<String, ?> testConfigs = getTestConfigs();
95
+ if (!testConfigs.containsKey(name)) {
96
+ if (required) {
97
+ throw new ConfigException(String.format(ENGLISH, "\"%s\" element in \"%s\" doesn't contain \"%s\" element.",
98
+ pluginName, CONFIG_FILE_NAME, name));
99
+ }
100
+ return null;
101
+ }
102
+ return testConfigs.get(name);
103
+ }
104
+
105
+ protected Object getTestConfig(String name)
106
+ {
107
+ return getTestConfig(name, true);
108
+ }
109
+
110
+ protected String getHost()
111
+ {
112
+ return (String)getTestConfig("host");
113
+ }
114
+
115
+ protected int getPort()
116
+ {
117
+ return (Integer)getTestConfig("port");
118
+ }
119
+
120
+ protected String getUser()
121
+ {
122
+ return (String)getTestConfig("user");
123
+ }
124
+
125
+ protected String getPassword()
126
+ {
127
+ return (String)getTestConfig("password");
128
+ }
129
+
130
+ protected String getDatabase()
131
+ {
132
+ return (String)getTestConfig("database");
133
+ }
134
+
135
+ protected void dropTable(String table) throws SQLException
136
+ {
137
+ String sql = String.format("DROP TABLE %s", table);
138
+ executeSQL(sql, true);
139
+ }
140
+
141
+ protected List<List<Object>> select(String table) throws SQLException
142
+ {
143
+ try (Connection connection = connect()) {
144
+ try (Statement statement = connection.createStatement()) {
145
+ List<List<Object>> rows = new ArrayList<List<Object>>();
146
+ String sql = String.format("SELECT * FROM %s", table);
147
+ System.out.println(sql);
148
+ try (ResultSet resultSet = statement.executeQuery(sql)) {
149
+ while (resultSet.next()) {
150
+ List<Object> row = new ArrayList<Object>();
151
+ for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) {
152
+ row.add(getValue(resultSet, i));
153
+ }
154
+ rows.add(row);
155
+ }
156
+ }
157
+ // cannot sort by CLOB, so sort by Java
158
+ Collections.sort(rows, new Comparator<List<Object>>() {
159
+ @Override
160
+ public int compare(List<Object> o1, List<Object> o2) {
161
+ return o1.toString().compareTo(o2.toString());
162
+ }
163
+ });
164
+ return rows;
165
+ }
166
+ }
167
+
168
+ }
169
+
170
+ protected Object getValue(ResultSet resultSet, int index) throws SQLException
171
+ {
172
+ return resultSet.getObject(index);
173
+ }
174
+
175
+ protected void executeSQL(String sql) throws SQLException
176
+ {
177
+ executeSQL(sql, false);
178
+ }
179
+
180
+ protected void executeSQL(String sql, boolean ignoreError) throws SQLException
181
+ {
182
+ if (!enabled) {
183
+ return;
184
+ }
185
+
186
+ try (Connection connection = connect()) {
187
+ try {
188
+ connection.setAutoCommit(true);
189
+
190
+ try (Statement statement = connection.createStatement()) {
191
+ System.out.println(String.format("Execute SQL : \"%s\".", sql));
192
+ statement.execute(sql);
193
+ }
194
+
195
+ } catch (SQLException e) {
196
+ if (!ignoreError) {
197
+ throw e;
198
+ }
199
+ }
200
+ }
201
+ }
202
+
203
+ protected void test(String ymlPath) throws Exception
204
+ {
205
+ if (!enabled) {
206
+ return;
207
+ }
208
+
209
+ tester.run(convertYml(ymlPath));
210
+ }
211
+
212
+ protected String convertYml(String ymlName) throws Exception
213
+ {
214
+ StringBuilder builder = new StringBuilder();
215
+ Pattern pathPrefixPattern = Pattern.compile("^ *path(_prefix)?: '(.*)'$");
216
+ for (String line : Files.readLines(convertPath(ymlName), Charset.forName("UTF8"))) {
217
+ line = convertYmlLine(line);
218
+ Matcher matcher = pathPrefixPattern.matcher(line);
219
+ if (matcher.matches()) {
220
+ int group = 2;
221
+ builder.append(line.substring(0, matcher.start(group)));
222
+ builder.append(convertPath(matcher.group(group)).getAbsolutePath());
223
+ builder.append(line.substring(matcher.end(group)));
224
+ } else {
225
+ builder.append(line);
226
+ }
227
+ builder.append(System.lineSeparator());
228
+ }
229
+ return builder.toString();
230
+ }
231
+
232
+ protected String convertYmlLine(String line)
233
+ {
234
+ line = line.replaceAll("#host#", getHost());
235
+ line = line.replaceAll("#port#", Integer.toString(getPort()));
236
+ line = line.replaceAll("#database#", getDatabase());
237
+ line = line.replaceAll("#user#", getUser());
238
+ line = line.replaceAll("#password#", getPassword());
239
+ return line;
240
+ }
241
+
242
+ protected File convertPath(String name) throws URISyntaxException
243
+ {
244
+ return new File(getClass().getResource(name).toURI());
245
+ }
246
+
247
+ protected List<String> read(String path) throws IOException
248
+ {
249
+ return Files.readLines(new File(path), Charset.forName("UTF8"));
250
+ }
251
+
252
+ protected abstract Connection connect() throws SQLException;
253
+
254
+ }
@@ -1,6 +1,5 @@
1
- package org.embulk.input;
1
+ package org.embulk.input.tester;
2
2
 
3
- import java.io.File;
4
3
  import java.util.ArrayList;
5
4
  import java.util.List;
6
5
 
@@ -14,7 +13,7 @@ import com.google.inject.Module;
14
13
 
15
14
  public class EmbulkPluginTester
16
15
  {
17
- private static class PluginDefinition
16
+ public static class PluginDefinition
18
17
  {
19
18
  public final Class<?> iface;
20
19
  public final String name;
@@ -48,7 +47,12 @@ public class EmbulkPluginTester
48
47
  plugins.add(new PluginDefinition(iface, name, impl));
49
48
  }
50
49
 
51
- public void run(String ymlPath) throws Exception
50
+ public List<PluginDefinition> getPlugins()
51
+ {
52
+ return plugins;
53
+ }
54
+
55
+ public void run(String yml) throws Exception
52
56
  {
53
57
  if (embulk == null) {
54
58
  Bootstrap bootstrap = new EmbulkEmbed.Bootstrap();
@@ -64,8 +68,7 @@ public class EmbulkPluginTester
64
68
  });
65
69
  embulk = bootstrap.initializeCloseable();
66
70
  }
67
-
68
- ConfigSource config = embulk.newConfigLoader().fromYamlFile(new File(ymlPath));
71
+ ConfigSource config = embulk.newConfigLoader().fromYamlString(yml);
69
72
  embulk.run(config);
70
73
  }
71
74
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-jdbc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.4
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-09-28 00:00:00.000000000 Z
11
+ date: 2016-10-13 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Selects records from a table.
14
14
  email:
@@ -17,8 +17,9 @@ executables: []
17
17
  extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
+ - README.md
20
21
  - build.gradle
21
- - classpath/embulk-input-jdbc-0.7.4.jar
22
+ - classpath/embulk-input-jdbc-0.8.0.jar
22
23
  - lib/embulk/input/jdbc.rb
23
24
  - src/main/java/org/embulk/input/JdbcInputPlugin.java
24
25
  - src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java
@@ -43,7 +44,8 @@ files:
43
44
  - src/main/java/org/embulk/input/jdbc/getter/StringColumnGetter.java
44
45
  - src/main/java/org/embulk/input/jdbc/getter/TimeColumnGetter.java
45
46
  - src/main/java/org/embulk/input/jdbc/getter/TimestampColumnGetter.java
46
- - src/test/java/org/embulk/input/EmbulkPluginTester.java
47
+ - src/test/java/org/embulk/input/AbstractJdbcInputPluginTest.java
48
+ - src/test/java/org/embulk/input/tester/EmbulkPluginTester.java
47
49
  homepage: https://github.com/embulk/embulk-input-jdbc
48
50
  licenses:
49
51
  - Apache 2.0