embulk-input-mysql 0.7.2 → 0.7.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ce1cc0bda9fc6b7d3274330e039a3cc8eed4bab5
4
- data.tar.gz: bbd9954bd61e347fbd46fcad186f301e43bb75e7
3
+ metadata.gz: df7e77fb9710e59a758f75ecb6a799d1c70f138a
4
+ data.tar.gz: f0ca6f0272e329eefbf63730cd70060b1aa4f6b0
5
5
  SHA512:
6
- metadata.gz: 110b37febae29f93ec4eb6e6336428356c1a2108474cb3b62b96e542596cd69fd3b6d2a7bb14c520e98c9fada48b0a23ac954e671912bffc3e403282efa4dfb5
7
- data.tar.gz: 24112e432d231826a3adb77963314af742ab7632345cfe8ea1693d04a0b9b7183e099d806176193833f222335c251201d354f9a267a3ae2ce0493d34b1aafcf2
6
+ metadata.gz: a2d992492d49c4c0bcbc58424a130e85514fc9e79cd2ac9d9180cdc0b09ef8c341cd7b1d4d57cec4af41399eec54ff39477938a7a10bcf40c499396cc081a4c7
7
+ data.tar.gz: ab3fc7378700e980c9ccf4cfcb3a907abfba3ae17873fb0c572e6682e3e807ce088e96a604caa31ef76226dbbf5512499b83ccc457833d2ac4b2230096d880f2
data/README.md CHANGED
@@ -18,9 +18,9 @@ MySQL input plugins for Embulk loads records from MySQL.
18
18
  - **query**: SQL to run (string)
19
19
  - If **query** is not set,
20
20
  - **table**: destination table name (string, required)
21
- - **select**: comma-separated list of columns to select (string, default: "*")
21
+ - **select**: expression of select (e.g. `id, created_at`) (string, default: "*")
22
22
  - **where**: WHERE condition to filter the rows (string, default: no-condition)
23
- - **order_by**: name of the column that rows are sorted by (string, default: not sorted)
23
+ - **order_by**: expression of ORDER BY to sort rows (e.g. `created_at DESC, id ASC`) (string, default: not sorted)
24
24
  - **fetch_rows**: number of rows to fetch one time (integer, default: 10000)
25
25
  - If this value is set to > 1:
26
26
  - It uses a server-side prepared statement and fetches rows by chunks.
@@ -34,6 +34,9 @@ MySQL input plugins for Embulk loads records from MySQL.
34
34
  - **connect_timeout**: timeout for socket connect. 0 means no timeout. (integer (seconds), default: 300)
35
35
  - **socket_timeout**: timeout on network socket operations. 0 means no timeout. (integer (seconds), default: 1800)
36
36
  - **options**: extra JDBC properties (hash, default: {})
37
+ - **incremental**: if true, enables incremental loading. See next section for details (boolean, default: false)
38
+ - **incremental_columns**: column names for incremental loading (array of strings, default: use primary keys)
39
+ - **last_record**: values of the last record for incremental loading (array of objects, default: load all records)
37
40
  - **default_timezone**: If the sql type of a column is `date`/`time`/`datetime` and the embulk type is `string`, column values are formatted int this default_timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
38
41
  - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
39
42
  - **value_type**: embulk get values from database as this value_type. Typically, the value_type determines `getXXX` method of `java.sql.PreparedStatement`.
@@ -46,6 +49,43 @@ MySQL input plugins for Embulk loads records from MySQL.
46
49
  (string, value of default_timezone option is used by default)
47
50
  - **after_select**: if set, this SQL will be executed after the SELECT query in the same transaction.
48
51
 
52
+
53
+ ## Incremental loading
54
+
55
+ Incremental loading uses monotonically increasing unique columns (such as AUTO_INCREMENT column) to load records inserted (or updated) after last execution.
56
+
57
+ First, if `incremental: true` is set, this plugin loads all records with additional ORDER BY. For example, if `incremental_columns: [updated_at, id]` option is set, query will be as following:
58
+
59
+ ```
60
+ SELECT * FROM (
61
+ ...original query is here...
62
+ )
63
+ ORDER BY updated_at, id
64
+ ```
65
+
66
+ When bulk data loading finishes successfully, it outputs `last_record: ` paramater as config-diff so that next execution uses it.
67
+
68
+ At the next execution, when `last_record: ` is also set, this plugin generates additional WHERE conditions to load records larger than the last record. For example, if `last_record: ["2017-01-01 00:32:12", 5291]` is set,
69
+
70
+ ```
71
+ SELECT * FROM (
72
+ ...original query is here...
73
+ )
74
+ WHERE created_at > '2017-01-01 00:32:12' OR (created_at = '2017-01-01 00:32:12' AND id > 5291)
75
+ ORDER BY updated_at, id
76
+ ```
77
+
78
+ Then, it updates `last_record: ` so that next execution uses the updated last_record.
79
+
80
+ **IMPORTANT**: If you set `incremental_columns: ` option, make sure that there is an index on the columns to avoid full table scan. For this example, following index should be created:
81
+
82
+ ```
83
+ CREATE INDEX embulk_incremental_loading_index ON table (updated_at, id);
84
+ ```
85
+
86
+ Recommended usage is to leave `incremental_columns` unset and let this plugin automatically finds an AUTO_INCREMENT primary key. Currently, only strings and integers are supported as incremental_columns.
87
+
88
+
49
89
  ## Example
50
90
 
51
91
  ```yaml
@@ -58,6 +98,16 @@ in:
58
98
  table: my_table
59
99
  select: "col1, col2, col3"
60
100
  where: "col4 != 'a'"
101
+ order_by: "col1 DESC"
102
+ ```
103
+
104
+ This configuration will generate following SQL:
105
+
106
+ ```
107
+ SELECT col1, col2, col3
108
+ FROM `my_table`
109
+ WHERE col4 != 'a'
110
+ ORDER BY col1 DESC
61
111
  ```
62
112
 
63
113
  If you need a complex SQL,
@@ -1,10 +1,13 @@
1
1
  package org.embulk.input.mysql;
2
2
 
3
+ import java.util.List;
3
4
  import java.sql.Connection;
4
5
  import java.sql.PreparedStatement;
5
6
  import java.sql.SQLException;
6
7
  import java.sql.ResultSet;
7
8
  import org.embulk.input.jdbc.JdbcInputConnection;
9
+ import org.embulk.input.jdbc.JdbcLiteral;
10
+ import org.embulk.input.jdbc.getter.ColumnGetter;
8
11
 
9
12
  public class MySQLInputConnection
10
13
  extends JdbcInputConnection
@@ -16,10 +19,19 @@ public class MySQLInputConnection
16
19
  }
17
20
 
18
21
  @Override
19
- protected BatchSelect newBatchSelect(String select, int fetchRows, int queryTimeout) throws SQLException
22
+ protected BatchSelect newBatchSelect(PreparedQuery preparedQuery,
23
+ List<ColumnGetter> getters,
24
+ int fetchRows, int queryTimeout) throws SQLException
20
25
  {
21
- logger.info("SQL: " + select);
22
- PreparedStatement stmt = connection.prepareStatement(select, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); // TYPE_FORWARD_ONLY and CONCUR_READ_ONLY are default
26
+ String query = preparedQuery.getQuery();
27
+ List<JdbcLiteral> params = preparedQuery.getParameters();
28
+
29
+ logger.info("SQL: " + query);
30
+ PreparedStatement stmt = connection.prepareStatement(query, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); // TYPE_FORWARD_ONLY and CONCUR_READ_ONLY are default
31
+ if (!params.isEmpty()) {
32
+ logger.info("Parameters: {}", params);
33
+ prepareParameters(stmt, getters, params);
34
+ }
23
35
  if (fetchRows == 1) {
24
36
  // See MySQLInputPlugin.newConnection doesn't set useCursorFetch=true when fetchRows=1
25
37
  // MySQL Connector/J keeps the connection opened and process rows one by one with Integer.MIN_VALUE.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-mysql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.7.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-24 00:00:00.000000000 Z
11
+ date: 2016-08-26 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Selects records from a table.
14
14
  email:
@@ -19,6 +19,9 @@ extra_rdoc_files: []
19
19
  files:
20
20
  - README.md
21
21
  - build.gradle
22
+ - classpath/embulk-input-jdbc-0.7.3.jar
23
+ - classpath/embulk-input-mysql-0.7.3.jar
24
+ - classpath/mysql-connector-java-5.1.34.jar
22
25
  - lib/embulk/input/mysql.rb
23
26
  - src/main/java/org/embulk/input/MySQLInputPlugin.java
24
27
  - src/main/java/org/embulk/input/mysql/MySQLInputConnection.java
@@ -33,9 +36,6 @@ files:
33
36
  - src/test/resources/mysql/yml/input-valuetype-decimal.yml
34
37
  - src/test/resources/mysql/yml/input-valuetype-string.yml
35
38
  - src/test/resources/mysql/yml/input.yml
36
- - classpath/embulk-input-jdbc-0.7.2.jar
37
- - classpath/embulk-input-mysql-0.7.2.jar
38
- - classpath/mysql-connector-java-5.1.34.jar
39
39
  homepage: https://github.com/embulk/embulk-input-jdbc
40
40
  licenses:
41
41
  - Apache 2.0
@@ -46,17 +46,17 @@ require_paths:
46
46
  - lib
47
47
  required_ruby_version: !ruby/object:Gem::Requirement
48
48
  requirements:
49
- - - '>='
49
+ - - ">="
50
50
  - !ruby/object:Gem::Version
51
51
  version: '0'
52
52
  required_rubygems_version: !ruby/object:Gem::Requirement
53
53
  requirements:
54
- - - '>='
54
+ - - ">="
55
55
  - !ruby/object:Gem::Version
56
56
  version: '0'
57
57
  requirements: []
58
58
  rubyforge_project:
59
- rubygems_version: 2.1.9
59
+ rubygems_version: 2.4.8
60
60
  signing_key:
61
61
  specification_version: 4
62
62
  summary: JDBC input plugin for Embulk