embulk-input-redshift 0.7.2 → 0.7.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 08e6e18498c2526a2931f5535a555f747d4bbf0e
4
- data.tar.gz: 142340e8d88ac8780107dbdf6ec16020cd4d91aa
3
+ metadata.gz: 3dcd3503c3056643c3e889768ae5a54adfd37db5
4
+ data.tar.gz: 8cfd88e81e904dee60a8dcd2f9c95dec4a299e84
5
5
  SHA512:
6
- metadata.gz: dcc6d6e8db32b1d28779e48cf0aca9b07d8c0de1477a1cf52f1db913acd30cd7b9392457e6d180b3a2736fe49d764cf8c37d6856c153947b443458a1a0422059
7
- data.tar.gz: c0d64afefba4d4883cb62d81f79a3546e525f7e0eac7cac6bfe20d522c59c817c7b18e47bdb5fee8eb9030b4522cf81ffa2f00b0f13ed2d9281beae0d319595c
6
+ metadata.gz: 7466fc607e1d42fc6bd7fc4221ebeef47796681683ff4c573db65443bc6d2b9a8ff0d6eea4fd3e100e973faa88acb0d763c1a57da617c74303d421eb1bdb1ac4
7
+ data.tar.gz: 7394c561b93a2eda161b8eddba85c20f212f836a03b2acc7d148ed148f77c0c6dd961b2cd1f690807e4ed493eabff17edf2537b39d80edf449fa94c33736b0fa
data/README.md CHANGED
@@ -19,14 +19,18 @@ Redshift input plugins for Embulk loads records from Redshift.
19
19
  - **fetch_rows**: number of rows to fetch one time (used for java.sql.Statement#setFetchSize) (integer, default: 10000)
20
20
  - **connect_timeout**: timeout for establishment of a database connection. (integer (seconds), default: 300)
21
21
  - **socket_timeout**: timeout for socket read operations. 0 means no timeout. (integer (seconds), default: 1800)
22
+ - **ssl**: enables SSL. data will be encrypted but CA or certification will not be verified (boolean, default: false)
22
23
  - **options**: extra JDBC properties (hash, default: {})
23
24
  - If you write SQL directly,
24
25
  - **query**: SQL to run (string)
25
26
  - If **query** is not set,
26
27
  - **table**: destination table name (string, required)
27
- - **select**: comma-separated list of columns to select (string, default: "*")
28
+ - **select**: expression of select (e.g. `id, created_at`) (string, default: "*")
28
29
  - **where**: WHERE condition to filter the rows (string, default: no-condition)
29
- - **order_by**: name of the column that rows are sorted by (string, default: not sorted)
30
+ - **order_by**: expression of ORDER BY to sort rows (e.g. `created_at DESC, id ASC`) (string, default: not sorted)
31
+ - **incremental**: if true, enables incremental loading. See next section for details (boolean, default: false)
32
+ - **incremental_columns**: column names for incremental loading (array of strings, default: use primary keys)
33
+ - **last_record**: values of the last record for incremental loading (array of objects, default: load all records)
30
34
  - **default_timezone**: If the sql type of a column is `date`/`time`/`datetime` and the embulk type is `string`, column values are formatted int this default_timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
31
35
  - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
32
36
  - **value_type**: embulk get values from database as this value_type. Typically, the value_type determines `getXXX` method of `java.sql.PreparedStatement`.
@@ -39,6 +43,43 @@ Redshift input plugins for Embulk loads records from Redshift.
39
43
  (string, value of default_timezone option is used by default)
40
44
  - **after_select**: if set, this SQL will be executed after the SELECT query in the same transaction.
41
45
 
46
+
47
+ ### Incremental loading
48
+
49
+ Incremental loading uses monotonically increasing unique columns (such as auto-increment (IDENTITY) column) to load records inserted (or updated) after last execution.
50
+
51
+ First, if `incremental: true` is set, this plugin loads all records with additional ORDER BY. For example, if `incremental_columns: [updated_at, id]` option is set, query will be as following:
52
+
53
+ ```
54
+ SELECT * FROM (
55
+ ...original query is here...
56
+ )
57
+ ORDER BY updated_at, id
58
+ ```
59
+
60
+ When bulk data loading finishes successfully, it outputs `last_record: ` paramater as config-diff so that next execution uses it.
61
+
62
+ At the next execution, when `last_record: ` is also set, this plugin generates additional WHERE conditions to load records larger than the last record. For example, if `last_record: ["2017-01-01 00:32:12", 5291]` is set,
63
+
64
+ ```
65
+ SELECT * FROM (
66
+ ...original query is here...
67
+ )
68
+ WHERE created_at > '2017-01-01 00:32:12' OR (created_at = '2017-01-01 00:32:12' AND id > 5291)
69
+ ORDER BY updated_at, id
70
+ ```
71
+
72
+ Then, it updates `last_record: ` so that next execution uses the updated last_record.
73
+
74
+ **IMPORTANT**: If you set `incremental_columns: ` option, make sure that there is an index on the columns to avoid full table scan. For this example, following index should be created:
75
+
76
+ ```
77
+ CREATE INDEX embulk_incremental_loading_index ON table (updated_at, id);
78
+ ```
79
+
80
+ Recommended usage is to leave `incremental_columns` unset and let this plugin automatically finds an auto-increment (IDENTITY) primary key. Currently, only strings and integers are supported as incremental_columns.
81
+
82
+
42
83
  ## Example
43
84
 
44
85
  ```yaml
@@ -51,6 +92,16 @@ in:
51
92
  table: my_table
52
93
  select: "col1, col2, col3"
53
94
  where: "col4 != 'a'"
95
+ order_by: "col1 DESC"
96
+ ```
97
+
98
+ This configuration will generate following SQL:
99
+
100
+ ```
101
+ SELECT col1, col2, col3
102
+ FROM "my_table"
103
+ WHERE col4 != 'a'
104
+ ORDER BY col1 DESC
54
105
  ```
55
106
 
56
107
  If you need a complex SQL,
@@ -39,6 +39,10 @@ public class RedshiftInputPlugin
39
39
  @Config("schema")
40
40
  @ConfigDefault("\"public\"")
41
41
  public String getSchema();
42
+
43
+ @Config("ssl")
44
+ @ConfigDefault("false")
45
+ public boolean getSsl();
42
46
  }
43
47
 
44
48
  @Override
@@ -65,16 +69,13 @@ public class RedshiftInputPlugin
65
69
  // Socket options TCP_KEEPCNT, TCP_KEEPIDLE, and TCP_KEEPINTVL are not configurable.
66
70
  props.setProperty("tcpKeepAlive", "true");
67
71
 
68
- // TODO
69
- //switch t.getSssl() {
70
- //when "disable":
71
- // break;
72
- //when "enable":
73
- // props.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); // disable server-side validation
74
- //when "verify":
75
- // props.setProperty("ssl", "true");
76
- // break;
77
- //}
72
+ if (t.getSsl()) {
73
+ // TODO add ssl_verify (boolean) option to allow users to verify certification.
74
+ // see embulk-input-ftp for SSL implementation.
75
+ props.setProperty("ssl", "true");
76
+ props.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); // disable server-side validation
77
+ }
78
+ // setting ssl=false enables SSL. See org.postgresql.core.v3.openConnectionImpl.
78
79
 
79
80
  props.putAll(t.getOptions());
80
81
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-redshift
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.7.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-24 00:00:00.000000000 Z
11
+ date: 2016-08-26 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Selects records from a table.
14
14
  email:
@@ -19,12 +19,12 @@ extra_rdoc_files: []
19
19
  files:
20
20
  - README.md
21
21
  - build.gradle
22
+ - classpath/embulk-input-jdbc-0.7.3.jar
23
+ - classpath/embulk-input-postgresql-0.7.3.jar
24
+ - classpath/embulk-input-redshift-0.7.3.jar
25
+ - classpath/postgresql-9.4-1205-jdbc41.jar
22
26
  - lib/embulk/input/redshift.rb
23
27
  - src/main/java/org/embulk/input/RedshiftInputPlugin.java
24
- - classpath/embulk-input-jdbc-0.7.2.jar
25
- - classpath/embulk-input-postgresql-0.7.2.jar
26
- - classpath/embulk-input-redshift-0.7.2.jar
27
- - classpath/postgresql-9.4-1205-jdbc41.jar
28
28
  homepage: https://github.com/embulk/embulk-input-jdbc
29
29
  licenses:
30
30
  - Apache 2.0
@@ -35,17 +35,17 @@ require_paths:
35
35
  - lib
36
36
  required_ruby_version: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  required_rubygems_version: !ruby/object:Gem::Requirement
42
42
  requirements:
43
- - - '>='
43
+ - - ">="
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
46
  requirements: []
47
47
  rubyforge_project:
48
- rubygems_version: 2.1.9
48
+ rubygems_version: 2.4.8
49
49
  signing_key:
50
50
  specification_version: 4
51
51
  summary: JDBC input plugin for Embulk
Binary file