embulk-output-redshift 0.4.1 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +104 -104
- data/build.gradle +9 -9
- data/classpath/{aws-java-sdk-core-1.9.17.jar → aws-java-sdk-core-1.10.33.jar} +0 -0
- data/classpath/aws-java-sdk-kms-1.10.33.jar +0 -0
- data/classpath/aws-java-sdk-s3-1.10.33.jar +0 -0
- data/classpath/aws-java-sdk-sts-1.10.33.jar +0 -0
- data/classpath/embulk-output-jdbc-0.4.2.jar +0 -0
- data/classpath/embulk-output-postgresql-0.4.2.jar +0 -0
- data/classpath/{embulk-output-redshift-0.4.1.jar → embulk-output-redshift-0.4.2.jar} +0 -0
- data/classpath/{httpclient-4.3.4.jar → httpclient-4.3.6.jar} +0 -0
- data/classpath/{httpcore-4.3.2.jar → httpcore-4.3.3.jar} +0 -0
- data/classpath/postgresql-9.4-1205-jdbc41.jar +0 -0
- data/lib/embulk/output/redshift.rb +3 -3
- data/src/main/java/org/embulk/output/RedshiftOutputPlugin.java +151 -151
- data/src/main/java/org/embulk/output/redshift/RedshiftCopyBatchInsert.java +218 -218
- data/src/main/java/org/embulk/output/redshift/RedshiftOutputConnection.java +122 -122
- data/src/main/java/org/embulk/output/redshift/RedshiftOutputConnector.java +40 -40
- metadata +12 -17
- data/classpath/aws-java-sdk-kms-1.9.17.jar +0 -0
- data/classpath/aws-java-sdk-s3-1.9.17.jar +0 -0
- data/classpath/aws-java-sdk-sts-1.9.17.jar +0 -0
- data/classpath/embulk-output-jdbc-0.4.1.jar +0 -0
- data/classpath/embulk-output-postgresql-0.4.1.jar +0 -0
- data/classpath/jna-4.1.0.jar +0 -0
- data/classpath/jna-platform-4.1.0.jar +0 -0
- data/classpath/joda-time-2.8.1.jar +0 -0
- data/classpath/postgresql-9.4-1200-jdbc41.jar +0 -0
- data/classpath/slf4j-simple-1.7.7.jar +0 -0
- data/classpath/waffle-jna-1.7.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 43952fe32c9c9396b32eaf636cf1dbac99e3f522
|
4
|
+
data.tar.gz: e161ee0aa8294d5ad6227bc26c88c1513ff23b2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 49f5eebc0dc559434672bd3bf7d38eb05172ee306160a64d558d98333f7e2fb347e38f95e223ae47bb828a3ecbca7dd0d29a986f41279589c8da913401380cbb
|
7
|
+
data.tar.gz: 39ad87b3d9beb2de22332251128a36cf0e139113eceda458686541307ad6a540287e332b01ebaa7be7311eceb09bdb638c2d16059443748bd10e0b61b9283787
|
data/README.md
CHANGED
@@ -1,104 +1,104 @@
|
|
1
|
-
# Redshift output plugins for Embulk
|
2
|
-
|
3
|
-
Redshift output plugins for Embulk loads records to Redshift.
|
4
|
-
|
5
|
-
## Overview
|
6
|
-
|
7
|
-
* **Plugin type**: output
|
8
|
-
* **Load all or nothing**: depnds on the mode. see bellow.
|
9
|
-
* **Resume supported**: depnds on the mode. see bellow.
|
10
|
-
|
11
|
-
## Configuration
|
12
|
-
|
13
|
-
- **host**: database host name (string, required)
|
14
|
-
- **port**: database port number (integer, default: 5439)
|
15
|
-
- **user**: database login user name (string, required)
|
16
|
-
- **password**: database login password (string, default: "")
|
17
|
-
- **database**: destination database name (string, required)
|
18
|
-
- **schema**: destination schema name (string, default: "public")
|
19
|
-
- **table**: destination table name (string, required)
|
20
|
-
- **access_key_id**: access key id for AWS
|
21
|
-
- **secret_access_key: secret access key for AWS
|
22
|
-
- **iam_user_name: IAM user name for uploading temporary files to S3. The user should have permissions of `s3:GetObject`, `s3:PutObject`, `s3:ListBucket` and `sts:GetFederationToken`.
|
23
|
-
- **s3_bucket: S3 bucket name for temporary files
|
24
|
-
- **s3_key_prefix: S3 key prefix for temporary files (string, default:"")
|
25
|
-
- **options**: extra connection properties (hash, default: {})
|
26
|
-
- **mode**: "replace" or "insert" (string, required)
|
27
|
-
- **batch_size**: size of a single batch insert (integer, default: 16777216)
|
28
|
-
- **default_timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp into a SQL string. This default_timezone option is used to control the timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
|
29
|
-
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
30
|
-
- **type**: type of a column when this plugin creates new tables (e.g. `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`). This used when this plugin creates intermediate tables (insert, truncate_insert and merge modes), when it creates the target table (insert_direct and replace modes), and when it creates nonexistent target table automatically. (string, default: depends on input column type. `BIGINT` if input column type is long, `BOOLEAN` if boolean, `DOUBLE PRECISION` if double, `CLOB` if string, `TIMESTAMP` if timestamp)
|
31
|
-
- **value_type**: This plugin converts input column type (embulk type) into a database type to build a INSERT statement. This value_type option controls the type of the value in a INSERT statement. (string, default: depends on input column type. Available values options are: `byte`, `short`, `int`, `long`, `double`, `float`, `boolean`, `string`, `nstring`, `date`, `time`, `timestamp`, `decimal`, `null`, `pass`)
|
32
|
-
- **timestamp_format**: If input column type (embulk type) is timestamp and value_type is `string` or `nstring`, this plugin needs to format the timestamp value into a string. This timestamp_format option is used to control the format of the timestamp. (string, default: `%Y-%m-%d %H:%M:%S.%6N`)
|
33
|
-
- **timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp value into a SQL string. In this cases, this timezone option is used to control the timezone. (string, value of default_timezone option is used by default)
|
34
|
-
|
35
|
-
### Modes
|
36
|
-
|
37
|
-
* **insert**:
|
38
|
-
* Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table_1> UNION ALL SELECT * FROM <intermediate_table_2> UNION ALL ...` query.
|
39
|
-
* Transactional: Yes. This mode successfully writes all rows, or fails with writing zero rows.
|
40
|
-
* Resumable: Yes.
|
41
|
-
* **insert_direct**:
|
42
|
-
* Behavior: This mode inserts rows to the target table directly.
|
43
|
-
* Transactional: No. If fails, the target table could have some rows inserted.
|
44
|
-
* Resumable: No.
|
45
|
-
* **truncate_insert**:
|
46
|
-
* Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
|
47
|
-
* Transactional: Yes.
|
48
|
-
* Resumable: Yes.
|
49
|
-
* **merge**:
|
50
|
-
* Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `with updated AS (UPDATE .... RETURNING ...) INSERT INTO ....` query.
|
51
|
-
* Transactional: Yes.
|
52
|
-
* Resumable: Yes.
|
53
|
-
* **replace**:
|
54
|
-
* Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
|
55
|
-
* Transactional: Yes.
|
56
|
-
* Resumable: No.
|
57
|
-
|
58
|
-
### Example
|
59
|
-
|
60
|
-
```yaml
|
61
|
-
out:
|
62
|
-
type: redshift
|
63
|
-
host: myinstance.us-west-2.redshift.amazonaws.com
|
64
|
-
user: pg
|
65
|
-
password: ""
|
66
|
-
database: my_database
|
67
|
-
table: my_table
|
68
|
-
access_key_id: ABCXYZ123ABCXYZ123
|
69
|
-
secret_access_key: AbCxYz123aBcXyZ123
|
70
|
-
iam_user_name: my-s3-read-only
|
71
|
-
s3_bucket: my-redshift-transfer-bucket
|
72
|
-
s3_key_prefix: temp/redshift
|
73
|
-
mode: insert
|
74
|
-
```
|
75
|
-
|
76
|
-
Advanced configuration:
|
77
|
-
|
78
|
-
```yaml
|
79
|
-
out:
|
80
|
-
type: redshift
|
81
|
-
host: myinstance.us-west-2.redshift.amazonaws.com
|
82
|
-
user: pg
|
83
|
-
password: ""
|
84
|
-
database: my_database
|
85
|
-
table: my_table
|
86
|
-
access_key_id: ABCXYZ123ABCXYZ123
|
87
|
-
secret_access_key: AbCxYz123aBcXyZ123
|
88
|
-
iam_user_name: my-s3-read-only
|
89
|
-
s3_bucket: my-redshift-transfer-bucket
|
90
|
-
s3_key_prefix: temp/redshift
|
91
|
-
options: {loglevel: 2}
|
92
|
-
mode: insert_direct
|
93
|
-
column_options:
|
94
|
-
my_col_1: {type: 'VARCHAR(255)'}
|
95
|
-
my_col_3: {type: 'INT NOT NULL'}
|
96
|
-
my_col_4: {value_type: string, timestamp_format: `%Y-%m-%d %H:%M:%S %z`, timezone: '-0700'}
|
97
|
-
my_col_5: {type: 'DECIMAL(18,9)', value_type: pass}
|
98
|
-
```
|
99
|
-
|
100
|
-
### Build
|
101
|
-
|
102
|
-
```
|
103
|
-
$ ./gradlew gem
|
104
|
-
```
|
1
|
+
# Redshift output plugins for Embulk
|
2
|
+
|
3
|
+
Redshift output plugins for Embulk loads records to Redshift.
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
|
7
|
+
* **Plugin type**: output
|
8
|
+
* **Load all or nothing**: depnds on the mode. see bellow.
|
9
|
+
* **Resume supported**: depnds on the mode. see bellow.
|
10
|
+
|
11
|
+
## Configuration
|
12
|
+
|
13
|
+
- **host**: database host name (string, required)
|
14
|
+
- **port**: database port number (integer, default: 5439)
|
15
|
+
- **user**: database login user name (string, required)
|
16
|
+
- **password**: database login password (string, default: "")
|
17
|
+
- **database**: destination database name (string, required)
|
18
|
+
- **schema**: destination schema name (string, default: "public")
|
19
|
+
- **table**: destination table name (string, required)
|
20
|
+
- **access_key_id**: access key id for AWS
|
21
|
+
- **secret_access_key: secret access key for AWS
|
22
|
+
- **iam_user_name: IAM user name for uploading temporary files to S3. The user should have permissions of `s3:GetObject`, `s3:PutObject`, `s3:ListBucket` and `sts:GetFederationToken`.
|
23
|
+
- **s3_bucket: S3 bucket name for temporary files
|
24
|
+
- **s3_key_prefix: S3 key prefix for temporary files (string, default:"")
|
25
|
+
- **options**: extra connection properties (hash, default: {})
|
26
|
+
- **mode**: "replace" or "insert" (string, required)
|
27
|
+
- **batch_size**: size of a single batch insert (integer, default: 16777216)
|
28
|
+
- **default_timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp into a SQL string. This default_timezone option is used to control the timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
|
29
|
+
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
30
|
+
- **type**: type of a column when this plugin creates new tables (e.g. `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`). This used when this plugin creates intermediate tables (insert, truncate_insert and merge modes), when it creates the target table (insert_direct and replace modes), and when it creates nonexistent target table automatically. (string, default: depends on input column type. `BIGINT` if input column type is long, `BOOLEAN` if boolean, `DOUBLE PRECISION` if double, `CLOB` if string, `TIMESTAMP` if timestamp)
|
31
|
+
- **value_type**: This plugin converts input column type (embulk type) into a database type to build a INSERT statement. This value_type option controls the type of the value in a INSERT statement. (string, default: depends on input column type. Available values options are: `byte`, `short`, `int`, `long`, `double`, `float`, `boolean`, `string`, `nstring`, `date`, `time`, `timestamp`, `decimal`, `null`, `pass`)
|
32
|
+
- **timestamp_format**: If input column type (embulk type) is timestamp and value_type is `string` or `nstring`, this plugin needs to format the timestamp value into a string. This timestamp_format option is used to control the format of the timestamp. (string, default: `%Y-%m-%d %H:%M:%S.%6N`)
|
33
|
+
- **timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp value into a SQL string. In this cases, this timezone option is used to control the timezone. (string, value of default_timezone option is used by default)
|
34
|
+
|
35
|
+
### Modes
|
36
|
+
|
37
|
+
* **insert**:
|
38
|
+
* Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table_1> UNION ALL SELECT * FROM <intermediate_table_2> UNION ALL ...` query.
|
39
|
+
* Transactional: Yes. This mode successfully writes all rows, or fails with writing zero rows.
|
40
|
+
* Resumable: Yes.
|
41
|
+
* **insert_direct**:
|
42
|
+
* Behavior: This mode inserts rows to the target table directly.
|
43
|
+
* Transactional: No. If fails, the target table could have some rows inserted.
|
44
|
+
* Resumable: No.
|
45
|
+
* **truncate_insert**:
|
46
|
+
* Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
|
47
|
+
* Transactional: Yes.
|
48
|
+
* Resumable: Yes.
|
49
|
+
* **merge**:
|
50
|
+
* Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `with updated AS (UPDATE .... RETURNING ...) INSERT INTO ....` query.
|
51
|
+
* Transactional: Yes.
|
52
|
+
* Resumable: Yes.
|
53
|
+
* **replace**:
|
54
|
+
* Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
|
55
|
+
* Transactional: Yes.
|
56
|
+
* Resumable: No.
|
57
|
+
|
58
|
+
### Example
|
59
|
+
|
60
|
+
```yaml
|
61
|
+
out:
|
62
|
+
type: redshift
|
63
|
+
host: myinstance.us-west-2.redshift.amazonaws.com
|
64
|
+
user: pg
|
65
|
+
password: ""
|
66
|
+
database: my_database
|
67
|
+
table: my_table
|
68
|
+
access_key_id: ABCXYZ123ABCXYZ123
|
69
|
+
secret_access_key: AbCxYz123aBcXyZ123
|
70
|
+
iam_user_name: my-s3-read-only
|
71
|
+
s3_bucket: my-redshift-transfer-bucket
|
72
|
+
s3_key_prefix: temp/redshift
|
73
|
+
mode: insert
|
74
|
+
```
|
75
|
+
|
76
|
+
Advanced configuration:
|
77
|
+
|
78
|
+
```yaml
|
79
|
+
out:
|
80
|
+
type: redshift
|
81
|
+
host: myinstance.us-west-2.redshift.amazonaws.com
|
82
|
+
user: pg
|
83
|
+
password: ""
|
84
|
+
database: my_database
|
85
|
+
table: my_table
|
86
|
+
access_key_id: ABCXYZ123ABCXYZ123
|
87
|
+
secret_access_key: AbCxYz123aBcXyZ123
|
88
|
+
iam_user_name: my-s3-read-only
|
89
|
+
s3_bucket: my-redshift-transfer-bucket
|
90
|
+
s3_key_prefix: temp/redshift
|
91
|
+
options: {loglevel: 2}
|
92
|
+
mode: insert_direct
|
93
|
+
column_options:
|
94
|
+
my_col_1: {type: 'VARCHAR(255)'}
|
95
|
+
my_col_3: {type: 'INT NOT NULL'}
|
96
|
+
my_col_4: {value_type: string, timestamp_format: `%Y-%m-%d %H:%M:%S %z`, timezone: '-0700'}
|
97
|
+
my_col_5: {type: 'DECIMAL(18,9)', value_type: pass}
|
98
|
+
```
|
99
|
+
|
100
|
+
### Build
|
101
|
+
|
102
|
+
```
|
103
|
+
$ ./gradlew gem
|
104
|
+
```
|
data/build.gradle
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
dependencies {
|
2
|
-
compile project(':embulk-output-jdbc')
|
3
|
-
compile project(':embulk-output-postgresql')
|
4
|
-
|
5
|
-
compile "com.amazonaws:aws-java-sdk-s3:1.
|
6
|
-
compile "com.amazonaws:aws-java-sdk-sts:1.
|
7
|
-
|
8
|
-
testCompile project(':embulk-output-jdbc').sourceSets.test.output
|
9
|
-
}
|
1
|
+
dependencies {
|
2
|
+
compile project(':embulk-output-jdbc')
|
3
|
+
compile project(':embulk-output-postgresql')
|
4
|
+
|
5
|
+
compile "com.amazonaws:aws-java-sdk-s3:1.10.33"
|
6
|
+
compile "com.amazonaws:aws-java-sdk-sts:1.10.33"
|
7
|
+
|
8
|
+
testCompile project(':embulk-output-jdbc').sourceSets.test.output
|
9
|
+
}
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -1,3 +1,3 @@
|
|
1
|
-
Embulk::JavaPlugin.register_output(
|
2
|
-
:redshift, "org.embulk.output.RedshiftOutputPlugin",
|
3
|
-
File.expand_path('../../../../classpath', __FILE__))
|
1
|
+
Embulk::JavaPlugin.register_output(
|
2
|
+
:redshift, "org.embulk.output.RedshiftOutputPlugin",
|
3
|
+
File.expand_path('../../../../classpath', __FILE__))
|
@@ -1,151 +1,151 @@
|
|
1
|
-
package org.embulk.output;
|
2
|
-
|
3
|
-
import java.util.List;
|
4
|
-
import java.util.Properties;
|
5
|
-
import java.io.IOException;
|
6
|
-
import java.sql.SQLException;
|
7
|
-
import org.slf4j.Logger;
|
8
|
-
import com.google.common.base.Optional;
|
9
|
-
import com.google.common.collect.ImmutableSet;
|
10
|
-
import com.amazonaws.auth.AWSCredentials;
|
11
|
-
import com.amazonaws.auth.AWSCredentialsProvider;
|
12
|
-
import com.amazonaws.auth.BasicAWSCredentials;
|
13
|
-
import org.embulk.spi.Exec;
|
14
|
-
import org.embulk.config.Config;
|
15
|
-
import org.embulk.config.ConfigDefault;
|
16
|
-
import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
|
17
|
-
import org.embulk.output.jdbc.BatchInsert;
|
18
|
-
import org.embulk.output.redshift.RedshiftOutputConnector;
|
19
|
-
import org.embulk.output.redshift.RedshiftCopyBatchInsert;
|
20
|
-
|
21
|
-
public class RedshiftOutputPlugin
|
22
|
-
extends AbstractJdbcOutputPlugin
|
23
|
-
{
|
24
|
-
private final Logger logger = Exec.getLogger(RedshiftOutputPlugin.class);
|
25
|
-
|
26
|
-
public interface RedshiftPluginTask extends PluginTask
|
27
|
-
{
|
28
|
-
@Config("host")
|
29
|
-
public String getHost();
|
30
|
-
|
31
|
-
@Config("port")
|
32
|
-
@ConfigDefault("5439")
|
33
|
-
public int getPort();
|
34
|
-
|
35
|
-
@Config("user")
|
36
|
-
public String getUser();
|
37
|
-
|
38
|
-
@Config("password")
|
39
|
-
@ConfigDefault("\"\"")
|
40
|
-
public String getPassword();
|
41
|
-
|
42
|
-
@Config("database")
|
43
|
-
public String getDatabase();
|
44
|
-
|
45
|
-
@Config("schema")
|
46
|
-
@ConfigDefault("\"public\"")
|
47
|
-
public String getSchema();
|
48
|
-
|
49
|
-
@Config("access_key_id")
|
50
|
-
public String getAccessKeyId();
|
51
|
-
|
52
|
-
@Config("secret_access_key")
|
53
|
-
public String getSecretAccessKey();
|
54
|
-
|
55
|
-
@Config("iam_user_name")
|
56
|
-
public String getIamUserName();
|
57
|
-
|
58
|
-
@Config("s3_bucket")
|
59
|
-
public String getS3Bucket();
|
60
|
-
|
61
|
-
@Config("s3_key_prefix")
|
62
|
-
@ConfigDefault("\"\"")
|
63
|
-
public String getS3KeyPrefix();
|
64
|
-
}
|
65
|
-
|
66
|
-
@Override
|
67
|
-
protected Class<? extends PluginTask> getTaskClass()
|
68
|
-
{
|
69
|
-
return RedshiftPluginTask.class;
|
70
|
-
}
|
71
|
-
|
72
|
-
@Override
|
73
|
-
protected Features getFeatures(PluginTask task)
|
74
|
-
{
|
75
|
-
return new Features()
|
76
|
-
.setMaxTableNameLength(30)
|
77
|
-
.setSupportedModes(ImmutableSet.of(Mode.INSERT, Mode.INSERT_DIRECT, Mode.MERGE, Mode.TRUNCATE_INSERT, Mode.REPLACE))
|
78
|
-
.setIgnoreMergeKeys(false);
|
79
|
-
}
|
80
|
-
|
81
|
-
@Override
|
82
|
-
protected RedshiftOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
|
83
|
-
{
|
84
|
-
RedshiftPluginTask t = (RedshiftPluginTask) task;
|
85
|
-
|
86
|
-
String url = String.format("jdbc:postgresql://%s:%d/%s",
|
87
|
-
t.getHost(), t.getPort(), t.getDatabase());
|
88
|
-
|
89
|
-
Properties props = new Properties();
|
90
|
-
props.setProperty("loginTimeout", "300"); // seconds
|
91
|
-
props.setProperty("socketTimeout", "1800"); // seconds
|
92
|
-
|
93
|
-
// Enable keepalive based on tcp_keepalive_time, tcp_keepalive_intvl and tcp_keepalive_probes kernel parameters.
|
94
|
-
// Socket options TCP_KEEPCNT, TCP_KEEPIDLE, and TCP_KEEPINTVL are not configurable.
|
95
|
-
props.setProperty("tcpKeepAlive", "true");
|
96
|
-
|
97
|
-
// TODO
|
98
|
-
//switch task.getSssl() {
|
99
|
-
//when "disable":
|
100
|
-
// break;
|
101
|
-
//when "enable":
|
102
|
-
// props.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); // disable server-side validation
|
103
|
-
//when "verify":
|
104
|
-
// props.setProperty("ssl", "true");
|
105
|
-
// break;
|
106
|
-
//}
|
107
|
-
|
108
|
-
if (!retryableMetadataOperation) {
|
109
|
-
// non-retryable batch operation uses longer timeout
|
110
|
-
props.setProperty("loginTimeout", "300"); // seconds
|
111
|
-
props.setProperty("socketTimeout", "28800"); // seconds
|
112
|
-
}
|
113
|
-
|
114
|
-
props.putAll(t.getOptions());
|
115
|
-
|
116
|
-
props.setProperty("user", t.getUser());
|
117
|
-
logger.info("Connecting to {} options {}", url, props);
|
118
|
-
props.setProperty("password", t.getPassword());
|
119
|
-
|
120
|
-
return new RedshiftOutputConnector(url, props, t.getSchema());
|
121
|
-
}
|
122
|
-
|
123
|
-
private static AWSCredentialsProvider getAWSCredentialsProvider(RedshiftPluginTask task)
|
124
|
-
{
|
125
|
-
final AWSCredentials creds = new BasicAWSCredentials(
|
126
|
-
task.getAccessKeyId(), task.getSecretAccessKey());
|
127
|
-
return new AWSCredentialsProvider() {
|
128
|
-
@Override
|
129
|
-
public AWSCredentials getCredentials()
|
130
|
-
{
|
131
|
-
return creds;
|
132
|
-
}
|
133
|
-
|
134
|
-
@Override
|
135
|
-
public void refresh()
|
136
|
-
{
|
137
|
-
}
|
138
|
-
};
|
139
|
-
}
|
140
|
-
|
141
|
-
@Override
|
142
|
-
protected BatchInsert newBatchInsert(PluginTask task, Optional<List<String>> mergeKeys) throws IOException, SQLException
|
143
|
-
{
|
144
|
-
if (mergeKeys.isPresent()) {
|
145
|
-
throw new UnsupportedOperationException("Redshift output plugin doesn't support 'merge_direct' mode. Use 'merge' mode instead.");
|
146
|
-
}
|
147
|
-
RedshiftPluginTask t = (RedshiftPluginTask) task;
|
148
|
-
return new RedshiftCopyBatchInsert(getConnector(task, true),
|
149
|
-
getAWSCredentialsProvider(t), t.getS3Bucket(), t.getS3KeyPrefix(), t.getIamUserName());
|
150
|
-
}
|
151
|
-
}
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.Properties;
|
5
|
+
import java.io.IOException;
|
6
|
+
import java.sql.SQLException;
|
7
|
+
import org.slf4j.Logger;
|
8
|
+
import com.google.common.base.Optional;
|
9
|
+
import com.google.common.collect.ImmutableSet;
|
10
|
+
import com.amazonaws.auth.AWSCredentials;
|
11
|
+
import com.amazonaws.auth.AWSCredentialsProvider;
|
12
|
+
import com.amazonaws.auth.BasicAWSCredentials;
|
13
|
+
import org.embulk.spi.Exec;
|
14
|
+
import org.embulk.config.Config;
|
15
|
+
import org.embulk.config.ConfigDefault;
|
16
|
+
import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
|
17
|
+
import org.embulk.output.jdbc.BatchInsert;
|
18
|
+
import org.embulk.output.redshift.RedshiftOutputConnector;
|
19
|
+
import org.embulk.output.redshift.RedshiftCopyBatchInsert;
|
20
|
+
|
21
|
+
public class RedshiftOutputPlugin
|
22
|
+
extends AbstractJdbcOutputPlugin
|
23
|
+
{
|
24
|
+
private final Logger logger = Exec.getLogger(RedshiftOutputPlugin.class);
|
25
|
+
|
26
|
+
public interface RedshiftPluginTask extends PluginTask
|
27
|
+
{
|
28
|
+
@Config("host")
|
29
|
+
public String getHost();
|
30
|
+
|
31
|
+
@Config("port")
|
32
|
+
@ConfigDefault("5439")
|
33
|
+
public int getPort();
|
34
|
+
|
35
|
+
@Config("user")
|
36
|
+
public String getUser();
|
37
|
+
|
38
|
+
@Config("password")
|
39
|
+
@ConfigDefault("\"\"")
|
40
|
+
public String getPassword();
|
41
|
+
|
42
|
+
@Config("database")
|
43
|
+
public String getDatabase();
|
44
|
+
|
45
|
+
@Config("schema")
|
46
|
+
@ConfigDefault("\"public\"")
|
47
|
+
public String getSchema();
|
48
|
+
|
49
|
+
@Config("access_key_id")
|
50
|
+
public String getAccessKeyId();
|
51
|
+
|
52
|
+
@Config("secret_access_key")
|
53
|
+
public String getSecretAccessKey();
|
54
|
+
|
55
|
+
@Config("iam_user_name")
|
56
|
+
public String getIamUserName();
|
57
|
+
|
58
|
+
@Config("s3_bucket")
|
59
|
+
public String getS3Bucket();
|
60
|
+
|
61
|
+
@Config("s3_key_prefix")
|
62
|
+
@ConfigDefault("\"\"")
|
63
|
+
public String getS3KeyPrefix();
|
64
|
+
}
|
65
|
+
|
66
|
+
@Override
|
67
|
+
protected Class<? extends PluginTask> getTaskClass()
|
68
|
+
{
|
69
|
+
return RedshiftPluginTask.class;
|
70
|
+
}
|
71
|
+
|
72
|
+
@Override
|
73
|
+
protected Features getFeatures(PluginTask task)
|
74
|
+
{
|
75
|
+
return new Features()
|
76
|
+
.setMaxTableNameLength(30)
|
77
|
+
.setSupportedModes(ImmutableSet.of(Mode.INSERT, Mode.INSERT_DIRECT, Mode.MERGE, Mode.TRUNCATE_INSERT, Mode.REPLACE))
|
78
|
+
.setIgnoreMergeKeys(false);
|
79
|
+
}
|
80
|
+
|
81
|
+
@Override
|
82
|
+
protected RedshiftOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
|
83
|
+
{
|
84
|
+
RedshiftPluginTask t = (RedshiftPluginTask) task;
|
85
|
+
|
86
|
+
String url = String.format("jdbc:postgresql://%s:%d/%s",
|
87
|
+
t.getHost(), t.getPort(), t.getDatabase());
|
88
|
+
|
89
|
+
Properties props = new Properties();
|
90
|
+
props.setProperty("loginTimeout", "300"); // seconds
|
91
|
+
props.setProperty("socketTimeout", "1800"); // seconds
|
92
|
+
|
93
|
+
// Enable keepalive based on tcp_keepalive_time, tcp_keepalive_intvl and tcp_keepalive_probes kernel parameters.
|
94
|
+
// Socket options TCP_KEEPCNT, TCP_KEEPIDLE, and TCP_KEEPINTVL are not configurable.
|
95
|
+
props.setProperty("tcpKeepAlive", "true");
|
96
|
+
|
97
|
+
// TODO
|
98
|
+
//switch task.getSssl() {
|
99
|
+
//when "disable":
|
100
|
+
// break;
|
101
|
+
//when "enable":
|
102
|
+
// props.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); // disable server-side validation
|
103
|
+
//when "verify":
|
104
|
+
// props.setProperty("ssl", "true");
|
105
|
+
// break;
|
106
|
+
//}
|
107
|
+
|
108
|
+
if (!retryableMetadataOperation) {
|
109
|
+
// non-retryable batch operation uses longer timeout
|
110
|
+
props.setProperty("loginTimeout", "300"); // seconds
|
111
|
+
props.setProperty("socketTimeout", "28800"); // seconds
|
112
|
+
}
|
113
|
+
|
114
|
+
props.putAll(t.getOptions());
|
115
|
+
|
116
|
+
props.setProperty("user", t.getUser());
|
117
|
+
logger.info("Connecting to {} options {}", url, props);
|
118
|
+
props.setProperty("password", t.getPassword());
|
119
|
+
|
120
|
+
return new RedshiftOutputConnector(url, props, t.getSchema());
|
121
|
+
}
|
122
|
+
|
123
|
+
private static AWSCredentialsProvider getAWSCredentialsProvider(RedshiftPluginTask task)
|
124
|
+
{
|
125
|
+
final AWSCredentials creds = new BasicAWSCredentials(
|
126
|
+
task.getAccessKeyId(), task.getSecretAccessKey());
|
127
|
+
return new AWSCredentialsProvider() {
|
128
|
+
@Override
|
129
|
+
public AWSCredentials getCredentials()
|
130
|
+
{
|
131
|
+
return creds;
|
132
|
+
}
|
133
|
+
|
134
|
+
@Override
|
135
|
+
public void refresh()
|
136
|
+
{
|
137
|
+
}
|
138
|
+
};
|
139
|
+
}
|
140
|
+
|
141
|
+
@Override
|
142
|
+
protected BatchInsert newBatchInsert(PluginTask task, Optional<List<String>> mergeKeys) throws IOException, SQLException
|
143
|
+
{
|
144
|
+
if (mergeKeys.isPresent()) {
|
145
|
+
throw new UnsupportedOperationException("Redshift output plugin doesn't support 'merge_direct' mode. Use 'merge' mode instead.");
|
146
|
+
}
|
147
|
+
RedshiftPluginTask t = (RedshiftPluginTask) task;
|
148
|
+
return new RedshiftCopyBatchInsert(getConnector(task, true),
|
149
|
+
getAWSCredentialsProvider(t), t.getS3Bucket(), t.getS3KeyPrefix(), t.getIamUserName());
|
150
|
+
}
|
151
|
+
}
|