embulk-output-redshift 0.4.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +104 -104
  3. data/build.gradle +9 -9
  4. data/classpath/{aws-java-sdk-core-1.9.17.jar → aws-java-sdk-core-1.10.33.jar} +0 -0
  5. data/classpath/aws-java-sdk-kms-1.10.33.jar +0 -0
  6. data/classpath/aws-java-sdk-s3-1.10.33.jar +0 -0
  7. data/classpath/aws-java-sdk-sts-1.10.33.jar +0 -0
  8. data/classpath/embulk-output-jdbc-0.4.2.jar +0 -0
  9. data/classpath/embulk-output-postgresql-0.4.2.jar +0 -0
  10. data/classpath/{embulk-output-redshift-0.4.1.jar → embulk-output-redshift-0.4.2.jar} +0 -0
  11. data/classpath/{httpclient-4.3.4.jar → httpclient-4.3.6.jar} +0 -0
  12. data/classpath/{httpcore-4.3.2.jar → httpcore-4.3.3.jar} +0 -0
  13. data/classpath/postgresql-9.4-1205-jdbc41.jar +0 -0
  14. data/lib/embulk/output/redshift.rb +3 -3
  15. data/src/main/java/org/embulk/output/RedshiftOutputPlugin.java +151 -151
  16. data/src/main/java/org/embulk/output/redshift/RedshiftCopyBatchInsert.java +218 -218
  17. data/src/main/java/org/embulk/output/redshift/RedshiftOutputConnection.java +122 -122
  18. data/src/main/java/org/embulk/output/redshift/RedshiftOutputConnector.java +40 -40
  19. metadata +12 -17
  20. data/classpath/aws-java-sdk-kms-1.9.17.jar +0 -0
  21. data/classpath/aws-java-sdk-s3-1.9.17.jar +0 -0
  22. data/classpath/aws-java-sdk-sts-1.9.17.jar +0 -0
  23. data/classpath/embulk-output-jdbc-0.4.1.jar +0 -0
  24. data/classpath/embulk-output-postgresql-0.4.1.jar +0 -0
  25. data/classpath/jna-4.1.0.jar +0 -0
  26. data/classpath/jna-platform-4.1.0.jar +0 -0
  27. data/classpath/joda-time-2.8.1.jar +0 -0
  28. data/classpath/postgresql-9.4-1200-jdbc41.jar +0 -0
  29. data/classpath/slf4j-simple-1.7.7.jar +0 -0
  30. data/classpath/waffle-jna-1.7.jar +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6fbe390d4e95de66a23275061c7841365a9d4f28
4
- data.tar.gz: 2685fb58fccbbc8d26b1824a6d57cb9b1cba5d45
3
+ metadata.gz: 43952fe32c9c9396b32eaf636cf1dbac99e3f522
4
+ data.tar.gz: e161ee0aa8294d5ad6227bc26c88c1513ff23b2c
5
5
  SHA512:
6
- metadata.gz: 37171adae4aee6a663efdce2663ae087faa6549acb3f4f33ba2cad1cd1d871fd6e6934a84e1878699d82f7a9b84ccde74b8210548b929a3f4f08e19664908879
7
- data.tar.gz: 924cba2f34150a1e87112f9e8277e796f70fb595418adfc8f8c654f71a1a58c9ae048c4ab2b616eb549bc86836f64ed9b7605a7e0b49c1997091d694c193d39c
6
+ metadata.gz: 49f5eebc0dc559434672bd3bf7d38eb05172ee306160a64d558d98333f7e2fb347e38f95e223ae47bb828a3ecbca7dd0d29a986f41279589c8da913401380cbb
7
+ data.tar.gz: 39ad87b3d9beb2de22332251128a36cf0e139113eceda458686541307ad6a540287e332b01ebaa7be7311eceb09bdb638c2d16059443748bd10e0b61b9283787
data/README.md CHANGED
@@ -1,104 +1,104 @@
1
- # Redshift output plugins for Embulk
2
-
3
- Redshift output plugins for Embulk loads records to Redshift.
4
-
5
- ## Overview
6
-
7
- * **Plugin type**: output
8
- * **Load all or nothing**: depnds on the mode. see bellow.
9
- * **Resume supported**: depnds on the mode. see bellow.
10
-
11
- ## Configuration
12
-
13
- - **host**: database host name (string, required)
14
- - **port**: database port number (integer, default: 5439)
15
- - **user**: database login user name (string, required)
16
- - **password**: database login password (string, default: "")
17
- - **database**: destination database name (string, required)
18
- - **schema**: destination schema name (string, default: "public")
19
- - **table**: destination table name (string, required)
20
- - **access_key_id**: access key id for AWS
21
- - **secret_access_key: secret access key for AWS
22
- - **iam_user_name: IAM user name for uploading temporary files to S3. The user should have permissions of `s3:GetObject`, `s3:PutObject`, `s3:ListBucket` and `sts:GetFederationToken`.
23
- - **s3_bucket: S3 bucket name for temporary files
24
- - **s3_key_prefix: S3 key prefix for temporary files (string, default:"")
25
- - **options**: extra connection properties (hash, default: {})
26
- - **mode**: "replace" or "insert" (string, required)
27
- - **batch_size**: size of a single batch insert (integer, default: 16777216)
28
- - **default_timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp into a SQL string. This default_timezone option is used to control the timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
29
- - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
30
- - **type**: type of a column when this plugin creates new tables (e.g. `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`). This used when this plugin creates intermediate tables (insert, truncate_insert and merge modes), when it creates the target table (insert_direct and replace modes), and when it creates nonexistent target table automatically. (string, default: depends on input column type. `BIGINT` if input column type is long, `BOOLEAN` if boolean, `DOUBLE PRECISION` if double, `CLOB` if string, `TIMESTAMP` if timestamp)
31
- - **value_type**: This plugin converts input column type (embulk type) into a database type to build a INSERT statement. This value_type option controls the type of the value in a INSERT statement. (string, default: depends on input column type. Available values options are: `byte`, `short`, `int`, `long`, `double`, `float`, `boolean`, `string`, `nstring`, `date`, `time`, `timestamp`, `decimal`, `null`, `pass`)
32
- - **timestamp_format**: If input column type (embulk type) is timestamp and value_type is `string` or `nstring`, this plugin needs to format the timestamp value into a string. This timestamp_format option is used to control the format of the timestamp. (string, default: `%Y-%m-%d %H:%M:%S.%6N`)
33
- - **timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp value into a SQL string. In this cases, this timezone option is used to control the timezone. (string, value of default_timezone option is used by default)
34
-
35
- ### Modes
36
-
37
- * **insert**:
38
- * Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table_1> UNION ALL SELECT * FROM <intermediate_table_2> UNION ALL ...` query.
39
- * Transactional: Yes. This mode successfully writes all rows, or fails with writing zero rows.
40
- * Resumable: Yes.
41
- * **insert_direct**:
42
- * Behavior: This mode inserts rows to the target table directly.
43
- * Transactional: No. If fails, the target table could have some rows inserted.
44
- * Resumable: No.
45
- * **truncate_insert**:
46
- * Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
47
- * Transactional: Yes.
48
- * Resumable: Yes.
49
- * **merge**:
50
- * Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `with updated AS (UPDATE .... RETURNING ...) INSERT INTO ....` query.
51
- * Transactional: Yes.
52
- * Resumable: Yes.
53
- * **replace**:
54
- * Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
55
- * Transactional: Yes.
56
- * Resumable: No.
57
-
58
- ### Example
59
-
60
- ```yaml
61
- out:
62
- type: redshift
63
- host: myinstance.us-west-2.redshift.amazonaws.com
64
- user: pg
65
- password: ""
66
- database: my_database
67
- table: my_table
68
- access_key_id: ABCXYZ123ABCXYZ123
69
- secret_access_key: AbCxYz123aBcXyZ123
70
- iam_user_name: my-s3-read-only
71
- s3_bucket: my-redshift-transfer-bucket
72
- s3_key_prefix: temp/redshift
73
- mode: insert
74
- ```
75
-
76
- Advanced configuration:
77
-
78
- ```yaml
79
- out:
80
- type: redshift
81
- host: myinstance.us-west-2.redshift.amazonaws.com
82
- user: pg
83
- password: ""
84
- database: my_database
85
- table: my_table
86
- access_key_id: ABCXYZ123ABCXYZ123
87
- secret_access_key: AbCxYz123aBcXyZ123
88
- iam_user_name: my-s3-read-only
89
- s3_bucket: my-redshift-transfer-bucket
90
- s3_key_prefix: temp/redshift
91
- options: {loglevel: 2}
92
- mode: insert_direct
93
- column_options:
94
- my_col_1: {type: 'VARCHAR(255)'}
95
- my_col_3: {type: 'INT NOT NULL'}
96
- my_col_4: {value_type: string, timestamp_format: `%Y-%m-%d %H:%M:%S %z`, timezone: '-0700'}
97
- my_col_5: {type: 'DECIMAL(18,9)', value_type: pass}
98
- ```
99
-
100
- ### Build
101
-
102
- ```
103
- $ ./gradlew gem
104
- ```
1
+ # Redshift output plugins for Embulk
2
+
3
+ Redshift output plugins for Embulk loads records to Redshift.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: output
8
+ * **Load all or nothing**: depnds on the mode. see bellow.
9
+ * **Resume supported**: depnds on the mode. see bellow.
10
+
11
+ ## Configuration
12
+
13
+ - **host**: database host name (string, required)
14
+ - **port**: database port number (integer, default: 5439)
15
+ - **user**: database login user name (string, required)
16
+ - **password**: database login password (string, default: "")
17
+ - **database**: destination database name (string, required)
18
+ - **schema**: destination schema name (string, default: "public")
19
+ - **table**: destination table name (string, required)
20
+ - **access_key_id**: access key id for AWS
21
+ - **secret_access_key: secret access key for AWS
22
+ - **iam_user_name: IAM user name for uploading temporary files to S3. The user should have permissions of `s3:GetObject`, `s3:PutObject`, `s3:ListBucket` and `sts:GetFederationToken`.
23
+ - **s3_bucket: S3 bucket name for temporary files
24
+ - **s3_key_prefix: S3 key prefix for temporary files (string, default:"")
25
+ - **options**: extra connection properties (hash, default: {})
26
+ - **mode**: "replace" or "insert" (string, required)
27
+ - **batch_size**: size of a single batch insert (integer, default: 16777216)
28
+ - **default_timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp into a SQL string. This default_timezone option is used to control the timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
29
+ - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
30
+ - **type**: type of a column when this plugin creates new tables (e.g. `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`). This used when this plugin creates intermediate tables (insert, truncate_insert and merge modes), when it creates the target table (insert_direct and replace modes), and when it creates nonexistent target table automatically. (string, default: depends on input column type. `BIGINT` if input column type is long, `BOOLEAN` if boolean, `DOUBLE PRECISION` if double, `CLOB` if string, `TIMESTAMP` if timestamp)
31
+ - **value_type**: This plugin converts input column type (embulk type) into a database type to build a INSERT statement. This value_type option controls the type of the value in a INSERT statement. (string, default: depends on input column type. Available values options are: `byte`, `short`, `int`, `long`, `double`, `float`, `boolean`, `string`, `nstring`, `date`, `time`, `timestamp`, `decimal`, `null`, `pass`)
32
+ - **timestamp_format**: If input column type (embulk type) is timestamp and value_type is `string` or `nstring`, this plugin needs to format the timestamp value into a string. This timestamp_format option is used to control the format of the timestamp. (string, default: `%Y-%m-%d %H:%M:%S.%6N`)
33
+ - **timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp value into a SQL string. In this cases, this timezone option is used to control the timezone. (string, value of default_timezone option is used by default)
34
+
35
+ ### Modes
36
+
37
+ * **insert**:
38
+ * Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table_1> UNION ALL SELECT * FROM <intermediate_table_2> UNION ALL ...` query.
39
+ * Transactional: Yes. This mode successfully writes all rows, or fails with writing zero rows.
40
+ * Resumable: Yes.
41
+ * **insert_direct**:
42
+ * Behavior: This mode inserts rows to the target table directly.
43
+ * Transactional: No. If fails, the target table could have some rows inserted.
44
+ * Resumable: No.
45
+ * **truncate_insert**:
46
+ * Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
47
+ * Transactional: Yes.
48
+ * Resumable: Yes.
49
+ * **merge**:
50
+ * Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `with updated AS (UPDATE .... RETURNING ...) INSERT INTO ....` query.
51
+ * Transactional: Yes.
52
+ * Resumable: Yes.
53
+ * **replace**:
54
+ * Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
55
+ * Transactional: Yes.
56
+ * Resumable: No.
57
+
58
+ ### Example
59
+
60
+ ```yaml
61
+ out:
62
+ type: redshift
63
+ host: myinstance.us-west-2.redshift.amazonaws.com
64
+ user: pg
65
+ password: ""
66
+ database: my_database
67
+ table: my_table
68
+ access_key_id: ABCXYZ123ABCXYZ123
69
+ secret_access_key: AbCxYz123aBcXyZ123
70
+ iam_user_name: my-s3-read-only
71
+ s3_bucket: my-redshift-transfer-bucket
72
+ s3_key_prefix: temp/redshift
73
+ mode: insert
74
+ ```
75
+
76
+ Advanced configuration:
77
+
78
+ ```yaml
79
+ out:
80
+ type: redshift
81
+ host: myinstance.us-west-2.redshift.amazonaws.com
82
+ user: pg
83
+ password: ""
84
+ database: my_database
85
+ table: my_table
86
+ access_key_id: ABCXYZ123ABCXYZ123
87
+ secret_access_key: AbCxYz123aBcXyZ123
88
+ iam_user_name: my-s3-read-only
89
+ s3_bucket: my-redshift-transfer-bucket
90
+ s3_key_prefix: temp/redshift
91
+ options: {loglevel: 2}
92
+ mode: insert_direct
93
+ column_options:
94
+ my_col_1: {type: 'VARCHAR(255)'}
95
+ my_col_3: {type: 'INT NOT NULL'}
96
+ my_col_4: {value_type: string, timestamp_format: `%Y-%m-%d %H:%M:%S %z`, timezone: '-0700'}
97
+ my_col_5: {type: 'DECIMAL(18,9)', value_type: pass}
98
+ ```
99
+
100
+ ### Build
101
+
102
+ ```
103
+ $ ./gradlew gem
104
+ ```
data/build.gradle CHANGED
@@ -1,9 +1,9 @@
1
- dependencies {
2
- compile project(':embulk-output-jdbc')
3
- compile project(':embulk-output-postgresql')
4
-
5
- compile "com.amazonaws:aws-java-sdk-s3:1.9.17"
6
- compile "com.amazonaws:aws-java-sdk-sts:1.9.17"
7
-
8
- testCompile project(':embulk-output-jdbc').sourceSets.test.output
9
- }
1
+ dependencies {
2
+ compile project(':embulk-output-jdbc')
3
+ compile project(':embulk-output-postgresql')
4
+
5
+ compile "com.amazonaws:aws-java-sdk-s3:1.10.33"
6
+ compile "com.amazonaws:aws-java-sdk-sts:1.10.33"
7
+
8
+ testCompile project(':embulk-output-jdbc').sourceSets.test.output
9
+ }
@@ -1,3 +1,3 @@
1
- Embulk::JavaPlugin.register_output(
2
- :redshift, "org.embulk.output.RedshiftOutputPlugin",
3
- File.expand_path('../../../../classpath', __FILE__))
1
+ Embulk::JavaPlugin.register_output(
2
+ :redshift, "org.embulk.output.RedshiftOutputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -1,151 +1,151 @@
1
- package org.embulk.output;
2
-
3
- import java.util.List;
4
- import java.util.Properties;
5
- import java.io.IOException;
6
- import java.sql.SQLException;
7
- import org.slf4j.Logger;
8
- import com.google.common.base.Optional;
9
- import com.google.common.collect.ImmutableSet;
10
- import com.amazonaws.auth.AWSCredentials;
11
- import com.amazonaws.auth.AWSCredentialsProvider;
12
- import com.amazonaws.auth.BasicAWSCredentials;
13
- import org.embulk.spi.Exec;
14
- import org.embulk.config.Config;
15
- import org.embulk.config.ConfigDefault;
16
- import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
17
- import org.embulk.output.jdbc.BatchInsert;
18
- import org.embulk.output.redshift.RedshiftOutputConnector;
19
- import org.embulk.output.redshift.RedshiftCopyBatchInsert;
20
-
21
- public class RedshiftOutputPlugin
22
- extends AbstractJdbcOutputPlugin
23
- {
24
- private final Logger logger = Exec.getLogger(RedshiftOutputPlugin.class);
25
-
26
- public interface RedshiftPluginTask extends PluginTask
27
- {
28
- @Config("host")
29
- public String getHost();
30
-
31
- @Config("port")
32
- @ConfigDefault("5439")
33
- public int getPort();
34
-
35
- @Config("user")
36
- public String getUser();
37
-
38
- @Config("password")
39
- @ConfigDefault("\"\"")
40
- public String getPassword();
41
-
42
- @Config("database")
43
- public String getDatabase();
44
-
45
- @Config("schema")
46
- @ConfigDefault("\"public\"")
47
- public String getSchema();
48
-
49
- @Config("access_key_id")
50
- public String getAccessKeyId();
51
-
52
- @Config("secret_access_key")
53
- public String getSecretAccessKey();
54
-
55
- @Config("iam_user_name")
56
- public String getIamUserName();
57
-
58
- @Config("s3_bucket")
59
- public String getS3Bucket();
60
-
61
- @Config("s3_key_prefix")
62
- @ConfigDefault("\"\"")
63
- public String getS3KeyPrefix();
64
- }
65
-
66
- @Override
67
- protected Class<? extends PluginTask> getTaskClass()
68
- {
69
- return RedshiftPluginTask.class;
70
- }
71
-
72
- @Override
73
- protected Features getFeatures(PluginTask task)
74
- {
75
- return new Features()
76
- .setMaxTableNameLength(30)
77
- .setSupportedModes(ImmutableSet.of(Mode.INSERT, Mode.INSERT_DIRECT, Mode.MERGE, Mode.TRUNCATE_INSERT, Mode.REPLACE))
78
- .setIgnoreMergeKeys(false);
79
- }
80
-
81
- @Override
82
- protected RedshiftOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
83
- {
84
- RedshiftPluginTask t = (RedshiftPluginTask) task;
85
-
86
- String url = String.format("jdbc:postgresql://%s:%d/%s",
87
- t.getHost(), t.getPort(), t.getDatabase());
88
-
89
- Properties props = new Properties();
90
- props.setProperty("loginTimeout", "300"); // seconds
91
- props.setProperty("socketTimeout", "1800"); // seconds
92
-
93
- // Enable keepalive based on tcp_keepalive_time, tcp_keepalive_intvl and tcp_keepalive_probes kernel parameters.
94
- // Socket options TCP_KEEPCNT, TCP_KEEPIDLE, and TCP_KEEPINTVL are not configurable.
95
- props.setProperty("tcpKeepAlive", "true");
96
-
97
- // TODO
98
- //switch task.getSssl() {
99
- //when "disable":
100
- // break;
101
- //when "enable":
102
- // props.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); // disable server-side validation
103
- //when "verify":
104
- // props.setProperty("ssl", "true");
105
- // break;
106
- //}
107
-
108
- if (!retryableMetadataOperation) {
109
- // non-retryable batch operation uses longer timeout
110
- props.setProperty("loginTimeout", "300"); // seconds
111
- props.setProperty("socketTimeout", "28800"); // seconds
112
- }
113
-
114
- props.putAll(t.getOptions());
115
-
116
- props.setProperty("user", t.getUser());
117
- logger.info("Connecting to {} options {}", url, props);
118
- props.setProperty("password", t.getPassword());
119
-
120
- return new RedshiftOutputConnector(url, props, t.getSchema());
121
- }
122
-
123
- private static AWSCredentialsProvider getAWSCredentialsProvider(RedshiftPluginTask task)
124
- {
125
- final AWSCredentials creds = new BasicAWSCredentials(
126
- task.getAccessKeyId(), task.getSecretAccessKey());
127
- return new AWSCredentialsProvider() {
128
- @Override
129
- public AWSCredentials getCredentials()
130
- {
131
- return creds;
132
- }
133
-
134
- @Override
135
- public void refresh()
136
- {
137
- }
138
- };
139
- }
140
-
141
- @Override
142
- protected BatchInsert newBatchInsert(PluginTask task, Optional<List<String>> mergeKeys) throws IOException, SQLException
143
- {
144
- if (mergeKeys.isPresent()) {
145
- throw new UnsupportedOperationException("Redshift output plugin doesn't support 'merge_direct' mode. Use 'merge' mode instead.");
146
- }
147
- RedshiftPluginTask t = (RedshiftPluginTask) task;
148
- return new RedshiftCopyBatchInsert(getConnector(task, true),
149
- getAWSCredentialsProvider(t), t.getS3Bucket(), t.getS3KeyPrefix(), t.getIamUserName());
150
- }
151
- }
1
+ package org.embulk.output;
2
+
3
+ import java.util.List;
4
+ import java.util.Properties;
5
+ import java.io.IOException;
6
+ import java.sql.SQLException;
7
+ import org.slf4j.Logger;
8
+ import com.google.common.base.Optional;
9
+ import com.google.common.collect.ImmutableSet;
10
+ import com.amazonaws.auth.AWSCredentials;
11
+ import com.amazonaws.auth.AWSCredentialsProvider;
12
+ import com.amazonaws.auth.BasicAWSCredentials;
13
+ import org.embulk.spi.Exec;
14
+ import org.embulk.config.Config;
15
+ import org.embulk.config.ConfigDefault;
16
+ import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
17
+ import org.embulk.output.jdbc.BatchInsert;
18
+ import org.embulk.output.redshift.RedshiftOutputConnector;
19
+ import org.embulk.output.redshift.RedshiftCopyBatchInsert;
20
+
21
+ public class RedshiftOutputPlugin
22
+ extends AbstractJdbcOutputPlugin
23
+ {
24
+ private final Logger logger = Exec.getLogger(RedshiftOutputPlugin.class);
25
+
26
+ public interface RedshiftPluginTask extends PluginTask
27
+ {
28
+ @Config("host")
29
+ public String getHost();
30
+
31
+ @Config("port")
32
+ @ConfigDefault("5439")
33
+ public int getPort();
34
+
35
+ @Config("user")
36
+ public String getUser();
37
+
38
+ @Config("password")
39
+ @ConfigDefault("\"\"")
40
+ public String getPassword();
41
+
42
+ @Config("database")
43
+ public String getDatabase();
44
+
45
+ @Config("schema")
46
+ @ConfigDefault("\"public\"")
47
+ public String getSchema();
48
+
49
+ @Config("access_key_id")
50
+ public String getAccessKeyId();
51
+
52
+ @Config("secret_access_key")
53
+ public String getSecretAccessKey();
54
+
55
+ @Config("iam_user_name")
56
+ public String getIamUserName();
57
+
58
+ @Config("s3_bucket")
59
+ public String getS3Bucket();
60
+
61
+ @Config("s3_key_prefix")
62
+ @ConfigDefault("\"\"")
63
+ public String getS3KeyPrefix();
64
+ }
65
+
66
+ @Override
67
+ protected Class<? extends PluginTask> getTaskClass()
68
+ {
69
+ return RedshiftPluginTask.class;
70
+ }
71
+
72
+ @Override
73
+ protected Features getFeatures(PluginTask task)
74
+ {
75
+ return new Features()
76
+ .setMaxTableNameLength(30)
77
+ .setSupportedModes(ImmutableSet.of(Mode.INSERT, Mode.INSERT_DIRECT, Mode.MERGE, Mode.TRUNCATE_INSERT, Mode.REPLACE))
78
+ .setIgnoreMergeKeys(false);
79
+ }
80
+
81
+ @Override
82
+ protected RedshiftOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
83
+ {
84
+ RedshiftPluginTask t = (RedshiftPluginTask) task;
85
+
86
+ String url = String.format("jdbc:postgresql://%s:%d/%s",
87
+ t.getHost(), t.getPort(), t.getDatabase());
88
+
89
+ Properties props = new Properties();
90
+ props.setProperty("loginTimeout", "300"); // seconds
91
+ props.setProperty("socketTimeout", "1800"); // seconds
92
+
93
+ // Enable keepalive based on tcp_keepalive_time, tcp_keepalive_intvl and tcp_keepalive_probes kernel parameters.
94
+ // Socket options TCP_KEEPCNT, TCP_KEEPIDLE, and TCP_KEEPINTVL are not configurable.
95
+ props.setProperty("tcpKeepAlive", "true");
96
+
97
+ // TODO
98
+ //switch task.getSssl() {
99
+ //when "disable":
100
+ // break;
101
+ //when "enable":
102
+ // props.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); // disable server-side validation
103
+ //when "verify":
104
+ // props.setProperty("ssl", "true");
105
+ // break;
106
+ //}
107
+
108
+ if (!retryableMetadataOperation) {
109
+ // non-retryable batch operation uses longer timeout
110
+ props.setProperty("loginTimeout", "300"); // seconds
111
+ props.setProperty("socketTimeout", "28800"); // seconds
112
+ }
113
+
114
+ props.putAll(t.getOptions());
115
+
116
+ props.setProperty("user", t.getUser());
117
+ logger.info("Connecting to {} options {}", url, props);
118
+ props.setProperty("password", t.getPassword());
119
+
120
+ return new RedshiftOutputConnector(url, props, t.getSchema());
121
+ }
122
+
123
+ private static AWSCredentialsProvider getAWSCredentialsProvider(RedshiftPluginTask task)
124
+ {
125
+ final AWSCredentials creds = new BasicAWSCredentials(
126
+ task.getAccessKeyId(), task.getSecretAccessKey());
127
+ return new AWSCredentialsProvider() {
128
+ @Override
129
+ public AWSCredentials getCredentials()
130
+ {
131
+ return creds;
132
+ }
133
+
134
+ @Override
135
+ public void refresh()
136
+ {
137
+ }
138
+ };
139
+ }
140
+
141
+ @Override
142
+ protected BatchInsert newBatchInsert(PluginTask task, Optional<List<String>> mergeKeys) throws IOException, SQLException
143
+ {
144
+ if (mergeKeys.isPresent()) {
145
+ throw new UnsupportedOperationException("Redshift output plugin doesn't support 'merge_direct' mode. Use 'merge' mode instead.");
146
+ }
147
+ RedshiftPluginTask t = (RedshiftPluginTask) task;
148
+ return new RedshiftCopyBatchInsert(getConnector(task, true),
149
+ getAWSCredentialsProvider(t), t.getS3Bucket(), t.getS3KeyPrefix(), t.getIamUserName());
150
+ }
151
+ }