embulk-output-postgresql 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +89 -89
- data/build.gradle +7 -7
- data/classpath/{embulk-output-jdbc-0.4.0.jar → embulk-output-jdbc-0.4.1.jar} +0 -0
- data/classpath/{embulk-output-postgresql-0.4.0.jar → embulk-output-postgresql-0.4.1.jar} +0 -0
- data/lib/embulk/output/postgresql.rb +3 -3
- data/src/main/java/org/embulk/output/PostgreSQLOutputPlugin.java +165 -165
- data/src/main/java/org/embulk/output/postgresql/AbstractPostgreSQLCopyBatchInsert.java +245 -245
- data/src/main/java/org/embulk/output/postgresql/PostgreSQLCopyBatchInsert.java +73 -73
- data/src/main/java/org/embulk/output/postgresql/PostgreSQLOutputConnection.java +157 -157
- data/src/main/java/org/embulk/output/postgresql/PostgreSQLOutputConnector.java +40 -40
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 250c4ddbe6b7dcd4b48ac024df131182c5ca5c1c
|
4
|
+
data.tar.gz: ff78eb2ec44bf83d95f85cb0ebb5986ee734b59a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 51126373caeb6aae0f6973ee58fd51fabbbea117de3eb9a8325865f4212140c7705bb0a2101142ffdbe5c3e9dd3846c0f2d5a9e16e805c1aa9bf862320d820cf
|
7
|
+
data.tar.gz: f43c2719c43b3d6f8d7f212aea42629be582786b9cafb19c19122459ffe4aa5caf96d497ac538f0890d404e311749f55100f9a784ee7e6a55ad20db3f6c6fd22
|
data/README.md
CHANGED
@@ -1,89 +1,89 @@
|
|
1
|
-
# PostgreSQL output plugins for Embulk
|
2
|
-
|
3
|
-
PostgreSQL output plugins for Embulk loads records to PostgreSQL.
|
4
|
-
|
5
|
-
## Overview
|
6
|
-
|
7
|
-
* **Plugin type**: output
|
8
|
-
* **Load all or nothing**: depnds on the mode. see bellow.
|
9
|
-
* **Resume supported**: depnds on the mode. see bellow.
|
10
|
-
|
11
|
-
## Configuration
|
12
|
-
|
13
|
-
- **host**: database host name (string, required)
|
14
|
-
- **port**: database port number (integer, default: 5432)
|
15
|
-
- **user**: database login user name (string, required)
|
16
|
-
- **password**: database login password (string, default: "")
|
17
|
-
- **database**: destination database name (string, required)
|
18
|
-
- **schema**: destination schema name (string, default: "public")
|
19
|
-
- **table**: destination table name (string, required)
|
20
|
-
- **options**: extra connection properties (hash, default: {})
|
21
|
-
- **mode**: "replace", "merge" or "insert" (string, required)
|
22
|
-
- **batch_size**: size of a single batch insert (integer, default: 16777216)
|
23
|
-
- **default_timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp into a SQL string. This default_timezone option is used to control the timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
|
24
|
-
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
25
|
-
- **type**: type of a column when this plugin creates new tables (e.g. `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`). This used when this plugin creates intermediate tables (insert, truncate_insert and merge modes), when it creates the target table (insert_direct and replace modes), and when it creates nonexistent target table automatically. (string, default: depends on input column type. `BIGINT` if input column type is long, `BOOLEAN` if boolean, `DOUBLE PRECISION` if double, `CLOB` if string, `TIMESTAMP WITH TIME ZONE` if timestamp)
|
26
|
-
- **value_type**: This plugin converts input column type (embulk type) into a database type to build a INSERT statement. This value_type option controls the type of the value in a INSERT statement. (string, default: depends on input column type. Available values options are: `byte`, `short`, `int`, `long`, `double`, `float`, `boolean`, `string`, `nstring`, `date`, `time`, `timestamp`, `decimal`, `null`, `pass`)
|
27
|
-
- **timestamp_format**: If input column type (embulk type) is timestamp and value_type is `string` or `nstring`, this plugin needs to format the timestamp value into a string. This timestamp_format option is used to control the format of the timestamp. (string, default: `%Y-%m-%d %H:%M:%S.%6N`)
|
28
|
-
- **timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp value into a SQL string. In this cases, this timezone option is used to control the timezone. (string, value of default_timezone option is used by default)
|
29
|
-
|
30
|
-
### Modes
|
31
|
-
|
32
|
-
* **insert**:
|
33
|
-
* Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table_1> UNION ALL SELECT * FROM <intermediate_table_2> UNION ALL ...` query.
|
34
|
-
* Transactional: Yes. This mode successfully writes all rows, or fails with writing zero rows.
|
35
|
-
* Resumable: Yes.
|
36
|
-
* **insert_direct**:
|
37
|
-
* Behavior: This mode inserts rows to the target table directly.
|
38
|
-
* Transactional: No. If fails, the target table could have some rows inserted.
|
39
|
-
* Resumable: No.
|
40
|
-
* **truncate_insert**:
|
41
|
-
* Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
|
42
|
-
* Transactional: Yes.
|
43
|
-
* Resumable: Yes.
|
44
|
-
* **merge**:
|
45
|
-
* Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `with updated AS (UPDATE .... RETURNING ...) INSERT INTO ....` query.
|
46
|
-
* Transactional: Yes.
|
47
|
-
* Resumable: Yes.
|
48
|
-
* **replace**:
|
49
|
-
* Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
|
50
|
-
* Transactional: Yes.
|
51
|
-
* Resumable: No.
|
52
|
-
|
53
|
-
### Example
|
54
|
-
|
55
|
-
```yaml
|
56
|
-
out:
|
57
|
-
type: postgresql
|
58
|
-
host: localhost
|
59
|
-
user: pg
|
60
|
-
password: ""
|
61
|
-
database: my_database
|
62
|
-
table: my_table
|
63
|
-
mode: insert
|
64
|
-
```
|
65
|
-
|
66
|
-
Advanced configuration:
|
67
|
-
|
68
|
-
```yaml
|
69
|
-
out:
|
70
|
-
type: postgresql
|
71
|
-
host: localhost
|
72
|
-
user: pg
|
73
|
-
password: ""
|
74
|
-
database: my_database
|
75
|
-
table: my_table
|
76
|
-
options: {loglevel: 2}
|
77
|
-
mode: insert_direct
|
78
|
-
column_options:
|
79
|
-
my_col_1: {type: 'BIGSERIAL'}
|
80
|
-
my_col_3: {type: 'INT NOT NULL'}
|
81
|
-
my_col_4: {value_type: string, timestamp_format: `%Y-%m-%d %H:%M:%S %z`, timezone: '-0700'}
|
82
|
-
my_col_5: {type: 'DECIMAL(18,9)', value_type: pass}
|
83
|
-
```
|
84
|
-
|
85
|
-
### Build
|
86
|
-
|
87
|
-
```
|
88
|
-
$ ./gradlew gem
|
89
|
-
```
|
1
|
+
# PostgreSQL output plugins for Embulk
|
2
|
+
|
3
|
+
PostgreSQL output plugins for Embulk loads records to PostgreSQL.
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
|
7
|
+
* **Plugin type**: output
|
8
|
+
* **Load all or nothing**: depnds on the mode. see bellow.
|
9
|
+
* **Resume supported**: depnds on the mode. see bellow.
|
10
|
+
|
11
|
+
## Configuration
|
12
|
+
|
13
|
+
- **host**: database host name (string, required)
|
14
|
+
- **port**: database port number (integer, default: 5432)
|
15
|
+
- **user**: database login user name (string, required)
|
16
|
+
- **password**: database login password (string, default: "")
|
17
|
+
- **database**: destination database name (string, required)
|
18
|
+
- **schema**: destination schema name (string, default: "public")
|
19
|
+
- **table**: destination table name (string, required)
|
20
|
+
- **options**: extra connection properties (hash, default: {})
|
21
|
+
- **mode**: "replace", "merge" or "insert" (string, required)
|
22
|
+
- **batch_size**: size of a single batch insert (integer, default: 16777216)
|
23
|
+
- **default_timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp into a SQL string. This default_timezone option is used to control the timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
|
24
|
+
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
25
|
+
- **type**: type of a column when this plugin creates new tables (e.g. `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`). This used when this plugin creates intermediate tables (insert, truncate_insert and merge modes), when it creates the target table (insert_direct and replace modes), and when it creates nonexistent target table automatically. (string, default: depends on input column type. `BIGINT` if input column type is long, `BOOLEAN` if boolean, `DOUBLE PRECISION` if double, `CLOB` if string, `TIMESTAMP WITH TIME ZONE` if timestamp)
|
26
|
+
- **value_type**: This plugin converts input column type (embulk type) into a database type to build a INSERT statement. This value_type option controls the type of the value in a INSERT statement. (string, default: depends on input column type. Available values options are: `byte`, `short`, `int`, `long`, `double`, `float`, `boolean`, `string`, `nstring`, `date`, `time`, `timestamp`, `decimal`, `null`, `pass`)
|
27
|
+
- **timestamp_format**: If input column type (embulk type) is timestamp and value_type is `string` or `nstring`, this plugin needs to format the timestamp value into a string. This timestamp_format option is used to control the format of the timestamp. (string, default: `%Y-%m-%d %H:%M:%S.%6N`)
|
28
|
+
- **timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp value into a SQL string. In this cases, this timezone option is used to control the timezone. (string, value of default_timezone option is used by default)
|
29
|
+
|
30
|
+
### Modes
|
31
|
+
|
32
|
+
* **insert**:
|
33
|
+
* Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table_1> UNION ALL SELECT * FROM <intermediate_table_2> UNION ALL ...` query.
|
34
|
+
* Transactional: Yes. This mode successfully writes all rows, or fails with writing zero rows.
|
35
|
+
* Resumable: Yes.
|
36
|
+
* **insert_direct**:
|
37
|
+
* Behavior: This mode inserts rows to the target table directly.
|
38
|
+
* Transactional: No. If fails, the target table could have some rows inserted.
|
39
|
+
* Resumable: No.
|
40
|
+
* **truncate_insert**:
|
41
|
+
* Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
|
42
|
+
* Transactional: Yes.
|
43
|
+
* Resumable: Yes.
|
44
|
+
* **merge**:
|
45
|
+
* Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `with updated AS (UPDATE .... RETURNING ...) INSERT INTO ....` query.
|
46
|
+
* Transactional: Yes.
|
47
|
+
* Resumable: Yes.
|
48
|
+
* **replace**:
|
49
|
+
* Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
|
50
|
+
* Transactional: Yes.
|
51
|
+
* Resumable: No.
|
52
|
+
|
53
|
+
### Example
|
54
|
+
|
55
|
+
```yaml
|
56
|
+
out:
|
57
|
+
type: postgresql
|
58
|
+
host: localhost
|
59
|
+
user: pg
|
60
|
+
password: ""
|
61
|
+
database: my_database
|
62
|
+
table: my_table
|
63
|
+
mode: insert
|
64
|
+
```
|
65
|
+
|
66
|
+
Advanced configuration:
|
67
|
+
|
68
|
+
```yaml
|
69
|
+
out:
|
70
|
+
type: postgresql
|
71
|
+
host: localhost
|
72
|
+
user: pg
|
73
|
+
password: ""
|
74
|
+
database: my_database
|
75
|
+
table: my_table
|
76
|
+
options: {loglevel: 2}
|
77
|
+
mode: insert_direct
|
78
|
+
column_options:
|
79
|
+
my_col_1: {type: 'BIGSERIAL'}
|
80
|
+
my_col_3: {type: 'INT NOT NULL'}
|
81
|
+
my_col_4: {value_type: string, timestamp_format: `%Y-%m-%d %H:%M:%S %z`, timezone: '-0700'}
|
82
|
+
my_col_5: {type: 'DECIMAL(18,9)', value_type: pass}
|
83
|
+
```
|
84
|
+
|
85
|
+
### Build
|
86
|
+
|
87
|
+
```
|
88
|
+
$ ./gradlew gem
|
89
|
+
```
|
data/build.gradle
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
dependencies {
|
2
|
-
compile project(':embulk-output-jdbc')
|
3
|
-
|
4
|
-
compile 'org.postgresql:postgresql:9.4-1200-jdbc41'
|
5
|
-
|
6
|
-
testCompile project(':embulk-output-jdbc').sourceSets.test.output
|
7
|
-
}
|
1
|
+
dependencies {
|
2
|
+
compile project(':embulk-output-jdbc')
|
3
|
+
|
4
|
+
compile 'org.postgresql:postgresql:9.4-1200-jdbc41'
|
5
|
+
|
6
|
+
testCompile project(':embulk-output-jdbc').sourceSets.test.output
|
7
|
+
}
|
Binary file
|
Binary file
|
@@ -1,3 +1,3 @@
|
|
1
|
-
Embulk::JavaPlugin.register_output(
|
2
|
-
:postgresql, "org.embulk.output.PostgreSQLOutputPlugin",
|
3
|
-
File.expand_path('../../../../classpath', __FILE__))
|
1
|
+
Embulk::JavaPlugin.register_output(
|
2
|
+
:postgresql, "org.embulk.output.PostgreSQLOutputPlugin",
|
3
|
+
File.expand_path('../../../../classpath', __FILE__))
|
@@ -1,165 +1,165 @@
|
|
1
|
-
package org.embulk.output;
|
2
|
-
|
3
|
-
import java.util.List;
|
4
|
-
import java.util.Properties;
|
5
|
-
import java.io.IOException;
|
6
|
-
import java.sql.SQLException;
|
7
|
-
import com.google.common.base.Optional;
|
8
|
-
import com.google.common.collect.ImmutableSet;
|
9
|
-
import org.embulk.config.Config;
|
10
|
-
import org.embulk.config.ConfigDefault;
|
11
|
-
import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
|
12
|
-
import org.embulk.output.jdbc.BatchInsert;
|
13
|
-
import org.embulk.output.postgresql.PostgreSQLOutputConnector;
|
14
|
-
import org.embulk.output.postgresql.PostgreSQLCopyBatchInsert;
|
15
|
-
|
16
|
-
import com.google.common.collect.ImmutableList;
|
17
|
-
import java.sql.Types;
|
18
|
-
import org.embulk.spi.Schema;
|
19
|
-
import org.embulk.spi.ColumnVisitor;
|
20
|
-
import org.embulk.spi.Column;
|
21
|
-
import org.embulk.output.jdbc.JdbcColumn;
|
22
|
-
import org.embulk.output.jdbc.JdbcSchema;
|
23
|
-
|
24
|
-
public class PostgreSQLOutputPlugin
|
25
|
-
extends AbstractJdbcOutputPlugin
|
26
|
-
{
|
27
|
-
public interface PostgreSQLPluginTask
|
28
|
-
extends PluginTask
|
29
|
-
{
|
30
|
-
@Config("host")
|
31
|
-
public String getHost();
|
32
|
-
|
33
|
-
@Config("port")
|
34
|
-
@ConfigDefault("5432")
|
35
|
-
public int getPort();
|
36
|
-
|
37
|
-
@Config("user")
|
38
|
-
public String getUser();
|
39
|
-
|
40
|
-
@Config("password")
|
41
|
-
@ConfigDefault("\"\"")
|
42
|
-
public String getPassword();
|
43
|
-
|
44
|
-
@Config("database")
|
45
|
-
public String getDatabase();
|
46
|
-
|
47
|
-
@Config("schema")
|
48
|
-
@ConfigDefault("\"public\"")
|
49
|
-
public String getSchema();
|
50
|
-
}
|
51
|
-
|
52
|
-
@Override
|
53
|
-
protected Class<? extends PluginTask> getTaskClass()
|
54
|
-
{
|
55
|
-
return PostgreSQLPluginTask.class;
|
56
|
-
}
|
57
|
-
|
58
|
-
@Override
|
59
|
-
protected Features getFeatures(PluginTask task)
|
60
|
-
{
|
61
|
-
return new Features()
|
62
|
-
.setMaxTableNameLength(30)
|
63
|
-
.setSupportedModes(ImmutableSet.of(Mode.INSERT, Mode.INSERT_DIRECT, Mode.MERGE, Mode.TRUNCATE_INSERT, Mode.REPLACE))
|
64
|
-
.setIgnoreMergeKeys(false);
|
65
|
-
}
|
66
|
-
|
67
|
-
@Override
|
68
|
-
protected PostgreSQLOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
|
69
|
-
{
|
70
|
-
PostgreSQLPluginTask t = (PostgreSQLPluginTask) task;
|
71
|
-
|
72
|
-
String url = String.format("jdbc:postgresql://%s:%d/%s",
|
73
|
-
t.getHost(), t.getPort(), t.getDatabase());
|
74
|
-
|
75
|
-
Properties props = new Properties();
|
76
|
-
props.setProperty("loginTimeout", "300"); // seconds
|
77
|
-
props.setProperty("socketTimeout", "1800"); // seconds
|
78
|
-
|
79
|
-
// Enable keepalive based on tcp_keepalive_time, tcp_keepalive_intvl and tcp_keepalive_probes kernel parameters.
|
80
|
-
// Socket options TCP_KEEPCNT, TCP_KEEPIDLE, and TCP_KEEPINTVL are not configurable.
|
81
|
-
props.setProperty("tcpKeepAlive", "true");
|
82
|
-
|
83
|
-
// TODO
|
84
|
-
//switch t.getSssl() {
|
85
|
-
//when "disable":
|
86
|
-
// break;
|
87
|
-
//when "enable":
|
88
|
-
// props.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); // disable server-side validation
|
89
|
-
//when "verify":
|
90
|
-
// props.setProperty("ssl", "true");
|
91
|
-
// break;
|
92
|
-
//}
|
93
|
-
|
94
|
-
if (!retryableMetadataOperation) {
|
95
|
-
// non-retryable batch operation uses longer timeout
|
96
|
-
props.setProperty("loginTimeout", "300"); // seconds
|
97
|
-
props.setProperty("socketTimeout", "28800"); // seconds
|
98
|
-
}
|
99
|
-
|
100
|
-
props.putAll(t.getOptions());
|
101
|
-
|
102
|
-
props.setProperty("user", t.getUser());
|
103
|
-
logger.info("Connecting to {} options {}", url, props);
|
104
|
-
props.setProperty("password", t.getPassword());
|
105
|
-
|
106
|
-
return new PostgreSQLOutputConnector(url, props, t.getSchema());
|
107
|
-
}
|
108
|
-
|
109
|
-
@Override
|
110
|
-
protected BatchInsert newBatchInsert(PluginTask task, Optional<List<String>> mergeKeys) throws IOException, SQLException
|
111
|
-
{
|
112
|
-
if (mergeKeys.isPresent()) {
|
113
|
-
throw new UnsupportedOperationException("PostgreSQL output plugin doesn't support 'merge_direct' mode. Use 'merge' mode instead.");
|
114
|
-
}
|
115
|
-
return new PostgreSQLCopyBatchInsert(getConnector(task, true));
|
116
|
-
}
|
117
|
-
|
118
|
-
// TODO This is almost copy from AbstractJdbcOutputPlugin excepting type of TIMESTAMP -> TIMESTAMP WITH TIME ZONE.
|
119
|
-
// AbstractJdbcOutputPlugin should have better extensibility.
|
120
|
-
@Override
|
121
|
-
protected JdbcSchema newJdbcSchemaForNewTable(Schema schema)
|
122
|
-
{
|
123
|
-
final ImmutableList.Builder<JdbcColumn> columns = ImmutableList.builder();
|
124
|
-
for (Column c : schema.getColumns()) {
|
125
|
-
final String columnName = c.getName();
|
126
|
-
c.visit(new ColumnVisitor() {
|
127
|
-
public void booleanColumn(Column column)
|
128
|
-
{
|
129
|
-
columns.add(JdbcColumn.newGenericTypeColumn(
|
130
|
-
columnName, Types.BOOLEAN, "BOOLEAN",
|
131
|
-
1, 0, false, false));
|
132
|
-
}
|
133
|
-
|
134
|
-
public void longColumn(Column column)
|
135
|
-
{
|
136
|
-
columns.add(JdbcColumn.newGenericTypeColumn(
|
137
|
-
columnName, Types.BIGINT, "BIGINT",
|
138
|
-
22, 0, false, false));
|
139
|
-
}
|
140
|
-
|
141
|
-
public void doubleColumn(Column column)
|
142
|
-
{
|
143
|
-
columns.add(JdbcColumn.newGenericTypeColumn(
|
144
|
-
columnName, Types.FLOAT, "DOUBLE PRECISION",
|
145
|
-
24, 0, false, false));
|
146
|
-
}
|
147
|
-
|
148
|
-
public void stringColumn(Column column)
|
149
|
-
{
|
150
|
-
columns.add(JdbcColumn.newGenericTypeColumn(
|
151
|
-
columnName, Types.CLOB, "CLOB",
|
152
|
-
4000, 0, false, false)); // TODO size type param
|
153
|
-
}
|
154
|
-
|
155
|
-
public void timestampColumn(Column column)
|
156
|
-
{
|
157
|
-
columns.add(JdbcColumn.newGenericTypeColumn(
|
158
|
-
columnName, Types.TIMESTAMP, "TIMESTAMP WITH TIME ZONE",
|
159
|
-
26, 0, false, false)); // size type param is from postgresql
|
160
|
-
}
|
161
|
-
});
|
162
|
-
}
|
163
|
-
return new JdbcSchema(columns.build());
|
164
|
-
}
|
165
|
-
}
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.Properties;
|
5
|
+
import java.io.IOException;
|
6
|
+
import java.sql.SQLException;
|
7
|
+
import com.google.common.base.Optional;
|
8
|
+
import com.google.common.collect.ImmutableSet;
|
9
|
+
import org.embulk.config.Config;
|
10
|
+
import org.embulk.config.ConfigDefault;
|
11
|
+
import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
|
12
|
+
import org.embulk.output.jdbc.BatchInsert;
|
13
|
+
import org.embulk.output.postgresql.PostgreSQLOutputConnector;
|
14
|
+
import org.embulk.output.postgresql.PostgreSQLCopyBatchInsert;
|
15
|
+
|
16
|
+
import com.google.common.collect.ImmutableList;
|
17
|
+
import java.sql.Types;
|
18
|
+
import org.embulk.spi.Schema;
|
19
|
+
import org.embulk.spi.ColumnVisitor;
|
20
|
+
import org.embulk.spi.Column;
|
21
|
+
import org.embulk.output.jdbc.JdbcColumn;
|
22
|
+
import org.embulk.output.jdbc.JdbcSchema;
|
23
|
+
|
24
|
+
public class PostgreSQLOutputPlugin
|
25
|
+
extends AbstractJdbcOutputPlugin
|
26
|
+
{
|
27
|
+
public interface PostgreSQLPluginTask
|
28
|
+
extends PluginTask
|
29
|
+
{
|
30
|
+
@Config("host")
|
31
|
+
public String getHost();
|
32
|
+
|
33
|
+
@Config("port")
|
34
|
+
@ConfigDefault("5432")
|
35
|
+
public int getPort();
|
36
|
+
|
37
|
+
@Config("user")
|
38
|
+
public String getUser();
|
39
|
+
|
40
|
+
@Config("password")
|
41
|
+
@ConfigDefault("\"\"")
|
42
|
+
public String getPassword();
|
43
|
+
|
44
|
+
@Config("database")
|
45
|
+
public String getDatabase();
|
46
|
+
|
47
|
+
@Config("schema")
|
48
|
+
@ConfigDefault("\"public\"")
|
49
|
+
public String getSchema();
|
50
|
+
}
|
51
|
+
|
52
|
+
@Override
|
53
|
+
protected Class<? extends PluginTask> getTaskClass()
|
54
|
+
{
|
55
|
+
return PostgreSQLPluginTask.class;
|
56
|
+
}
|
57
|
+
|
58
|
+
@Override
|
59
|
+
protected Features getFeatures(PluginTask task)
|
60
|
+
{
|
61
|
+
return new Features()
|
62
|
+
.setMaxTableNameLength(30)
|
63
|
+
.setSupportedModes(ImmutableSet.of(Mode.INSERT, Mode.INSERT_DIRECT, Mode.MERGE, Mode.TRUNCATE_INSERT, Mode.REPLACE))
|
64
|
+
.setIgnoreMergeKeys(false);
|
65
|
+
}
|
66
|
+
|
67
|
+
@Override
|
68
|
+
protected PostgreSQLOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
|
69
|
+
{
|
70
|
+
PostgreSQLPluginTask t = (PostgreSQLPluginTask) task;
|
71
|
+
|
72
|
+
String url = String.format("jdbc:postgresql://%s:%d/%s",
|
73
|
+
t.getHost(), t.getPort(), t.getDatabase());
|
74
|
+
|
75
|
+
Properties props = new Properties();
|
76
|
+
props.setProperty("loginTimeout", "300"); // seconds
|
77
|
+
props.setProperty("socketTimeout", "1800"); // seconds
|
78
|
+
|
79
|
+
// Enable keepalive based on tcp_keepalive_time, tcp_keepalive_intvl and tcp_keepalive_probes kernel parameters.
|
80
|
+
// Socket options TCP_KEEPCNT, TCP_KEEPIDLE, and TCP_KEEPINTVL are not configurable.
|
81
|
+
props.setProperty("tcpKeepAlive", "true");
|
82
|
+
|
83
|
+
// TODO
|
84
|
+
//switch t.getSssl() {
|
85
|
+
//when "disable":
|
86
|
+
// break;
|
87
|
+
//when "enable":
|
88
|
+
// props.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); // disable server-side validation
|
89
|
+
//when "verify":
|
90
|
+
// props.setProperty("ssl", "true");
|
91
|
+
// break;
|
92
|
+
//}
|
93
|
+
|
94
|
+
if (!retryableMetadataOperation) {
|
95
|
+
// non-retryable batch operation uses longer timeout
|
96
|
+
props.setProperty("loginTimeout", "300"); // seconds
|
97
|
+
props.setProperty("socketTimeout", "28800"); // seconds
|
98
|
+
}
|
99
|
+
|
100
|
+
props.putAll(t.getOptions());
|
101
|
+
|
102
|
+
props.setProperty("user", t.getUser());
|
103
|
+
logger.info("Connecting to {} options {}", url, props);
|
104
|
+
props.setProperty("password", t.getPassword());
|
105
|
+
|
106
|
+
return new PostgreSQLOutputConnector(url, props, t.getSchema());
|
107
|
+
}
|
108
|
+
|
109
|
+
@Override
|
110
|
+
protected BatchInsert newBatchInsert(PluginTask task, Optional<List<String>> mergeKeys) throws IOException, SQLException
|
111
|
+
{
|
112
|
+
if (mergeKeys.isPresent()) {
|
113
|
+
throw new UnsupportedOperationException("PostgreSQL output plugin doesn't support 'merge_direct' mode. Use 'merge' mode instead.");
|
114
|
+
}
|
115
|
+
return new PostgreSQLCopyBatchInsert(getConnector(task, true));
|
116
|
+
}
|
117
|
+
|
118
|
+
// TODO This is almost copy from AbstractJdbcOutputPlugin excepting type of TIMESTAMP -> TIMESTAMP WITH TIME ZONE.
|
119
|
+
// AbstractJdbcOutputPlugin should have better extensibility.
|
120
|
+
@Override
|
121
|
+
protected JdbcSchema newJdbcSchemaForNewTable(Schema schema)
|
122
|
+
{
|
123
|
+
final ImmutableList.Builder<JdbcColumn> columns = ImmutableList.builder();
|
124
|
+
for (Column c : schema.getColumns()) {
|
125
|
+
final String columnName = c.getName();
|
126
|
+
c.visit(new ColumnVisitor() {
|
127
|
+
public void booleanColumn(Column column)
|
128
|
+
{
|
129
|
+
columns.add(JdbcColumn.newGenericTypeColumn(
|
130
|
+
columnName, Types.BOOLEAN, "BOOLEAN",
|
131
|
+
1, 0, false, false));
|
132
|
+
}
|
133
|
+
|
134
|
+
public void longColumn(Column column)
|
135
|
+
{
|
136
|
+
columns.add(JdbcColumn.newGenericTypeColumn(
|
137
|
+
columnName, Types.BIGINT, "BIGINT",
|
138
|
+
22, 0, false, false));
|
139
|
+
}
|
140
|
+
|
141
|
+
public void doubleColumn(Column column)
|
142
|
+
{
|
143
|
+
columns.add(JdbcColumn.newGenericTypeColumn(
|
144
|
+
columnName, Types.FLOAT, "DOUBLE PRECISION",
|
145
|
+
24, 0, false, false));
|
146
|
+
}
|
147
|
+
|
148
|
+
public void stringColumn(Column column)
|
149
|
+
{
|
150
|
+
columns.add(JdbcColumn.newGenericTypeColumn(
|
151
|
+
columnName, Types.CLOB, "CLOB",
|
152
|
+
4000, 0, false, false)); // TODO size type param
|
153
|
+
}
|
154
|
+
|
155
|
+
public void timestampColumn(Column column)
|
156
|
+
{
|
157
|
+
columns.add(JdbcColumn.newGenericTypeColumn(
|
158
|
+
columnName, Types.TIMESTAMP, "TIMESTAMP WITH TIME ZONE",
|
159
|
+
26, 0, false, false)); // size type param is from postgresql
|
160
|
+
}
|
161
|
+
});
|
162
|
+
}
|
163
|
+
return new JdbcSchema(columns.build());
|
164
|
+
}
|
165
|
+
}
|