embulk-output-postgresql 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +89 -43
- data/build.gradle +7 -7
- data/classpath/embulk-output-jdbc-0.3.0.jar +0 -0
- data/classpath/embulk-output-postgresql-0.3.0.jar +0 -0
- data/lib/embulk/output/postgresql.rb +3 -3
- data/src/main/java/org/embulk/output/PostgreSQLOutputPlugin.java +109 -142
- data/src/main/java/org/embulk/output/postgresql/AbstractPostgreSQLCopyBatchInsert.java +215 -217
- data/src/main/java/org/embulk/output/postgresql/PostgreSQLCopyBatchInsert.java +73 -73
- data/src/main/java/org/embulk/output/postgresql/PostgreSQLOutputConnection.java +98 -49
- data/src/main/java/org/embulk/output/postgresql/PostgreSQLOutputConnector.java +40 -40
- metadata +4 -5
- data/classpath/embulk-output-jdbc-0.2.4.jar +0 -0
- data/classpath/embulk-output-postgresql-0.2.4.jar +0 -0
- data/src/main/java/org/embulk/output/postgresql/PostgresqlBatchUpsert.java +0 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20e8259bf0d8db2e0a6486cf6d6e8122a35eac4e
|
4
|
+
data.tar.gz: c51d55c5847e97b55862f85bc7d7c36fbfd825be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 77bddc402cbe94140137f501359842fd101206b26602908aba64f632c6b56a0e02c73e4bbc83c6a20b1a4047034fd95d5667a1ee0487c4792e1805724339ab1a
|
7
|
+
data.tar.gz: b8cc2770741d36bb12fb02868b164833f35d57f9d6015115af25f972b2a791e84cc86c4701521547d0004d377e39e4c171a89bcd5b30a6c1885231f85e7c2f77
|
data/README.md
CHANGED
@@ -1,43 +1,89 @@
|
|
1
|
-
# PostgreSQL output plugins for Embulk
|
2
|
-
|
3
|
-
PostgreSQL output plugins for Embulk loads records to PostgreSQL.
|
4
|
-
|
5
|
-
## Overview
|
6
|
-
|
7
|
-
* **Plugin type**: output
|
8
|
-
* **Load all or nothing**: depnds on the mode
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
- **
|
16
|
-
- **
|
17
|
-
- **
|
18
|
-
- **
|
19
|
-
- **
|
20
|
-
- **
|
21
|
-
- **
|
22
|
-
- **
|
23
|
-
- **
|
24
|
-
- **
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
1
|
+
# PostgreSQL output plugins for Embulk
|
2
|
+
|
3
|
+
PostgreSQL output plugins for Embulk loads records to PostgreSQL.
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
|
7
|
+
* **Plugin type**: output
|
8
|
+
* **Load all or nothing**: depnds on the mode. see bellow.
|
9
|
+
* **Resume supported**: depnds on the mode. see bellow.
|
10
|
+
|
11
|
+
## Configuration
|
12
|
+
|
13
|
+
- **host**: database host name (string, required)
|
14
|
+
- **port**: database port number (integer, default: 5432)
|
15
|
+
- **user**: database login user name (string, required)
|
16
|
+
- **password**: database login password (string, default: "")
|
17
|
+
- **database**: destination database name (string, required)
|
18
|
+
- **schema**: destination schema name (string, default: "public")
|
19
|
+
- **table**: destination table name (string, required)
|
20
|
+
- **options**: extra connection properties (hash, default: {})
|
21
|
+
- **mode**: "replace", "merge" or "insert" (string, required)
|
22
|
+
- **batch_size**: size of a single batch insert (integer, default: 16777216)
|
23
|
+
- **default_timezone**: If input column type (embulk type) is timestamp and destination column type is `string` or `nstring`, this plugin needs to format the timestamp into a string. This default_timezone option is used to control the timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
|
24
|
+
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
25
|
+
- **type**: type of a column when this plugin creates new tables (e.g. `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`). This used when this plugin creates intermediate tables (insert, truncate_insert and merge modes), when it creates the target table (insert_direct and replace modes), and when it creates nonexistent target table automatically. (string, default: depends on input column type. `BIGINT` if input column type is long, `BOOLEAN` if boolean, `DOUBLE PRECISION` if double, `CLOB` if string, `TIMESTAMP` if timestamp)
|
26
|
+
- **value_type**: This plugin converts input column type (embulk type) into a database type to build a INSERT statement. This value_type option controls the type of the value in a INSERT statement. (string, default: depends on input column type. Available values options are: `byte`, `short`, `int`, `long`, `double`, `float`, `boolean`, `string`, `nstring`, `date`, `time`, `timestamp`, `decimal`, `null`, `pass`)
|
27
|
+
- **timestamp_format**: If input column type (embulk type) is timestamp and value_type is `string` or `nstring`, this plugin needs to format the timestamp value into a string. This timestamp_format option is used to control the format of the timestamp. (string, default: `%Y-%m-%d %H:%M:%S.%6N`)
|
28
|
+
- **timezone**: If input column type (embulk type) is timestamp and value_type is `string` or `nstring`, this plugin needs to format the timestamp value into a string. And if the input column type is timestamp and value_type is `date`, this plugin needs to consider timezone. In those cases, this timezone option is used to control the timezone. (string, value of default_timezone option is used by default)
|
29
|
+
|
30
|
+
### Modes
|
31
|
+
|
32
|
+
* **insert**:
|
33
|
+
* Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table_1> UNION ALL SELECT * FROM <intermediate_table_2> UNION ALL ...` query.
|
34
|
+
* Transactional: Yes. This mode successfully writes all rows, or fails with writing zero rows.
|
35
|
+
* Resumable: Yes.
|
36
|
+
* **insert_direct**:
|
37
|
+
* Behavior: This mode inserts rows to the target table directly.
|
38
|
+
* Transactional: No. If fails, the target table could have some rows inserted.
|
39
|
+
* Resumable: No.
|
40
|
+
* **truncate_insert**:
|
41
|
+
* Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
|
42
|
+
* Transactional: Yes.
|
43
|
+
* Resumable: Yes.
|
44
|
+
* **merge**:
|
45
|
+
* Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table_1> UNION ALL SELECT * FROM <intermediate_table_2> UNION ALL ... ON DUPLICATE KEY UPDATE ...` query.
|
46
|
+
* Transactional: Yes.
|
47
|
+
* Resumable: Yes.
|
48
|
+
* **replace**:
|
49
|
+
* Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
|
50
|
+
* Transactional: Yes.
|
51
|
+
* Resumable: No.
|
52
|
+
|
53
|
+
### Example
|
54
|
+
|
55
|
+
```yaml
|
56
|
+
out:
|
57
|
+
type: postgresql
|
58
|
+
host: localhost
|
59
|
+
user: pg
|
60
|
+
password: ""
|
61
|
+
database: my_database
|
62
|
+
table: my_table
|
63
|
+
mode: insert
|
64
|
+
```
|
65
|
+
|
66
|
+
Advanced configuration:
|
67
|
+
|
68
|
+
```yaml
|
69
|
+
out:
|
70
|
+
type: postgresql
|
71
|
+
host: localhost
|
72
|
+
user: pg
|
73
|
+
password: ""
|
74
|
+
database: my_database
|
75
|
+
table: my_table
|
76
|
+
options: {loglevel: 2}
|
77
|
+
mode: insert_direct
|
78
|
+
column_options:
|
79
|
+
my_col_1: {type: 'BIGSERIAL'}
|
80
|
+
my_col_3: {type: 'INT NOT NULL'}
|
81
|
+
my_col_4: {value_type: string, timestamp_format: `%Y-%m-%d %H:%M:%S %z`, timezone: '-0700'}
|
82
|
+
my_col_5: {type: 'DECIMAL(18,9)', value_type: pass}
|
83
|
+
```
|
84
|
+
|
85
|
+
### Build
|
86
|
+
|
87
|
+
```
|
88
|
+
$ ./gradlew gem
|
89
|
+
```
|
data/build.gradle
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
dependencies {
|
2
|
-
compile project(':embulk-output-jdbc')
|
3
|
-
|
4
|
-
compile 'org.postgresql:postgresql:9.4-1200-jdbc41'
|
5
|
-
|
6
|
-
testCompile project(':embulk-output-jdbc').sourceSets.test.output
|
7
|
-
}
|
1
|
+
dependencies {
|
2
|
+
compile project(':embulk-output-jdbc')
|
3
|
+
|
4
|
+
compile 'org.postgresql:postgresql:9.4-1200-jdbc41'
|
5
|
+
|
6
|
+
testCompile project(':embulk-output-jdbc').sourceSets.test.output
|
7
|
+
}
|
Binary file
|
Binary file
|
@@ -1,3 +1,3 @@
|
|
1
|
-
Embulk::JavaPlugin.register_output(
|
2
|
-
:postgresql, "org.embulk.output.PostgreSQLOutputPlugin",
|
3
|
-
File.expand_path('../../../../classpath', __FILE__))
|
1
|
+
Embulk::JavaPlugin.register_output(
|
2
|
+
:postgresql, "org.embulk.output.PostgreSQLOutputPlugin",
|
3
|
+
File.expand_path('../../../../classpath', __FILE__))
|
@@ -1,142 +1,109 @@
|
|
1
|
-
package org.embulk.output;
|
2
|
-
|
3
|
-
import java.util.List;
|
4
|
-
import java.util.Properties;
|
5
|
-
import java.io.IOException;
|
6
|
-
import java.sql.SQLException;
|
7
|
-
|
8
|
-
import
|
9
|
-
import org.embulk.
|
10
|
-
import org.embulk.
|
11
|
-
import org.embulk.
|
12
|
-
import org.embulk.
|
13
|
-
import org.embulk.output.
|
14
|
-
import org.embulk.output.
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
public
|
20
|
-
|
21
|
-
{
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
@Config("
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
@
|
30
|
-
public
|
31
|
-
|
32
|
-
@Config("
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
@
|
37
|
-
public String
|
38
|
-
|
39
|
-
@Config("
|
40
|
-
public
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
props.setProperty("
|
70
|
-
|
71
|
-
//
|
72
|
-
//
|
73
|
-
|
74
|
-
|
75
|
-
//
|
76
|
-
//
|
77
|
-
//when "
|
78
|
-
//
|
79
|
-
//
|
80
|
-
//
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
}
|
111
|
-
|
112
|
-
@Override
|
113
|
-
protected void handleColumnsSetters()
|
114
|
-
{
|
115
|
-
int size = columnSetters.size();
|
116
|
-
for (int i=0; i < size; i++) {
|
117
|
-
ColumnSetter columnSetter = columnSetters.get(i);
|
118
|
-
if (!columnSetter.getColumn().isPrimaryKey()) {
|
119
|
-
columns.get(i).visit(columnSetter);
|
120
|
-
}
|
121
|
-
}
|
122
|
-
for (int i=0; i < size; i++) {
|
123
|
-
ColumnSetter columnSetter = columnSetters.get(i);
|
124
|
-
if (columnSetter.getColumn().isPrimaryKey()) {
|
125
|
-
columns.get(i).visit(columnSetter);
|
126
|
-
}
|
127
|
-
}
|
128
|
-
for (int i=0; i < size; i++) {
|
129
|
-
columns.get(i).visit(columnSetters.get(i));
|
130
|
-
}
|
131
|
-
}
|
132
|
-
|
133
|
-
}
|
134
|
-
|
135
|
-
@Override
|
136
|
-
protected BatchInsert newBatchInsert(PluginTask task) throws IOException, SQLException
|
137
|
-
{
|
138
|
-
PostgreSQLOutputConnector connector = getConnector(task, true);
|
139
|
-
return task.getMode().isMerge() ? new PostgresqlBatchUpsert(connector) :
|
140
|
-
new PostgreSQLCopyBatchInsert(getConnector(task, true));
|
141
|
-
}
|
142
|
-
}
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.Properties;
|
5
|
+
import java.io.IOException;
|
6
|
+
import java.sql.SQLException;
|
7
|
+
import com.google.common.base.Optional;
|
8
|
+
import com.google.common.collect.ImmutableSet;
|
9
|
+
import org.embulk.config.Config;
|
10
|
+
import org.embulk.config.ConfigDefault;
|
11
|
+
import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
|
12
|
+
import org.embulk.output.jdbc.BatchInsert;
|
13
|
+
import org.embulk.output.postgresql.PostgreSQLOutputConnector;
|
14
|
+
import org.embulk.output.postgresql.PostgreSQLCopyBatchInsert;
|
15
|
+
|
16
|
+
public class PostgreSQLOutputPlugin
|
17
|
+
extends AbstractJdbcOutputPlugin
|
18
|
+
{
|
19
|
+
public interface PostgreSQLPluginTask
|
20
|
+
extends PluginTask
|
21
|
+
{
|
22
|
+
@Config("host")
|
23
|
+
public String getHost();
|
24
|
+
|
25
|
+
@Config("port")
|
26
|
+
@ConfigDefault("5432")
|
27
|
+
public int getPort();
|
28
|
+
|
29
|
+
@Config("user")
|
30
|
+
public String getUser();
|
31
|
+
|
32
|
+
@Config("password")
|
33
|
+
@ConfigDefault("\"\"")
|
34
|
+
public String getPassword();
|
35
|
+
|
36
|
+
@Config("database")
|
37
|
+
public String getDatabase();
|
38
|
+
|
39
|
+
@Config("schema")
|
40
|
+
@ConfigDefault("\"public\"")
|
41
|
+
public String getSchema();
|
42
|
+
}
|
43
|
+
|
44
|
+
@Override
|
45
|
+
protected Class<? extends PluginTask> getTaskClass()
|
46
|
+
{
|
47
|
+
return PostgreSQLPluginTask.class;
|
48
|
+
}
|
49
|
+
|
50
|
+
@Override
|
51
|
+
protected Features getFeatures(PluginTask task)
|
52
|
+
{
|
53
|
+
return new Features()
|
54
|
+
.setMaxTableNameLength(30)
|
55
|
+
.setSupportedModes(ImmutableSet.of(Mode.INSERT, Mode.INSERT_DIRECT, Mode.MERGE, Mode.TRUNCATE_INSERT, Mode.REPLACE))
|
56
|
+
.setIgnoreMergeKeys(false);
|
57
|
+
}
|
58
|
+
|
59
|
+
@Override
|
60
|
+
protected PostgreSQLOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
|
61
|
+
{
|
62
|
+
PostgreSQLPluginTask t = (PostgreSQLPluginTask) task;
|
63
|
+
|
64
|
+
String url = String.format("jdbc:postgresql://%s:%d/%s",
|
65
|
+
t.getHost(), t.getPort(), t.getDatabase());
|
66
|
+
|
67
|
+
Properties props = new Properties();
|
68
|
+
props.setProperty("loginTimeout", "300"); // seconds
|
69
|
+
props.setProperty("socketTimeout", "1800"); // seconds
|
70
|
+
|
71
|
+
// Enable keepalive based on tcp_keepalive_time, tcp_keepalive_intvl and tcp_keepalive_probes kernel parameters.
|
72
|
+
// Socket options TCP_KEEPCNT, TCP_KEEPIDLE, and TCP_KEEPINTVL are not configurable.
|
73
|
+
props.setProperty("tcpKeepAlive", "true");
|
74
|
+
|
75
|
+
// TODO
|
76
|
+
//switch t.getSssl() {
|
77
|
+
//when "disable":
|
78
|
+
// break;
|
79
|
+
//when "enable":
|
80
|
+
// props.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); // disable server-side validation
|
81
|
+
//when "verify":
|
82
|
+
// props.setProperty("ssl", "true");
|
83
|
+
// break;
|
84
|
+
//}
|
85
|
+
|
86
|
+
if (!retryableMetadataOperation) {
|
87
|
+
// non-retryable batch operation uses longer timeout
|
88
|
+
props.setProperty("loginTimeout", "300"); // seconds
|
89
|
+
props.setProperty("socketTimeout", "28800"); // seconds
|
90
|
+
}
|
91
|
+
|
92
|
+
props.putAll(t.getOptions());
|
93
|
+
|
94
|
+
props.setProperty("user", t.getUser());
|
95
|
+
logger.info("Connecting to {} options {}", url, props);
|
96
|
+
props.setProperty("password", t.getPassword());
|
97
|
+
|
98
|
+
return new PostgreSQLOutputConnector(url, props, t.getSchema());
|
99
|
+
}
|
100
|
+
|
101
|
+
@Override
|
102
|
+
protected BatchInsert newBatchInsert(PluginTask task, Optional<List<String>> mergeKeys) throws IOException, SQLException
|
103
|
+
{
|
104
|
+
if (mergeKeys.isPresent()) {
|
105
|
+
throw new UnsupportedOperationException("PostgreSQL output plugin doesn't support 'merge_direct' mode. Use 'merge' mode instead.");
|
106
|
+
}
|
107
|
+
return new PostgreSQLCopyBatchInsert(getConnector(task, true));
|
108
|
+
}
|
109
|
+
}
|
@@ -1,217 +1,215 @@
|
|
1
|
-
package org.embulk.output.postgresql;
|
2
|
-
|
3
|
-
import java.io.File;
|
4
|
-
import java.io.FileOutputStream;
|
5
|
-
import java.io.Writer;
|
6
|
-
import java.io.BufferedWriter;
|
7
|
-
import java.io.OutputStreamWriter;
|
8
|
-
import java.io.IOException;
|
9
|
-
import java.nio.charset.Charset;
|
10
|
-
import java.math.BigDecimal;
|
11
|
-
import java.sql.Date;
|
12
|
-
import java.sql.Time;
|
13
|
-
import java.sql.Timestamp;
|
14
|
-
import java.sql.SQLException;
|
15
|
-
import org.embulk.
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
protected static final
|
23
|
-
|
24
|
-
protected static final String
|
25
|
-
|
26
|
-
protected
|
27
|
-
|
28
|
-
protected
|
29
|
-
protected
|
30
|
-
|
31
|
-
protected
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
return
|
72
|
-
}
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
}
|
217
|
-
}
|
1
|
+
package org.embulk.output.postgresql;
|
2
|
+
|
3
|
+
import java.io.File;
|
4
|
+
import java.io.FileOutputStream;
|
5
|
+
import java.io.Writer;
|
6
|
+
import java.io.BufferedWriter;
|
7
|
+
import java.io.OutputStreamWriter;
|
8
|
+
import java.io.IOException;
|
9
|
+
import java.nio.charset.Charset;
|
10
|
+
import java.math.BigDecimal;
|
11
|
+
import java.sql.Date;
|
12
|
+
import java.sql.Time;
|
13
|
+
import java.sql.Timestamp;
|
14
|
+
import java.sql.SQLException;
|
15
|
+
import org.embulk.output.jdbc.BatchInsert;
|
16
|
+
|
17
|
+
public abstract class AbstractPostgreSQLCopyBatchInsert
|
18
|
+
implements BatchInsert
|
19
|
+
{
|
20
|
+
protected static final Charset FILE_CHARSET = Charset.forName("UTF-8");
|
21
|
+
|
22
|
+
protected static final String nullString = "\\N";
|
23
|
+
protected static final String newLineString = "\n";
|
24
|
+
protected static final String delimiterString = "\t";
|
25
|
+
|
26
|
+
protected File currentFile;
|
27
|
+
protected BufferedWriter writer;
|
28
|
+
protected int index;
|
29
|
+
protected int batchRows;
|
30
|
+
|
31
|
+
protected AbstractPostgreSQLCopyBatchInsert() throws IOException
|
32
|
+
{
|
33
|
+
this.index = 0;
|
34
|
+
openNewFile();
|
35
|
+
}
|
36
|
+
|
37
|
+
private File createTempFile() throws IOException
|
38
|
+
{
|
39
|
+
return File.createTempFile("embulk-output-postgres-copy-", ".tsv.tmp"); // TODO configurable temporary file path
|
40
|
+
}
|
41
|
+
|
42
|
+
protected File openNewFile() throws IOException
|
43
|
+
{
|
44
|
+
File newFile = createTempFile();
|
45
|
+
File oldFile = closeCurrentFile();
|
46
|
+
this.writer = openWriter(newFile);
|
47
|
+
currentFile = newFile;
|
48
|
+
return oldFile;
|
49
|
+
}
|
50
|
+
|
51
|
+
protected File closeCurrentFile() throws IOException
|
52
|
+
{
|
53
|
+
if(writer != null) {
|
54
|
+
writer.close();
|
55
|
+
writer = null;
|
56
|
+
}
|
57
|
+
return currentFile;
|
58
|
+
}
|
59
|
+
|
60
|
+
protected BufferedWriter openWriter(File newFile) throws IOException
|
61
|
+
{
|
62
|
+
return new BufferedWriter(new OutputStreamWriter(new FileOutputStream(newFile), FILE_CHARSET));
|
63
|
+
}
|
64
|
+
|
65
|
+
public int getBatchWeight()
|
66
|
+
{
|
67
|
+
long fsize = currentFile.length();
|
68
|
+
if (fsize > Integer.MAX_VALUE) {
|
69
|
+
return Integer.MAX_VALUE;
|
70
|
+
} else {
|
71
|
+
return (int) fsize;
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
public void finish() throws IOException, SQLException
|
76
|
+
{
|
77
|
+
closeCurrentFile(); // this is necessary to make getBatchWeight() work
|
78
|
+
if (getBatchWeight() != 0) {
|
79
|
+
flush();
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
public void add() throws IOException
|
84
|
+
{
|
85
|
+
writer.write(newLineString);
|
86
|
+
batchRows++;
|
87
|
+
index = 0;
|
88
|
+
}
|
89
|
+
|
90
|
+
private void appendDelimiter() throws IOException
|
91
|
+
{
|
92
|
+
if(index != 0) {
|
93
|
+
writer.write(delimiterString);
|
94
|
+
}
|
95
|
+
index++;
|
96
|
+
}
|
97
|
+
|
98
|
+
public void setNull(int sqlType) throws IOException
|
99
|
+
{
|
100
|
+
appendDelimiter();
|
101
|
+
writer.write(nullString);
|
102
|
+
}
|
103
|
+
|
104
|
+
public void setBoolean(boolean v) throws IOException
|
105
|
+
{
|
106
|
+
appendDelimiter();
|
107
|
+
writer.write(String.valueOf(v));
|
108
|
+
}
|
109
|
+
|
110
|
+
public void setByte(byte v) throws IOException
|
111
|
+
{
|
112
|
+
appendDelimiter();
|
113
|
+
setEscapedString(String.valueOf(v));
|
114
|
+
}
|
115
|
+
|
116
|
+
public void setShort(short v) throws IOException
|
117
|
+
{
|
118
|
+
appendDelimiter();
|
119
|
+
writer.write(String.valueOf(v));
|
120
|
+
}
|
121
|
+
|
122
|
+
public void setInt(int v) throws IOException
|
123
|
+
{
|
124
|
+
appendDelimiter();
|
125
|
+
writer.write(String.valueOf(v));
|
126
|
+
}
|
127
|
+
|
128
|
+
public void setLong(long v) throws IOException
|
129
|
+
{
|
130
|
+
appendDelimiter();
|
131
|
+
writer.write(String.valueOf(v));
|
132
|
+
}
|
133
|
+
|
134
|
+
public void setFloat(float v) throws IOException
|
135
|
+
{
|
136
|
+
appendDelimiter();
|
137
|
+
writer.write(String.valueOf(v));
|
138
|
+
}
|
139
|
+
|
140
|
+
public void setDouble(double v) throws IOException
|
141
|
+
{
|
142
|
+
appendDelimiter();
|
143
|
+
writer.write(String.valueOf(v));
|
144
|
+
}
|
145
|
+
|
146
|
+
public void setBigDecimal(BigDecimal v) throws IOException
|
147
|
+
{
|
148
|
+
appendDelimiter();
|
149
|
+
writer.write(String.valueOf(v));
|
150
|
+
}
|
151
|
+
|
152
|
+
public void setString(String v) throws IOException
|
153
|
+
{
|
154
|
+
appendDelimiter();
|
155
|
+
setEscapedString(v);
|
156
|
+
}
|
157
|
+
|
158
|
+
public void setNString(String v) throws IOException
|
159
|
+
{
|
160
|
+
appendDelimiter();
|
161
|
+
setEscapedString(v);
|
162
|
+
}
|
163
|
+
|
164
|
+
public void setBytes(byte[] v) throws IOException
|
165
|
+
{
|
166
|
+
appendDelimiter();
|
167
|
+
setEscapedString(String.valueOf(v));
|
168
|
+
}
|
169
|
+
|
170
|
+
public void setSqlDate(Date v, int sqlType) throws IOException
|
171
|
+
{
|
172
|
+
appendDelimiter();
|
173
|
+
writer.write(v.toString());
|
174
|
+
}
|
175
|
+
|
176
|
+
public void setSqlTime(Time v, int sqlType) throws IOException
|
177
|
+
{
|
178
|
+
appendDelimiter();
|
179
|
+
writer.write(v.toString());
|
180
|
+
}
|
181
|
+
|
182
|
+
public void setSqlTimestamp(Timestamp v, int sqlType) throws IOException
|
183
|
+
{
|
184
|
+
appendDelimiter();
|
185
|
+
writer.write(v.toString());
|
186
|
+
}
|
187
|
+
|
188
|
+
// Escape \, \n, \t, \r
|
189
|
+
// Remove \0
|
190
|
+
private void setEscapedString(String v) throws IOException{
|
191
|
+
for (char c : v.toCharArray()) {
|
192
|
+
String s;
|
193
|
+
switch (c) {
|
194
|
+
case '\\':
|
195
|
+
s = "\\\\";
|
196
|
+
break;
|
197
|
+
case '\n':
|
198
|
+
s = "\\n";
|
199
|
+
break;
|
200
|
+
case '\t':
|
201
|
+
s = "\\t";
|
202
|
+
break;
|
203
|
+
case '\r':
|
204
|
+
s = "\\r";
|
205
|
+
break;
|
206
|
+
case 0:
|
207
|
+
s = "";
|
208
|
+
break;
|
209
|
+
default:
|
210
|
+
s = String.valueOf(c);
|
211
|
+
}
|
212
|
+
writer.write(s);
|
213
|
+
}
|
214
|
+
}
|
215
|
+
}
|