embulk-output-postgresql 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +89 -43
- data/build.gradle +7 -7
- data/classpath/embulk-output-jdbc-0.3.0.jar +0 -0
- data/classpath/embulk-output-postgresql-0.3.0.jar +0 -0
- data/lib/embulk/output/postgresql.rb +3 -3
- data/src/main/java/org/embulk/output/PostgreSQLOutputPlugin.java +109 -142
- data/src/main/java/org/embulk/output/postgresql/AbstractPostgreSQLCopyBatchInsert.java +215 -217
- data/src/main/java/org/embulk/output/postgresql/PostgreSQLCopyBatchInsert.java +73 -73
- data/src/main/java/org/embulk/output/postgresql/PostgreSQLOutputConnection.java +98 -49
- data/src/main/java/org/embulk/output/postgresql/PostgreSQLOutputConnector.java +40 -40
- metadata +4 -5
- data/classpath/embulk-output-jdbc-0.2.4.jar +0 -0
- data/classpath/embulk-output-postgresql-0.2.4.jar +0 -0
- data/src/main/java/org/embulk/output/postgresql/PostgresqlBatchUpsert.java +0 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20e8259bf0d8db2e0a6486cf6d6e8122a35eac4e
|
4
|
+
data.tar.gz: c51d55c5847e97b55862f85bc7d7c36fbfd825be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 77bddc402cbe94140137f501359842fd101206b26602908aba64f632c6b56a0e02c73e4bbc83c6a20b1a4047034fd95d5667a1ee0487c4792e1805724339ab1a
|
7
|
+
data.tar.gz: b8cc2770741d36bb12fb02868b164833f35d57f9d6015115af25f972b2a791e84cc86c4701521547d0004d377e39e4c171a89bcd5b30a6c1885231f85e7c2f77
|
data/README.md
CHANGED
@@ -1,43 +1,89 @@
|
|
1
|
-
# PostgreSQL output plugins for Embulk
|
2
|
-
|
3
|
-
PostgreSQL output plugins for Embulk loads records to PostgreSQL.
|
4
|
-
|
5
|
-
## Overview
|
6
|
-
|
7
|
-
* **Plugin type**: output
|
8
|
-
* **Load all or nothing**: depnds on the mode
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
- **
|
16
|
-
- **
|
17
|
-
- **
|
18
|
-
- **
|
19
|
-
- **
|
20
|
-
- **
|
21
|
-
- **
|
22
|
-
- **
|
23
|
-
- **
|
24
|
-
- **
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
1
|
+
# PostgreSQL output plugins for Embulk
|
2
|
+
|
3
|
+
PostgreSQL output plugins for Embulk loads records to PostgreSQL.
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
|
7
|
+
* **Plugin type**: output
|
8
|
+
* **Load all or nothing**: depnds on the mode. see bellow.
|
9
|
+
* **Resume supported**: depnds on the mode. see bellow.
|
10
|
+
|
11
|
+
## Configuration
|
12
|
+
|
13
|
+
- **host**: database host name (string, required)
|
14
|
+
- **port**: database port number (integer, default: 5432)
|
15
|
+
- **user**: database login user name (string, required)
|
16
|
+
- **password**: database login password (string, default: "")
|
17
|
+
- **database**: destination database name (string, required)
|
18
|
+
- **schema**: destination schema name (string, default: "public")
|
19
|
+
- **table**: destination table name (string, required)
|
20
|
+
- **options**: extra connection properties (hash, default: {})
|
21
|
+
- **mode**: "replace", "merge" or "insert" (string, required)
|
22
|
+
- **batch_size**: size of a single batch insert (integer, default: 16777216)
|
23
|
+
- **default_timezone**: If input column type (embulk type) is timestamp and destination column type is `string` or `nstring`, this plugin needs to format the timestamp into a string. This default_timezone option is used to control the timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
|
24
|
+
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
25
|
+
- **type**: type of a column when this plugin creates new tables (e.g. `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`). This used when this plugin creates intermediate tables (insert, truncate_insert and merge modes), when it creates the target table (insert_direct and replace modes), and when it creates nonexistent target table automatically. (string, default: depends on input column type. `BIGINT` if input column type is long, `BOOLEAN` if boolean, `DOUBLE PRECISION` if double, `CLOB` if string, `TIMESTAMP` if timestamp)
|
26
|
+
- **value_type**: This plugin converts input column type (embulk type) into a database type to build a INSERT statement. This value_type option controls the type of the value in a INSERT statement. (string, default: depends on input column type. Available values options are: `byte`, `short`, `int`, `long`, `double`, `float`, `boolean`, `string`, `nstring`, `date`, `time`, `timestamp`, `decimal`, `null`, `pass`)
|
27
|
+
- **timestamp_format**: If input column type (embulk type) is timestamp and value_type is `string` or `nstring`, this plugin needs to format the timestamp value into a string. This timestamp_format option is used to control the format of the timestamp. (string, default: `%Y-%m-%d %H:%M:%S.%6N`)
|
28
|
+
- **timezone**: If input column type (embulk type) is timestamp and value_type is `string` or `nstring`, this plugin needs to format the timestamp value into a string. And if the input column type is timestamp and value_type is `date`, this plugin needs to consider timezone. In those cases, this timezone option is used to control the timezone. (string, value of default_timezone option is used by default)
|
29
|
+
|
30
|
+
### Modes
|
31
|
+
|
32
|
+
* **insert**:
|
33
|
+
* Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table_1> UNION ALL SELECT * FROM <intermediate_table_2> UNION ALL ...` query.
|
34
|
+
* Transactional: Yes. This mode successfully writes all rows, or fails with writing zero rows.
|
35
|
+
* Resumable: Yes.
|
36
|
+
* **insert_direct**:
|
37
|
+
* Behavior: This mode inserts rows to the target table directly.
|
38
|
+
* Transactional: No. If fails, the target table could have some rows inserted.
|
39
|
+
* Resumable: No.
|
40
|
+
* **truncate_insert**:
|
41
|
+
* Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
|
42
|
+
* Transactional: Yes.
|
43
|
+
* Resumable: Yes.
|
44
|
+
* **merge**:
|
45
|
+
* Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table_1> UNION ALL SELECT * FROM <intermediate_table_2> UNION ALL ... ON DUPLICATE KEY UPDATE ...` query.
|
46
|
+
* Transactional: Yes.
|
47
|
+
* Resumable: Yes.
|
48
|
+
* **replace**:
|
49
|
+
* Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
|
50
|
+
* Transactional: Yes.
|
51
|
+
* Resumable: No.
|
52
|
+
|
53
|
+
### Example
|
54
|
+
|
55
|
+
```yaml
|
56
|
+
out:
|
57
|
+
type: postgresql
|
58
|
+
host: localhost
|
59
|
+
user: pg
|
60
|
+
password: ""
|
61
|
+
database: my_database
|
62
|
+
table: my_table
|
63
|
+
mode: insert
|
64
|
+
```
|
65
|
+
|
66
|
+
Advanced configuration:
|
67
|
+
|
68
|
+
```yaml
|
69
|
+
out:
|
70
|
+
type: postgresql
|
71
|
+
host: localhost
|
72
|
+
user: pg
|
73
|
+
password: ""
|
74
|
+
database: my_database
|
75
|
+
table: my_table
|
76
|
+
options: {loglevel: 2}
|
77
|
+
mode: insert_direct
|
78
|
+
column_options:
|
79
|
+
my_col_1: {type: 'BIGSERIAL'}
|
80
|
+
my_col_3: {type: 'INT NOT NULL'}
|
81
|
+
my_col_4: {value_type: string, timestamp_format: `%Y-%m-%d %H:%M:%S %z`, timezone: '-0700'}
|
82
|
+
my_col_5: {type: 'DECIMAL(18,9)', value_type: pass}
|
83
|
+
```
|
84
|
+
|
85
|
+
### Build
|
86
|
+
|
87
|
+
```
|
88
|
+
$ ./gradlew gem
|
89
|
+
```
|
data/build.gradle
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
dependencies {
|
2
|
-
compile project(':embulk-output-jdbc')
|
3
|
-
|
4
|
-
compile 'org.postgresql:postgresql:9.4-1200-jdbc41'
|
5
|
-
|
6
|
-
testCompile project(':embulk-output-jdbc').sourceSets.test.output
|
7
|
-
}
|
1
|
+
dependencies {
|
2
|
+
compile project(':embulk-output-jdbc')
|
3
|
+
|
4
|
+
compile 'org.postgresql:postgresql:9.4-1200-jdbc41'
|
5
|
+
|
6
|
+
testCompile project(':embulk-output-jdbc').sourceSets.test.output
|
7
|
+
}
|
Binary file
|
Binary file
|
@@ -1,3 +1,3 @@
|
|
1
|
-
Embulk::JavaPlugin.register_output(
|
2
|
-
:postgresql, "org.embulk.output.PostgreSQLOutputPlugin",
|
3
|
-
File.expand_path('../../../../classpath', __FILE__))
|
1
|
+
Embulk::JavaPlugin.register_output(
|
2
|
+
:postgresql, "org.embulk.output.PostgreSQLOutputPlugin",
|
3
|
+
File.expand_path('../../../../classpath', __FILE__))
|
@@ -1,142 +1,109 @@
|
|
1
|
-
package org.embulk.output;
|
2
|
-
|
3
|
-
import java.util.List;
|
4
|
-
import java.util.Properties;
|
5
|
-
import java.io.IOException;
|
6
|
-
import java.sql.SQLException;
|
7
|
-
|
8
|
-
import
|
9
|
-
import org.embulk.
|
10
|
-
import org.embulk.
|
11
|
-
import org.embulk.
|
12
|
-
import org.embulk.
|
13
|
-
import org.embulk.output.
|
14
|
-
import org.embulk.output.
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
public
|
20
|
-
|
21
|
-
{
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
@Config("
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
@
|
30
|
-
public
|
31
|
-
|
32
|
-
@Config("
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
@
|
37
|
-
public String
|
38
|
-
|
39
|
-
@Config("
|
40
|
-
public
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
props.setProperty("
|
70
|
-
|
71
|
-
//
|
72
|
-
//
|
73
|
-
|
74
|
-
|
75
|
-
//
|
76
|
-
//
|
77
|
-
//when "
|
78
|
-
//
|
79
|
-
//
|
80
|
-
//
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
}
|
111
|
-
|
112
|
-
@Override
|
113
|
-
protected void handleColumnsSetters()
|
114
|
-
{
|
115
|
-
int size = columnSetters.size();
|
116
|
-
for (int i=0; i < size; i++) {
|
117
|
-
ColumnSetter columnSetter = columnSetters.get(i);
|
118
|
-
if (!columnSetter.getColumn().isPrimaryKey()) {
|
119
|
-
columns.get(i).visit(columnSetter);
|
120
|
-
}
|
121
|
-
}
|
122
|
-
for (int i=0; i < size; i++) {
|
123
|
-
ColumnSetter columnSetter = columnSetters.get(i);
|
124
|
-
if (columnSetter.getColumn().isPrimaryKey()) {
|
125
|
-
columns.get(i).visit(columnSetter);
|
126
|
-
}
|
127
|
-
}
|
128
|
-
for (int i=0; i < size; i++) {
|
129
|
-
columns.get(i).visit(columnSetters.get(i));
|
130
|
-
}
|
131
|
-
}
|
132
|
-
|
133
|
-
}
|
134
|
-
|
135
|
-
@Override
|
136
|
-
protected BatchInsert newBatchInsert(PluginTask task) throws IOException, SQLException
|
137
|
-
{
|
138
|
-
PostgreSQLOutputConnector connector = getConnector(task, true);
|
139
|
-
return task.getMode().isMerge() ? new PostgresqlBatchUpsert(connector) :
|
140
|
-
new PostgreSQLCopyBatchInsert(getConnector(task, true));
|
141
|
-
}
|
142
|
-
}
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.Properties;
|
5
|
+
import java.io.IOException;
|
6
|
+
import java.sql.SQLException;
|
7
|
+
import com.google.common.base.Optional;
|
8
|
+
import com.google.common.collect.ImmutableSet;
|
9
|
+
import org.embulk.config.Config;
|
10
|
+
import org.embulk.config.ConfigDefault;
|
11
|
+
import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
|
12
|
+
import org.embulk.output.jdbc.BatchInsert;
|
13
|
+
import org.embulk.output.postgresql.PostgreSQLOutputConnector;
|
14
|
+
import org.embulk.output.postgresql.PostgreSQLCopyBatchInsert;
|
15
|
+
|
16
|
+
public class PostgreSQLOutputPlugin
|
17
|
+
extends AbstractJdbcOutputPlugin
|
18
|
+
{
|
19
|
+
public interface PostgreSQLPluginTask
|
20
|
+
extends PluginTask
|
21
|
+
{
|
22
|
+
@Config("host")
|
23
|
+
public String getHost();
|
24
|
+
|
25
|
+
@Config("port")
|
26
|
+
@ConfigDefault("5432")
|
27
|
+
public int getPort();
|
28
|
+
|
29
|
+
@Config("user")
|
30
|
+
public String getUser();
|
31
|
+
|
32
|
+
@Config("password")
|
33
|
+
@ConfigDefault("\"\"")
|
34
|
+
public String getPassword();
|
35
|
+
|
36
|
+
@Config("database")
|
37
|
+
public String getDatabase();
|
38
|
+
|
39
|
+
@Config("schema")
|
40
|
+
@ConfigDefault("\"public\"")
|
41
|
+
public String getSchema();
|
42
|
+
}
|
43
|
+
|
44
|
+
@Override
|
45
|
+
protected Class<? extends PluginTask> getTaskClass()
|
46
|
+
{
|
47
|
+
return PostgreSQLPluginTask.class;
|
48
|
+
}
|
49
|
+
|
50
|
+
@Override
|
51
|
+
protected Features getFeatures(PluginTask task)
|
52
|
+
{
|
53
|
+
return new Features()
|
54
|
+
.setMaxTableNameLength(30)
|
55
|
+
.setSupportedModes(ImmutableSet.of(Mode.INSERT, Mode.INSERT_DIRECT, Mode.MERGE, Mode.TRUNCATE_INSERT, Mode.REPLACE))
|
56
|
+
.setIgnoreMergeKeys(false);
|
57
|
+
}
|
58
|
+
|
59
|
+
@Override
|
60
|
+
protected PostgreSQLOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
|
61
|
+
{
|
62
|
+
PostgreSQLPluginTask t = (PostgreSQLPluginTask) task;
|
63
|
+
|
64
|
+
String url = String.format("jdbc:postgresql://%s:%d/%s",
|
65
|
+
t.getHost(), t.getPort(), t.getDatabase());
|
66
|
+
|
67
|
+
Properties props = new Properties();
|
68
|
+
props.setProperty("loginTimeout", "300"); // seconds
|
69
|
+
props.setProperty("socketTimeout", "1800"); // seconds
|
70
|
+
|
71
|
+
// Enable keepalive based on tcp_keepalive_time, tcp_keepalive_intvl and tcp_keepalive_probes kernel parameters.
|
72
|
+
// Socket options TCP_KEEPCNT, TCP_KEEPIDLE, and TCP_KEEPINTVL are not configurable.
|
73
|
+
props.setProperty("tcpKeepAlive", "true");
|
74
|
+
|
75
|
+
// TODO
|
76
|
+
//switch t.getSssl() {
|
77
|
+
//when "disable":
|
78
|
+
// break;
|
79
|
+
//when "enable":
|
80
|
+
// props.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); // disable server-side validation
|
81
|
+
//when "verify":
|
82
|
+
// props.setProperty("ssl", "true");
|
83
|
+
// break;
|
84
|
+
//}
|
85
|
+
|
86
|
+
if (!retryableMetadataOperation) {
|
87
|
+
// non-retryable batch operation uses longer timeout
|
88
|
+
props.setProperty("loginTimeout", "300"); // seconds
|
89
|
+
props.setProperty("socketTimeout", "28800"); // seconds
|
90
|
+
}
|
91
|
+
|
92
|
+
props.putAll(t.getOptions());
|
93
|
+
|
94
|
+
props.setProperty("user", t.getUser());
|
95
|
+
logger.info("Connecting to {} options {}", url, props);
|
96
|
+
props.setProperty("password", t.getPassword());
|
97
|
+
|
98
|
+
return new PostgreSQLOutputConnector(url, props, t.getSchema());
|
99
|
+
}
|
100
|
+
|
101
|
+
@Override
|
102
|
+
protected BatchInsert newBatchInsert(PluginTask task, Optional<List<String>> mergeKeys) throws IOException, SQLException
|
103
|
+
{
|
104
|
+
if (mergeKeys.isPresent()) {
|
105
|
+
throw new UnsupportedOperationException("PostgreSQL output plugin doesn't support 'merge_direct' mode. Use 'merge' mode instead.");
|
106
|
+
}
|
107
|
+
return new PostgreSQLCopyBatchInsert(getConnector(task, true));
|
108
|
+
}
|
109
|
+
}
|
@@ -1,217 +1,215 @@
|
|
1
|
-
package org.embulk.output.postgresql;
|
2
|
-
|
3
|
-
import java.io.File;
|
4
|
-
import java.io.FileOutputStream;
|
5
|
-
import java.io.Writer;
|
6
|
-
import java.io.BufferedWriter;
|
7
|
-
import java.io.OutputStreamWriter;
|
8
|
-
import java.io.IOException;
|
9
|
-
import java.nio.charset.Charset;
|
10
|
-
import java.math.BigDecimal;
|
11
|
-
import java.sql.Date;
|
12
|
-
import java.sql.Time;
|
13
|
-
import java.sql.Timestamp;
|
14
|
-
import java.sql.SQLException;
|
15
|
-
import org.embulk.
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
protected static final
|
23
|
-
|
24
|
-
protected static final String
|
25
|
-
|
26
|
-
protected
|
27
|
-
|
28
|
-
protected
|
29
|
-
protected
|
30
|
-
|
31
|
-
protected
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
return
|
72
|
-
}
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
}
|
217
|
-
}
|
1
|
+
package org.embulk.output.postgresql;
|
2
|
+
|
3
|
+
import java.io.File;
|
4
|
+
import java.io.FileOutputStream;
|
5
|
+
import java.io.Writer;
|
6
|
+
import java.io.BufferedWriter;
|
7
|
+
import java.io.OutputStreamWriter;
|
8
|
+
import java.io.IOException;
|
9
|
+
import java.nio.charset.Charset;
|
10
|
+
import java.math.BigDecimal;
|
11
|
+
import java.sql.Date;
|
12
|
+
import java.sql.Time;
|
13
|
+
import java.sql.Timestamp;
|
14
|
+
import java.sql.SQLException;
|
15
|
+
import org.embulk.output.jdbc.BatchInsert;
|
16
|
+
|
17
|
+
public abstract class AbstractPostgreSQLCopyBatchInsert
|
18
|
+
implements BatchInsert
|
19
|
+
{
|
20
|
+
protected static final Charset FILE_CHARSET = Charset.forName("UTF-8");
|
21
|
+
|
22
|
+
protected static final String nullString = "\\N";
|
23
|
+
protected static final String newLineString = "\n";
|
24
|
+
protected static final String delimiterString = "\t";
|
25
|
+
|
26
|
+
protected File currentFile;
|
27
|
+
protected BufferedWriter writer;
|
28
|
+
protected int index;
|
29
|
+
protected int batchRows;
|
30
|
+
|
31
|
+
protected AbstractPostgreSQLCopyBatchInsert() throws IOException
|
32
|
+
{
|
33
|
+
this.index = 0;
|
34
|
+
openNewFile();
|
35
|
+
}
|
36
|
+
|
37
|
+
private File createTempFile() throws IOException
|
38
|
+
{
|
39
|
+
return File.createTempFile("embulk-output-postgres-copy-", ".tsv.tmp"); // TODO configurable temporary file path
|
40
|
+
}
|
41
|
+
|
42
|
+
protected File openNewFile() throws IOException
|
43
|
+
{
|
44
|
+
File newFile = createTempFile();
|
45
|
+
File oldFile = closeCurrentFile();
|
46
|
+
this.writer = openWriter(newFile);
|
47
|
+
currentFile = newFile;
|
48
|
+
return oldFile;
|
49
|
+
}
|
50
|
+
|
51
|
+
protected File closeCurrentFile() throws IOException
|
52
|
+
{
|
53
|
+
if(writer != null) {
|
54
|
+
writer.close();
|
55
|
+
writer = null;
|
56
|
+
}
|
57
|
+
return currentFile;
|
58
|
+
}
|
59
|
+
|
60
|
+
protected BufferedWriter openWriter(File newFile) throws IOException
|
61
|
+
{
|
62
|
+
return new BufferedWriter(new OutputStreamWriter(new FileOutputStream(newFile), FILE_CHARSET));
|
63
|
+
}
|
64
|
+
|
65
|
+
public int getBatchWeight()
|
66
|
+
{
|
67
|
+
long fsize = currentFile.length();
|
68
|
+
if (fsize > Integer.MAX_VALUE) {
|
69
|
+
return Integer.MAX_VALUE;
|
70
|
+
} else {
|
71
|
+
return (int) fsize;
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
public void finish() throws IOException, SQLException
|
76
|
+
{
|
77
|
+
closeCurrentFile(); // this is necessary to make getBatchWeight() work
|
78
|
+
if (getBatchWeight() != 0) {
|
79
|
+
flush();
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
public void add() throws IOException
|
84
|
+
{
|
85
|
+
writer.write(newLineString);
|
86
|
+
batchRows++;
|
87
|
+
index = 0;
|
88
|
+
}
|
89
|
+
|
90
|
+
private void appendDelimiter() throws IOException
|
91
|
+
{
|
92
|
+
if(index != 0) {
|
93
|
+
writer.write(delimiterString);
|
94
|
+
}
|
95
|
+
index++;
|
96
|
+
}
|
97
|
+
|
98
|
+
public void setNull(int sqlType) throws IOException
|
99
|
+
{
|
100
|
+
appendDelimiter();
|
101
|
+
writer.write(nullString);
|
102
|
+
}
|
103
|
+
|
104
|
+
public void setBoolean(boolean v) throws IOException
|
105
|
+
{
|
106
|
+
appendDelimiter();
|
107
|
+
writer.write(String.valueOf(v));
|
108
|
+
}
|
109
|
+
|
110
|
+
public void setByte(byte v) throws IOException
|
111
|
+
{
|
112
|
+
appendDelimiter();
|
113
|
+
setEscapedString(String.valueOf(v));
|
114
|
+
}
|
115
|
+
|
116
|
+
public void setShort(short v) throws IOException
|
117
|
+
{
|
118
|
+
appendDelimiter();
|
119
|
+
writer.write(String.valueOf(v));
|
120
|
+
}
|
121
|
+
|
122
|
+
public void setInt(int v) throws IOException
|
123
|
+
{
|
124
|
+
appendDelimiter();
|
125
|
+
writer.write(String.valueOf(v));
|
126
|
+
}
|
127
|
+
|
128
|
+
public void setLong(long v) throws IOException
|
129
|
+
{
|
130
|
+
appendDelimiter();
|
131
|
+
writer.write(String.valueOf(v));
|
132
|
+
}
|
133
|
+
|
134
|
+
public void setFloat(float v) throws IOException
|
135
|
+
{
|
136
|
+
appendDelimiter();
|
137
|
+
writer.write(String.valueOf(v));
|
138
|
+
}
|
139
|
+
|
140
|
+
public void setDouble(double v) throws IOException
|
141
|
+
{
|
142
|
+
appendDelimiter();
|
143
|
+
writer.write(String.valueOf(v));
|
144
|
+
}
|
145
|
+
|
146
|
+
public void setBigDecimal(BigDecimal v) throws IOException
|
147
|
+
{
|
148
|
+
appendDelimiter();
|
149
|
+
writer.write(String.valueOf(v));
|
150
|
+
}
|
151
|
+
|
152
|
+
public void setString(String v) throws IOException
|
153
|
+
{
|
154
|
+
appendDelimiter();
|
155
|
+
setEscapedString(v);
|
156
|
+
}
|
157
|
+
|
158
|
+
public void setNString(String v) throws IOException
|
159
|
+
{
|
160
|
+
appendDelimiter();
|
161
|
+
setEscapedString(v);
|
162
|
+
}
|
163
|
+
|
164
|
+
public void setBytes(byte[] v) throws IOException
|
165
|
+
{
|
166
|
+
appendDelimiter();
|
167
|
+
setEscapedString(String.valueOf(v));
|
168
|
+
}
|
169
|
+
|
170
|
+
public void setSqlDate(Date v, int sqlType) throws IOException
|
171
|
+
{
|
172
|
+
appendDelimiter();
|
173
|
+
writer.write(v.toString());
|
174
|
+
}
|
175
|
+
|
176
|
+
public void setSqlTime(Time v, int sqlType) throws IOException
|
177
|
+
{
|
178
|
+
appendDelimiter();
|
179
|
+
writer.write(v.toString());
|
180
|
+
}
|
181
|
+
|
182
|
+
public void setSqlTimestamp(Timestamp v, int sqlType) throws IOException
|
183
|
+
{
|
184
|
+
appendDelimiter();
|
185
|
+
writer.write(v.toString());
|
186
|
+
}
|
187
|
+
|
188
|
+
// Escape \, \n, \t, \r
|
189
|
+
// Remove \0
|
190
|
+
private void setEscapedString(String v) throws IOException{
|
191
|
+
for (char c : v.toCharArray()) {
|
192
|
+
String s;
|
193
|
+
switch (c) {
|
194
|
+
case '\\':
|
195
|
+
s = "\\\\";
|
196
|
+
break;
|
197
|
+
case '\n':
|
198
|
+
s = "\\n";
|
199
|
+
break;
|
200
|
+
case '\t':
|
201
|
+
s = "\\t";
|
202
|
+
break;
|
203
|
+
case '\r':
|
204
|
+
s = "\\r";
|
205
|
+
break;
|
206
|
+
case 0:
|
207
|
+
s = "";
|
208
|
+
break;
|
209
|
+
default:
|
210
|
+
s = String.valueOf(c);
|
211
|
+
}
|
212
|
+
writer.write(s);
|
213
|
+
}
|
214
|
+
}
|
215
|
+
}
|