embulk-output-postgresql 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7401277e85bce3f1f2370d0aaa62d9811d8f4173
4
- data.tar.gz: 2b308d4f179ad2191d1f2cd9ca4c0e506bdc7c1c
3
+ metadata.gz: 20e8259bf0d8db2e0a6486cf6d6e8122a35eac4e
4
+ data.tar.gz: c51d55c5847e97b55862f85bc7d7c36fbfd825be
5
5
  SHA512:
6
- metadata.gz: 6fb3a523e6717894d079460c8a2c850ada1ed48f0fa484e38d7a7d6164a9a6600991a9b904273743ce084eee9fa219d8014954f81ed2bf9b0fd20df95cb3a06d
7
- data.tar.gz: ad686f50fa358911d4a86fb0de4433cb75723e656ae71cbb598e43e4aefd074fa8e64319f24adc3bec17a2d90accf7d561099ae5309af6243803341492d8fa74
6
+ metadata.gz: 77bddc402cbe94140137f501359842fd101206b26602908aba64f632c6b56a0e02c73e4bbc83c6a20b1a4047034fd95d5667a1ee0487c4792e1805724339ab1a
7
+ data.tar.gz: b8cc2770741d36bb12fb02868b164833f35d57f9d6015115af25f972b2a791e84cc86c4701521547d0004d377e39e4c171a89bcd5b30a6c1885231f85e7c2f77
data/README.md CHANGED
@@ -1,43 +1,89 @@
1
- # PostgreSQL output plugins for Embulk
2
-
3
- PostgreSQL output plugins for Embulk loads records to PostgreSQL.
4
-
5
- ## Overview
6
-
7
- * **Plugin type**: output
8
- * **Load all or nothing**: depnds on the mode:
9
- * **insert**: no
10
- * **replace**: yes
11
- * **Resume supported**: no
12
-
13
- ## Configuration
14
-
15
- - **host**: database host name (string, required)
16
- - **port**: database port number (integer, default: 5432)
17
- - **user**: database login user name (string, required)
18
- - **password**: database login password (string, default: "")
19
- - **database**: destination database name (string, required)
20
- - **schema**: destination schema name (string, default: "public")
21
- - **table**: destination table name (string, required)
22
- - **mode**: "replace", "merge" or "insert" (string, required)
23
- - **batch_size**: size of a single batch insert (integer, default: 16777216)
24
- - **options**: extra connection properties (hash, default: {})
25
-
26
- ### Example
27
-
28
- ```yaml
29
- out:
30
- type: postgresql
31
- host: localhost
32
- user: pg
33
- password: ""
34
- database: my_database
35
- table: my_table
36
- mode: insert
37
- ```
38
-
39
- ### Build
40
-
41
- ```
42
- $ ./gradlew gem
43
- ```
1
+ # PostgreSQL output plugins for Embulk
2
+
3
+ PostgreSQL output plugins for Embulk loads records to PostgreSQL.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: output
8
+ * **Load all or nothing**: depnds on the mode. see bellow.
9
+ * **Resume supported**: depnds on the mode. see bellow.
10
+
11
+ ## Configuration
12
+
13
+ - **host**: database host name (string, required)
14
+ - **port**: database port number (integer, default: 5432)
15
+ - **user**: database login user name (string, required)
16
+ - **password**: database login password (string, default: "")
17
+ - **database**: destination database name (string, required)
18
+ - **schema**: destination schema name (string, default: "public")
19
+ - **table**: destination table name (string, required)
20
+ - **options**: extra connection properties (hash, default: {})
21
+ - **mode**: "replace", "merge" or "insert" (string, required)
22
+ - **batch_size**: size of a single batch insert (integer, default: 16777216)
23
+ - **default_timezone**: If input column type (embulk type) is timestamp and destination column type is `string` or `nstring`, this plugin needs to format the timestamp into a string. This default_timezone option is used to control the timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
24
+ - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
25
+ - **type**: type of a column when this plugin creates new tables (e.g. `VARCHAR(255)`, `INTEGER NOT NULL UNIQUE`). This used when this plugin creates intermediate tables (insert, truncate_insert and merge modes), when it creates the target table (insert_direct and replace modes), and when it creates nonexistent target table automatically. (string, default: depends on input column type. `BIGINT` if input column type is long, `BOOLEAN` if boolean, `DOUBLE PRECISION` if double, `CLOB` if string, `TIMESTAMP` if timestamp)
26
+ - **value_type**: This plugin converts input column type (embulk type) into a database type to build a INSERT statement. This value_type option controls the type of the value in a INSERT statement. (string, default: depends on input column type. Available values options are: `byte`, `short`, `int`, `long`, `double`, `float`, `boolean`, `string`, `nstring`, `date`, `time`, `timestamp`, `decimal`, `null`, `pass`)
27
+ - **timestamp_format**: If input column type (embulk type) is timestamp and value_type is `string` or `nstring`, this plugin needs to format the timestamp value into a string. This timestamp_format option is used to control the format of the timestamp. (string, default: `%Y-%m-%d %H:%M:%S.%6N`)
28
+ - **timezone**: If input column type (embulk type) is timestamp and value_type is `string` or `nstring`, this plugin needs to format the timestamp value into a string. And if the input column type is timestamp and value_type is `date`, this plugin needs to consider timezone. In those cases, this timezone option is used to control the timezone. (string, value of default_timezone option is used by default)
29
+
30
+ ### Modes
31
+
32
+ * **insert**:
33
+ * Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table_1> UNION ALL SELECT * FROM <intermediate_table_2> UNION ALL ...` query.
34
+ * Transactional: Yes. This mode successfully writes all rows, or fails with writing zero rows.
35
+ * Resumable: Yes.
36
+ * **insert_direct**:
37
+ * Behavior: This mode inserts rows to the target table directly.
38
+ * Transactional: No. If fails, the target table could have some rows inserted.
39
+ * Resumable: No.
40
+ * **truncate_insert**:
41
+ * Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
42
+ * Transactional: Yes.
43
+ * Resumable: Yes.
44
+ * **merge**:
45
+ * Behavior: This mode writes rows to some intermediate tables first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table_1> UNION ALL SELECT * FROM <intermediate_table_2> UNION ALL ... ON DUPLICATE KEY UPDATE ...` query.
46
+ * Transactional: Yes.
47
+ * Resumable: Yes.
48
+ * **replace**:
49
+ * Behavior: Same with `insert` mode excepting that it truncates the target table right before the last `INSERT ...` query.
50
+ * Transactional: Yes.
51
+ * Resumable: No.
52
+
53
+ ### Example
54
+
55
+ ```yaml
56
+ out:
57
+ type: postgresql
58
+ host: localhost
59
+ user: pg
60
+ password: ""
61
+ database: my_database
62
+ table: my_table
63
+ mode: insert
64
+ ```
65
+
66
+ Advanced configuration:
67
+
68
+ ```yaml
69
+ out:
70
+ type: postgresql
71
+ host: localhost
72
+ user: pg
73
+ password: ""
74
+ database: my_database
75
+ table: my_table
76
+ options: {loglevel: 2}
77
+ mode: insert_direct
78
+ column_options:
79
+ my_col_1: {type: 'BIGSERIAL'}
80
+ my_col_3: {type: 'INT NOT NULL'}
81
+ my_col_4: {value_type: string, timestamp_format: `%Y-%m-%d %H:%M:%S %z`, timezone: '-0700'}
82
+ my_col_5: {type: 'DECIMAL(18,9)', value_type: pass}
83
+ ```
84
+
85
+ ### Build
86
+
87
+ ```
88
+ $ ./gradlew gem
89
+ ```
data/build.gradle CHANGED
@@ -1,7 +1,7 @@
1
- dependencies {
2
- compile project(':embulk-output-jdbc')
3
-
4
- compile 'org.postgresql:postgresql:9.4-1200-jdbc41'
5
-
6
- testCompile project(':embulk-output-jdbc').sourceSets.test.output
7
- }
1
+ dependencies {
2
+ compile project(':embulk-output-jdbc')
3
+
4
+ compile 'org.postgresql:postgresql:9.4-1200-jdbc41'
5
+
6
+ testCompile project(':embulk-output-jdbc').sourceSets.test.output
7
+ }
@@ -1,3 +1,3 @@
1
- Embulk::JavaPlugin.register_output(
2
- :postgresql, "org.embulk.output.PostgreSQLOutputPlugin",
3
- File.expand_path('../../../../classpath', __FILE__))
1
+ Embulk::JavaPlugin.register_output(
2
+ :postgresql, "org.embulk.output.PostgreSQLOutputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -1,142 +1,109 @@
1
- package org.embulk.output;
2
-
3
- import java.util.List;
4
- import java.util.Properties;
5
- import java.io.IOException;
6
- import java.sql.SQLException;
7
-
8
- import org.embulk.output.jdbc.setter.ColumnSetter;
9
- import org.embulk.output.postgresql.PostgresqlBatchUpsert;
10
- import org.embulk.spi.Exec;
11
- import org.embulk.config.Config;
12
- import org.embulk.config.ConfigDefault;
13
- import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
14
- import org.embulk.output.jdbc.BatchInsert;
15
- import org.embulk.output.postgresql.PostgreSQLOutputConnector;
16
- import org.embulk.output.postgresql.PostgreSQLCopyBatchInsert;
17
- import org.embulk.spi.PageReader;
18
-
19
- public class PostgreSQLOutputPlugin
20
- extends AbstractJdbcOutputPlugin
21
- {
22
- public interface PostgreSQLPluginTask
23
- extends PluginTask
24
- {
25
- @Config("host")
26
- public String getHost();
27
-
28
- @Config("port")
29
- @ConfigDefault("5432")
30
- public int getPort();
31
-
32
- @Config("user")
33
- public String getUser();
34
-
35
- @Config("password")
36
- @ConfigDefault("\"\"")
37
- public String getPassword();
38
-
39
- @Config("database")
40
- public String getDatabase();
41
-
42
- @Config("schema")
43
- @ConfigDefault("\"public\"")
44
- public String getSchema();
45
- }
46
-
47
- @Override
48
- protected Class<? extends PluginTask> getTaskClass()
49
- {
50
- return PostgreSQLPluginTask.class;
51
- }
52
-
53
- @Override
54
- protected PostgreSQLOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
55
- {
56
- PostgreSQLPluginTask t = (PostgreSQLPluginTask) task;
57
-
58
- String url = String.format("jdbc:postgresql://%s:%d/%s",
59
- t.getHost(), t.getPort(), t.getDatabase());
60
-
61
- Properties props = new Properties();
62
- props.setProperty("user", t.getUser());
63
- props.setProperty("password", t.getPassword());
64
- props.setProperty("loginTimeout", "300"); // seconds
65
- props.setProperty("socketTimeout", "1800"); // seconds
66
-
67
- // Enable keepalive based on tcp_keepalive_time, tcp_keepalive_intvl and tcp_keepalive_probes kernel parameters.
68
- // Socket options TCP_KEEPCNT, TCP_KEEPIDLE, and TCP_KEEPINTVL are not configurable.
69
- props.setProperty("tcpKeepAlive", "true");
70
-
71
- // TODO
72
- //switch t.getSssl() {
73
- //when "disable":
74
- // break;
75
- //when "enable":
76
- // props.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); // disable server-side validation
77
- //when "verify":
78
- // props.setProperty("ssl", "true");
79
- // break;
80
- //}
81
-
82
- if (!retryableMetadataOperation) {
83
- // non-retryable batch operation uses longer timeout
84
- props.setProperty("loginTimeout", "300"); // seconds
85
- props.setProperty("socketTimeout", "28800"); // seconds
86
- }
87
-
88
- props.putAll(t.getOptions());
89
-
90
- return new PostgreSQLOutputConnector(url, props, t.getSchema());
91
- }
92
-
93
- @Override
94
- protected PluginPageOutput newPluginPageOutput(PageReader reader,
95
- BatchInsert batch, List<ColumnSetter> columnSetters,
96
- PluginTask task)
97
- {
98
- if (task.getMode().isMerge()) {
99
- return new PostgresPluginPageOutput(reader, batch, columnSetters, task.getBatchSize());
100
- }
101
- return super.newPluginPageOutput(reader, batch, columnSetters, task);
102
- }
103
-
104
- public static class PostgresPluginPageOutput extends PluginPageOutput
105
- {
106
-
107
- public PostgresPluginPageOutput(PageReader pageReader, BatchInsert batch, List<ColumnSetter> columnSetters, int batchSize)
108
- {
109
- super(pageReader, batch, columnSetters, batchSize);
110
- }
111
-
112
- @Override
113
- protected void handleColumnsSetters()
114
- {
115
- int size = columnSetters.size();
116
- for (int i=0; i < size; i++) {
117
- ColumnSetter columnSetter = columnSetters.get(i);
118
- if (!columnSetter.getColumn().isPrimaryKey()) {
119
- columns.get(i).visit(columnSetter);
120
- }
121
- }
122
- for (int i=0; i < size; i++) {
123
- ColumnSetter columnSetter = columnSetters.get(i);
124
- if (columnSetter.getColumn().isPrimaryKey()) {
125
- columns.get(i).visit(columnSetter);
126
- }
127
- }
128
- for (int i=0; i < size; i++) {
129
- columns.get(i).visit(columnSetters.get(i));
130
- }
131
- }
132
-
133
- }
134
-
135
- @Override
136
- protected BatchInsert newBatchInsert(PluginTask task) throws IOException, SQLException
137
- {
138
- PostgreSQLOutputConnector connector = getConnector(task, true);
139
- return task.getMode().isMerge() ? new PostgresqlBatchUpsert(connector) :
140
- new PostgreSQLCopyBatchInsert(getConnector(task, true));
141
- }
142
- }
1
+ package org.embulk.output;
2
+
3
+ import java.util.List;
4
+ import java.util.Properties;
5
+ import java.io.IOException;
6
+ import java.sql.SQLException;
7
+ import com.google.common.base.Optional;
8
+ import com.google.common.collect.ImmutableSet;
9
+ import org.embulk.config.Config;
10
+ import org.embulk.config.ConfigDefault;
11
+ import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
12
+ import org.embulk.output.jdbc.BatchInsert;
13
+ import org.embulk.output.postgresql.PostgreSQLOutputConnector;
14
+ import org.embulk.output.postgresql.PostgreSQLCopyBatchInsert;
15
+
16
+ public class PostgreSQLOutputPlugin
17
+ extends AbstractJdbcOutputPlugin
18
+ {
19
+ public interface PostgreSQLPluginTask
20
+ extends PluginTask
21
+ {
22
+ @Config("host")
23
+ public String getHost();
24
+
25
+ @Config("port")
26
+ @ConfigDefault("5432")
27
+ public int getPort();
28
+
29
+ @Config("user")
30
+ public String getUser();
31
+
32
+ @Config("password")
33
+ @ConfigDefault("\"\"")
34
+ public String getPassword();
35
+
36
+ @Config("database")
37
+ public String getDatabase();
38
+
39
+ @Config("schema")
40
+ @ConfigDefault("\"public\"")
41
+ public String getSchema();
42
+ }
43
+
44
+ @Override
45
+ protected Class<? extends PluginTask> getTaskClass()
46
+ {
47
+ return PostgreSQLPluginTask.class;
48
+ }
49
+
50
+ @Override
51
+ protected Features getFeatures(PluginTask task)
52
+ {
53
+ return new Features()
54
+ .setMaxTableNameLength(30)
55
+ .setSupportedModes(ImmutableSet.of(Mode.INSERT, Mode.INSERT_DIRECT, Mode.MERGE, Mode.TRUNCATE_INSERT, Mode.REPLACE))
56
+ .setIgnoreMergeKeys(false);
57
+ }
58
+
59
+ @Override
60
+ protected PostgreSQLOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
61
+ {
62
+ PostgreSQLPluginTask t = (PostgreSQLPluginTask) task;
63
+
64
+ String url = String.format("jdbc:postgresql://%s:%d/%s",
65
+ t.getHost(), t.getPort(), t.getDatabase());
66
+
67
+ Properties props = new Properties();
68
+ props.setProperty("loginTimeout", "300"); // seconds
69
+ props.setProperty("socketTimeout", "1800"); // seconds
70
+
71
+ // Enable keepalive based on tcp_keepalive_time, tcp_keepalive_intvl and tcp_keepalive_probes kernel parameters.
72
+ // Socket options TCP_KEEPCNT, TCP_KEEPIDLE, and TCP_KEEPINTVL are not configurable.
73
+ props.setProperty("tcpKeepAlive", "true");
74
+
75
+ // TODO
76
+ //switch t.getSssl() {
77
+ //when "disable":
78
+ // break;
79
+ //when "enable":
80
+ // props.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); // disable server-side validation
81
+ //when "verify":
82
+ // props.setProperty("ssl", "true");
83
+ // break;
84
+ //}
85
+
86
+ if (!retryableMetadataOperation) {
87
+ // non-retryable batch operation uses longer timeout
88
+ props.setProperty("loginTimeout", "300"); // seconds
89
+ props.setProperty("socketTimeout", "28800"); // seconds
90
+ }
91
+
92
+ props.putAll(t.getOptions());
93
+
94
+ props.setProperty("user", t.getUser());
95
+ logger.info("Connecting to {} options {}", url, props);
96
+ props.setProperty("password", t.getPassword());
97
+
98
+ return new PostgreSQLOutputConnector(url, props, t.getSchema());
99
+ }
100
+
101
+ @Override
102
+ protected BatchInsert newBatchInsert(PluginTask task, Optional<List<String>> mergeKeys) throws IOException, SQLException
103
+ {
104
+ if (mergeKeys.isPresent()) {
105
+ throw new UnsupportedOperationException("PostgreSQL output plugin doesn't support 'merge_direct' mode. Use 'merge' mode instead.");
106
+ }
107
+ return new PostgreSQLCopyBatchInsert(getConnector(task, true));
108
+ }
109
+ }
@@ -1,217 +1,215 @@
1
- package org.embulk.output.postgresql;
2
-
3
- import java.io.File;
4
- import java.io.FileOutputStream;
5
- import java.io.Writer;
6
- import java.io.BufferedWriter;
7
- import java.io.OutputStreamWriter;
8
- import java.io.IOException;
9
- import java.nio.charset.Charset;
10
- import java.math.BigDecimal;
11
- import java.sql.Date;
12
- import java.sql.Time;
13
- import java.sql.Timestamp;
14
- import java.sql.SQLException;
15
- import org.embulk.spi.Exec;
16
- import org.embulk.output.jdbc.JdbcSchema;
17
- import org.embulk.output.jdbc.BatchInsert;
18
-
19
- public abstract class AbstractPostgreSQLCopyBatchInsert
20
- implements BatchInsert
21
- {
22
- protected static final Charset FILE_CHARSET = Charset.forName("UTF-8");
23
-
24
- protected static final String nullString = "\\N";
25
- protected static final String newLineString = "\n";
26
- protected static final String delimiterString = "\t";
27
-
28
- protected File currentFile;
29
- protected BufferedWriter writer;
30
- protected int index;
31
- protected int batchRows;
32
-
33
- protected AbstractPostgreSQLCopyBatchInsert() throws IOException
34
- {
35
- this.index = 0;
36
- openNewFile();
37
- }
38
-
39
- private File createTempFile() throws IOException
40
- {
41
- return File.createTempFile("embulk-output-postgres-copy-", ".tsv.tmp"); // TODO configurable temporary file path
42
- }
43
-
44
- protected File openNewFile() throws IOException
45
- {
46
- File newFile = createTempFile();
47
- File oldFile = closeCurrentFile();
48
- this.writer = openWriter(newFile);
49
- currentFile = newFile;
50
- return oldFile;
51
- }
52
-
53
- protected File closeCurrentFile() throws IOException
54
- {
55
- if(writer != null) {
56
- writer.close();
57
- writer = null;
58
- }
59
- return currentFile;
60
- }
61
-
62
- protected BufferedWriter openWriter(File newFile) throws IOException
63
- {
64
- return new BufferedWriter(new OutputStreamWriter(new FileOutputStream(newFile), FILE_CHARSET));
65
- }
66
-
67
- public int getBatchWeight()
68
- {
69
- long fsize = currentFile.length();
70
- if (fsize > Integer.MAX_VALUE) {
71
- return Integer.MAX_VALUE;
72
- } else {
73
- return (int) fsize;
74
- }
75
- }
76
-
77
- public void finish() throws IOException, SQLException
78
- {
79
- closeCurrentFile(); // this is necessary to make getBatchWeight() work
80
- if (getBatchWeight() != 0) {
81
- flush();
82
- }
83
- }
84
-
85
- public void add() throws IOException
86
- {
87
- writer.write(newLineString);
88
- batchRows++;
89
- index = 0;
90
- }
91
-
92
- private void appendDelimiter() throws IOException
93
- {
94
- if(index != 0) {
95
- writer.write(delimiterString);
96
- }
97
- index++;
98
- }
99
-
100
- public void setNull(int sqlType) throws IOException
101
- {
102
- appendDelimiter();
103
- writer.write(nullString);
104
- }
105
-
106
- public void setBoolean(boolean v) throws IOException
107
- {
108
- appendDelimiter();
109
- writer.write(String.valueOf(v));
110
- }
111
-
112
- public void setByte(byte v) throws IOException
113
- {
114
- appendDelimiter();
115
- setEscapedString(String.valueOf(v));
116
- }
117
-
118
- public void setShort(short v) throws IOException
119
- {
120
- appendDelimiter();
121
- writer.write(String.valueOf(v));
122
- }
123
-
124
- public void setInt(int v) throws IOException
125
- {
126
- appendDelimiter();
127
- writer.write(String.valueOf(v));
128
- }
129
-
130
- public void setLong(long v) throws IOException
131
- {
132
- appendDelimiter();
133
- writer.write(String.valueOf(v));
134
- }
135
-
136
- public void setFloat(float v) throws IOException
137
- {
138
- appendDelimiter();
139
- writer.write(String.valueOf(v));
140
- }
141
-
142
- public void setDouble(double v) throws IOException
143
- {
144
- appendDelimiter();
145
- writer.write(String.valueOf(v));
146
- }
147
-
148
- public void setBigDecimal(BigDecimal v) throws IOException
149
- {
150
- appendDelimiter();
151
- writer.write(String.valueOf(v));
152
- }
153
-
154
- public void setString(String v) throws IOException
155
- {
156
- appendDelimiter();
157
- setEscapedString(v);
158
- }
159
-
160
- public void setNString(String v) throws IOException
161
- {
162
- appendDelimiter();
163
- setEscapedString(v);
164
- }
165
-
166
- public void setBytes(byte[] v) throws IOException
167
- {
168
- appendDelimiter();
169
- setEscapedString(String.valueOf(v));
170
- }
171
-
172
- public void setSqlDate(Date v, int sqlType) throws IOException
173
- {
174
- appendDelimiter();
175
- writer.write(v.toString());
176
- }
177
-
178
- public void setSqlTime(Time v, int sqlType) throws IOException
179
- {
180
- appendDelimiter();
181
- writer.write(v.toString());
182
- }
183
-
184
- public void setSqlTimestamp(Timestamp v, int sqlType) throws IOException
185
- {
186
- appendDelimiter();
187
- writer.write(v.toString());
188
- }
189
-
190
- // Escape \, \n, \t, \r
191
- // Remove \0
192
- private void setEscapedString(String v) throws IOException{
193
- for (char c : v.toCharArray()) {
194
- String s;
195
- switch (c) {
196
- case '\\':
197
- s = "\\\\";
198
- break;
199
- case '\n':
200
- s = "\\n";
201
- break;
202
- case '\t':
203
- s = "\\t";
204
- break;
205
- case '\r':
206
- s = "\\r";
207
- break;
208
- case 0:
209
- s = "";
210
- break;
211
- default:
212
- s = String.valueOf(c);
213
- }
214
- writer.write(s);
215
- }
216
- }
217
- }
1
+ package org.embulk.output.postgresql;
2
+
3
+ import java.io.File;
4
+ import java.io.FileOutputStream;
5
+ import java.io.Writer;
6
+ import java.io.BufferedWriter;
7
+ import java.io.OutputStreamWriter;
8
+ import java.io.IOException;
9
+ import java.nio.charset.Charset;
10
+ import java.math.BigDecimal;
11
+ import java.sql.Date;
12
+ import java.sql.Time;
13
+ import java.sql.Timestamp;
14
+ import java.sql.SQLException;
15
+ import org.embulk.output.jdbc.BatchInsert;
16
+
17
+ public abstract class AbstractPostgreSQLCopyBatchInsert
18
+ implements BatchInsert
19
+ {
20
+ protected static final Charset FILE_CHARSET = Charset.forName("UTF-8");
21
+
22
+ protected static final String nullString = "\\N";
23
+ protected static final String newLineString = "\n";
24
+ protected static final String delimiterString = "\t";
25
+
26
+ protected File currentFile;
27
+ protected BufferedWriter writer;
28
+ protected int index;
29
+ protected int batchRows;
30
+
31
+ protected AbstractPostgreSQLCopyBatchInsert() throws IOException
32
+ {
33
+ this.index = 0;
34
+ openNewFile();
35
+ }
36
+
37
+ private File createTempFile() throws IOException
38
+ {
39
+ return File.createTempFile("embulk-output-postgres-copy-", ".tsv.tmp"); // TODO configurable temporary file path
40
+ }
41
+
42
+ protected File openNewFile() throws IOException
43
+ {
44
+ File newFile = createTempFile();
45
+ File oldFile = closeCurrentFile();
46
+ this.writer = openWriter(newFile);
47
+ currentFile = newFile;
48
+ return oldFile;
49
+ }
50
+
51
+ protected File closeCurrentFile() throws IOException
52
+ {
53
+ if(writer != null) {
54
+ writer.close();
55
+ writer = null;
56
+ }
57
+ return currentFile;
58
+ }
59
+
60
+ protected BufferedWriter openWriter(File newFile) throws IOException
61
+ {
62
+ return new BufferedWriter(new OutputStreamWriter(new FileOutputStream(newFile), FILE_CHARSET));
63
+ }
64
+
65
+ public int getBatchWeight()
66
+ {
67
+ long fsize = currentFile.length();
68
+ if (fsize > Integer.MAX_VALUE) {
69
+ return Integer.MAX_VALUE;
70
+ } else {
71
+ return (int) fsize;
72
+ }
73
+ }
74
+
75
+ public void finish() throws IOException, SQLException
76
+ {
77
+ closeCurrentFile(); // this is necessary to make getBatchWeight() work
78
+ if (getBatchWeight() != 0) {
79
+ flush();
80
+ }
81
+ }
82
+
83
+ public void add() throws IOException
84
+ {
85
+ writer.write(newLineString);
86
+ batchRows++;
87
+ index = 0;
88
+ }
89
+
90
+ private void appendDelimiter() throws IOException
91
+ {
92
+ if(index != 0) {
93
+ writer.write(delimiterString);
94
+ }
95
+ index++;
96
+ }
97
+
98
+ public void setNull(int sqlType) throws IOException
99
+ {
100
+ appendDelimiter();
101
+ writer.write(nullString);
102
+ }
103
+
104
+ public void setBoolean(boolean v) throws IOException
105
+ {
106
+ appendDelimiter();
107
+ writer.write(String.valueOf(v));
108
+ }
109
+
110
+ public void setByte(byte v) throws IOException
111
+ {
112
+ appendDelimiter();
113
+ setEscapedString(String.valueOf(v));
114
+ }
115
+
116
+ public void setShort(short v) throws IOException
117
+ {
118
+ appendDelimiter();
119
+ writer.write(String.valueOf(v));
120
+ }
121
+
122
+ public void setInt(int v) throws IOException
123
+ {
124
+ appendDelimiter();
125
+ writer.write(String.valueOf(v));
126
+ }
127
+
128
+ public void setLong(long v) throws IOException
129
+ {
130
+ appendDelimiter();
131
+ writer.write(String.valueOf(v));
132
+ }
133
+
134
+ public void setFloat(float v) throws IOException
135
+ {
136
+ appendDelimiter();
137
+ writer.write(String.valueOf(v));
138
+ }
139
+
140
+ public void setDouble(double v) throws IOException
141
+ {
142
+ appendDelimiter();
143
+ writer.write(String.valueOf(v));
144
+ }
145
+
146
+ public void setBigDecimal(BigDecimal v) throws IOException
147
+ {
148
+ appendDelimiter();
149
+ writer.write(String.valueOf(v));
150
+ }
151
+
152
+ public void setString(String v) throws IOException
153
+ {
154
+ appendDelimiter();
155
+ setEscapedString(v);
156
+ }
157
+
158
+ public void setNString(String v) throws IOException
159
+ {
160
+ appendDelimiter();
161
+ setEscapedString(v);
162
+ }
163
+
164
+ public void setBytes(byte[] v) throws IOException
165
+ {
166
+ appendDelimiter();
167
+ setEscapedString(String.valueOf(v));
168
+ }
169
+
170
+ public void setSqlDate(Date v, int sqlType) throws IOException
171
+ {
172
+ appendDelimiter();
173
+ writer.write(v.toString());
174
+ }
175
+
176
+ public void setSqlTime(Time v, int sqlType) throws IOException
177
+ {
178
+ appendDelimiter();
179
+ writer.write(v.toString());
180
+ }
181
+
182
+ public void setSqlTimestamp(Timestamp v, int sqlType) throws IOException
183
+ {
184
+ appendDelimiter();
185
+ writer.write(v.toString());
186
+ }
187
+
188
+ // Escape \, \n, \t, \r
189
+ // Remove \0
190
+ private void setEscapedString(String v) throws IOException{
191
+ for (char c : v.toCharArray()) {
192
+ String s;
193
+ switch (c) {
194
+ case '\\':
195
+ s = "\\\\";
196
+ break;
197
+ case '\n':
198
+ s = "\\n";
199
+ break;
200
+ case '\t':
201
+ s = "\\t";
202
+ break;
203
+ case '\r':
204
+ s = "\\r";
205
+ break;
206
+ case 0:
207
+ s = "";
208
+ break;
209
+ default:
210
+ s = String.valueOf(c);
211
+ }
212
+ writer.write(s);
213
+ }
214
+ }
215
+ }