embulk-output-postgresql 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d0c60c7936524e61c263938180308a38347e99ff
4
+ data.tar.gz: b9e0957dabaf0373af15303df9f20000185533af
5
+ SHA512:
6
+ metadata.gz: 6325e1a100edae2e0ff2aa6123e01ac658d79f654b4aea880bde69a01602250808971d95621d7a79e9b4a70ffe7b2f7f3aeeef57cf7ea4c9209ad6b2215903d0
7
+ data.tar.gz: 282c82aac27df9ffaf9298f4f4fac3f31c2a6c67bbb3eb27a2d7bfede1f87613472348dfdbf46cf20241c7048d5617052d8b24f3ff0aa518996a4944ed1a18b2
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # PostgreSQL output plugins for Embulk
2
+
3
+ PostgreSQL output plugins for Embulk loads records to PostgreSQL.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: output
8
+ * **Load all or nothing**: depnds on the mode:
9
+ * **insert**: no
10
+ * **replace**: yes
11
+ * **Resume supported**: no
12
+
13
+ ## Configuration
14
+
15
+ - **host**: database host name (string, required)
16
+ - **port**: database port number (integer, default: 5432)
17
+ - **user**: database login user name (string, required)
18
+ - **password**: database login password (string, default: "")
19
+ - **database**: destination database name (string, required)
20
+ - **schema**: destination name (string, default: "public")
21
+ - **table**: destination name (string, required)
22
+ - **mode**: "replace" or "insert" (string, required)
23
+ - **batch_size**: size of a single batch insert (integer, default: 16777216)
24
+ - **options**: extra connection properties (hash, default: {})
25
+
26
+ ### Example
27
+
28
+ ```yaml
29
+ out:
30
+ type: postgresql
31
+ host: localhost
32
+ user: pg
33
+ password: ""
34
+ database: my_database
35
+ table: my_table
36
+ mode: insert
37
+ ```
38
+
39
+ ### Build
40
+
41
+ ```
42
+ $ ./gradlew gem
43
+ ```
data/build.gradle ADDED
@@ -0,0 +1,7 @@
1
+ dependencies {
2
+ compile project(':embulk-output-jdbc')
3
+
4
+ compile 'org.postgresql:postgresql:9.4-1200-jdbc41'
5
+
6
+ testCompile project(':embulk-output-jdbc').sourceSets.test.output
7
+ }
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_output(
2
+ :postgresql, "org.embulk.output.PostgreSQLOutputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,61 @@
1
+ package org.embulk.output;
2
+
3
+ import java.util.Properties;
4
+ import java.io.IOException;
5
+ import java.sql.SQLException;
6
+ import org.embulk.spi.Exec;
7
+ import org.embulk.output.jdbc.AbstractJdbcOutputPlugin;
8
+ import org.embulk.output.jdbc.BatchInsert;
9
+ import org.embulk.output.postgresql.PostgreSQLOutputConnector;
10
+ import org.embulk.output.postgresql.PostgreSQLCopyBatchInsert;
11
+
12
+ public class PostgreSQLOutputPlugin
13
+ extends AbstractJdbcOutputPlugin
14
+ {
15
+ private static final String DEFAULT_SCHEMA = "public";
16
+ private static final int DEFAULT_PORT = 5432;
17
+
18
+ @Override
19
+ protected PostgreSQLOutputConnector getConnector(PluginTask task, boolean retryableMetadataOperation)
20
+ {
21
+ String url = String.format("jdbc:postgresql://%s:%d/%s",
22
+ task.getHost(), task.getPort().or(DEFAULT_PORT), task.getDatabase());
23
+
24
+ Properties props = new Properties();
25
+ props.setProperty("user", task.getUser());
26
+ props.setProperty("password", task.getPassword());
27
+ props.setProperty("loginTimeout", "300"); // seconds
28
+ props.setProperty("socketTimeout", "1800"); // seconds
29
+
30
+ // Enable keepalive based on tcp_keepalive_time, tcp_keepalive_intvl and tcp_keepalive_probes kernel parameters.
31
+ // Socket options TCP_KEEPCNT, TCP_KEEPIDLE, and TCP_KEEPINTVL are not configurable.
32
+ props.setProperty("tcpKeepAlive", "true");
33
+
34
+ // TODO
35
+ //switch task.getSssl() {
36
+ //when "disable":
37
+ // break;
38
+ //when "enable":
39
+ // props.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); // disable server-side validation
40
+ //when "verify":
41
+ // props.setProperty("ssl", "true");
42
+ // break;
43
+ //}
44
+
45
+ if (!retryableMetadataOperation) {
46
+ // non-retryable batch operation uses longer timeout
47
+ props.setProperty("loginTimeout", "300"); // seconds
48
+ props.setProperty("socketTimeout", "28800"); // seconds
49
+ }
50
+
51
+ props.putAll(task.getOptions());
52
+
53
+ return new PostgreSQLOutputConnector(url, props, task.getSchema().or(DEFAULT_SCHEMA));
54
+ }
55
+
56
+ @Override
57
+ protected BatchInsert newBatchInsert(PluginTask task) throws IOException, SQLException
58
+ {
59
+ return new PostgreSQLCopyBatchInsert(getConnector(task, true));
60
+ }
61
+ }
@@ -0,0 +1,217 @@
1
+ package org.embulk.output.postgresql;
2
+
3
+ import java.io.File;
4
+ import java.io.FileOutputStream;
5
+ import java.io.Writer;
6
+ import java.io.BufferedWriter;
7
+ import java.io.OutputStreamWriter;
8
+ import java.io.IOException;
9
+ import java.nio.charset.Charset;
10
+ import java.math.BigDecimal;
11
+ import java.sql.Date;
12
+ import java.sql.Time;
13
+ import java.sql.Timestamp;
14
+ import java.sql.SQLException;
15
+ import org.embulk.spi.Exec;
16
+ import org.embulk.output.jdbc.JdbcSchema;
17
+ import org.embulk.output.jdbc.BatchInsert;
18
+
19
+ public abstract class AbstractPostgreSQLCopyBatchInsert
20
+ implements BatchInsert
21
+ {
22
+ protected static final Charset FILE_CHARSET = Charset.forName("UTF-8");
23
+
24
+ protected static final String nullString = "\\N";
25
+ protected static final String newLineString = "\n";
26
+ protected static final String delimiterString = "\t";
27
+
28
+ protected File currentFile;
29
+ protected BufferedWriter writer;
30
+ protected int index;
31
+ protected int batchRows;
32
+
33
+ protected AbstractPostgreSQLCopyBatchInsert() throws IOException
34
+ {
35
+ this.index = 0;
36
+ openNewFile();
37
+ }
38
+
39
+ private File createTempFile() throws IOException
40
+ {
41
+ return File.createTempFile("embulk-output-postgres-copy-", ".tsv.tmp"); // TODO configurable temporary file path
42
+ }
43
+
44
+ protected File openNewFile() throws IOException
45
+ {
46
+ File newFile = createTempFile();
47
+ File oldFile = closeCurrentFile();
48
+ this.writer = openWriter(newFile);
49
+ currentFile = newFile;
50
+ return oldFile;
51
+ }
52
+
53
+ protected File closeCurrentFile() throws IOException
54
+ {
55
+ if(writer != null) {
56
+ writer.close();
57
+ writer = null;
58
+ }
59
+ return currentFile;
60
+ }
61
+
62
+ protected BufferedWriter openWriter(File newFile) throws IOException
63
+ {
64
+ return new BufferedWriter(new OutputStreamWriter(new FileOutputStream(newFile), FILE_CHARSET));
65
+ }
66
+
67
+ public int getBatchWeight()
68
+ {
69
+ long fsize = currentFile.length();
70
+ if (fsize > Integer.MAX_VALUE) {
71
+ return Integer.MAX_VALUE;
72
+ } else {
73
+ return (int) fsize;
74
+ }
75
+ }
76
+
77
+ public void finish() throws IOException, SQLException
78
+ {
79
+ closeCurrentFile(); // this is necessary to make getBatchWeight() work
80
+ if (getBatchWeight() != 0) {
81
+ flush();
82
+ }
83
+ }
84
+
85
+ public void add() throws IOException
86
+ {
87
+ writer.write(newLineString);
88
+ batchRows++;
89
+ index = 0;
90
+ }
91
+
92
+ private void appendDelimiter() throws IOException
93
+ {
94
+ if(index != 0) {
95
+ writer.write(delimiterString);
96
+ }
97
+ index++;
98
+ }
99
+
100
+ public void setNull(int sqlType) throws IOException
101
+ {
102
+ appendDelimiter();
103
+ writer.write(nullString);
104
+ }
105
+
106
+ public void setBoolean(boolean v) throws IOException
107
+ {
108
+ appendDelimiter();
109
+ writer.write(String.valueOf(v));
110
+ }
111
+
112
+ public void setByte(byte v) throws IOException
113
+ {
114
+ appendDelimiter();
115
+ setEscapedString(String.valueOf(v));
116
+ }
117
+
118
+ public void setShort(short v) throws IOException
119
+ {
120
+ appendDelimiter();
121
+ writer.write(String.valueOf(v));
122
+ }
123
+
124
+ public void setInt(int v) throws IOException
125
+ {
126
+ appendDelimiter();
127
+ writer.write(String.valueOf(v));
128
+ }
129
+
130
+ public void setLong(long v) throws IOException
131
+ {
132
+ appendDelimiter();
133
+ writer.write(String.valueOf(v));
134
+ }
135
+
136
+ public void setFloat(float v) throws IOException
137
+ {
138
+ appendDelimiter();
139
+ writer.write(String.valueOf(v));
140
+ }
141
+
142
+ public void setDouble(double v) throws IOException
143
+ {
144
+ appendDelimiter();
145
+ writer.write(String.valueOf(v));
146
+ }
147
+
148
+ public void setBigDecimal(BigDecimal v) throws IOException
149
+ {
150
+ appendDelimiter();
151
+ writer.write(String.valueOf(v));
152
+ }
153
+
154
+ public void setString(String v) throws IOException
155
+ {
156
+ appendDelimiter();
157
+ setEscapedString(v);
158
+ }
159
+
160
+ public void setNString(String v) throws IOException
161
+ {
162
+ appendDelimiter();
163
+ setEscapedString(v);
164
+ }
165
+
166
+ public void setBytes(byte[] v) throws IOException
167
+ {
168
+ appendDelimiter();
169
+ setEscapedString(String.valueOf(v));
170
+ }
171
+
172
+ public void setSqlDate(Date v, int sqlType) throws IOException
173
+ {
174
+ appendDelimiter();
175
+ writer.write(v.toString());
176
+ }
177
+
178
+ public void setSqlTime(Time v, int sqlType) throws IOException
179
+ {
180
+ appendDelimiter();
181
+ writer.write(v.toString());
182
+ }
183
+
184
+ public void setSqlTimestamp(Timestamp v, int sqlType) throws IOException
185
+ {
186
+ appendDelimiter();
187
+ writer.write(v.toString());
188
+ }
189
+
190
+ // Escape \, \n, \t, \r
191
+ // Remove \0
192
+ private void setEscapedString(String v) throws IOException{
193
+ for (char c : v.toCharArray()) {
194
+ String s;
195
+ switch (c) {
196
+ case '\\':
197
+ s = "\\\\";
198
+ break;
199
+ case '\n':
200
+ s = "\\n";
201
+ break;
202
+ case '\t':
203
+ s = "\\t";
204
+ break;
205
+ case '\r':
206
+ s = "\\r";
207
+ break;
208
+ case 0:
209
+ s = "";
210
+ break;
211
+ default:
212
+ s = String.valueOf(c);
213
+ }
214
+ writer.write(s);
215
+ }
216
+ }
217
+ }
@@ -0,0 +1,73 @@
1
+ package org.embulk.output.postgresql;
2
+
3
+ import java.io.File;
4
+ import java.io.IOException;
5
+ import java.io.FileInputStream;
6
+ import java.sql.Connection;
7
+ import java.sql.SQLException;
8
+ import org.slf4j.Logger;
9
+ import org.postgresql.copy.CopyManager;
10
+ import org.postgresql.core.BaseConnection;
11
+ import org.embulk.spi.Exec;
12
+ import org.embulk.output.jdbc.JdbcSchema;
13
+
14
+ public class PostgreSQLCopyBatchInsert
15
+ extends AbstractPostgreSQLCopyBatchInsert
16
+ {
17
+ private final Logger logger = Exec.getLogger(PostgreSQLCopyBatchInsert.class);
18
+ private final PostgreSQLOutputConnector connector;
19
+
20
+ private PostgreSQLOutputConnection connection = null;
21
+ private CopyManager copyManager = null;
22
+ private String copySql = null;
23
+ private long totalRows;
24
+
25
+ public PostgreSQLCopyBatchInsert(PostgreSQLOutputConnector connector) throws IOException, SQLException
26
+ {
27
+ super();
28
+ this.connector = connector;
29
+ }
30
+
31
+ @Override
32
+ public void prepare(String loadTable, JdbcSchema insertSchema) throws SQLException
33
+ {
34
+ this.connection = connector.connect(true);
35
+ this.copySql = connection.buildCopySql(loadTable, insertSchema);
36
+ this.copyManager = connection.newCopyManager();
37
+ logger.info("Copy SQL: "+copySql);
38
+ }
39
+
40
+ @Override
41
+ public void flush() throws IOException, SQLException
42
+ {
43
+ File file = closeCurrentFile(); // flush buffered data in writer
44
+
45
+ logger.info(String.format("Loading %,d rows (%,d bytes)", batchRows, file.length()));
46
+ long startTime = System.currentTimeMillis();
47
+ FileInputStream in = new FileInputStream(file);
48
+ try {
49
+ // TODO check age of connection and call isValid if it's old and reconnect if it's invalid
50
+ copyManager.copyIn(copySql, in);
51
+ } finally {
52
+ in.close();
53
+ }
54
+ double seconds = (System.currentTimeMillis() - startTime) / 1000.0;
55
+
56
+ totalRows += batchRows;
57
+ batchRows = 0;
58
+ logger.info(String.format("> %.2f seconds (loaded %,d rows in total)", seconds, totalRows));
59
+
60
+ openNewFile();
61
+ file.delete();
62
+ }
63
+
64
+ @Override
65
+ public void close() throws IOException, SQLException
66
+ {
67
+ closeCurrentFile().delete();
68
+ if (connection != null) {
69
+ connection.close();
70
+ connection = null;
71
+ }
72
+ }
73
+ }
@@ -0,0 +1,57 @@
1
+ package org.embulk.output.postgresql;
2
+
3
+ import java.sql.Connection;
4
+ import java.sql.SQLException;
5
+ import java.sql.Statement;
6
+ import org.postgresql.copy.CopyManager;
7
+ import org.postgresql.core.BaseConnection;
8
+ import org.embulk.spi.Exec;
9
+ import org.embulk.output.jdbc.JdbcOutputConnection;
10
+ import org.embulk.output.jdbc.JdbcColumn;
11
+ import org.embulk.output.jdbc.JdbcSchema;
12
+
13
+ public class PostgreSQLOutputConnection
14
+ extends JdbcOutputConnection
15
+ {
16
+ public PostgreSQLOutputConnection(Connection connection, String schemaName, boolean autoCommit)
17
+ throws SQLException
18
+ {
19
+ super(connection, schemaName);
20
+ connection.setAutoCommit(autoCommit);
21
+ }
22
+
23
+ public String buildCopySql(String toTable, JdbcSchema toTableSchema)
24
+ {
25
+ StringBuilder sb = new StringBuilder();
26
+
27
+ sb.append("COPY ");
28
+ quoteIdentifierString(sb, toTable);
29
+ sb.append(" (");
30
+ for (int i=0; i < toTableSchema.getCount(); i++) {
31
+ if(i != 0) { sb.append(", "); }
32
+ quoteIdentifierString(sb, toTableSchema.getColumnName(i));
33
+ }
34
+ sb.append(") ");
35
+ sb.append("FROM STDIN");
36
+
37
+ return sb.toString();
38
+ }
39
+
40
+ public CopyManager newCopyManager() throws SQLException
41
+ {
42
+ return new CopyManager((BaseConnection) connection);
43
+ }
44
+
45
+ @Override
46
+ protected String convertTypeName(String typeName)
47
+ {
48
+ switch(typeName) {
49
+ case "CLOB":
50
+ return "TEXT";
51
+ case "BLOB":
52
+ return "BYTEA";
53
+ default:
54
+ return typeName;
55
+ }
56
+ }
57
+ }
@@ -0,0 +1,40 @@
1
+ package org.embulk.output.postgresql;
2
+
3
+ import java.util.Properties;
4
+ import java.sql.Driver;
5
+ import java.sql.Connection;
6
+ import java.sql.SQLException;
7
+ import org.embulk.output.jdbc.JdbcOutputConnector;
8
+ import org.embulk.output.jdbc.JdbcOutputConnection;
9
+
10
+ public class PostgreSQLOutputConnector
11
+ implements JdbcOutputConnector
12
+ {
13
+ private static final Driver driver = new org.postgresql.Driver();
14
+
15
+ private final String url;
16
+ private final Properties properties;
17
+ private final String schemaName;
18
+
19
+ public PostgreSQLOutputConnector(String url, Properties properties, String schemaName)
20
+ {
21
+ this.url = url;
22
+ this.properties = properties;
23
+ this.schemaName = schemaName;
24
+ }
25
+
26
+ @Override
27
+ public PostgreSQLOutputConnection connect(boolean autoCommit) throws SQLException
28
+ {
29
+ Connection c = driver.connect(url, properties);
30
+ try {
31
+ PostgreSQLOutputConnection con = new PostgreSQLOutputConnection(c, schemaName, autoCommit);
32
+ c = null;
33
+ return con;
34
+ } finally {
35
+ if (c != null) {
36
+ c.close();
37
+ }
38
+ }
39
+ }
40
+ }
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-output-postgresql
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - FURUHASHI Sadayuki
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-16 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: JDBC output plugin is an Embulk plugin that loads records to JDBC read by any input plugins. Search the input plugins by "embulk-input" keyword.
14
+ email:
15
+ - frsyuki@users.sourceforge.jp
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - README.md
21
+ - build.gradle
22
+ - lib/embulk/output/postgresql.rb
23
+ - src/main/java/org/embulk/output/PostgreSQLOutputPlugin.java
24
+ - src/main/java/org/embulk/output/postgresql/AbstractPostgreSQLCopyBatchInsert.java
25
+ - src/main/java/org/embulk/output/postgresql/PostgreSQLCopyBatchInsert.java
26
+ - src/main/java/org/embulk/output/postgresql/PostgreSQLOutputConnection.java
27
+ - src/main/java/org/embulk/output/postgresql/PostgreSQLOutputConnector.java
28
+ - classpath/embulk-output-jdbc-0.1.0.jar
29
+ - classpath/embulk-output-postgresql-0.1.0.jar
30
+ - classpath/jna-4.1.0.jar
31
+ - classpath/jna-platform-4.1.0.jar
32
+ - classpath/postgresql-9.4-1200-jdbc41.jar
33
+ - classpath/slf4j-simple-1.7.7.jar
34
+ - classpath/waffle-jna-1.7.jar
35
+ homepage: https://github.com/embulk/embulk-output-jdbc
36
+ licenses:
37
+ - Apache 2.0
38
+ metadata: {}
39
+ post_install_message:
40
+ rdoc_options: []
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ requirements: []
54
+ rubyforge_project:
55
+ rubygems_version: 2.1.9
56
+ signing_key:
57
+ specification_version: 4
58
+ summary: JDBC output plugin for Embulk
59
+ test_files: []