embulk-output-clickhouse 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/LICENSE +21 -0
- data/LICENSE.txt +21 -0
- data/README.md +67 -0
- data/build.gradle +95 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/default_jdbc_driver/clickhouse-jdbc-0.2.4.jar +0 -0
- data/default_jdbc_driver/commons-codec-1.9.jar +0 -0
- data/default_jdbc_driver/commons-logging-1.2.jar +0 -0
- data/default_jdbc_driver/guava-19.0.jar +0 -0
- data/default_jdbc_driver/httpclient-4.5.2.jar +0 -0
- data/default_jdbc_driver/httpcore-4.4.4.jar +0 -0
- data/default_jdbc_driver/httpmime-4.5.2.jar +0 -0
- data/default_jdbc_driver/jackson-annotations-2.7.0.jar +0 -0
- data/default_jdbc_driver/jackson-core-2.7.3.jar +0 -0
- data/default_jdbc_driver/jackson-databind-2.7.3.jar +0 -0
- data/default_jdbc_driver/jaxb-api-2.3.0.jar +0 -0
- data/default_jdbc_driver/lz4-1.3.0.jar +0 -0
- data/default_jdbc_driver/slf4j-api-1.7.21.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +172 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/output/clickhouse.rb +3 -0
- data/src/main/java/org/embulk/output/ClickhouseOutputPlugin.java +151 -0
- data/src/main/java/org/embulk/output/clickhouse/ClickhouseBatchInsert.java +223 -0
- data/src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnection.java +40 -0
- data/src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnector.java +44 -0
- data/src/main/java/org/embulk/output/clickhouse/setter/ClickhouseColumnSetterFactory.java +31 -0
- data/src/main/java/org/embulk/output/clickhouse/setter/ClickhouseSqlTimeColumnSetter.java +30 -0
- metadata +77 -0
@@ -0,0 +1,223 @@
|
|
1
|
+
package org.embulk.output.clickhouse;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.math.BigDecimal;
|
5
|
+
import java.sql.*;
|
6
|
+
import java.util.Calendar;
|
7
|
+
import java.util.Locale;
|
8
|
+
|
9
|
+
import org.embulk.output.jdbc.*;
|
10
|
+
|
11
|
+
import com.google.common.base.Optional;
|
12
|
+
import org.embulk.spi.Exec;
|
13
|
+
import org.embulk.spi.time.Timestamp;
|
14
|
+
import org.slf4j.Logger;
|
15
|
+
|
16
|
+
public class ClickhouseBatchInsert
|
17
|
+
implements BatchInsert {
|
18
|
+
|
19
|
+
private final Logger logger = Exec.getLogger(StandardBatchInsert.class);
|
20
|
+
|
21
|
+
private final JdbcOutputConnector connector;
|
22
|
+
private final Optional<MergeConfig> mergeConfig;
|
23
|
+
|
24
|
+
private JdbcOutputConnection connection;
|
25
|
+
private PreparedStatement batch;
|
26
|
+
private int index;
|
27
|
+
private int batchWeight;
|
28
|
+
private int batchRows;
|
29
|
+
private long totalRows;
|
30
|
+
private int[] lastUpdateCounts;
|
31
|
+
|
32
|
+
public ClickhouseBatchInsert(JdbcOutputConnector connector, Optional <MergeConfig> mergeConfig) throws
|
33
|
+
IOException, SQLException
|
34
|
+
{
|
35
|
+
this.connector = connector;
|
36
|
+
this.mergeConfig = mergeConfig;
|
37
|
+
}
|
38
|
+
|
39
|
+
public void prepare(TableIdentifier loadTable, JdbcSchema insertSchema) throws SQLException
|
40
|
+
{
|
41
|
+
this.connection = connector.connect(true);
|
42
|
+
this.index = 1; // PreparedStatement index begings from 1
|
43
|
+
this.batchRows = 0;
|
44
|
+
this.totalRows = 0;
|
45
|
+
this.batch = prepareStatement(loadTable, insertSchema);
|
46
|
+
batch.clearBatch();
|
47
|
+
}
|
48
|
+
|
49
|
+
protected PreparedStatement prepareStatement(TableIdentifier loadTable, JdbcSchema insertSchema) throws
|
50
|
+
SQLException
|
51
|
+
{
|
52
|
+
return connection.prepareBatchInsertStatement(loadTable, insertSchema, mergeConfig);
|
53
|
+
}
|
54
|
+
|
55
|
+
public int getBatchWeight()
|
56
|
+
{
|
57
|
+
return batchWeight;
|
58
|
+
}
|
59
|
+
|
60
|
+
public void add() throws IOException, SQLException
|
61
|
+
{
|
62
|
+
batch.addBatch();
|
63
|
+
index = 1; // PreparedStatement index begins from 1
|
64
|
+
batchRows++;
|
65
|
+
batchWeight += 32; // add weight as overhead of each rows
|
66
|
+
}
|
67
|
+
|
68
|
+
public void close() throws IOException, SQLException
|
69
|
+
{
|
70
|
+
if (connection != null) {
|
71
|
+
connection.close();
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
public void flush() throws IOException, SQLException
|
76
|
+
{
|
77
|
+
lastUpdateCounts = new int[]{};
|
78
|
+
|
79
|
+
if (batchWeight == 0) return;
|
80
|
+
|
81
|
+
logger.info(String.format("Loading %,d rows", batchRows));
|
82
|
+
long startTime = System.currentTimeMillis();
|
83
|
+
try {
|
84
|
+
lastUpdateCounts = batch.executeBatch(); // here can't use returned value because MySQL Connector/J returns SUCCESS_NO_INFO as a batch result
|
85
|
+
double seconds = (System.currentTimeMillis() - startTime) / 1000.0;
|
86
|
+
|
87
|
+
totalRows += batchRows;
|
88
|
+
logger.info(String.format("> %.2f seconds (loaded %,d rows in total)", seconds, totalRows));
|
89
|
+
|
90
|
+
} catch (BatchUpdateException e) {
|
91
|
+
// will be used for retry
|
92
|
+
lastUpdateCounts = e.getUpdateCounts();
|
93
|
+
throw e;
|
94
|
+
|
95
|
+
} finally {
|
96
|
+
// clear for retry
|
97
|
+
batch.clearBatch();
|
98
|
+
batchRows = 0;
|
99
|
+
batchWeight = 0;
|
100
|
+
}
|
101
|
+
}
|
102
|
+
|
103
|
+
@Override
|
104
|
+
public int[] getLastUpdateCounts()
|
105
|
+
{
|
106
|
+
return lastUpdateCounts;
|
107
|
+
}
|
108
|
+
|
109
|
+
public void finish() throws IOException, SQLException
|
110
|
+
{
|
111
|
+
}
|
112
|
+
|
113
|
+
public void setNull(int sqlType) throws IOException, SQLException
|
114
|
+
{
|
115
|
+
batch.setNull(index, sqlType);
|
116
|
+
nextColumn(0);
|
117
|
+
}
|
118
|
+
|
119
|
+
public void setBoolean(boolean v) throws IOException, SQLException
|
120
|
+
{
|
121
|
+
batch.setBoolean(index, v);
|
122
|
+
nextColumn(1);
|
123
|
+
}
|
124
|
+
|
125
|
+
public void setByte(byte v) throws IOException, SQLException
|
126
|
+
{
|
127
|
+
batch.setByte(index, v);
|
128
|
+
nextColumn(1);
|
129
|
+
}
|
130
|
+
|
131
|
+
public void setShort(short v) throws IOException, SQLException
|
132
|
+
{
|
133
|
+
batch.setShort(index, v);
|
134
|
+
nextColumn(2);
|
135
|
+
}
|
136
|
+
|
137
|
+
public void setInt(int v) throws IOException, SQLException
|
138
|
+
{
|
139
|
+
batch.setInt(index, v);
|
140
|
+
nextColumn(4);
|
141
|
+
}
|
142
|
+
|
143
|
+
public void setLong(long v) throws IOException, SQLException
|
144
|
+
{
|
145
|
+
batch.setLong(index, v);
|
146
|
+
nextColumn(8);
|
147
|
+
}
|
148
|
+
|
149
|
+
public void setFloat(float v) throws IOException, SQLException
|
150
|
+
{
|
151
|
+
batch.setFloat(index, v);
|
152
|
+
nextColumn(4);
|
153
|
+
}
|
154
|
+
|
155
|
+
public void setDouble(double v) throws IOException, SQLException
|
156
|
+
{
|
157
|
+
batch.setDouble(index, v);
|
158
|
+
nextColumn(8);
|
159
|
+
}
|
160
|
+
|
161
|
+
public void setBigDecimal(BigDecimal v) throws IOException, SQLException
|
162
|
+
{
|
163
|
+
// use estimated number of necessary bytes + 8 byte for the weight
|
164
|
+
// assuming one place needs 4 bits. ceil(v.precision() / 2.0) + 8
|
165
|
+
batch.setBigDecimal(index, v);
|
166
|
+
nextColumn((v.precision() & ~2) / 2 + 8);
|
167
|
+
}
|
168
|
+
|
169
|
+
public void setString(String v) throws IOException, SQLException
|
170
|
+
{
|
171
|
+
batch.setString(index, v);
|
172
|
+
// estimate all chracters use 2 bytes; almost enough for the worst case
|
173
|
+
nextColumn(v.length() * 2 + 4);
|
174
|
+
}
|
175
|
+
|
176
|
+
public void setNString(String v) throws IOException, SQLException
|
177
|
+
{
|
178
|
+
batch.setNString(index, v);
|
179
|
+
// estimate all chracters use 2 bytes; almost enough for the worst case
|
180
|
+
nextColumn(v.length() * 2 + 4);
|
181
|
+
}
|
182
|
+
|
183
|
+
public void setBytes(byte[] v) throws IOException, SQLException
|
184
|
+
{
|
185
|
+
batch.setBytes(index, v);
|
186
|
+
nextColumn(v.length + 4);
|
187
|
+
}
|
188
|
+
|
189
|
+
public void setSqlDate(Timestamp v, Calendar cal) throws IOException, SQLException
|
190
|
+
{
|
191
|
+
// JavaDoc of java.sql.Time says:
|
192
|
+
// >> To conform with the definition of SQL DATE, the millisecond values wrapped by a java.sql.Date instance must be 'normalized' by setting the hours, minutes, seconds, and milliseconds to zero in the particular time zone with which the instance is associated.
|
193
|
+
cal.setTimeInMillis(v.getEpochSecond() * 1000);
|
194
|
+
cal.set(Calendar.SECOND, 0);
|
195
|
+
cal.set(Calendar.MINUTE, 0);
|
196
|
+
cal.set(Calendar.HOUR_OF_DAY, 0);
|
197
|
+
Date normalized = new Date(cal.getTimeInMillis());
|
198
|
+
batch.setDate(index, normalized, cal);
|
199
|
+
nextColumn(32);
|
200
|
+
}
|
201
|
+
|
202
|
+
public void setSqlTime(Timestamp v, Calendar cal) throws IOException, SQLException
|
203
|
+
{
|
204
|
+
Time t = new Time(v.toEpochMilli());
|
205
|
+
batch.setTime(index, t, cal);
|
206
|
+
nextColumn(32);
|
207
|
+
}
|
208
|
+
|
209
|
+
public void setSqlTimestamp(Timestamp v, Calendar cal) throws IOException, SQLException
|
210
|
+
{
|
211
|
+
java.sql.Timestamp t = new java.sql.Timestamp(v.getEpochSecond() * 1000);
|
212
|
+
t.setNanos(v.getNano());
|
213
|
+
batch.setTimestamp(index, t);
|
214
|
+
nextColumn(32);
|
215
|
+
}
|
216
|
+
|
217
|
+
private void nextColumn(int weight)
|
218
|
+
{
|
219
|
+
index++;
|
220
|
+
batchWeight += weight + 4; // add weight as overhead of each columns
|
221
|
+
|
222
|
+
}
|
223
|
+
}
|
@@ -0,0 +1,40 @@
|
|
1
|
+
package org.embulk.output.clickhouse;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.sql.Connection;
|
5
|
+
import java.sql.SQLException;
|
6
|
+
|
7
|
+
import com.google.common.base.Optional;
|
8
|
+
import org.embulk.output.jdbc.JdbcColumn;
|
9
|
+
import org.embulk.output.jdbc.JdbcSchema;
|
10
|
+
import org.embulk.output.jdbc.JdbcOutputConnection;
|
11
|
+
import org.embulk.output.jdbc.MergeConfig;
|
12
|
+
import org.embulk.output.jdbc.TableIdentifier;
|
13
|
+
import ru.yandex.clickhouse.domain.ClickHouseDataType;
|
14
|
+
|
15
|
+
public class ClickhouseOutputConnection
|
16
|
+
extends JdbcOutputConnection
|
17
|
+
{
|
18
|
+
public ClickhouseOutputConnection(Connection connection)
|
19
|
+
throws SQLException
|
20
|
+
{
|
21
|
+
super(connection, null);
|
22
|
+
}
|
23
|
+
|
24
|
+
@Override
|
25
|
+
protected String buildColumnTypeName(JdbcColumn c)
|
26
|
+
{
|
27
|
+
switch(c.getSimpleTypeName()) {
|
28
|
+
case "CLOB":
|
29
|
+
return "String";
|
30
|
+
case "DOUBLE PRECISION":
|
31
|
+
return "Float64";
|
32
|
+
case "DATETIME64(3)":
|
33
|
+
return "DateTime64";
|
34
|
+
|
35
|
+
default:
|
36
|
+
return ClickHouseDataType.fromTypeString(c.getSimpleTypeName()).toString();
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
package org.embulk.output.clickhouse;
|
2
|
+
|
3
|
+
import java.util.Properties;
|
4
|
+
import java.sql.Connection;
|
5
|
+
import java.sql.DriverManager;
|
6
|
+
import java.sql.SQLException;
|
7
|
+
|
8
|
+
import org.embulk.output.jdbc.AbstractJdbcOutputConnector;
|
9
|
+
import org.embulk.output.jdbc.JdbcOutputConnection;
|
10
|
+
import org.embulk.output.jdbc.TransactionIsolation;
|
11
|
+
|
12
|
+
import com.google.common.base.Optional;
|
13
|
+
|
14
|
+
public class ClickhouseOutputConnector
|
15
|
+
extends AbstractJdbcOutputConnector
|
16
|
+
{
|
17
|
+
|
18
|
+
private final String url;
|
19
|
+
private final Properties properties;
|
20
|
+
|
21
|
+
public ClickhouseOutputConnector(String url, Properties properties,
|
22
|
+
Optional<TransactionIsolation> transactionIsolation)
|
23
|
+
{
|
24
|
+
super(transactionIsolation);
|
25
|
+
|
26
|
+
this.url = url;
|
27
|
+
this.properties = properties;
|
28
|
+
}
|
29
|
+
|
30
|
+
@Override
|
31
|
+
protected JdbcOutputConnection connect() throws SQLException
|
32
|
+
{
|
33
|
+
Connection c = DriverManager.getConnection(url, properties);
|
34
|
+
try {
|
35
|
+
ClickhouseOutputConnection con = new ClickhouseOutputConnection(c);
|
36
|
+
c = null;
|
37
|
+
return con;
|
38
|
+
} finally {
|
39
|
+
if (c != null) {
|
40
|
+
c.close();
|
41
|
+
}
|
42
|
+
}
|
43
|
+
}
|
44
|
+
}
|
@@ -0,0 +1,31 @@
|
|
1
|
+
package org.embulk.output.clickhouse.setter;
|
2
|
+
|
3
|
+
import org.embulk.output.jdbc.BatchInsert;
|
4
|
+
import org.embulk.output.jdbc.JdbcColumn;
|
5
|
+
import org.embulk.output.jdbc.JdbcColumnOption;
|
6
|
+
import org.embulk.output.jdbc.setter.ColumnSetter;
|
7
|
+
import org.embulk.output.jdbc.setter.ColumnSetterFactory;
|
8
|
+
import org.embulk.output.jdbc.setter.SqlTimestampColumnSetter;
|
9
|
+
import org.embulk.output.jdbc.setter.StringColumnSetter;
|
10
|
+
import org.joda.time.DateTimeZone;
|
11
|
+
|
12
|
+
public class ClickhouseColumnSetterFactory
|
13
|
+
extends ColumnSetterFactory
|
14
|
+
{
|
15
|
+
public ClickhouseColumnSetterFactory(BatchInsert batch, DateTimeZone defaultTimeZone)
|
16
|
+
{
|
17
|
+
super(batch, defaultTimeZone);
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public ColumnSetter newCoalesceColumnSetter(JdbcColumn column, JdbcColumnOption option)
|
22
|
+
{
|
23
|
+
if (column.getSimpleTypeName().equalsIgnoreCase("datetime64(3)")) {
|
24
|
+
// actually "timestamp"
|
25
|
+
return new ClickhouseSqlTimeColumnSetter(batch, column, newDefaultValueSetter(column, option), newCalendar(option));
|
26
|
+
} else {
|
27
|
+
return super.newCoalesceColumnSetter(column, option);
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
}
|
@@ -0,0 +1,30 @@
|
|
1
|
+
package org.embulk.output.clickhouse.setter;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.sql.SQLException;
|
5
|
+
import java.util.Calendar;
|
6
|
+
|
7
|
+
import org.embulk.output.jdbc.BatchInsert;
|
8
|
+
import org.embulk.output.jdbc.JdbcColumn;
|
9
|
+
import org.embulk.output.jdbc.setter.DefaultValueSetter;
|
10
|
+
import org.embulk.output.jdbc.setter.SqlTimeColumnSetter;
|
11
|
+
import org.embulk.spi.time.Timestamp;
|
12
|
+
|
13
|
+
public class ClickhouseSqlTimeColumnSetter
|
14
|
+
extends SqlTimeColumnSetter
|
15
|
+
{
|
16
|
+
|
17
|
+
public ClickhouseSqlTimeColumnSetter(BatchInsert batch, JdbcColumn column,
|
18
|
+
DefaultValueSetter defaultValue,
|
19
|
+
Calendar calendar)
|
20
|
+
{
|
21
|
+
super(batch, column, defaultValue, calendar);
|
22
|
+
}
|
23
|
+
|
24
|
+
@Override
|
25
|
+
public void timestampValue(Timestamp v) throws IOException, SQLException
|
26
|
+
{
|
27
|
+
batch.setSqlTimestamp(v, calendar);
|
28
|
+
}
|
29
|
+
|
30
|
+
}
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: embulk-output-clickhouse
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.4
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Karri Niemelä
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-02-10 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Dumps records to Clickhouse.
|
14
|
+
email:
|
15
|
+
- karri.niemela@beans.fi
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- ".gitignore"
|
21
|
+
- LICENSE
|
22
|
+
- LICENSE.txt
|
23
|
+
- README.md
|
24
|
+
- build.gradle
|
25
|
+
- classpath/embulk-output-clickhouse-0.1.4.jar
|
26
|
+
- classpath/embulk-output-jdbc-0.8.7.jar
|
27
|
+
- config/checkstyle/checkstyle.xml
|
28
|
+
- config/checkstyle/default.xml
|
29
|
+
- default_jdbc_driver/clickhouse-jdbc-0.2.4.jar
|
30
|
+
- default_jdbc_driver/commons-codec-1.9.jar
|
31
|
+
- default_jdbc_driver/commons-logging-1.2.jar
|
32
|
+
- default_jdbc_driver/guava-19.0.jar
|
33
|
+
- default_jdbc_driver/httpclient-4.5.2.jar
|
34
|
+
- default_jdbc_driver/httpcore-4.4.4.jar
|
35
|
+
- default_jdbc_driver/httpmime-4.5.2.jar
|
36
|
+
- default_jdbc_driver/jackson-annotations-2.7.0.jar
|
37
|
+
- default_jdbc_driver/jackson-core-2.7.3.jar
|
38
|
+
- default_jdbc_driver/jackson-databind-2.7.3.jar
|
39
|
+
- default_jdbc_driver/jaxb-api-2.3.0.jar
|
40
|
+
- default_jdbc_driver/lz4-1.3.0.jar
|
41
|
+
- default_jdbc_driver/slf4j-api-1.7.21.jar
|
42
|
+
- gradle/wrapper/gradle-wrapper.jar
|
43
|
+
- gradle/wrapper/gradle-wrapper.properties
|
44
|
+
- gradlew
|
45
|
+
- gradlew.bat
|
46
|
+
- lib/embulk/output/clickhouse.rb
|
47
|
+
- src/main/java/org/embulk/output/ClickhouseOutputPlugin.java
|
48
|
+
- src/main/java/org/embulk/output/clickhouse/ClickhouseBatchInsert.java
|
49
|
+
- src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnection.java
|
50
|
+
- src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnector.java
|
51
|
+
- src/main/java/org/embulk/output/clickhouse/setter/ClickhouseColumnSetterFactory.java
|
52
|
+
- src/main/java/org/embulk/output/clickhouse/setter/ClickhouseSqlTimeColumnSetter.java
|
53
|
+
homepage: https://github.com/kakoni/embulk-output-clickhouse
|
54
|
+
licenses:
|
55
|
+
- MIT
|
56
|
+
metadata: {}
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options: []
|
59
|
+
require_paths:
|
60
|
+
- lib
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
requirements: []
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 2.4.8
|
74
|
+
signing_key:
|
75
|
+
specification_version: 4
|
76
|
+
summary: Clickhouse output plugin for Embulk
|
77
|
+
test_files: []
|