embulk-output-clickhouse 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/LICENSE +21 -0
- data/LICENSE.txt +21 -0
- data/README.md +67 -0
- data/build.gradle +95 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/default_jdbc_driver/clickhouse-jdbc-0.2.4.jar +0 -0
- data/default_jdbc_driver/commons-codec-1.9.jar +0 -0
- data/default_jdbc_driver/commons-logging-1.2.jar +0 -0
- data/default_jdbc_driver/guava-19.0.jar +0 -0
- data/default_jdbc_driver/httpclient-4.5.2.jar +0 -0
- data/default_jdbc_driver/httpcore-4.4.4.jar +0 -0
- data/default_jdbc_driver/httpmime-4.5.2.jar +0 -0
- data/default_jdbc_driver/jackson-annotations-2.7.0.jar +0 -0
- data/default_jdbc_driver/jackson-core-2.7.3.jar +0 -0
- data/default_jdbc_driver/jackson-databind-2.7.3.jar +0 -0
- data/default_jdbc_driver/jaxb-api-2.3.0.jar +0 -0
- data/default_jdbc_driver/lz4-1.3.0.jar +0 -0
- data/default_jdbc_driver/slf4j-api-1.7.21.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +172 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/output/clickhouse.rb +3 -0
- data/src/main/java/org/embulk/output/ClickhouseOutputPlugin.java +151 -0
- data/src/main/java/org/embulk/output/clickhouse/ClickhouseBatchInsert.java +223 -0
- data/src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnection.java +40 -0
- data/src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnector.java +44 -0
- data/src/main/java/org/embulk/output/clickhouse/setter/ClickhouseColumnSetterFactory.java +31 -0
- data/src/main/java/org/embulk/output/clickhouse/setter/ClickhouseSqlTimeColumnSetter.java +30 -0
- metadata +77 -0
@@ -0,0 +1,223 @@
|
|
1
|
+
package org.embulk.output.clickhouse;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.math.BigDecimal;
|
5
|
+
import java.sql.*;
|
6
|
+
import java.util.Calendar;
|
7
|
+
import java.util.Locale;
|
8
|
+
|
9
|
+
import org.embulk.output.jdbc.*;
|
10
|
+
|
11
|
+
import com.google.common.base.Optional;
|
12
|
+
import org.embulk.spi.Exec;
|
13
|
+
import org.embulk.spi.time.Timestamp;
|
14
|
+
import org.slf4j.Logger;
|
15
|
+
|
16
|
+
public class ClickhouseBatchInsert
|
17
|
+
implements BatchInsert {
|
18
|
+
|
19
|
+
private final Logger logger = Exec.getLogger(StandardBatchInsert.class);
|
20
|
+
|
21
|
+
private final JdbcOutputConnector connector;
|
22
|
+
private final Optional<MergeConfig> mergeConfig;
|
23
|
+
|
24
|
+
private JdbcOutputConnection connection;
|
25
|
+
private PreparedStatement batch;
|
26
|
+
private int index;
|
27
|
+
private int batchWeight;
|
28
|
+
private int batchRows;
|
29
|
+
private long totalRows;
|
30
|
+
private int[] lastUpdateCounts;
|
31
|
+
|
32
|
+
public ClickhouseBatchInsert(JdbcOutputConnector connector, Optional <MergeConfig> mergeConfig) throws
|
33
|
+
IOException, SQLException
|
34
|
+
{
|
35
|
+
this.connector = connector;
|
36
|
+
this.mergeConfig = mergeConfig;
|
37
|
+
}
|
38
|
+
|
39
|
+
public void prepare(TableIdentifier loadTable, JdbcSchema insertSchema) throws SQLException
|
40
|
+
{
|
41
|
+
this.connection = connector.connect(true);
|
42
|
+
this.index = 1; // PreparedStatement index begings from 1
|
43
|
+
this.batchRows = 0;
|
44
|
+
this.totalRows = 0;
|
45
|
+
this.batch = prepareStatement(loadTable, insertSchema);
|
46
|
+
batch.clearBatch();
|
47
|
+
}
|
48
|
+
|
49
|
+
protected PreparedStatement prepareStatement(TableIdentifier loadTable, JdbcSchema insertSchema) throws
|
50
|
+
SQLException
|
51
|
+
{
|
52
|
+
return connection.prepareBatchInsertStatement(loadTable, insertSchema, mergeConfig);
|
53
|
+
}
|
54
|
+
|
55
|
+
public int getBatchWeight()
|
56
|
+
{
|
57
|
+
return batchWeight;
|
58
|
+
}
|
59
|
+
|
60
|
+
public void add() throws IOException, SQLException
|
61
|
+
{
|
62
|
+
batch.addBatch();
|
63
|
+
index = 1; // PreparedStatement index begins from 1
|
64
|
+
batchRows++;
|
65
|
+
batchWeight += 32; // add weight as overhead of each rows
|
66
|
+
}
|
67
|
+
|
68
|
+
public void close() throws IOException, SQLException
|
69
|
+
{
|
70
|
+
if (connection != null) {
|
71
|
+
connection.close();
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
public void flush() throws IOException, SQLException
|
76
|
+
{
|
77
|
+
lastUpdateCounts = new int[]{};
|
78
|
+
|
79
|
+
if (batchWeight == 0) return;
|
80
|
+
|
81
|
+
logger.info(String.format("Loading %,d rows", batchRows));
|
82
|
+
long startTime = System.currentTimeMillis();
|
83
|
+
try {
|
84
|
+
lastUpdateCounts = batch.executeBatch(); // here can't use returned value because MySQL Connector/J returns SUCCESS_NO_INFO as a batch result
|
85
|
+
double seconds = (System.currentTimeMillis() - startTime) / 1000.0;
|
86
|
+
|
87
|
+
totalRows += batchRows;
|
88
|
+
logger.info(String.format("> %.2f seconds (loaded %,d rows in total)", seconds, totalRows));
|
89
|
+
|
90
|
+
} catch (BatchUpdateException e) {
|
91
|
+
// will be used for retry
|
92
|
+
lastUpdateCounts = e.getUpdateCounts();
|
93
|
+
throw e;
|
94
|
+
|
95
|
+
} finally {
|
96
|
+
// clear for retry
|
97
|
+
batch.clearBatch();
|
98
|
+
batchRows = 0;
|
99
|
+
batchWeight = 0;
|
100
|
+
}
|
101
|
+
}
|
102
|
+
|
103
|
+
@Override
|
104
|
+
public int[] getLastUpdateCounts()
|
105
|
+
{
|
106
|
+
return lastUpdateCounts;
|
107
|
+
}
|
108
|
+
|
109
|
+
public void finish() throws IOException, SQLException
|
110
|
+
{
|
111
|
+
}
|
112
|
+
|
113
|
+
public void setNull(int sqlType) throws IOException, SQLException
|
114
|
+
{
|
115
|
+
batch.setNull(index, sqlType);
|
116
|
+
nextColumn(0);
|
117
|
+
}
|
118
|
+
|
119
|
+
public void setBoolean(boolean v) throws IOException, SQLException
|
120
|
+
{
|
121
|
+
batch.setBoolean(index, v);
|
122
|
+
nextColumn(1);
|
123
|
+
}
|
124
|
+
|
125
|
+
public void setByte(byte v) throws IOException, SQLException
|
126
|
+
{
|
127
|
+
batch.setByte(index, v);
|
128
|
+
nextColumn(1);
|
129
|
+
}
|
130
|
+
|
131
|
+
public void setShort(short v) throws IOException, SQLException
|
132
|
+
{
|
133
|
+
batch.setShort(index, v);
|
134
|
+
nextColumn(2);
|
135
|
+
}
|
136
|
+
|
137
|
+
public void setInt(int v) throws IOException, SQLException
|
138
|
+
{
|
139
|
+
batch.setInt(index, v);
|
140
|
+
nextColumn(4);
|
141
|
+
}
|
142
|
+
|
143
|
+
public void setLong(long v) throws IOException, SQLException
|
144
|
+
{
|
145
|
+
batch.setLong(index, v);
|
146
|
+
nextColumn(8);
|
147
|
+
}
|
148
|
+
|
149
|
+
public void setFloat(float v) throws IOException, SQLException
|
150
|
+
{
|
151
|
+
batch.setFloat(index, v);
|
152
|
+
nextColumn(4);
|
153
|
+
}
|
154
|
+
|
155
|
+
public void setDouble(double v) throws IOException, SQLException
|
156
|
+
{
|
157
|
+
batch.setDouble(index, v);
|
158
|
+
nextColumn(8);
|
159
|
+
}
|
160
|
+
|
161
|
+
public void setBigDecimal(BigDecimal v) throws IOException, SQLException
|
162
|
+
{
|
163
|
+
// use estimated number of necessary bytes + 8 byte for the weight
|
164
|
+
// assuming one place needs 4 bits. ceil(v.precision() / 2.0) + 8
|
165
|
+
batch.setBigDecimal(index, v);
|
166
|
+
nextColumn((v.precision() & ~2) / 2 + 8);
|
167
|
+
}
|
168
|
+
|
169
|
+
public void setString(String v) throws IOException, SQLException
|
170
|
+
{
|
171
|
+
batch.setString(index, v);
|
172
|
+
// estimate all chracters use 2 bytes; almost enough for the worst case
|
173
|
+
nextColumn(v.length() * 2 + 4);
|
174
|
+
}
|
175
|
+
|
176
|
+
public void setNString(String v) throws IOException, SQLException
|
177
|
+
{
|
178
|
+
batch.setNString(index, v);
|
179
|
+
// estimate all chracters use 2 bytes; almost enough for the worst case
|
180
|
+
nextColumn(v.length() * 2 + 4);
|
181
|
+
}
|
182
|
+
|
183
|
+
public void setBytes(byte[] v) throws IOException, SQLException
|
184
|
+
{
|
185
|
+
batch.setBytes(index, v);
|
186
|
+
nextColumn(v.length + 4);
|
187
|
+
}
|
188
|
+
|
189
|
+
public void setSqlDate(Timestamp v, Calendar cal) throws IOException, SQLException
|
190
|
+
{
|
191
|
+
// JavaDoc of java.sql.Time says:
|
192
|
+
// >> To conform with the definition of SQL DATE, the millisecond values wrapped by a java.sql.Date instance must be 'normalized' by setting the hours, minutes, seconds, and milliseconds to zero in the particular time zone with which the instance is associated.
|
193
|
+
cal.setTimeInMillis(v.getEpochSecond() * 1000);
|
194
|
+
cal.set(Calendar.SECOND, 0);
|
195
|
+
cal.set(Calendar.MINUTE, 0);
|
196
|
+
cal.set(Calendar.HOUR_OF_DAY, 0);
|
197
|
+
Date normalized = new Date(cal.getTimeInMillis());
|
198
|
+
batch.setDate(index, normalized, cal);
|
199
|
+
nextColumn(32);
|
200
|
+
}
|
201
|
+
|
202
|
+
public void setSqlTime(Timestamp v, Calendar cal) throws IOException, SQLException
|
203
|
+
{
|
204
|
+
Time t = new Time(v.toEpochMilli());
|
205
|
+
batch.setTime(index, t, cal);
|
206
|
+
nextColumn(32);
|
207
|
+
}
|
208
|
+
|
209
|
+
public void setSqlTimestamp(Timestamp v, Calendar cal) throws IOException, SQLException
|
210
|
+
{
|
211
|
+
java.sql.Timestamp t = new java.sql.Timestamp(v.getEpochSecond() * 1000);
|
212
|
+
t.setNanos(v.getNano());
|
213
|
+
batch.setTimestamp(index, t);
|
214
|
+
nextColumn(32);
|
215
|
+
}
|
216
|
+
|
217
|
+
private void nextColumn(int weight)
|
218
|
+
{
|
219
|
+
index++;
|
220
|
+
batchWeight += weight + 4; // add weight as overhead of each columns
|
221
|
+
|
222
|
+
}
|
223
|
+
}
|
@@ -0,0 +1,40 @@
|
|
1
|
+
package org.embulk.output.clickhouse;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.sql.Connection;
|
5
|
+
import java.sql.SQLException;
|
6
|
+
|
7
|
+
import com.google.common.base.Optional;
|
8
|
+
import org.embulk.output.jdbc.JdbcColumn;
|
9
|
+
import org.embulk.output.jdbc.JdbcSchema;
|
10
|
+
import org.embulk.output.jdbc.JdbcOutputConnection;
|
11
|
+
import org.embulk.output.jdbc.MergeConfig;
|
12
|
+
import org.embulk.output.jdbc.TableIdentifier;
|
13
|
+
import ru.yandex.clickhouse.domain.ClickHouseDataType;
|
14
|
+
|
15
|
+
public class ClickhouseOutputConnection
|
16
|
+
extends JdbcOutputConnection
|
17
|
+
{
|
18
|
+
public ClickhouseOutputConnection(Connection connection)
|
19
|
+
throws SQLException
|
20
|
+
{
|
21
|
+
super(connection, null);
|
22
|
+
}
|
23
|
+
|
24
|
+
@Override
|
25
|
+
protected String buildColumnTypeName(JdbcColumn c)
|
26
|
+
{
|
27
|
+
switch(c.getSimpleTypeName()) {
|
28
|
+
case "CLOB":
|
29
|
+
return "String";
|
30
|
+
case "DOUBLE PRECISION":
|
31
|
+
return "Float64";
|
32
|
+
case "DATETIME64(3)":
|
33
|
+
return "DateTime64";
|
34
|
+
|
35
|
+
default:
|
36
|
+
return ClickHouseDataType.fromTypeString(c.getSimpleTypeName()).toString();
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
package org.embulk.output.clickhouse;
|
2
|
+
|
3
|
+
import java.util.Properties;
|
4
|
+
import java.sql.Connection;
|
5
|
+
import java.sql.DriverManager;
|
6
|
+
import java.sql.SQLException;
|
7
|
+
|
8
|
+
import org.embulk.output.jdbc.AbstractJdbcOutputConnector;
|
9
|
+
import org.embulk.output.jdbc.JdbcOutputConnection;
|
10
|
+
import org.embulk.output.jdbc.TransactionIsolation;
|
11
|
+
|
12
|
+
import com.google.common.base.Optional;
|
13
|
+
|
14
|
+
public class ClickhouseOutputConnector
|
15
|
+
extends AbstractJdbcOutputConnector
|
16
|
+
{
|
17
|
+
|
18
|
+
private final String url;
|
19
|
+
private final Properties properties;
|
20
|
+
|
21
|
+
public ClickhouseOutputConnector(String url, Properties properties,
|
22
|
+
Optional<TransactionIsolation> transactionIsolation)
|
23
|
+
{
|
24
|
+
super(transactionIsolation);
|
25
|
+
|
26
|
+
this.url = url;
|
27
|
+
this.properties = properties;
|
28
|
+
}
|
29
|
+
|
30
|
+
@Override
|
31
|
+
protected JdbcOutputConnection connect() throws SQLException
|
32
|
+
{
|
33
|
+
Connection c = DriverManager.getConnection(url, properties);
|
34
|
+
try {
|
35
|
+
ClickhouseOutputConnection con = new ClickhouseOutputConnection(c);
|
36
|
+
c = null;
|
37
|
+
return con;
|
38
|
+
} finally {
|
39
|
+
if (c != null) {
|
40
|
+
c.close();
|
41
|
+
}
|
42
|
+
}
|
43
|
+
}
|
44
|
+
}
|
@@ -0,0 +1,31 @@
|
|
1
|
+
package org.embulk.output.clickhouse.setter;
|
2
|
+
|
3
|
+
import org.embulk.output.jdbc.BatchInsert;
|
4
|
+
import org.embulk.output.jdbc.JdbcColumn;
|
5
|
+
import org.embulk.output.jdbc.JdbcColumnOption;
|
6
|
+
import org.embulk.output.jdbc.setter.ColumnSetter;
|
7
|
+
import org.embulk.output.jdbc.setter.ColumnSetterFactory;
|
8
|
+
import org.embulk.output.jdbc.setter.SqlTimestampColumnSetter;
|
9
|
+
import org.embulk.output.jdbc.setter.StringColumnSetter;
|
10
|
+
import org.joda.time.DateTimeZone;
|
11
|
+
|
12
|
+
public class ClickhouseColumnSetterFactory
|
13
|
+
extends ColumnSetterFactory
|
14
|
+
{
|
15
|
+
public ClickhouseColumnSetterFactory(BatchInsert batch, DateTimeZone defaultTimeZone)
|
16
|
+
{
|
17
|
+
super(batch, defaultTimeZone);
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public ColumnSetter newCoalesceColumnSetter(JdbcColumn column, JdbcColumnOption option)
|
22
|
+
{
|
23
|
+
if (column.getSimpleTypeName().equalsIgnoreCase("datetime64(3)")) {
|
24
|
+
// actually "timestamp"
|
25
|
+
return new ClickhouseSqlTimeColumnSetter(batch, column, newDefaultValueSetter(column, option), newCalendar(option));
|
26
|
+
} else {
|
27
|
+
return super.newCoalesceColumnSetter(column, option);
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
}
|
@@ -0,0 +1,30 @@
|
|
1
|
+
package org.embulk.output.clickhouse.setter;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.sql.SQLException;
|
5
|
+
import java.util.Calendar;
|
6
|
+
|
7
|
+
import org.embulk.output.jdbc.BatchInsert;
|
8
|
+
import org.embulk.output.jdbc.JdbcColumn;
|
9
|
+
import org.embulk.output.jdbc.setter.DefaultValueSetter;
|
10
|
+
import org.embulk.output.jdbc.setter.SqlTimeColumnSetter;
|
11
|
+
import org.embulk.spi.time.Timestamp;
|
12
|
+
|
13
|
+
public class ClickhouseSqlTimeColumnSetter
|
14
|
+
extends SqlTimeColumnSetter
|
15
|
+
{
|
16
|
+
|
17
|
+
public ClickhouseSqlTimeColumnSetter(BatchInsert batch, JdbcColumn column,
|
18
|
+
DefaultValueSetter defaultValue,
|
19
|
+
Calendar calendar)
|
20
|
+
{
|
21
|
+
super(batch, column, defaultValue, calendar);
|
22
|
+
}
|
23
|
+
|
24
|
+
@Override
|
25
|
+
public void timestampValue(Timestamp v) throws IOException, SQLException
|
26
|
+
{
|
27
|
+
batch.setSqlTimestamp(v, calendar);
|
28
|
+
}
|
29
|
+
|
30
|
+
}
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: embulk-output-clickhouse
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.4
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Karri Niemelä
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-02-10 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Dumps records to Clickhouse.
|
14
|
+
email:
|
15
|
+
- karri.niemela@beans.fi
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- ".gitignore"
|
21
|
+
- LICENSE
|
22
|
+
- LICENSE.txt
|
23
|
+
- README.md
|
24
|
+
- build.gradle
|
25
|
+
- classpath/embulk-output-clickhouse-0.1.4.jar
|
26
|
+
- classpath/embulk-output-jdbc-0.8.7.jar
|
27
|
+
- config/checkstyle/checkstyle.xml
|
28
|
+
- config/checkstyle/default.xml
|
29
|
+
- default_jdbc_driver/clickhouse-jdbc-0.2.4.jar
|
30
|
+
- default_jdbc_driver/commons-codec-1.9.jar
|
31
|
+
- default_jdbc_driver/commons-logging-1.2.jar
|
32
|
+
- default_jdbc_driver/guava-19.0.jar
|
33
|
+
- default_jdbc_driver/httpclient-4.5.2.jar
|
34
|
+
- default_jdbc_driver/httpcore-4.4.4.jar
|
35
|
+
- default_jdbc_driver/httpmime-4.5.2.jar
|
36
|
+
- default_jdbc_driver/jackson-annotations-2.7.0.jar
|
37
|
+
- default_jdbc_driver/jackson-core-2.7.3.jar
|
38
|
+
- default_jdbc_driver/jackson-databind-2.7.3.jar
|
39
|
+
- default_jdbc_driver/jaxb-api-2.3.0.jar
|
40
|
+
- default_jdbc_driver/lz4-1.3.0.jar
|
41
|
+
- default_jdbc_driver/slf4j-api-1.7.21.jar
|
42
|
+
- gradle/wrapper/gradle-wrapper.jar
|
43
|
+
- gradle/wrapper/gradle-wrapper.properties
|
44
|
+
- gradlew
|
45
|
+
- gradlew.bat
|
46
|
+
- lib/embulk/output/clickhouse.rb
|
47
|
+
- src/main/java/org/embulk/output/ClickhouseOutputPlugin.java
|
48
|
+
- src/main/java/org/embulk/output/clickhouse/ClickhouseBatchInsert.java
|
49
|
+
- src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnection.java
|
50
|
+
- src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnector.java
|
51
|
+
- src/main/java/org/embulk/output/clickhouse/setter/ClickhouseColumnSetterFactory.java
|
52
|
+
- src/main/java/org/embulk/output/clickhouse/setter/ClickhouseSqlTimeColumnSetter.java
|
53
|
+
homepage: https://github.com/kakoni/embulk-output-clickhouse
|
54
|
+
licenses:
|
55
|
+
- MIT
|
56
|
+
metadata: {}
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options: []
|
59
|
+
require_paths:
|
60
|
+
- lib
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
requirements: []
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 2.4.8
|
74
|
+
signing_key:
|
75
|
+
specification_version: 4
|
76
|
+
summary: Clickhouse output plugin for Embulk
|
77
|
+
test_files: []
|