embulk-output-clickhouse 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/LICENSE +21 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +67 -0
  6. data/build.gradle +95 -0
  7. data/config/checkstyle/checkstyle.xml +128 -0
  8. data/config/checkstyle/default.xml +108 -0
  9. data/default_jdbc_driver/clickhouse-jdbc-0.2.4.jar +0 -0
  10. data/default_jdbc_driver/commons-codec-1.9.jar +0 -0
  11. data/default_jdbc_driver/commons-logging-1.2.jar +0 -0
  12. data/default_jdbc_driver/guava-19.0.jar +0 -0
  13. data/default_jdbc_driver/httpclient-4.5.2.jar +0 -0
  14. data/default_jdbc_driver/httpcore-4.4.4.jar +0 -0
  15. data/default_jdbc_driver/httpmime-4.5.2.jar +0 -0
  16. data/default_jdbc_driver/jackson-annotations-2.7.0.jar +0 -0
  17. data/default_jdbc_driver/jackson-core-2.7.3.jar +0 -0
  18. data/default_jdbc_driver/jackson-databind-2.7.3.jar +0 -0
  19. data/default_jdbc_driver/jaxb-api-2.3.0.jar +0 -0
  20. data/default_jdbc_driver/lz4-1.3.0.jar +0 -0
  21. data/default_jdbc_driver/slf4j-api-1.7.21.jar +0 -0
  22. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  23. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  24. data/gradlew +172 -0
  25. data/gradlew.bat +84 -0
  26. data/lib/embulk/output/clickhouse.rb +3 -0
  27. data/src/main/java/org/embulk/output/ClickhouseOutputPlugin.java +151 -0
  28. data/src/main/java/org/embulk/output/clickhouse/ClickhouseBatchInsert.java +223 -0
  29. data/src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnection.java +40 -0
  30. data/src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnector.java +44 -0
  31. data/src/main/java/org/embulk/output/clickhouse/setter/ClickhouseColumnSetterFactory.java +31 -0
  32. data/src/main/java/org/embulk/output/clickhouse/setter/ClickhouseSqlTimeColumnSetter.java +30 -0
  33. metadata +77 -0
@@ -0,0 +1,223 @@
1
+ package org.embulk.output.clickhouse;
2
+
3
+ import java.io.IOException;
4
+ import java.math.BigDecimal;
5
+ import java.sql.*;
6
+ import java.util.Calendar;
7
+ import java.util.Locale;
8
+
9
+ import org.embulk.output.jdbc.*;
10
+
11
+ import com.google.common.base.Optional;
12
+ import org.embulk.spi.Exec;
13
+ import org.embulk.spi.time.Timestamp;
14
+ import org.slf4j.Logger;
15
+
16
+ public class ClickhouseBatchInsert
17
+ implements BatchInsert {
18
+
19
+ private final Logger logger = Exec.getLogger(StandardBatchInsert.class);
20
+
21
+ private final JdbcOutputConnector connector;
22
+ private final Optional<MergeConfig> mergeConfig;
23
+
24
+ private JdbcOutputConnection connection;
25
+ private PreparedStatement batch;
26
+ private int index;
27
+ private int batchWeight;
28
+ private int batchRows;
29
+ private long totalRows;
30
+ private int[] lastUpdateCounts;
31
+
32
+ public ClickhouseBatchInsert(JdbcOutputConnector connector, Optional <MergeConfig> mergeConfig) throws
33
+ IOException, SQLException
34
+ {
35
+ this.connector = connector;
36
+ this.mergeConfig = mergeConfig;
37
+ }
38
+
39
+ public void prepare(TableIdentifier loadTable, JdbcSchema insertSchema) throws SQLException
40
+ {
41
+ this.connection = connector.connect(true);
42
+ this.index = 1; // PreparedStatement index begings from 1
43
+ this.batchRows = 0;
44
+ this.totalRows = 0;
45
+ this.batch = prepareStatement(loadTable, insertSchema);
46
+ batch.clearBatch();
47
+ }
48
+
49
+ protected PreparedStatement prepareStatement(TableIdentifier loadTable, JdbcSchema insertSchema) throws
50
+ SQLException
51
+ {
52
+ return connection.prepareBatchInsertStatement(loadTable, insertSchema, mergeConfig);
53
+ }
54
+
55
+ public int getBatchWeight()
56
+ {
57
+ return batchWeight;
58
+ }
59
+
60
+ public void add() throws IOException, SQLException
61
+ {
62
+ batch.addBatch();
63
+ index = 1; // PreparedStatement index begins from 1
64
+ batchRows++;
65
+ batchWeight += 32; // add weight as overhead of each rows
66
+ }
67
+
68
+ public void close() throws IOException, SQLException
69
+ {
70
+ if (connection != null) {
71
+ connection.close();
72
+ }
73
+ }
74
+
75
+ public void flush() throws IOException, SQLException
76
+ {
77
+ lastUpdateCounts = new int[]{};
78
+
79
+ if (batchWeight == 0) return;
80
+
81
+ logger.info(String.format("Loading %,d rows", batchRows));
82
+ long startTime = System.currentTimeMillis();
83
+ try {
84
+ lastUpdateCounts = batch.executeBatch(); // here can't use returned value because MySQL Connector/J returns SUCCESS_NO_INFO as a batch result
85
+ double seconds = (System.currentTimeMillis() - startTime) / 1000.0;
86
+
87
+ totalRows += batchRows;
88
+ logger.info(String.format("> %.2f seconds (loaded %,d rows in total)", seconds, totalRows));
89
+
90
+ } catch (BatchUpdateException e) {
91
+ // will be used for retry
92
+ lastUpdateCounts = e.getUpdateCounts();
93
+ throw e;
94
+
95
+ } finally {
96
+ // clear for retry
97
+ batch.clearBatch();
98
+ batchRows = 0;
99
+ batchWeight = 0;
100
+ }
101
+ }
102
+
103
+ @Override
104
+ public int[] getLastUpdateCounts()
105
+ {
106
+ return lastUpdateCounts;
107
+ }
108
+
109
+ public void finish() throws IOException, SQLException
110
+ {
111
+ }
112
+
113
+ public void setNull(int sqlType) throws IOException, SQLException
114
+ {
115
+ batch.setNull(index, sqlType);
116
+ nextColumn(0);
117
+ }
118
+
119
+ public void setBoolean(boolean v) throws IOException, SQLException
120
+ {
121
+ batch.setBoolean(index, v);
122
+ nextColumn(1);
123
+ }
124
+
125
+ public void setByte(byte v) throws IOException, SQLException
126
+ {
127
+ batch.setByte(index, v);
128
+ nextColumn(1);
129
+ }
130
+
131
+ public void setShort(short v) throws IOException, SQLException
132
+ {
133
+ batch.setShort(index, v);
134
+ nextColumn(2);
135
+ }
136
+
137
+ public void setInt(int v) throws IOException, SQLException
138
+ {
139
+ batch.setInt(index, v);
140
+ nextColumn(4);
141
+ }
142
+
143
+ public void setLong(long v) throws IOException, SQLException
144
+ {
145
+ batch.setLong(index, v);
146
+ nextColumn(8);
147
+ }
148
+
149
+ public void setFloat(float v) throws IOException, SQLException
150
+ {
151
+ batch.setFloat(index, v);
152
+ nextColumn(4);
153
+ }
154
+
155
+ public void setDouble(double v) throws IOException, SQLException
156
+ {
157
+ batch.setDouble(index, v);
158
+ nextColumn(8);
159
+ }
160
+
161
+ public void setBigDecimal(BigDecimal v) throws IOException, SQLException
162
+ {
163
+ // use estimated number of necessary bytes + 8 byte for the weight
164
+ // assuming one place needs 4 bits. ceil(v.precision() / 2.0) + 8
165
+ batch.setBigDecimal(index, v);
166
+ nextColumn((v.precision() & ~2) / 2 + 8);
167
+ }
168
+
169
+ public void setString(String v) throws IOException, SQLException
170
+ {
171
+ batch.setString(index, v);
172
+ // estimate all chracters use 2 bytes; almost enough for the worst case
173
+ nextColumn(v.length() * 2 + 4);
174
+ }
175
+
176
+ public void setNString(String v) throws IOException, SQLException
177
+ {
178
+ batch.setNString(index, v);
179
+ // estimate all chracters use 2 bytes; almost enough for the worst case
180
+ nextColumn(v.length() * 2 + 4);
181
+ }
182
+
183
+ public void setBytes(byte[] v) throws IOException, SQLException
184
+ {
185
+ batch.setBytes(index, v);
186
+ nextColumn(v.length + 4);
187
+ }
188
+
189
+ public void setSqlDate(Timestamp v, Calendar cal) throws IOException, SQLException
190
+ {
191
+ // JavaDoc of java.sql.Time says:
192
+ // >> To conform with the definition of SQL DATE, the millisecond values wrapped by a java.sql.Date instance must be 'normalized' by setting the hours, minutes, seconds, and milliseconds to zero in the particular time zone with which the instance is associated.
193
+ cal.setTimeInMillis(v.getEpochSecond() * 1000);
194
+ cal.set(Calendar.SECOND, 0);
195
+ cal.set(Calendar.MINUTE, 0);
196
+ cal.set(Calendar.HOUR_OF_DAY, 0);
197
+ Date normalized = new Date(cal.getTimeInMillis());
198
+ batch.setDate(index, normalized, cal);
199
+ nextColumn(32);
200
+ }
201
+
202
+ public void setSqlTime(Timestamp v, Calendar cal) throws IOException, SQLException
203
+ {
204
+ Time t = new Time(v.toEpochMilli());
205
+ batch.setTime(index, t, cal);
206
+ nextColumn(32);
207
+ }
208
+
209
+ public void setSqlTimestamp(Timestamp v, Calendar cal) throws IOException, SQLException
210
+ {
211
+ java.sql.Timestamp t = new java.sql.Timestamp(v.getEpochSecond() * 1000);
212
+ t.setNanos(v.getNano());
213
+ batch.setTimestamp(index, t);
214
+ nextColumn(32);
215
+ }
216
+
217
+ private void nextColumn(int weight)
218
+ {
219
+ index++;
220
+ batchWeight += weight + 4; // add weight as overhead of each columns
221
+
222
+ }
223
+ }
@@ -0,0 +1,40 @@
1
+ package org.embulk.output.clickhouse;
2
+
3
+ import java.util.List;
4
+ import java.sql.Connection;
5
+ import java.sql.SQLException;
6
+
7
+ import com.google.common.base.Optional;
8
+ import org.embulk.output.jdbc.JdbcColumn;
9
+ import org.embulk.output.jdbc.JdbcSchema;
10
+ import org.embulk.output.jdbc.JdbcOutputConnection;
11
+ import org.embulk.output.jdbc.MergeConfig;
12
+ import org.embulk.output.jdbc.TableIdentifier;
13
+ import ru.yandex.clickhouse.domain.ClickHouseDataType;
14
+
15
+ public class ClickhouseOutputConnection
16
+ extends JdbcOutputConnection
17
+ {
18
+ public ClickhouseOutputConnection(Connection connection)
19
+ throws SQLException
20
+ {
21
+ super(connection, null);
22
+ }
23
+
24
+ @Override
25
+ protected String buildColumnTypeName(JdbcColumn c)
26
+ {
27
+ switch(c.getSimpleTypeName()) {
28
+ case "CLOB":
29
+ return "String";
30
+ case "DOUBLE PRECISION":
31
+ return "Float64";
32
+ case "DATETIME64(3)":
33
+ return "DateTime64";
34
+
35
+ default:
36
+ return ClickHouseDataType.fromTypeString(c.getSimpleTypeName()).toString();
37
+ }
38
+ }
39
+
40
+ }
@@ -0,0 +1,44 @@
1
+ package org.embulk.output.clickhouse;
2
+
3
+ import java.util.Properties;
4
+ import java.sql.Connection;
5
+ import java.sql.DriverManager;
6
+ import java.sql.SQLException;
7
+
8
+ import org.embulk.output.jdbc.AbstractJdbcOutputConnector;
9
+ import org.embulk.output.jdbc.JdbcOutputConnection;
10
+ import org.embulk.output.jdbc.TransactionIsolation;
11
+
12
+ import com.google.common.base.Optional;
13
+
14
+ public class ClickhouseOutputConnector
15
+ extends AbstractJdbcOutputConnector
16
+ {
17
+
18
+ private final String url;
19
+ private final Properties properties;
20
+
21
+ public ClickhouseOutputConnector(String url, Properties properties,
22
+ Optional<TransactionIsolation> transactionIsolation)
23
+ {
24
+ super(transactionIsolation);
25
+
26
+ this.url = url;
27
+ this.properties = properties;
28
+ }
29
+
30
+ @Override
31
+ protected JdbcOutputConnection connect() throws SQLException
32
+ {
33
+ Connection c = DriverManager.getConnection(url, properties);
34
+ try {
35
+ ClickhouseOutputConnection con = new ClickhouseOutputConnection(c);
36
+ c = null;
37
+ return con;
38
+ } finally {
39
+ if (c != null) {
40
+ c.close();
41
+ }
42
+ }
43
+ }
44
+ }
@@ -0,0 +1,31 @@
1
+ package org.embulk.output.clickhouse.setter;
2
+
3
+ import org.embulk.output.jdbc.BatchInsert;
4
+ import org.embulk.output.jdbc.JdbcColumn;
5
+ import org.embulk.output.jdbc.JdbcColumnOption;
6
+ import org.embulk.output.jdbc.setter.ColumnSetter;
7
+ import org.embulk.output.jdbc.setter.ColumnSetterFactory;
8
+ import org.embulk.output.jdbc.setter.SqlTimestampColumnSetter;
9
+ import org.embulk.output.jdbc.setter.StringColumnSetter;
10
+ import org.joda.time.DateTimeZone;
11
+
12
+ public class ClickhouseColumnSetterFactory
13
+ extends ColumnSetterFactory
14
+ {
15
+ public ClickhouseColumnSetterFactory(BatchInsert batch, DateTimeZone defaultTimeZone)
16
+ {
17
+ super(batch, defaultTimeZone);
18
+ }
19
+
20
+ @Override
21
+ public ColumnSetter newCoalesceColumnSetter(JdbcColumn column, JdbcColumnOption option)
22
+ {
23
+ if (column.getSimpleTypeName().equalsIgnoreCase("datetime64(3)")) {
24
+ // actually "timestamp"
25
+ return new ClickhouseSqlTimeColumnSetter(batch, column, newDefaultValueSetter(column, option), newCalendar(option));
26
+ } else {
27
+ return super.newCoalesceColumnSetter(column, option);
28
+ }
29
+ }
30
+
31
+ }
@@ -0,0 +1,30 @@
1
+ package org.embulk.output.clickhouse.setter;
2
+
3
+ import java.io.IOException;
4
+ import java.sql.SQLException;
5
+ import java.util.Calendar;
6
+
7
+ import org.embulk.output.jdbc.BatchInsert;
8
+ import org.embulk.output.jdbc.JdbcColumn;
9
+ import org.embulk.output.jdbc.setter.DefaultValueSetter;
10
+ import org.embulk.output.jdbc.setter.SqlTimeColumnSetter;
11
+ import org.embulk.spi.time.Timestamp;
12
+
13
+ public class ClickhouseSqlTimeColumnSetter
14
+ extends SqlTimeColumnSetter
15
+ {
16
+
17
+ public ClickhouseSqlTimeColumnSetter(BatchInsert batch, JdbcColumn column,
18
+ DefaultValueSetter defaultValue,
19
+ Calendar calendar)
20
+ {
21
+ super(batch, column, defaultValue, calendar);
22
+ }
23
+
24
+ @Override
25
+ public void timestampValue(Timestamp v) throws IOException, SQLException
26
+ {
27
+ batch.setSqlTimestamp(v, calendar);
28
+ }
29
+
30
+ }
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-output-clickhouse
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
5
+ platform: ruby
6
+ authors:
7
+ - Karri Niemelä
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-02-10 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Dumps records to Clickhouse.
14
+ email:
15
+ - karri.niemela@beans.fi
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - ".gitignore"
21
+ - LICENSE
22
+ - LICENSE.txt
23
+ - README.md
24
+ - build.gradle
25
+ - classpath/embulk-output-clickhouse-0.1.4.jar
26
+ - classpath/embulk-output-jdbc-0.8.7.jar
27
+ - config/checkstyle/checkstyle.xml
28
+ - config/checkstyle/default.xml
29
+ - default_jdbc_driver/clickhouse-jdbc-0.2.4.jar
30
+ - default_jdbc_driver/commons-codec-1.9.jar
31
+ - default_jdbc_driver/commons-logging-1.2.jar
32
+ - default_jdbc_driver/guava-19.0.jar
33
+ - default_jdbc_driver/httpclient-4.5.2.jar
34
+ - default_jdbc_driver/httpcore-4.4.4.jar
35
+ - default_jdbc_driver/httpmime-4.5.2.jar
36
+ - default_jdbc_driver/jackson-annotations-2.7.0.jar
37
+ - default_jdbc_driver/jackson-core-2.7.3.jar
38
+ - default_jdbc_driver/jackson-databind-2.7.3.jar
39
+ - default_jdbc_driver/jaxb-api-2.3.0.jar
40
+ - default_jdbc_driver/lz4-1.3.0.jar
41
+ - default_jdbc_driver/slf4j-api-1.7.21.jar
42
+ - gradle/wrapper/gradle-wrapper.jar
43
+ - gradle/wrapper/gradle-wrapper.properties
44
+ - gradlew
45
+ - gradlew.bat
46
+ - lib/embulk/output/clickhouse.rb
47
+ - src/main/java/org/embulk/output/ClickhouseOutputPlugin.java
48
+ - src/main/java/org/embulk/output/clickhouse/ClickhouseBatchInsert.java
49
+ - src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnection.java
50
+ - src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnector.java
51
+ - src/main/java/org/embulk/output/clickhouse/setter/ClickhouseColumnSetterFactory.java
52
+ - src/main/java/org/embulk/output/clickhouse/setter/ClickhouseSqlTimeColumnSetter.java
53
+ homepage: https://github.com/kakoni/embulk-output-clickhouse
54
+ licenses:
55
+ - MIT
56
+ metadata: {}
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ requirements: []
72
+ rubyforge_project:
73
+ rubygems_version: 2.4.8
74
+ signing_key:
75
+ specification_version: 4
76
+ summary: Clickhouse output plugin for Embulk
77
+ test_files: []