embulk-output-clickhouse 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/LICENSE +21 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +67 -0
  6. data/build.gradle +95 -0
  7. data/config/checkstyle/checkstyle.xml +128 -0
  8. data/config/checkstyle/default.xml +108 -0
  9. data/default_jdbc_driver/clickhouse-jdbc-0.2.4.jar +0 -0
  10. data/default_jdbc_driver/commons-codec-1.9.jar +0 -0
  11. data/default_jdbc_driver/commons-logging-1.2.jar +0 -0
  12. data/default_jdbc_driver/guava-19.0.jar +0 -0
  13. data/default_jdbc_driver/httpclient-4.5.2.jar +0 -0
  14. data/default_jdbc_driver/httpcore-4.4.4.jar +0 -0
  15. data/default_jdbc_driver/httpmime-4.5.2.jar +0 -0
  16. data/default_jdbc_driver/jackson-annotations-2.7.0.jar +0 -0
  17. data/default_jdbc_driver/jackson-core-2.7.3.jar +0 -0
  18. data/default_jdbc_driver/jackson-databind-2.7.3.jar +0 -0
  19. data/default_jdbc_driver/jaxb-api-2.3.0.jar +0 -0
  20. data/default_jdbc_driver/lz4-1.3.0.jar +0 -0
  21. data/default_jdbc_driver/slf4j-api-1.7.21.jar +0 -0
  22. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  23. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  24. data/gradlew +172 -0
  25. data/gradlew.bat +84 -0
  26. data/lib/embulk/output/clickhouse.rb +3 -0
  27. data/src/main/java/org/embulk/output/ClickhouseOutputPlugin.java +151 -0
  28. data/src/main/java/org/embulk/output/clickhouse/ClickhouseBatchInsert.java +223 -0
  29. data/src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnection.java +40 -0
  30. data/src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnector.java +44 -0
  31. data/src/main/java/org/embulk/output/clickhouse/setter/ClickhouseColumnSetterFactory.java +31 -0
  32. data/src/main/java/org/embulk/output/clickhouse/setter/ClickhouseSqlTimeColumnSetter.java +30 -0
  33. metadata +77 -0
@@ -0,0 +1,223 @@
1
+ package org.embulk.output.clickhouse;
2
+
3
+ import java.io.IOException;
4
+ import java.math.BigDecimal;
5
+ import java.sql.*;
6
+ import java.util.Calendar;
7
+ import java.util.Locale;
8
+
9
+ import org.embulk.output.jdbc.*;
10
+
11
+ import com.google.common.base.Optional;
12
+ import org.embulk.spi.Exec;
13
+ import org.embulk.spi.time.Timestamp;
14
+ import org.slf4j.Logger;
15
+
16
+ public class ClickhouseBatchInsert
17
+ implements BatchInsert {
18
+
19
+ private final Logger logger = Exec.getLogger(StandardBatchInsert.class);
20
+
21
+ private final JdbcOutputConnector connector;
22
+ private final Optional<MergeConfig> mergeConfig;
23
+
24
+ private JdbcOutputConnection connection;
25
+ private PreparedStatement batch;
26
+ private int index;
27
+ private int batchWeight;
28
+ private int batchRows;
29
+ private long totalRows;
30
+ private int[] lastUpdateCounts;
31
+
32
+ public ClickhouseBatchInsert(JdbcOutputConnector connector, Optional <MergeConfig> mergeConfig) throws
33
+ IOException, SQLException
34
+ {
35
+ this.connector = connector;
36
+ this.mergeConfig = mergeConfig;
37
+ }
38
+
39
+ public void prepare(TableIdentifier loadTable, JdbcSchema insertSchema) throws SQLException
40
+ {
41
+ this.connection = connector.connect(true);
42
+ this.index = 1; // PreparedStatement index begings from 1
43
+ this.batchRows = 0;
44
+ this.totalRows = 0;
45
+ this.batch = prepareStatement(loadTable, insertSchema);
46
+ batch.clearBatch();
47
+ }
48
+
49
+ protected PreparedStatement prepareStatement(TableIdentifier loadTable, JdbcSchema insertSchema) throws
50
+ SQLException
51
+ {
52
+ return connection.prepareBatchInsertStatement(loadTable, insertSchema, mergeConfig);
53
+ }
54
+
55
+ public int getBatchWeight()
56
+ {
57
+ return batchWeight;
58
+ }
59
+
60
+ public void add() throws IOException, SQLException
61
+ {
62
+ batch.addBatch();
63
+ index = 1; // PreparedStatement index begins from 1
64
+ batchRows++;
65
+ batchWeight += 32; // add weight as overhead of each rows
66
+ }
67
+
68
+ public void close() throws IOException, SQLException
69
+ {
70
+ if (connection != null) {
71
+ connection.close();
72
+ }
73
+ }
74
+
75
+ public void flush() throws IOException, SQLException
76
+ {
77
+ lastUpdateCounts = new int[]{};
78
+
79
+ if (batchWeight == 0) return;
80
+
81
+ logger.info(String.format("Loading %,d rows", batchRows));
82
+ long startTime = System.currentTimeMillis();
83
+ try {
84
+ lastUpdateCounts = batch.executeBatch(); // here can't use returned value because MySQL Connector/J returns SUCCESS_NO_INFO as a batch result
85
+ double seconds = (System.currentTimeMillis() - startTime) / 1000.0;
86
+
87
+ totalRows += batchRows;
88
+ logger.info(String.format("> %.2f seconds (loaded %,d rows in total)", seconds, totalRows));
89
+
90
+ } catch (BatchUpdateException e) {
91
+ // will be used for retry
92
+ lastUpdateCounts = e.getUpdateCounts();
93
+ throw e;
94
+
95
+ } finally {
96
+ // clear for retry
97
+ batch.clearBatch();
98
+ batchRows = 0;
99
+ batchWeight = 0;
100
+ }
101
+ }
102
+
103
+ @Override
104
+ public int[] getLastUpdateCounts()
105
+ {
106
+ return lastUpdateCounts;
107
+ }
108
+
109
+ public void finish() throws IOException, SQLException
110
+ {
111
+ }
112
+
113
+ public void setNull(int sqlType) throws IOException, SQLException
114
+ {
115
+ batch.setNull(index, sqlType);
116
+ nextColumn(0);
117
+ }
118
+
119
+ public void setBoolean(boolean v) throws IOException, SQLException
120
+ {
121
+ batch.setBoolean(index, v);
122
+ nextColumn(1);
123
+ }
124
+
125
+ public void setByte(byte v) throws IOException, SQLException
126
+ {
127
+ batch.setByte(index, v);
128
+ nextColumn(1);
129
+ }
130
+
131
+ public void setShort(short v) throws IOException, SQLException
132
+ {
133
+ batch.setShort(index, v);
134
+ nextColumn(2);
135
+ }
136
+
137
+ public void setInt(int v) throws IOException, SQLException
138
+ {
139
+ batch.setInt(index, v);
140
+ nextColumn(4);
141
+ }
142
+
143
+ public void setLong(long v) throws IOException, SQLException
144
+ {
145
+ batch.setLong(index, v);
146
+ nextColumn(8);
147
+ }
148
+
149
+ public void setFloat(float v) throws IOException, SQLException
150
+ {
151
+ batch.setFloat(index, v);
152
+ nextColumn(4);
153
+ }
154
+
155
+ public void setDouble(double v) throws IOException, SQLException
156
+ {
157
+ batch.setDouble(index, v);
158
+ nextColumn(8);
159
+ }
160
+
161
+ public void setBigDecimal(BigDecimal v) throws IOException, SQLException
162
+ {
163
+ // use estimated number of necessary bytes + 8 byte for the weight
164
+ // assuming one place needs 4 bits. ceil(v.precision() / 2.0) + 8
165
+ batch.setBigDecimal(index, v);
166
+ nextColumn((v.precision() & ~2) / 2 + 8);
167
+ }
168
+
169
+ public void setString(String v) throws IOException, SQLException
170
+ {
171
+ batch.setString(index, v);
172
+ // estimate all chracters use 2 bytes; almost enough for the worst case
173
+ nextColumn(v.length() * 2 + 4);
174
+ }
175
+
176
+ public void setNString(String v) throws IOException, SQLException
177
+ {
178
+ batch.setNString(index, v);
179
+ // estimate all chracters use 2 bytes; almost enough for the worst case
180
+ nextColumn(v.length() * 2 + 4);
181
+ }
182
+
183
+ public void setBytes(byte[] v) throws IOException, SQLException
184
+ {
185
+ batch.setBytes(index, v);
186
+ nextColumn(v.length + 4);
187
+ }
188
+
189
+ public void setSqlDate(Timestamp v, Calendar cal) throws IOException, SQLException
190
+ {
191
+ // JavaDoc of java.sql.Time says:
192
+ // >> To conform with the definition of SQL DATE, the millisecond values wrapped by a java.sql.Date instance must be 'normalized' by setting the hours, minutes, seconds, and milliseconds to zero in the particular time zone with which the instance is associated.
193
+ cal.setTimeInMillis(v.getEpochSecond() * 1000);
194
+ cal.set(Calendar.SECOND, 0);
195
+ cal.set(Calendar.MINUTE, 0);
196
+ cal.set(Calendar.HOUR_OF_DAY, 0);
197
+ Date normalized = new Date(cal.getTimeInMillis());
198
+ batch.setDate(index, normalized, cal);
199
+ nextColumn(32);
200
+ }
201
+
202
+ public void setSqlTime(Timestamp v, Calendar cal) throws IOException, SQLException
203
+ {
204
+ Time t = new Time(v.toEpochMilli());
205
+ batch.setTime(index, t, cal);
206
+ nextColumn(32);
207
+ }
208
+
209
+ public void setSqlTimestamp(Timestamp v, Calendar cal) throws IOException, SQLException
210
+ {
211
+ java.sql.Timestamp t = new java.sql.Timestamp(v.getEpochSecond() * 1000);
212
+ t.setNanos(v.getNano());
213
+ batch.setTimestamp(index, t);
214
+ nextColumn(32);
215
+ }
216
+
217
+ private void nextColumn(int weight)
218
+ {
219
+ index++;
220
+ batchWeight += weight + 4; // add weight as overhead of each columns
221
+
222
+ }
223
+ }
@@ -0,0 +1,40 @@
1
+ package org.embulk.output.clickhouse;
2
+
3
+ import java.util.List;
4
+ import java.sql.Connection;
5
+ import java.sql.SQLException;
6
+
7
+ import com.google.common.base.Optional;
8
+ import org.embulk.output.jdbc.JdbcColumn;
9
+ import org.embulk.output.jdbc.JdbcSchema;
10
+ import org.embulk.output.jdbc.JdbcOutputConnection;
11
+ import org.embulk.output.jdbc.MergeConfig;
12
+ import org.embulk.output.jdbc.TableIdentifier;
13
+ import ru.yandex.clickhouse.domain.ClickHouseDataType;
14
+
15
+ public class ClickhouseOutputConnection
16
+ extends JdbcOutputConnection
17
+ {
18
+ public ClickhouseOutputConnection(Connection connection)
19
+ throws SQLException
20
+ {
21
+ super(connection, null);
22
+ }
23
+
24
+ @Override
25
+ protected String buildColumnTypeName(JdbcColumn c)
26
+ {
27
+ switch(c.getSimpleTypeName()) {
28
+ case "CLOB":
29
+ return "String";
30
+ case "DOUBLE PRECISION":
31
+ return "Float64";
32
+ case "DATETIME64(3)":
33
+ return "DateTime64";
34
+
35
+ default:
36
+ return ClickHouseDataType.fromTypeString(c.getSimpleTypeName()).toString();
37
+ }
38
+ }
39
+
40
+ }
@@ -0,0 +1,44 @@
1
+ package org.embulk.output.clickhouse;
2
+
3
+ import java.util.Properties;
4
+ import java.sql.Connection;
5
+ import java.sql.DriverManager;
6
+ import java.sql.SQLException;
7
+
8
+ import org.embulk.output.jdbc.AbstractJdbcOutputConnector;
9
+ import org.embulk.output.jdbc.JdbcOutputConnection;
10
+ import org.embulk.output.jdbc.TransactionIsolation;
11
+
12
+ import com.google.common.base.Optional;
13
+
14
+ public class ClickhouseOutputConnector
15
+ extends AbstractJdbcOutputConnector
16
+ {
17
+
18
+ private final String url;
19
+ private final Properties properties;
20
+
21
+ public ClickhouseOutputConnector(String url, Properties properties,
22
+ Optional<TransactionIsolation> transactionIsolation)
23
+ {
24
+ super(transactionIsolation);
25
+
26
+ this.url = url;
27
+ this.properties = properties;
28
+ }
29
+
30
+ @Override
31
+ protected JdbcOutputConnection connect() throws SQLException
32
+ {
33
+ Connection c = DriverManager.getConnection(url, properties);
34
+ try {
35
+ ClickhouseOutputConnection con = new ClickhouseOutputConnection(c);
36
+ c = null;
37
+ return con;
38
+ } finally {
39
+ if (c != null) {
40
+ c.close();
41
+ }
42
+ }
43
+ }
44
+ }
@@ -0,0 +1,31 @@
1
+ package org.embulk.output.clickhouse.setter;
2
+
3
+ import org.embulk.output.jdbc.BatchInsert;
4
+ import org.embulk.output.jdbc.JdbcColumn;
5
+ import org.embulk.output.jdbc.JdbcColumnOption;
6
+ import org.embulk.output.jdbc.setter.ColumnSetter;
7
+ import org.embulk.output.jdbc.setter.ColumnSetterFactory;
8
+ import org.embulk.output.jdbc.setter.SqlTimestampColumnSetter;
9
+ import org.embulk.output.jdbc.setter.StringColumnSetter;
10
+ import org.joda.time.DateTimeZone;
11
+
12
+ public class ClickhouseColumnSetterFactory
13
+ extends ColumnSetterFactory
14
+ {
15
+ public ClickhouseColumnSetterFactory(BatchInsert batch, DateTimeZone defaultTimeZone)
16
+ {
17
+ super(batch, defaultTimeZone);
18
+ }
19
+
20
+ @Override
21
+ public ColumnSetter newCoalesceColumnSetter(JdbcColumn column, JdbcColumnOption option)
22
+ {
23
+ if (column.getSimpleTypeName().equalsIgnoreCase("datetime64(3)")) {
24
+ // actually "timestamp"
25
+ return new ClickhouseSqlTimeColumnSetter(batch, column, newDefaultValueSetter(column, option), newCalendar(option));
26
+ } else {
27
+ return super.newCoalesceColumnSetter(column, option);
28
+ }
29
+ }
30
+
31
+ }
@@ -0,0 +1,30 @@
1
+ package org.embulk.output.clickhouse.setter;
2
+
3
+ import java.io.IOException;
4
+ import java.sql.SQLException;
5
+ import java.util.Calendar;
6
+
7
+ import org.embulk.output.jdbc.BatchInsert;
8
+ import org.embulk.output.jdbc.JdbcColumn;
9
+ import org.embulk.output.jdbc.setter.DefaultValueSetter;
10
+ import org.embulk.output.jdbc.setter.SqlTimeColumnSetter;
11
+ import org.embulk.spi.time.Timestamp;
12
+
13
+ public class ClickhouseSqlTimeColumnSetter
14
+ extends SqlTimeColumnSetter
15
+ {
16
+
17
+ public ClickhouseSqlTimeColumnSetter(BatchInsert batch, JdbcColumn column,
18
+ DefaultValueSetter defaultValue,
19
+ Calendar calendar)
20
+ {
21
+ super(batch, column, defaultValue, calendar);
22
+ }
23
+
24
+ @Override
25
+ public void timestampValue(Timestamp v) throws IOException, SQLException
26
+ {
27
+ batch.setSqlTimestamp(v, calendar);
28
+ }
29
+
30
+ }
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-output-clickhouse
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
5
+ platform: ruby
6
+ authors:
7
+ - Karri Niemelä
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-02-10 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Dumps records to Clickhouse.
14
+ email:
15
+ - karri.niemela@beans.fi
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - ".gitignore"
21
+ - LICENSE
22
+ - LICENSE.txt
23
+ - README.md
24
+ - build.gradle
25
+ - classpath/embulk-output-clickhouse-0.1.4.jar
26
+ - classpath/embulk-output-jdbc-0.8.7.jar
27
+ - config/checkstyle/checkstyle.xml
28
+ - config/checkstyle/default.xml
29
+ - default_jdbc_driver/clickhouse-jdbc-0.2.4.jar
30
+ - default_jdbc_driver/commons-codec-1.9.jar
31
+ - default_jdbc_driver/commons-logging-1.2.jar
32
+ - default_jdbc_driver/guava-19.0.jar
33
+ - default_jdbc_driver/httpclient-4.5.2.jar
34
+ - default_jdbc_driver/httpcore-4.4.4.jar
35
+ - default_jdbc_driver/httpmime-4.5.2.jar
36
+ - default_jdbc_driver/jackson-annotations-2.7.0.jar
37
+ - default_jdbc_driver/jackson-core-2.7.3.jar
38
+ - default_jdbc_driver/jackson-databind-2.7.3.jar
39
+ - default_jdbc_driver/jaxb-api-2.3.0.jar
40
+ - default_jdbc_driver/lz4-1.3.0.jar
41
+ - default_jdbc_driver/slf4j-api-1.7.21.jar
42
+ - gradle/wrapper/gradle-wrapper.jar
43
+ - gradle/wrapper/gradle-wrapper.properties
44
+ - gradlew
45
+ - gradlew.bat
46
+ - lib/embulk/output/clickhouse.rb
47
+ - src/main/java/org/embulk/output/ClickhouseOutputPlugin.java
48
+ - src/main/java/org/embulk/output/clickhouse/ClickhouseBatchInsert.java
49
+ - src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnection.java
50
+ - src/main/java/org/embulk/output/clickhouse/ClickhouseOutputConnector.java
51
+ - src/main/java/org/embulk/output/clickhouse/setter/ClickhouseColumnSetterFactory.java
52
+ - src/main/java/org/embulk/output/clickhouse/setter/ClickhouseSqlTimeColumnSetter.java
53
+ homepage: https://github.com/kakoni/embulk-output-clickhouse
54
+ licenses:
55
+ - MIT
56
+ metadata: {}
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ requirements: []
72
+ rubyforge_project:
73
+ rubygems_version: 2.4.8
74
+ signing_key:
75
+ specification_version: 4
76
+ summary: Clickhouse output plugin for Embulk
77
+ test_files: []