embulk-output-cassandra 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +43 -0
- data/build.gradle +1 -1
- data/docker-compose.yml +6 -0
- data/embulk-output-cassandra.iml +1 -1
- data/example.yml +23 -0
- data/src/main/java/org/embulk/output/cassandra/CassandraOutputPlugin.java +39 -8
- data/src/main/java/org/embulk/output/cassandra/setter/CassandraColumnSetterFactory.java +1 -0
- data/src/test/java/org/embulk/output/cassandra/TestCassandraOutputPlugin.java +42 -0
- data/src/test/resources/org/embulk/output/cassandra/create_table_test_counter.cql +11 -0
- data/src/test/resources/org/embulk/output/cassandra/test1.csv +4 -4
- data/src/test/resources/org/embulk/output/cassandra/test_counter.yaml +3 -0
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ab4a3fa7b0b477c2c6ebed457fa48f248ce7a6ae
|
4
|
+
data.tar.gz: a7355ea7423d856a3533e2b3d4dbbb50d05c60be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a8b7043a53d758515a06274ffd00955013d8533d447e512a01228f17e63e378e9d70a5f149f54e9451b52a74718951261e3e6a6493889f8d6213b06db480ea3
|
7
|
+
data.tar.gz: f51f4f392c31bb8928e97b14ef4059a523578883517ea390e3beac76ffd84dc1f30b464cac999d05e478fc6ae4ad102739a26590b219dd5437901c00debc1181
|
data/README.md
CHANGED
@@ -11,10 +11,53 @@ Apache Cassandra output plugin for Embulk.
|
|
11
11
|
|
12
12
|
## Caution
|
13
13
|
In current, version of netty components conflicts to one that is used by embulk-core.
|
14
|
+
|
14
15
|
This probrem is very severe.
|
16
|
+
|
15
17
|
I tested this plugin on embulk-0.9.7.
|
16
18
|
But future embulk version may break this plugin.
|
17
19
|
|
20
|
+
## Support Data types
|
21
|
+
|
22
|
+
| CQL Type | Embulk Type | Descritpion |
|
23
|
+
| -------- | ----------- | -------------- |
|
24
|
+
| ascii | string, boolean, long, double, timestamp, json | use `toString` or `toJson` |
|
25
|
+
| bigint | string, boolean(as 0 or 1), long, double | |
|
26
|
+
| blob | unsupported | |
|
27
|
+
| boolean | boolean, long, double | 0 == false, 1 == true |
|
28
|
+
| counter | unsupported | |
|
29
|
+
| date | string, timestamp | timestamp use `toEpochMilli` |
|
30
|
+
| decimal | string, boolean(as 0 or 1), long, double | |
|
31
|
+
| double | string, boolean(as 0 or 1), long, double | |
|
32
|
+
| float | string, boolean(as 0 or 1), long, double | |
|
33
|
+
| inet | string | |
|
34
|
+
| int | string, boolean(as 0 or 1), long, double | overflowed value is reset to 0 |
|
35
|
+
| list | json | |
|
36
|
+
| map (support only text key) | json | |
|
37
|
+
| set | json | |
|
38
|
+
| smallint | string, boolean(as 0 or 1), long, double | overflowed value is reset to 0 |
|
39
|
+
| text | string, boolean, long, double, timestamp, json | use `toString` or `toJson` |
|
40
|
+
| time | string, long, double, timestamp | long and double as nano seconds of day,<br>timestamp use `toEpochMilli` |
|
41
|
+
| timestamp | long, double, timestamp | long and double as epoch second |
|
42
|
+
| timeuuid | null |
|
43
|
+
| uuid | null |
|
44
|
+
| varchar | string, boolean, long, double, timestamp, json | use `toString` or `toJson` |
|
45
|
+
| varint | string, boolean(as 0 or 1), long, double | |
|
46
|
+
| UDT | unsupported | |
|
47
|
+
|
48
|
+
## Insert Behavior
|
49
|
+
If embulk record does not have a column, it is treated as `unset`.
|
50
|
+
If same key record already exists, the column is not touched.
|
51
|
+
|
52
|
+
### Counter table
|
53
|
+
This plugin supports counter table.
|
54
|
+
|
55
|
+
But counter table supports only increment/decrement update.
|
56
|
+
|
57
|
+
Because of it, This plugin uses input value as increment value;
|
58
|
+
|
59
|
+
For example, If input data = {id: 1, count: 5}, Executed Statement is `UPDATE tablename SET count = count + 5 WHERE id = 1`
|
60
|
+
|
18
61
|
## Configuration
|
19
62
|
|
20
63
|
- **hosts**: list of seed hosts (list<string>, required)
|
data/build.gradle
CHANGED
data/docker-compose.yml
ADDED
data/embulk-output-cassandra.iml
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<module external.linked.project.id="embulk-output-cassandra" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="" external.system.module.version="0.1.
|
2
|
+
<module external.linked.project.id="embulk-output-cassandra" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="" external.system.module.version="0.1.1" type="JAVA_MODULE" version="4">
|
3
3
|
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
4
4
|
<exclude-output />
|
5
5
|
<content url="file://$MODULE_DIR$">
|
data/example.yml
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/org/embulk/output/cassandra/test1.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
skip_header_lines: 1
|
7
|
+
columns:
|
8
|
+
- {name: id, type: string}
|
9
|
+
- {name: int_item, type: long}
|
10
|
+
- {name: int32_item, type: long}
|
11
|
+
- {name: smallint_item, type: long}
|
12
|
+
- {name: tinyint_item, type: long}
|
13
|
+
- {name: boolean_item, type: boolean}
|
14
|
+
- {name: varchar_item, type: string}
|
15
|
+
- {name: timestamp_item, type: timestamp}
|
16
|
+
|
17
|
+
out:
|
18
|
+
type: cassandra
|
19
|
+
hosts:
|
20
|
+
- 127.0.0.1
|
21
|
+
keyspace: embulk_test
|
22
|
+
table: test_basic
|
23
|
+
idempotent: true
|
@@ -3,13 +3,16 @@ package org.embulk.output.cassandra;
|
|
3
3
|
import com.datastax.driver.core.BoundStatement;
|
4
4
|
import com.datastax.driver.core.Cluster;
|
5
5
|
import com.datastax.driver.core.ColumnMetadata;
|
6
|
+
import com.datastax.driver.core.DataType;
|
6
7
|
import com.datastax.driver.core.KeyspaceMetadata;
|
7
8
|
import com.datastax.driver.core.PreparedStatement;
|
8
9
|
import com.datastax.driver.core.Session;
|
9
10
|
import com.datastax.driver.core.SocketOptions;
|
10
11
|
import com.datastax.driver.core.TableMetadata;
|
12
|
+
import com.datastax.driver.core.querybuilder.BuiltStatement;
|
11
13
|
import com.datastax.driver.core.querybuilder.Insert;
|
12
14
|
import com.datastax.driver.core.querybuilder.QueryBuilder;
|
15
|
+
import com.datastax.driver.core.querybuilder.Update;
|
13
16
|
import com.google.common.base.Optional;
|
14
17
|
import com.google.common.collect.ImmutableList;
|
15
18
|
import com.google.common.collect.ImmutableMap;
|
@@ -86,6 +89,10 @@ public class CassandraOutputPlugin
|
|
86
89
|
@Config("request_timeout")
|
87
90
|
@ConfigDefault("12000")
|
88
91
|
public int getRequestTimeout();
|
92
|
+
|
93
|
+
@Config("counter_columnName")
|
94
|
+
@ConfigDefault("null")
|
95
|
+
public Optional<String> getCounterColumnName();
|
89
96
|
}
|
90
97
|
|
91
98
|
private final Logger logger = Exec.getLogger(CassandraOutputPlugin.class);
|
@@ -135,18 +142,42 @@ public class CassandraOutputPlugin
|
|
135
142
|
throw new RuntimeException("table `" + task.getTable() + "` is not found");
|
136
143
|
}
|
137
144
|
|
138
|
-
|
139
|
-
|
140
|
-
|
145
|
+
List<ColumnMetadata> columns = tableMetadata.getColumns();
|
146
|
+
boolean isCounterTable = columns.stream().anyMatch(col -> col.getType().getName() == DataType.Name.COUNTER);
|
147
|
+
|
148
|
+
BuiltStatement query;
|
149
|
+
if (isCounterTable) {
|
150
|
+
Update update = QueryBuilder.update(task.getKeyspace(), task.getTable());
|
151
|
+
if (task.getTtl().isPresent()) {
|
152
|
+
update.using(QueryBuilder.ttl(task.getTtl().get()));
|
153
|
+
}
|
154
|
+
for (ColumnMetadata column : tableMetadata.getColumns()) {
|
155
|
+
if (column.getType().getName() == DataType.Name.COUNTER) {
|
156
|
+
update.with(QueryBuilder.incr(column.getName(), QueryBuilder.bindMarker(column.getName())));
|
157
|
+
}
|
158
|
+
else{
|
159
|
+
update.where(QueryBuilder.eq(column.getName(), QueryBuilder.bindMarker(column.getName())));
|
160
|
+
}
|
161
|
+
}
|
162
|
+
query = update;
|
141
163
|
}
|
142
|
-
|
143
|
-
insert
|
164
|
+
else {
|
165
|
+
Insert insert = QueryBuilder.insertInto(task.getKeyspace(), task.getTable());
|
166
|
+
if (task.getIfNotExists()) {
|
167
|
+
insert.ifNotExists();
|
168
|
+
}
|
169
|
+
if (task.getTtl().isPresent()) {
|
170
|
+
insert.using(QueryBuilder.ttl(task.getTtl().get()));
|
171
|
+
}
|
172
|
+
for (ColumnMetadata column : tableMetadata.getColumns()) {
|
173
|
+
insert.value(column.getName(), QueryBuilder.bindMarker(column.getName()));
|
174
|
+
}
|
175
|
+
query = insert;
|
144
176
|
}
|
145
177
|
|
146
178
|
ImmutableMap.Builder<String, CassandraColumnSetter> columnSettersBuilder = ImmutableMap.builder();
|
147
179
|
ImmutableList.Builder<String> uuidColumnsBuilder = ImmutableList.builder();
|
148
180
|
for (ColumnMetadata column : tableMetadata.getColumns()) {
|
149
|
-
insert.value(column.getName(), QueryBuilder.bindMarker(column.getName()));
|
150
181
|
columnSettersBuilder.put(column.getName(), CassandraColumnSetterFactory.createColumnSetter(column, cluster));
|
151
182
|
switch (column.getType().getName()) {
|
152
183
|
case UUID:
|
@@ -159,9 +190,9 @@ public class CassandraOutputPlugin
|
|
159
190
|
List<ColumnSetterVisitor> columnVisitors = Lists.transform(schema.getColumns(), (column) ->
|
160
191
|
new ColumnSetterVisitor(pageReader, columnSetters.get(column.getName())));
|
161
192
|
|
162
|
-
logger.info("Insert Query: {}",
|
193
|
+
logger.info("Insert Query: {}", query.getQueryString());
|
163
194
|
|
164
|
-
PreparedStatement prepared = session.prepare(
|
195
|
+
PreparedStatement prepared = session.prepare(query);
|
165
196
|
if (task.getIdempotent()) {
|
166
197
|
prepared.setIdempotent(task.getIdempotent());
|
167
198
|
}
|
@@ -3,6 +3,7 @@ package org.embulk.output.cassandra;
|
|
3
3
|
import com.datastax.driver.core.Cluster;
|
4
4
|
import com.datastax.driver.core.DataType;
|
5
5
|
import com.datastax.driver.core.LocalDate;
|
6
|
+
import com.datastax.driver.core.ResultSet;
|
6
7
|
import com.datastax.driver.core.Row;
|
7
8
|
import com.datastax.driver.core.Session;
|
8
9
|
import com.datastax.driver.core.TupleType;
|
@@ -26,6 +27,7 @@ import java.nio.file.Path;
|
|
26
27
|
import java.time.ZoneId;
|
27
28
|
import java.time.ZonedDateTime;
|
28
29
|
import java.util.Arrays;
|
30
|
+
import java.util.Comparator;
|
29
31
|
import java.util.Date;
|
30
32
|
import java.util.List;
|
31
33
|
import java.util.Map;
|
@@ -83,13 +85,16 @@ public class TestCassandraOutputPlugin
|
|
83
85
|
String createTableBasic = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_basic.cql");
|
84
86
|
String createTableUuid = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_uuid.cql");
|
85
87
|
String createTableComplex = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_complex.cql");
|
88
|
+
String createTableCounter = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_counter.cql");
|
86
89
|
session.execute(createKeyspace);
|
87
90
|
session.execute(createTableBasic);
|
88
91
|
session.execute(createTableUuid);
|
89
92
|
session.execute(createTableComplex);
|
93
|
+
session.execute(createTableCounter);
|
90
94
|
session.execute("TRUNCATE embulk_test.test_basic");
|
91
95
|
session.execute("TRUNCATE embulk_test.test_uuid");
|
92
96
|
session.execute("TRUNCATE embulk_test.test_complex");
|
97
|
+
session.execute("TRUNCATE embulk_test.test_counter");
|
93
98
|
}
|
94
99
|
|
95
100
|
@After
|
@@ -139,6 +144,43 @@ public class TestCassandraOutputPlugin
|
|
139
144
|
assertEquals(createDate(2018, 7, 1, 10, 0, 2, 0), row3.getTimestamp("timestamp_item"));
|
140
145
|
}
|
141
146
|
|
147
|
+
@Test
|
148
|
+
public void testCounter() throws IOException
|
149
|
+
{
|
150
|
+
Path input = getInputPath("test1.csv");
|
151
|
+
ConfigSource config = loadYamlResource("test_counter.yaml");
|
152
|
+
config.set("hosts", getCassandraHostAsList());
|
153
|
+
|
154
|
+
assertEquals(0, session.execute("SELECT * FROM embulk_test.test_counter").all().size());
|
155
|
+
|
156
|
+
TestingEmbulk.RunResult result = embulk.runOutput(config, input);
|
157
|
+
assertEquals(3, result.getOutputTaskReports().get(0).get(Long.class, "inserted_record_count").longValue());
|
158
|
+
|
159
|
+
List<Row> rows = session.execute("SELECT * FROM embulk_test.test_counter").all();
|
160
|
+
rows.sort(Comparator.comparing(row -> row.getString("id")));
|
161
|
+
Row row1 = rows.get(0);
|
162
|
+
Row row2 = rows.get(1);
|
163
|
+
Row row3 = rows.get(2);
|
164
|
+
assertEquals("A001", row1.getString("id"));
|
165
|
+
assertEquals(9, row1.getLong("int_item"));
|
166
|
+
assertEquals(1, row1.getInt("int32_item"));
|
167
|
+
assertEquals(2, row1.getShort("smallint_item"));
|
168
|
+
assertTrue(row1.getBool("boolean_item"));
|
169
|
+
assertEquals(createDate(2018, 7, 1, 10, 0, 0, 0), row1.getTimestamp("timestamp_item"));
|
170
|
+
assertEquals("A002", row2.getString("id"));
|
171
|
+
assertEquals(0, row2.getLong("int_item"));
|
172
|
+
assertEquals(0, row2.getInt("int32_item"));
|
173
|
+
assertEquals(4, row2.getShort("smallint_item"));
|
174
|
+
assertTrue(row2.getBool("boolean_item"));
|
175
|
+
assertEquals(createDate(2018, 7, 1, 10, 0, 1, 0), row2.getTimestamp("timestamp_item"));
|
176
|
+
assertEquals("A003", row3.getString("id"));
|
177
|
+
assertEquals(9, row3.getLong("int_item"));
|
178
|
+
assertEquals(0, row3.getInt("int32_item"));
|
179
|
+
assertEquals(8, row3.getShort("smallint_item"));
|
180
|
+
assertFalse(row3.getBool("boolean_item"));
|
181
|
+
assertEquals(createDate(2018, 7, 1, 10, 0, 2, 0), row3.getTimestamp("timestamp_item"));
|
182
|
+
}
|
183
|
+
|
142
184
|
@Test
|
143
185
|
public void testBasicWithTtl() throws IOException
|
144
186
|
{
|
@@ -0,0 +1,11 @@
|
|
1
|
+
CREATE TABLE IF NOT EXISTS embulk_test.test_counter (
|
2
|
+
id text,
|
3
|
+
int_item counter,
|
4
|
+
int32_item int,
|
5
|
+
smallint_item smallint,
|
6
|
+
tinyint_item tinyint,
|
7
|
+
boolean_item boolean,
|
8
|
+
double_item double,
|
9
|
+
timestamp_item timestamp,
|
10
|
+
primary key (id, int32_item, smallint_item, tinyint_item, boolean_item, double_item, timestamp_item)
|
11
|
+
) WITH compaction = { 'class' : 'SizeTieredCompactionStrategy' };
|
@@ -1,4 +1,4 @@
|
|
1
|
-
id:string,int_item:long,int32_item:long,smallint_item:long,tinyint_item:long,boolean_item:boolean,varchar_item:string,timestamp_item:timestamp
|
2
|
-
A001,9,4294967297,2,0,true,a,2018-07-01 10:00:00.000 UTC
|
3
|
-
A002,0,0,4,0,true,b,2018-07-01 10:00:01.000 UTC
|
4
|
-
A003,9,0,8,1,false,c,2018-07-01 10:00:02.000 UTC
|
1
|
+
id:string,int_item:long,int32_item:long,smallint_item:long,tinyint_item:long,boolean_item:boolean,varchar_item:string,timestamp_item:timestamp,double_item:double
|
2
|
+
A001,9,4294967297,2,0,true,a,2018-07-01 10:00:00.000 UTC,1.1
|
3
|
+
A002,0,0,4,0,true,b,2018-07-01 10:00:01.000 UTC,1.2
|
4
|
+
A003,9,0,8,1,false,c,2018-07-01 10:00:02.000 UTC,1.3
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-cassandra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -55,7 +55,7 @@ files:
|
|
55
55
|
- classpath/asm-tree-5.0.3.jar
|
56
56
|
- classpath/asm-util-5.0.3.jar
|
57
57
|
- classpath/cassandra-driver-core-3.5.0.jar
|
58
|
-
- classpath/embulk-output-cassandra-0.
|
58
|
+
- classpath/embulk-output-cassandra-0.2.0.jar
|
59
59
|
- classpath/guava-19.0.jar
|
60
60
|
- classpath/jffi-1.2.16-native.jar
|
61
61
|
- classpath/jffi-1.2.16.jar
|
@@ -70,7 +70,9 @@ files:
|
|
70
70
|
- classpath/slf4j-api-1.7.25.jar
|
71
71
|
- config/checkstyle/checkstyle.xml
|
72
72
|
- config/checkstyle/default.xml
|
73
|
+
- docker-compose.yml
|
73
74
|
- embulk-output-cassandra.iml
|
75
|
+
- example.yml
|
74
76
|
- gradle/wrapper/gradle-wrapper.jar
|
75
77
|
- gradle/wrapper/gradle-wrapper.properties
|
76
78
|
- gradlew
|
@@ -105,12 +107,14 @@ files:
|
|
105
107
|
- src/test/resources/org/embulk/output/cassandra/create_keyspace.cql
|
106
108
|
- src/test/resources/org/embulk/output/cassandra/create_table_test_basic.cql
|
107
109
|
- src/test/resources/org/embulk/output/cassandra/create_table_test_complex.cql
|
110
|
+
- src/test/resources/org/embulk/output/cassandra/create_table_test_counter.cql
|
108
111
|
- src/test/resources/org/embulk/output/cassandra/create_table_test_uuid.cql
|
109
112
|
- src/test/resources/org/embulk/output/cassandra/test1.csv
|
110
113
|
- src/test/resources/org/embulk/output/cassandra/test2.csv
|
111
114
|
- src/test/resources/org/embulk/output/cassandra/test3.csv
|
112
115
|
- src/test/resources/org/embulk/output/cassandra/test_basic.yaml
|
113
116
|
- src/test/resources/org/embulk/output/cassandra/test_complex.yaml
|
117
|
+
- src/test/resources/org/embulk/output/cassandra/test_counter.yaml
|
114
118
|
- src/test/resources/org/embulk/output/cassandra/test_uuid.yaml
|
115
119
|
homepage: https://github.com/joker1007/embulk-output-cassandra
|
116
120
|
licenses:
|