embulk-output-cassandra 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +43 -0
- data/build.gradle +1 -1
- data/docker-compose.yml +6 -0
- data/embulk-output-cassandra.iml +1 -1
- data/example.yml +23 -0
- data/src/main/java/org/embulk/output/cassandra/CassandraOutputPlugin.java +39 -8
- data/src/main/java/org/embulk/output/cassandra/setter/CassandraColumnSetterFactory.java +1 -0
- data/src/test/java/org/embulk/output/cassandra/TestCassandraOutputPlugin.java +42 -0
- data/src/test/resources/org/embulk/output/cassandra/create_table_test_counter.cql +11 -0
- data/src/test/resources/org/embulk/output/cassandra/test1.csv +4 -4
- data/src/test/resources/org/embulk/output/cassandra/test_counter.yaml +3 -0
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ab4a3fa7b0b477c2c6ebed457fa48f248ce7a6ae
|
4
|
+
data.tar.gz: a7355ea7423d856a3533e2b3d4dbbb50d05c60be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a8b7043a53d758515a06274ffd00955013d8533d447e512a01228f17e63e378e9d70a5f149f54e9451b52a74718951261e3e6a6493889f8d6213b06db480ea3
|
7
|
+
data.tar.gz: f51f4f392c31bb8928e97b14ef4059a523578883517ea390e3beac76ffd84dc1f30b464cac999d05e478fc6ae4ad102739a26590b219dd5437901c00debc1181
|
data/README.md
CHANGED
@@ -11,10 +11,53 @@ Apache Cassandra output plugin for Embulk.
|
|
11
11
|
|
12
12
|
## Caution
|
13
13
|
In current, version of netty components conflicts to one that is used by embulk-core.
|
14
|
+
|
14
15
|
This probrem is very severe.
|
16
|
+
|
15
17
|
I tested this plugin on embulk-0.9.7.
|
16
18
|
But future embulk version may break this plugin.
|
17
19
|
|
20
|
+
## Support Data types
|
21
|
+
|
22
|
+
| CQL Type | Embulk Type | Descritpion |
|
23
|
+
| -------- | ----------- | -------------- |
|
24
|
+
| ascii | string, boolean, long, double, timestamp, json | use `toString` or `toJson` |
|
25
|
+
| bigint | string, boolean(as 0 or 1), long, double | |
|
26
|
+
| blob | unsupported | |
|
27
|
+
| boolean | boolean, long, double | 0 == false, 1 == true |
|
28
|
+
| counter | unsupported | |
|
29
|
+
| date | string, timestamp | timestamp use `toEpochMilli` |
|
30
|
+
| decimal | string, boolean(as 0 or 1), long, double | |
|
31
|
+
| double | string, boolean(as 0 or 1), long, double | |
|
32
|
+
| float | string, boolean(as 0 or 1), long, double | |
|
33
|
+
| inet | string | |
|
34
|
+
| int | string, boolean(as 0 or 1), long, double | overflowed value is reset to 0 |
|
35
|
+
| list | json | |
|
36
|
+
| map (support only text key) | json | |
|
37
|
+
| set | json | |
|
38
|
+
| smallint | string, boolean(as 0 or 1), long, double | overflowed value is reset to 0 |
|
39
|
+
| text | string, boolean, long, double, timestamp, json | use `toString` or `toJson` |
|
40
|
+
| time | string, long, double, timestamp | long and double as nano seconds of day,<br>timestamp use `toEpochMilli` |
|
41
|
+
| timestamp | long, double, timestamp | long and double as epoch second |
|
42
|
+
| timeuuid | null |
|
43
|
+
| uuid | null |
|
44
|
+
| varchar | string, boolean, long, double, timestamp, json | use `toString` or `toJson` |
|
45
|
+
| varint | string, boolean(as 0 or 1), long, double | |
|
46
|
+
| UDT | unsupported | |
|
47
|
+
|
48
|
+
## Insert Behavior
|
49
|
+
If embulk record does not have a column, it is treated as `unset`.
|
50
|
+
If same key record already exists, the column is not touched.
|
51
|
+
|
52
|
+
### Counter table
|
53
|
+
This plugin supports counter table.
|
54
|
+
|
55
|
+
But counter table supports only increment/decrement update.
|
56
|
+
|
57
|
+
Because of it, This plugin uses input value as increment value;
|
58
|
+
|
59
|
+
For example, If input data = {id: 1, count: 5}, Executed Statement is `UPDATE tablename SET count = count + 5 WHERE id = 1`
|
60
|
+
|
18
61
|
## Configuration
|
19
62
|
|
20
63
|
- **hosts**: list of seed hosts (list<string>, required)
|
data/build.gradle
CHANGED
data/docker-compose.yml
ADDED
data/embulk-output-cassandra.iml
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<module external.linked.project.id="embulk-output-cassandra" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="" external.system.module.version="0.1.
|
2
|
+
<module external.linked.project.id="embulk-output-cassandra" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="" external.system.module.version="0.1.1" type="JAVA_MODULE" version="4">
|
3
3
|
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
4
4
|
<exclude-output />
|
5
5
|
<content url="file://$MODULE_DIR$">
|
data/example.yml
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./src/test/resources/org/embulk/output/cassandra/test1.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
skip_header_lines: 1
|
7
|
+
columns:
|
8
|
+
- {name: id, type: string}
|
9
|
+
- {name: int_item, type: long}
|
10
|
+
- {name: int32_item, type: long}
|
11
|
+
- {name: smallint_item, type: long}
|
12
|
+
- {name: tinyint_item, type: long}
|
13
|
+
- {name: boolean_item, type: boolean}
|
14
|
+
- {name: varchar_item, type: string}
|
15
|
+
- {name: timestamp_item, type: timestamp}
|
16
|
+
|
17
|
+
out:
|
18
|
+
type: cassandra
|
19
|
+
hosts:
|
20
|
+
- 127.0.0.1
|
21
|
+
keyspace: embulk_test
|
22
|
+
table: test_basic
|
23
|
+
idempotent: true
|
@@ -3,13 +3,16 @@ package org.embulk.output.cassandra;
|
|
3
3
|
import com.datastax.driver.core.BoundStatement;
|
4
4
|
import com.datastax.driver.core.Cluster;
|
5
5
|
import com.datastax.driver.core.ColumnMetadata;
|
6
|
+
import com.datastax.driver.core.DataType;
|
6
7
|
import com.datastax.driver.core.KeyspaceMetadata;
|
7
8
|
import com.datastax.driver.core.PreparedStatement;
|
8
9
|
import com.datastax.driver.core.Session;
|
9
10
|
import com.datastax.driver.core.SocketOptions;
|
10
11
|
import com.datastax.driver.core.TableMetadata;
|
12
|
+
import com.datastax.driver.core.querybuilder.BuiltStatement;
|
11
13
|
import com.datastax.driver.core.querybuilder.Insert;
|
12
14
|
import com.datastax.driver.core.querybuilder.QueryBuilder;
|
15
|
+
import com.datastax.driver.core.querybuilder.Update;
|
13
16
|
import com.google.common.base.Optional;
|
14
17
|
import com.google.common.collect.ImmutableList;
|
15
18
|
import com.google.common.collect.ImmutableMap;
|
@@ -86,6 +89,10 @@ public class CassandraOutputPlugin
|
|
86
89
|
@Config("request_timeout")
|
87
90
|
@ConfigDefault("12000")
|
88
91
|
public int getRequestTimeout();
|
92
|
+
|
93
|
+
@Config("counter_columnName")
|
94
|
+
@ConfigDefault("null")
|
95
|
+
public Optional<String> getCounterColumnName();
|
89
96
|
}
|
90
97
|
|
91
98
|
private final Logger logger = Exec.getLogger(CassandraOutputPlugin.class);
|
@@ -135,18 +142,42 @@ public class CassandraOutputPlugin
|
|
135
142
|
throw new RuntimeException("table `" + task.getTable() + "` is not found");
|
136
143
|
}
|
137
144
|
|
138
|
-
|
139
|
-
|
140
|
-
|
145
|
+
List<ColumnMetadata> columns = tableMetadata.getColumns();
|
146
|
+
boolean isCounterTable = columns.stream().anyMatch(col -> col.getType().getName() == DataType.Name.COUNTER);
|
147
|
+
|
148
|
+
BuiltStatement query;
|
149
|
+
if (isCounterTable) {
|
150
|
+
Update update = QueryBuilder.update(task.getKeyspace(), task.getTable());
|
151
|
+
if (task.getTtl().isPresent()) {
|
152
|
+
update.using(QueryBuilder.ttl(task.getTtl().get()));
|
153
|
+
}
|
154
|
+
for (ColumnMetadata column : tableMetadata.getColumns()) {
|
155
|
+
if (column.getType().getName() == DataType.Name.COUNTER) {
|
156
|
+
update.with(QueryBuilder.incr(column.getName(), QueryBuilder.bindMarker(column.getName())));
|
157
|
+
}
|
158
|
+
else{
|
159
|
+
update.where(QueryBuilder.eq(column.getName(), QueryBuilder.bindMarker(column.getName())));
|
160
|
+
}
|
161
|
+
}
|
162
|
+
query = update;
|
141
163
|
}
|
142
|
-
|
143
|
-
insert
|
164
|
+
else {
|
165
|
+
Insert insert = QueryBuilder.insertInto(task.getKeyspace(), task.getTable());
|
166
|
+
if (task.getIfNotExists()) {
|
167
|
+
insert.ifNotExists();
|
168
|
+
}
|
169
|
+
if (task.getTtl().isPresent()) {
|
170
|
+
insert.using(QueryBuilder.ttl(task.getTtl().get()));
|
171
|
+
}
|
172
|
+
for (ColumnMetadata column : tableMetadata.getColumns()) {
|
173
|
+
insert.value(column.getName(), QueryBuilder.bindMarker(column.getName()));
|
174
|
+
}
|
175
|
+
query = insert;
|
144
176
|
}
|
145
177
|
|
146
178
|
ImmutableMap.Builder<String, CassandraColumnSetter> columnSettersBuilder = ImmutableMap.builder();
|
147
179
|
ImmutableList.Builder<String> uuidColumnsBuilder = ImmutableList.builder();
|
148
180
|
for (ColumnMetadata column : tableMetadata.getColumns()) {
|
149
|
-
insert.value(column.getName(), QueryBuilder.bindMarker(column.getName()));
|
150
181
|
columnSettersBuilder.put(column.getName(), CassandraColumnSetterFactory.createColumnSetter(column, cluster));
|
151
182
|
switch (column.getType().getName()) {
|
152
183
|
case UUID:
|
@@ -159,9 +190,9 @@ public class CassandraOutputPlugin
|
|
159
190
|
List<ColumnSetterVisitor> columnVisitors = Lists.transform(schema.getColumns(), (column) ->
|
160
191
|
new ColumnSetterVisitor(pageReader, columnSetters.get(column.getName())));
|
161
192
|
|
162
|
-
logger.info("Insert Query: {}",
|
193
|
+
logger.info("Insert Query: {}", query.getQueryString());
|
163
194
|
|
164
|
-
PreparedStatement prepared = session.prepare(
|
195
|
+
PreparedStatement prepared = session.prepare(query);
|
165
196
|
if (task.getIdempotent()) {
|
166
197
|
prepared.setIdempotent(task.getIdempotent());
|
167
198
|
}
|
@@ -3,6 +3,7 @@ package org.embulk.output.cassandra;
|
|
3
3
|
import com.datastax.driver.core.Cluster;
|
4
4
|
import com.datastax.driver.core.DataType;
|
5
5
|
import com.datastax.driver.core.LocalDate;
|
6
|
+
import com.datastax.driver.core.ResultSet;
|
6
7
|
import com.datastax.driver.core.Row;
|
7
8
|
import com.datastax.driver.core.Session;
|
8
9
|
import com.datastax.driver.core.TupleType;
|
@@ -26,6 +27,7 @@ import java.nio.file.Path;
|
|
26
27
|
import java.time.ZoneId;
|
27
28
|
import java.time.ZonedDateTime;
|
28
29
|
import java.util.Arrays;
|
30
|
+
import java.util.Comparator;
|
29
31
|
import java.util.Date;
|
30
32
|
import java.util.List;
|
31
33
|
import java.util.Map;
|
@@ -83,13 +85,16 @@ public class TestCassandraOutputPlugin
|
|
83
85
|
String createTableBasic = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_basic.cql");
|
84
86
|
String createTableUuid = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_uuid.cql");
|
85
87
|
String createTableComplex = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_complex.cql");
|
88
|
+
String createTableCounter = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_counter.cql");
|
86
89
|
session.execute(createKeyspace);
|
87
90
|
session.execute(createTableBasic);
|
88
91
|
session.execute(createTableUuid);
|
89
92
|
session.execute(createTableComplex);
|
93
|
+
session.execute(createTableCounter);
|
90
94
|
session.execute("TRUNCATE embulk_test.test_basic");
|
91
95
|
session.execute("TRUNCATE embulk_test.test_uuid");
|
92
96
|
session.execute("TRUNCATE embulk_test.test_complex");
|
97
|
+
session.execute("TRUNCATE embulk_test.test_counter");
|
93
98
|
}
|
94
99
|
|
95
100
|
@After
|
@@ -139,6 +144,43 @@ public class TestCassandraOutputPlugin
|
|
139
144
|
assertEquals(createDate(2018, 7, 1, 10, 0, 2, 0), row3.getTimestamp("timestamp_item"));
|
140
145
|
}
|
141
146
|
|
147
|
+
@Test
|
148
|
+
public void testCounter() throws IOException
|
149
|
+
{
|
150
|
+
Path input = getInputPath("test1.csv");
|
151
|
+
ConfigSource config = loadYamlResource("test_counter.yaml");
|
152
|
+
config.set("hosts", getCassandraHostAsList());
|
153
|
+
|
154
|
+
assertEquals(0, session.execute("SELECT * FROM embulk_test.test_counter").all().size());
|
155
|
+
|
156
|
+
TestingEmbulk.RunResult result = embulk.runOutput(config, input);
|
157
|
+
assertEquals(3, result.getOutputTaskReports().get(0).get(Long.class, "inserted_record_count").longValue());
|
158
|
+
|
159
|
+
List<Row> rows = session.execute("SELECT * FROM embulk_test.test_counter").all();
|
160
|
+
rows.sort(Comparator.comparing(row -> row.getString("id")));
|
161
|
+
Row row1 = rows.get(0);
|
162
|
+
Row row2 = rows.get(1);
|
163
|
+
Row row3 = rows.get(2);
|
164
|
+
assertEquals("A001", row1.getString("id"));
|
165
|
+
assertEquals(9, row1.getLong("int_item"));
|
166
|
+
assertEquals(1, row1.getInt("int32_item"));
|
167
|
+
assertEquals(2, row1.getShort("smallint_item"));
|
168
|
+
assertTrue(row1.getBool("boolean_item"));
|
169
|
+
assertEquals(createDate(2018, 7, 1, 10, 0, 0, 0), row1.getTimestamp("timestamp_item"));
|
170
|
+
assertEquals("A002", row2.getString("id"));
|
171
|
+
assertEquals(0, row2.getLong("int_item"));
|
172
|
+
assertEquals(0, row2.getInt("int32_item"));
|
173
|
+
assertEquals(4, row2.getShort("smallint_item"));
|
174
|
+
assertTrue(row2.getBool("boolean_item"));
|
175
|
+
assertEquals(createDate(2018, 7, 1, 10, 0, 1, 0), row2.getTimestamp("timestamp_item"));
|
176
|
+
assertEquals("A003", row3.getString("id"));
|
177
|
+
assertEquals(9, row3.getLong("int_item"));
|
178
|
+
assertEquals(0, row3.getInt("int32_item"));
|
179
|
+
assertEquals(8, row3.getShort("smallint_item"));
|
180
|
+
assertFalse(row3.getBool("boolean_item"));
|
181
|
+
assertEquals(createDate(2018, 7, 1, 10, 0, 2, 0), row3.getTimestamp("timestamp_item"));
|
182
|
+
}
|
183
|
+
|
142
184
|
@Test
|
143
185
|
public void testBasicWithTtl() throws IOException
|
144
186
|
{
|
@@ -0,0 +1,11 @@
|
|
1
|
+
CREATE TABLE IF NOT EXISTS embulk_test.test_counter (
|
2
|
+
id text,
|
3
|
+
int_item counter,
|
4
|
+
int32_item int,
|
5
|
+
smallint_item smallint,
|
6
|
+
tinyint_item tinyint,
|
7
|
+
boolean_item boolean,
|
8
|
+
double_item double,
|
9
|
+
timestamp_item timestamp,
|
10
|
+
primary key (id, int32_item, smallint_item, tinyint_item, boolean_item, double_item, timestamp_item)
|
11
|
+
) WITH compaction = { 'class' : 'SizeTieredCompactionStrategy' };
|
@@ -1,4 +1,4 @@
|
|
1
|
-
id:string,int_item:long,int32_item:long,smallint_item:long,tinyint_item:long,boolean_item:boolean,varchar_item:string,timestamp_item:timestamp
|
2
|
-
A001,9,4294967297,2,0,true,a,2018-07-01 10:00:00.000 UTC
|
3
|
-
A002,0,0,4,0,true,b,2018-07-01 10:00:01.000 UTC
|
4
|
-
A003,9,0,8,1,false,c,2018-07-01 10:00:02.000 UTC
|
1
|
+
id:string,int_item:long,int32_item:long,smallint_item:long,tinyint_item:long,boolean_item:boolean,varchar_item:string,timestamp_item:timestamp,double_item:double
|
2
|
+
A001,9,4294967297,2,0,true,a,2018-07-01 10:00:00.000 UTC,1.1
|
3
|
+
A002,0,0,4,0,true,b,2018-07-01 10:00:01.000 UTC,1.2
|
4
|
+
A003,9,0,8,1,false,c,2018-07-01 10:00:02.000 UTC,1.3
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-cassandra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -55,7 +55,7 @@ files:
|
|
55
55
|
- classpath/asm-tree-5.0.3.jar
|
56
56
|
- classpath/asm-util-5.0.3.jar
|
57
57
|
- classpath/cassandra-driver-core-3.5.0.jar
|
58
|
-
- classpath/embulk-output-cassandra-0.
|
58
|
+
- classpath/embulk-output-cassandra-0.2.0.jar
|
59
59
|
- classpath/guava-19.0.jar
|
60
60
|
- classpath/jffi-1.2.16-native.jar
|
61
61
|
- classpath/jffi-1.2.16.jar
|
@@ -70,7 +70,9 @@ files:
|
|
70
70
|
- classpath/slf4j-api-1.7.25.jar
|
71
71
|
- config/checkstyle/checkstyle.xml
|
72
72
|
- config/checkstyle/default.xml
|
73
|
+
- docker-compose.yml
|
73
74
|
- embulk-output-cassandra.iml
|
75
|
+
- example.yml
|
74
76
|
- gradle/wrapper/gradle-wrapper.jar
|
75
77
|
- gradle/wrapper/gradle-wrapper.properties
|
76
78
|
- gradlew
|
@@ -105,12 +107,14 @@ files:
|
|
105
107
|
- src/test/resources/org/embulk/output/cassandra/create_keyspace.cql
|
106
108
|
- src/test/resources/org/embulk/output/cassandra/create_table_test_basic.cql
|
107
109
|
- src/test/resources/org/embulk/output/cassandra/create_table_test_complex.cql
|
110
|
+
- src/test/resources/org/embulk/output/cassandra/create_table_test_counter.cql
|
108
111
|
- src/test/resources/org/embulk/output/cassandra/create_table_test_uuid.cql
|
109
112
|
- src/test/resources/org/embulk/output/cassandra/test1.csv
|
110
113
|
- src/test/resources/org/embulk/output/cassandra/test2.csv
|
111
114
|
- src/test/resources/org/embulk/output/cassandra/test3.csv
|
112
115
|
- src/test/resources/org/embulk/output/cassandra/test_basic.yaml
|
113
116
|
- src/test/resources/org/embulk/output/cassandra/test_complex.yaml
|
117
|
+
- src/test/resources/org/embulk/output/cassandra/test_counter.yaml
|
114
118
|
- src/test/resources/org/embulk/output/cassandra/test_uuid.yaml
|
115
119
|
homepage: https://github.com/joker1007/embulk-output-cassandra
|
116
120
|
licenses:
|