embulk-output-cassandra 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e128e8bd86c8b5573a718700e449160a47e9fa8d
4
- data.tar.gz: 5bf5acfb7cc3d84360a5f7889f60f5575608000f
3
+ metadata.gz: ab4a3fa7b0b477c2c6ebed457fa48f248ce7a6ae
4
+ data.tar.gz: a7355ea7423d856a3533e2b3d4dbbb50d05c60be
5
5
  SHA512:
6
- metadata.gz: 623b2b2b23945c8ebd2519b41e580636581b4ac4fcd305c1f60d84ccb73f7bad89ae0abfbfa20d35cb5598b507b161ead4ae0b8c9235650de538611a8a763d4c
7
- data.tar.gz: 7c9a6cfd7e008a4621b866b516057bb0d6277a3c875a6a8f95eeb735b977ae7b21e8325cf40d1f5e53c7da76004e45d54ec942e47b8958d24b7441a47e54aa53
6
+ metadata.gz: 0a8b7043a53d758515a06274ffd00955013d8533d447e512a01228f17e63e378e9d70a5f149f54e9451b52a74718951261e3e6a6493889f8d6213b06db480ea3
7
+ data.tar.gz: f51f4f392c31bb8928e97b14ef4059a523578883517ea390e3beac76ffd84dc1f30b464cac999d05e478fc6ae4ad102739a26590b219dd5437901c00debc1181
data/README.md CHANGED
@@ -11,10 +11,53 @@ Apache Cassandra output plugin for Embulk.
11
11
 
12
12
  ## Caution
13
13
  In current, version of netty components conflicts to one that is used by embulk-core.
14
+
14
15
  This probrem is very severe.
16
+
15
17
  I tested this plugin on embulk-0.9.7.
16
18
  But future embulk version may break this plugin.
17
19
 
20
+ ## Support Data types
21
+
22
+ | CQL Type | Embulk Type | Descritpion |
23
+ | -------- | ----------- | -------------- |
24
+ | ascii | string, boolean, long, double, timestamp, json | use `toString` or `toJson` |
25
+ | bigint | string, boolean(as 0 or 1), long, double | |
26
+ | blob | unsupported | |
27
+ | boolean | boolean, long, double | 0 == false, 1 == true |
28
+ | counter | unsupported | |
29
+ | date | string, timestamp | timestamp use `toEpochMilli` |
30
+ | decimal | string, boolean(as 0 or 1), long, double | |
31
+ | double | string, boolean(as 0 or 1), long, double | |
32
+ | float | string, boolean(as 0 or 1), long, double | |
33
+ | inet | string | |
34
+ | int | string, boolean(as 0 or 1), long, double | overflowed value is reset to 0 |
35
+ | list | json | |
36
+ | map (support only text key) | json | |
37
+ | set | json | |
38
+ | smallint | string, boolean(as 0 or 1), long, double | overflowed value is reset to 0 |
39
+ | text | string, boolean, long, double, timestamp, json | use `toString` or `toJson` |
40
+ | time | string, long, double, timestamp | long and double as nano seconds of day,<br>timestamp use `toEpochMilli` |
41
+ | timestamp | long, double, timestamp | long and double as epoch second |
42
+ | timeuuid | null |
43
+ | uuid | null |
44
+ | varchar | string, boolean, long, double, timestamp, json | use `toString` or `toJson` |
45
+ | varint | string, boolean(as 0 or 1), long, double | |
46
+ | UDT | unsupported | |
47
+
48
+ ## Insert Behavior
49
+ If embulk record does not have a column, it is treated as `unset`.
50
+ If same key record already exists, the column is not touched.
51
+
52
+ ### Counter table
53
+ This plugin supports counter table.
54
+
55
+ But counter table supports only increment/decrement update.
56
+
57
+ Because of it, This plugin uses input value as increment value;
58
+
59
+ For example, If input data = {id: 1, count: 5}, Executed Statement is `UPDATE tablename SET count = count + 5 WHERE id = 1`
60
+
18
61
  ## Configuration
19
62
 
20
63
  - **hosts**: list of seed hosts (list<string>, required)
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.1"
16
+ version = "0.2.0"
17
17
 
18
18
  sourceCompatibility = 1.8
19
19
  targetCompatibility = 1.8
@@ -0,0 +1,6 @@
1
+ version: "3"
2
+ services:
3
+ cassandra:
4
+ image: cassandra:latest
5
+ ports:
6
+ - '9042:9042'
@@ -1,5 +1,5 @@
1
1
  <?xml version="1.0" encoding="UTF-8"?>
2
- <module external.linked.project.id="embulk-output-cassandra" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="" external.system.module.version="0.1.0" type="JAVA_MODULE" version="4">
2
+ <module external.linked.project.id="embulk-output-cassandra" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="" external.system.module.version="0.1.1" type="JAVA_MODULE" version="4">
3
3
  <component name="NewModuleRootManager" inherit-compiler-output="true">
4
4
  <exclude-output />
5
5
  <content url="file://$MODULE_DIR$">
data/example.yml ADDED
@@ -0,0 +1,23 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./src/test/resources/org/embulk/output/cassandra/test1.csv
4
+ parser:
5
+ type: csv
6
+ skip_header_lines: 1
7
+ columns:
8
+ - {name: id, type: string}
9
+ - {name: int_item, type: long}
10
+ - {name: int32_item, type: long}
11
+ - {name: smallint_item, type: long}
12
+ - {name: tinyint_item, type: long}
13
+ - {name: boolean_item, type: boolean}
14
+ - {name: varchar_item, type: string}
15
+ - {name: timestamp_item, type: timestamp}
16
+
17
+ out:
18
+ type: cassandra
19
+ hosts:
20
+ - 127.0.0.1
21
+ keyspace: embulk_test
22
+ table: test_basic
23
+ idempotent: true
@@ -3,13 +3,16 @@ package org.embulk.output.cassandra;
3
3
  import com.datastax.driver.core.BoundStatement;
4
4
  import com.datastax.driver.core.Cluster;
5
5
  import com.datastax.driver.core.ColumnMetadata;
6
+ import com.datastax.driver.core.DataType;
6
7
  import com.datastax.driver.core.KeyspaceMetadata;
7
8
  import com.datastax.driver.core.PreparedStatement;
8
9
  import com.datastax.driver.core.Session;
9
10
  import com.datastax.driver.core.SocketOptions;
10
11
  import com.datastax.driver.core.TableMetadata;
12
+ import com.datastax.driver.core.querybuilder.BuiltStatement;
11
13
  import com.datastax.driver.core.querybuilder.Insert;
12
14
  import com.datastax.driver.core.querybuilder.QueryBuilder;
15
+ import com.datastax.driver.core.querybuilder.Update;
13
16
  import com.google.common.base.Optional;
14
17
  import com.google.common.collect.ImmutableList;
15
18
  import com.google.common.collect.ImmutableMap;
@@ -86,6 +89,10 @@ public class CassandraOutputPlugin
86
89
  @Config("request_timeout")
87
90
  @ConfigDefault("12000")
88
91
  public int getRequestTimeout();
92
+
93
+ @Config("counter_columnName")
94
+ @ConfigDefault("null")
95
+ public Optional<String> getCounterColumnName();
89
96
  }
90
97
 
91
98
  private final Logger logger = Exec.getLogger(CassandraOutputPlugin.class);
@@ -135,18 +142,42 @@ public class CassandraOutputPlugin
135
142
  throw new RuntimeException("table `" + task.getTable() + "` is not found");
136
143
  }
137
144
 
138
- Insert insert = QueryBuilder.insertInto(task.getKeyspace(), task.getTable());
139
- if (task.getIfNotExists()) {
140
- insert.ifNotExists();
145
+ List<ColumnMetadata> columns = tableMetadata.getColumns();
146
+ boolean isCounterTable = columns.stream().anyMatch(col -> col.getType().getName() == DataType.Name.COUNTER);
147
+
148
+ BuiltStatement query;
149
+ if (isCounterTable) {
150
+ Update update = QueryBuilder.update(task.getKeyspace(), task.getTable());
151
+ if (task.getTtl().isPresent()) {
152
+ update.using(QueryBuilder.ttl(task.getTtl().get()));
153
+ }
154
+ for (ColumnMetadata column : tableMetadata.getColumns()) {
155
+ if (column.getType().getName() == DataType.Name.COUNTER) {
156
+ update.with(QueryBuilder.incr(column.getName(), QueryBuilder.bindMarker(column.getName())));
157
+ }
158
+ else{
159
+ update.where(QueryBuilder.eq(column.getName(), QueryBuilder.bindMarker(column.getName())));
160
+ }
161
+ }
162
+ query = update;
141
163
  }
142
- if (task.getTtl().isPresent()) {
143
- insert.using(QueryBuilder.ttl(task.getTtl().get()));
164
+ else {
165
+ Insert insert = QueryBuilder.insertInto(task.getKeyspace(), task.getTable());
166
+ if (task.getIfNotExists()) {
167
+ insert.ifNotExists();
168
+ }
169
+ if (task.getTtl().isPresent()) {
170
+ insert.using(QueryBuilder.ttl(task.getTtl().get()));
171
+ }
172
+ for (ColumnMetadata column : tableMetadata.getColumns()) {
173
+ insert.value(column.getName(), QueryBuilder.bindMarker(column.getName()));
174
+ }
175
+ query = insert;
144
176
  }
145
177
 
146
178
  ImmutableMap.Builder<String, CassandraColumnSetter> columnSettersBuilder = ImmutableMap.builder();
147
179
  ImmutableList.Builder<String> uuidColumnsBuilder = ImmutableList.builder();
148
180
  for (ColumnMetadata column : tableMetadata.getColumns()) {
149
- insert.value(column.getName(), QueryBuilder.bindMarker(column.getName()));
150
181
  columnSettersBuilder.put(column.getName(), CassandraColumnSetterFactory.createColumnSetter(column, cluster));
151
182
  switch (column.getType().getName()) {
152
183
  case UUID:
@@ -159,9 +190,9 @@ public class CassandraOutputPlugin
159
190
  List<ColumnSetterVisitor> columnVisitors = Lists.transform(schema.getColumns(), (column) ->
160
191
  new ColumnSetterVisitor(pageReader, columnSetters.get(column.getName())));
161
192
 
162
- logger.info("Insert Query: {}", insert.getQueryString());
193
+ logger.info("Insert Query: {}", query.getQueryString());
163
194
 
164
- PreparedStatement prepared = session.prepare(insert);
195
+ PreparedStatement prepared = session.prepare(query);
165
196
  if (task.getIdempotent()) {
166
197
  prepared.setIdempotent(task.getIdempotent());
167
198
  }
@@ -19,6 +19,7 @@ public class CassandraColumnSetterFactory
19
19
  case BOOLEAN:
20
20
  return new BooleanColumnSetter(cassandraColumn);
21
21
  case BIGINT:
22
+ case COUNTER:
22
23
  return new BigintColumnSetter(cassandraColumn);
23
24
  case INT:
24
25
  return new IntColumnSetter(cassandraColumn);
@@ -3,6 +3,7 @@ package org.embulk.output.cassandra;
3
3
  import com.datastax.driver.core.Cluster;
4
4
  import com.datastax.driver.core.DataType;
5
5
  import com.datastax.driver.core.LocalDate;
6
+ import com.datastax.driver.core.ResultSet;
6
7
  import com.datastax.driver.core.Row;
7
8
  import com.datastax.driver.core.Session;
8
9
  import com.datastax.driver.core.TupleType;
@@ -26,6 +27,7 @@ import java.nio.file.Path;
26
27
  import java.time.ZoneId;
27
28
  import java.time.ZonedDateTime;
28
29
  import java.util.Arrays;
30
+ import java.util.Comparator;
29
31
  import java.util.Date;
30
32
  import java.util.List;
31
33
  import java.util.Map;
@@ -83,13 +85,16 @@ public class TestCassandraOutputPlugin
83
85
  String createTableBasic = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_basic.cql");
84
86
  String createTableUuid = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_uuid.cql");
85
87
  String createTableComplex = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_complex.cql");
88
+ String createTableCounter = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_counter.cql");
86
89
  session.execute(createKeyspace);
87
90
  session.execute(createTableBasic);
88
91
  session.execute(createTableUuid);
89
92
  session.execute(createTableComplex);
93
+ session.execute(createTableCounter);
90
94
  session.execute("TRUNCATE embulk_test.test_basic");
91
95
  session.execute("TRUNCATE embulk_test.test_uuid");
92
96
  session.execute("TRUNCATE embulk_test.test_complex");
97
+ session.execute("TRUNCATE embulk_test.test_counter");
93
98
  }
94
99
 
95
100
  @After
@@ -139,6 +144,43 @@ public class TestCassandraOutputPlugin
139
144
  assertEquals(createDate(2018, 7, 1, 10, 0, 2, 0), row3.getTimestamp("timestamp_item"));
140
145
  }
141
146
 
147
+ @Test
148
+ public void testCounter() throws IOException
149
+ {
150
+ Path input = getInputPath("test1.csv");
151
+ ConfigSource config = loadYamlResource("test_counter.yaml");
152
+ config.set("hosts", getCassandraHostAsList());
153
+
154
+ assertEquals(0, session.execute("SELECT * FROM embulk_test.test_counter").all().size());
155
+
156
+ TestingEmbulk.RunResult result = embulk.runOutput(config, input);
157
+ assertEquals(3, result.getOutputTaskReports().get(0).get(Long.class, "inserted_record_count").longValue());
158
+
159
+ List<Row> rows = session.execute("SELECT * FROM embulk_test.test_counter").all();
160
+ rows.sort(Comparator.comparing(row -> row.getString("id")));
161
+ Row row1 = rows.get(0);
162
+ Row row2 = rows.get(1);
163
+ Row row3 = rows.get(2);
164
+ assertEquals("A001", row1.getString("id"));
165
+ assertEquals(9, row1.getLong("int_item"));
166
+ assertEquals(1, row1.getInt("int32_item"));
167
+ assertEquals(2, row1.getShort("smallint_item"));
168
+ assertTrue(row1.getBool("boolean_item"));
169
+ assertEquals(createDate(2018, 7, 1, 10, 0, 0, 0), row1.getTimestamp("timestamp_item"));
170
+ assertEquals("A002", row2.getString("id"));
171
+ assertEquals(0, row2.getLong("int_item"));
172
+ assertEquals(0, row2.getInt("int32_item"));
173
+ assertEquals(4, row2.getShort("smallint_item"));
174
+ assertTrue(row2.getBool("boolean_item"));
175
+ assertEquals(createDate(2018, 7, 1, 10, 0, 1, 0), row2.getTimestamp("timestamp_item"));
176
+ assertEquals("A003", row3.getString("id"));
177
+ assertEquals(9, row3.getLong("int_item"));
178
+ assertEquals(0, row3.getInt("int32_item"));
179
+ assertEquals(8, row3.getShort("smallint_item"));
180
+ assertFalse(row3.getBool("boolean_item"));
181
+ assertEquals(createDate(2018, 7, 1, 10, 0, 2, 0), row3.getTimestamp("timestamp_item"));
182
+ }
183
+
142
184
  @Test
143
185
  public void testBasicWithTtl() throws IOException
144
186
  {
@@ -0,0 +1,11 @@
1
+ CREATE TABLE IF NOT EXISTS embulk_test.test_counter (
2
+ id text,
3
+ int_item counter,
4
+ int32_item int,
5
+ smallint_item smallint,
6
+ tinyint_item tinyint,
7
+ boolean_item boolean,
8
+ double_item double,
9
+ timestamp_item timestamp,
10
+ primary key (id, int32_item, smallint_item, tinyint_item, boolean_item, double_item, timestamp_item)
11
+ ) WITH compaction = { 'class' : 'SizeTieredCompactionStrategy' };
@@ -1,4 +1,4 @@
1
- id:string,int_item:long,int32_item:long,smallint_item:long,tinyint_item:long,boolean_item:boolean,varchar_item:string,timestamp_item:timestamp
2
- A001,9,4294967297,2,0,true,a,2018-07-01 10:00:00.000 UTC
3
- A002,0,0,4,0,true,b,2018-07-01 10:00:01.000 UTC
4
- A003,9,0,8,1,false,c,2018-07-01 10:00:02.000 UTC
1
+ id:string,int_item:long,int32_item:long,smallint_item:long,tinyint_item:long,boolean_item:boolean,varchar_item:string,timestamp_item:timestamp,double_item:double
2
+ A001,9,4294967297,2,0,true,a,2018-07-01 10:00:00.000 UTC,1.1
3
+ A002,0,0,4,0,true,b,2018-07-01 10:00:01.000 UTC,1.2
4
+ A003,9,0,8,1,false,c,2018-07-01 10:00:02.000 UTC,1.3
@@ -0,0 +1,3 @@
1
+ type: cassandra
2
+ keyspace: embulk_test
3
+ table: "test_counter"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-cassandra
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-02 00:00:00.000000000 Z
11
+ date: 2018-08-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -55,7 +55,7 @@ files:
55
55
  - classpath/asm-tree-5.0.3.jar
56
56
  - classpath/asm-util-5.0.3.jar
57
57
  - classpath/cassandra-driver-core-3.5.0.jar
58
- - classpath/embulk-output-cassandra-0.1.1.jar
58
+ - classpath/embulk-output-cassandra-0.2.0.jar
59
59
  - classpath/guava-19.0.jar
60
60
  - classpath/jffi-1.2.16-native.jar
61
61
  - classpath/jffi-1.2.16.jar
@@ -70,7 +70,9 @@ files:
70
70
  - classpath/slf4j-api-1.7.25.jar
71
71
  - config/checkstyle/checkstyle.xml
72
72
  - config/checkstyle/default.xml
73
+ - docker-compose.yml
73
74
  - embulk-output-cassandra.iml
75
+ - example.yml
74
76
  - gradle/wrapper/gradle-wrapper.jar
75
77
  - gradle/wrapper/gradle-wrapper.properties
76
78
  - gradlew
@@ -105,12 +107,14 @@ files:
105
107
  - src/test/resources/org/embulk/output/cassandra/create_keyspace.cql
106
108
  - src/test/resources/org/embulk/output/cassandra/create_table_test_basic.cql
107
109
  - src/test/resources/org/embulk/output/cassandra/create_table_test_complex.cql
110
+ - src/test/resources/org/embulk/output/cassandra/create_table_test_counter.cql
108
111
  - src/test/resources/org/embulk/output/cassandra/create_table_test_uuid.cql
109
112
  - src/test/resources/org/embulk/output/cassandra/test1.csv
110
113
  - src/test/resources/org/embulk/output/cassandra/test2.csv
111
114
  - src/test/resources/org/embulk/output/cassandra/test3.csv
112
115
  - src/test/resources/org/embulk/output/cassandra/test_basic.yaml
113
116
  - src/test/resources/org/embulk/output/cassandra/test_complex.yaml
117
+ - src/test/resources/org/embulk/output/cassandra/test_counter.yaml
114
118
  - src/test/resources/org/embulk/output/cassandra/test_uuid.yaml
115
119
  homepage: https://github.com/joker1007/embulk-output-cassandra
116
120
  licenses: