embulk-output-cassandra 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e128e8bd86c8b5573a718700e449160a47e9fa8d
4
- data.tar.gz: 5bf5acfb7cc3d84360a5f7889f60f5575608000f
3
+ metadata.gz: ab4a3fa7b0b477c2c6ebed457fa48f248ce7a6ae
4
+ data.tar.gz: a7355ea7423d856a3533e2b3d4dbbb50d05c60be
5
5
  SHA512:
6
- metadata.gz: 623b2b2b23945c8ebd2519b41e580636581b4ac4fcd305c1f60d84ccb73f7bad89ae0abfbfa20d35cb5598b507b161ead4ae0b8c9235650de538611a8a763d4c
7
- data.tar.gz: 7c9a6cfd7e008a4621b866b516057bb0d6277a3c875a6a8f95eeb735b977ae7b21e8325cf40d1f5e53c7da76004e45d54ec942e47b8958d24b7441a47e54aa53
6
+ metadata.gz: 0a8b7043a53d758515a06274ffd00955013d8533d447e512a01228f17e63e378e9d70a5f149f54e9451b52a74718951261e3e6a6493889f8d6213b06db480ea3
7
+ data.tar.gz: f51f4f392c31bb8928e97b14ef4059a523578883517ea390e3beac76ffd84dc1f30b464cac999d05e478fc6ae4ad102739a26590b219dd5437901c00debc1181
data/README.md CHANGED
@@ -11,10 +11,53 @@ Apache Cassandra output plugin for Embulk.
11
11
 
12
12
  ## Caution
13
13
  In current, version of netty components conflicts to one that is used by embulk-core.
14
+
14
15
  This probrem is very severe.
16
+
15
17
  I tested this plugin on embulk-0.9.7.
16
18
  But future embulk version may break this plugin.
17
19
 
20
+ ## Support Data types
21
+
22
+ | CQL Type | Embulk Type | Descritpion |
23
+ | -------- | ----------- | -------------- |
24
+ | ascii | string, boolean, long, double, timestamp, json | use `toString` or `toJson` |
25
+ | bigint | string, boolean(as 0 or 1), long, double | |
26
+ | blob | unsupported | |
27
+ | boolean | boolean, long, double | 0 == false, 1 == true |
28
+ | counter | unsupported | |
29
+ | date | string, timestamp | timestamp use `toEpochMilli` |
30
+ | decimal | string, boolean(as 0 or 1), long, double | |
31
+ | double | string, boolean(as 0 or 1), long, double | |
32
+ | float | string, boolean(as 0 or 1), long, double | |
33
+ | inet | string | |
34
+ | int | string, boolean(as 0 or 1), long, double | overflowed value is reset to 0 |
35
+ | list | json | |
36
+ | map (support only text key) | json | |
37
+ | set | json | |
38
+ | smallint | string, boolean(as 0 or 1), long, double | overflowed value is reset to 0 |
39
+ | text | string, boolean, long, double, timestamp, json | use `toString` or `toJson` |
40
+ | time | string, long, double, timestamp | long and double as nano seconds of day,<br>timestamp use `toEpochMilli` |
41
+ | timestamp | long, double, timestamp | long and double as epoch second |
42
+ | timeuuid | null |
43
+ | uuid | null |
44
+ | varchar | string, boolean, long, double, timestamp, json | use `toString` or `toJson` |
45
+ | varint | string, boolean(as 0 or 1), long, double | |
46
+ | UDT | unsupported | |
47
+
48
+ ## Insert Behavior
49
+ If embulk record does not have a column, it is treated as `unset`.
50
+ If same key record already exists, the column is not touched.
51
+
52
+ ### Counter table
53
+ This plugin supports counter table.
54
+
55
+ But counter table supports only increment/decrement update.
56
+
57
+ Because of it, This plugin uses input value as increment value;
58
+
59
+ For example, If input data = {id: 1, count: 5}, Executed Statement is `UPDATE tablename SET count = count + 5 WHERE id = 1`
60
+
18
61
  ## Configuration
19
62
 
20
63
  - **hosts**: list of seed hosts (list<string>, required)
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.1"
16
+ version = "0.2.0"
17
17
 
18
18
  sourceCompatibility = 1.8
19
19
  targetCompatibility = 1.8
@@ -0,0 +1,6 @@
1
+ version: "3"
2
+ services:
3
+ cassandra:
4
+ image: cassandra:latest
5
+ ports:
6
+ - '9042:9042'
@@ -1,5 +1,5 @@
1
1
  <?xml version="1.0" encoding="UTF-8"?>
2
- <module external.linked.project.id="embulk-output-cassandra" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="" external.system.module.version="0.1.0" type="JAVA_MODULE" version="4">
2
+ <module external.linked.project.id="embulk-output-cassandra" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="" external.system.module.version="0.1.1" type="JAVA_MODULE" version="4">
3
3
  <component name="NewModuleRootManager" inherit-compiler-output="true">
4
4
  <exclude-output />
5
5
  <content url="file://$MODULE_DIR$">
data/example.yml ADDED
@@ -0,0 +1,23 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./src/test/resources/org/embulk/output/cassandra/test1.csv
4
+ parser:
5
+ type: csv
6
+ skip_header_lines: 1
7
+ columns:
8
+ - {name: id, type: string}
9
+ - {name: int_item, type: long}
10
+ - {name: int32_item, type: long}
11
+ - {name: smallint_item, type: long}
12
+ - {name: tinyint_item, type: long}
13
+ - {name: boolean_item, type: boolean}
14
+ - {name: varchar_item, type: string}
15
+ - {name: timestamp_item, type: timestamp}
16
+
17
+ out:
18
+ type: cassandra
19
+ hosts:
20
+ - 127.0.0.1
21
+ keyspace: embulk_test
22
+ table: test_basic
23
+ idempotent: true
@@ -3,13 +3,16 @@ package org.embulk.output.cassandra;
3
3
  import com.datastax.driver.core.BoundStatement;
4
4
  import com.datastax.driver.core.Cluster;
5
5
  import com.datastax.driver.core.ColumnMetadata;
6
+ import com.datastax.driver.core.DataType;
6
7
  import com.datastax.driver.core.KeyspaceMetadata;
7
8
  import com.datastax.driver.core.PreparedStatement;
8
9
  import com.datastax.driver.core.Session;
9
10
  import com.datastax.driver.core.SocketOptions;
10
11
  import com.datastax.driver.core.TableMetadata;
12
+ import com.datastax.driver.core.querybuilder.BuiltStatement;
11
13
  import com.datastax.driver.core.querybuilder.Insert;
12
14
  import com.datastax.driver.core.querybuilder.QueryBuilder;
15
+ import com.datastax.driver.core.querybuilder.Update;
13
16
  import com.google.common.base.Optional;
14
17
  import com.google.common.collect.ImmutableList;
15
18
  import com.google.common.collect.ImmutableMap;
@@ -86,6 +89,10 @@ public class CassandraOutputPlugin
86
89
  @Config("request_timeout")
87
90
  @ConfigDefault("12000")
88
91
  public int getRequestTimeout();
92
+
93
+ @Config("counter_columnName")
94
+ @ConfigDefault("null")
95
+ public Optional<String> getCounterColumnName();
89
96
  }
90
97
 
91
98
  private final Logger logger = Exec.getLogger(CassandraOutputPlugin.class);
@@ -135,18 +142,42 @@ public class CassandraOutputPlugin
135
142
  throw new RuntimeException("table `" + task.getTable() + "` is not found");
136
143
  }
137
144
 
138
- Insert insert = QueryBuilder.insertInto(task.getKeyspace(), task.getTable());
139
- if (task.getIfNotExists()) {
140
- insert.ifNotExists();
145
+ List<ColumnMetadata> columns = tableMetadata.getColumns();
146
+ boolean isCounterTable = columns.stream().anyMatch(col -> col.getType().getName() == DataType.Name.COUNTER);
147
+
148
+ BuiltStatement query;
149
+ if (isCounterTable) {
150
+ Update update = QueryBuilder.update(task.getKeyspace(), task.getTable());
151
+ if (task.getTtl().isPresent()) {
152
+ update.using(QueryBuilder.ttl(task.getTtl().get()));
153
+ }
154
+ for (ColumnMetadata column : tableMetadata.getColumns()) {
155
+ if (column.getType().getName() == DataType.Name.COUNTER) {
156
+ update.with(QueryBuilder.incr(column.getName(), QueryBuilder.bindMarker(column.getName())));
157
+ }
158
+ else{
159
+ update.where(QueryBuilder.eq(column.getName(), QueryBuilder.bindMarker(column.getName())));
160
+ }
161
+ }
162
+ query = update;
141
163
  }
142
- if (task.getTtl().isPresent()) {
143
- insert.using(QueryBuilder.ttl(task.getTtl().get()));
164
+ else {
165
+ Insert insert = QueryBuilder.insertInto(task.getKeyspace(), task.getTable());
166
+ if (task.getIfNotExists()) {
167
+ insert.ifNotExists();
168
+ }
169
+ if (task.getTtl().isPresent()) {
170
+ insert.using(QueryBuilder.ttl(task.getTtl().get()));
171
+ }
172
+ for (ColumnMetadata column : tableMetadata.getColumns()) {
173
+ insert.value(column.getName(), QueryBuilder.bindMarker(column.getName()));
174
+ }
175
+ query = insert;
144
176
  }
145
177
 
146
178
  ImmutableMap.Builder<String, CassandraColumnSetter> columnSettersBuilder = ImmutableMap.builder();
147
179
  ImmutableList.Builder<String> uuidColumnsBuilder = ImmutableList.builder();
148
180
  for (ColumnMetadata column : tableMetadata.getColumns()) {
149
- insert.value(column.getName(), QueryBuilder.bindMarker(column.getName()));
150
181
  columnSettersBuilder.put(column.getName(), CassandraColumnSetterFactory.createColumnSetter(column, cluster));
151
182
  switch (column.getType().getName()) {
152
183
  case UUID:
@@ -159,9 +190,9 @@ public class CassandraOutputPlugin
159
190
  List<ColumnSetterVisitor> columnVisitors = Lists.transform(schema.getColumns(), (column) ->
160
191
  new ColumnSetterVisitor(pageReader, columnSetters.get(column.getName())));
161
192
 
162
- logger.info("Insert Query: {}", insert.getQueryString());
193
+ logger.info("Insert Query: {}", query.getQueryString());
163
194
 
164
- PreparedStatement prepared = session.prepare(insert);
195
+ PreparedStatement prepared = session.prepare(query);
165
196
  if (task.getIdempotent()) {
166
197
  prepared.setIdempotent(task.getIdempotent());
167
198
  }
@@ -19,6 +19,7 @@ public class CassandraColumnSetterFactory
19
19
  case BOOLEAN:
20
20
  return new BooleanColumnSetter(cassandraColumn);
21
21
  case BIGINT:
22
+ case COUNTER:
22
23
  return new BigintColumnSetter(cassandraColumn);
23
24
  case INT:
24
25
  return new IntColumnSetter(cassandraColumn);
@@ -3,6 +3,7 @@ package org.embulk.output.cassandra;
3
3
  import com.datastax.driver.core.Cluster;
4
4
  import com.datastax.driver.core.DataType;
5
5
  import com.datastax.driver.core.LocalDate;
6
+ import com.datastax.driver.core.ResultSet;
6
7
  import com.datastax.driver.core.Row;
7
8
  import com.datastax.driver.core.Session;
8
9
  import com.datastax.driver.core.TupleType;
@@ -26,6 +27,7 @@ import java.nio.file.Path;
26
27
  import java.time.ZoneId;
27
28
  import java.time.ZonedDateTime;
28
29
  import java.util.Arrays;
30
+ import java.util.Comparator;
29
31
  import java.util.Date;
30
32
  import java.util.List;
31
33
  import java.util.Map;
@@ -83,13 +85,16 @@ public class TestCassandraOutputPlugin
83
85
  String createTableBasic = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_basic.cql");
84
86
  String createTableUuid = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_uuid.cql");
85
87
  String createTableComplex = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_complex.cql");
88
+ String createTableCounter = EmbulkTests.readResource(RESOURCE_PATH + "create_table_test_counter.cql");
86
89
  session.execute(createKeyspace);
87
90
  session.execute(createTableBasic);
88
91
  session.execute(createTableUuid);
89
92
  session.execute(createTableComplex);
93
+ session.execute(createTableCounter);
90
94
  session.execute("TRUNCATE embulk_test.test_basic");
91
95
  session.execute("TRUNCATE embulk_test.test_uuid");
92
96
  session.execute("TRUNCATE embulk_test.test_complex");
97
+ session.execute("TRUNCATE embulk_test.test_counter");
93
98
  }
94
99
 
95
100
  @After
@@ -139,6 +144,43 @@ public class TestCassandraOutputPlugin
139
144
  assertEquals(createDate(2018, 7, 1, 10, 0, 2, 0), row3.getTimestamp("timestamp_item"));
140
145
  }
141
146
 
147
+ @Test
148
+ public void testCounter() throws IOException
149
+ {
150
+ Path input = getInputPath("test1.csv");
151
+ ConfigSource config = loadYamlResource("test_counter.yaml");
152
+ config.set("hosts", getCassandraHostAsList());
153
+
154
+ assertEquals(0, session.execute("SELECT * FROM embulk_test.test_counter").all().size());
155
+
156
+ TestingEmbulk.RunResult result = embulk.runOutput(config, input);
157
+ assertEquals(3, result.getOutputTaskReports().get(0).get(Long.class, "inserted_record_count").longValue());
158
+
159
+ List<Row> rows = session.execute("SELECT * FROM embulk_test.test_counter").all();
160
+ rows.sort(Comparator.comparing(row -> row.getString("id")));
161
+ Row row1 = rows.get(0);
162
+ Row row2 = rows.get(1);
163
+ Row row3 = rows.get(2);
164
+ assertEquals("A001", row1.getString("id"));
165
+ assertEquals(9, row1.getLong("int_item"));
166
+ assertEquals(1, row1.getInt("int32_item"));
167
+ assertEquals(2, row1.getShort("smallint_item"));
168
+ assertTrue(row1.getBool("boolean_item"));
169
+ assertEquals(createDate(2018, 7, 1, 10, 0, 0, 0), row1.getTimestamp("timestamp_item"));
170
+ assertEquals("A002", row2.getString("id"));
171
+ assertEquals(0, row2.getLong("int_item"));
172
+ assertEquals(0, row2.getInt("int32_item"));
173
+ assertEquals(4, row2.getShort("smallint_item"));
174
+ assertTrue(row2.getBool("boolean_item"));
175
+ assertEquals(createDate(2018, 7, 1, 10, 0, 1, 0), row2.getTimestamp("timestamp_item"));
176
+ assertEquals("A003", row3.getString("id"));
177
+ assertEquals(9, row3.getLong("int_item"));
178
+ assertEquals(0, row3.getInt("int32_item"));
179
+ assertEquals(8, row3.getShort("smallint_item"));
180
+ assertFalse(row3.getBool("boolean_item"));
181
+ assertEquals(createDate(2018, 7, 1, 10, 0, 2, 0), row3.getTimestamp("timestamp_item"));
182
+ }
183
+
142
184
  @Test
143
185
  public void testBasicWithTtl() throws IOException
144
186
  {
@@ -0,0 +1,11 @@
1
+ CREATE TABLE IF NOT EXISTS embulk_test.test_counter (
2
+ id text,
3
+ int_item counter,
4
+ int32_item int,
5
+ smallint_item smallint,
6
+ tinyint_item tinyint,
7
+ boolean_item boolean,
8
+ double_item double,
9
+ timestamp_item timestamp,
10
+ primary key (id, int32_item, smallint_item, tinyint_item, boolean_item, double_item, timestamp_item)
11
+ ) WITH compaction = { 'class' : 'SizeTieredCompactionStrategy' };
@@ -1,4 +1,4 @@
1
- id:string,int_item:long,int32_item:long,smallint_item:long,tinyint_item:long,boolean_item:boolean,varchar_item:string,timestamp_item:timestamp
2
- A001,9,4294967297,2,0,true,a,2018-07-01 10:00:00.000 UTC
3
- A002,0,0,4,0,true,b,2018-07-01 10:00:01.000 UTC
4
- A003,9,0,8,1,false,c,2018-07-01 10:00:02.000 UTC
1
+ id:string,int_item:long,int32_item:long,smallint_item:long,tinyint_item:long,boolean_item:boolean,varchar_item:string,timestamp_item:timestamp,double_item:double
2
+ A001,9,4294967297,2,0,true,a,2018-07-01 10:00:00.000 UTC,1.1
3
+ A002,0,0,4,0,true,b,2018-07-01 10:00:01.000 UTC,1.2
4
+ A003,9,0,8,1,false,c,2018-07-01 10:00:02.000 UTC,1.3
@@ -0,0 +1,3 @@
1
+ type: cassandra
2
+ keyspace: embulk_test
3
+ table: "test_counter"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-cassandra
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-02 00:00:00.000000000 Z
11
+ date: 2018-08-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -55,7 +55,7 @@ files:
55
55
  - classpath/asm-tree-5.0.3.jar
56
56
  - classpath/asm-util-5.0.3.jar
57
57
  - classpath/cassandra-driver-core-3.5.0.jar
58
- - classpath/embulk-output-cassandra-0.1.1.jar
58
+ - classpath/embulk-output-cassandra-0.2.0.jar
59
59
  - classpath/guava-19.0.jar
60
60
  - classpath/jffi-1.2.16-native.jar
61
61
  - classpath/jffi-1.2.16.jar
@@ -70,7 +70,9 @@ files:
70
70
  - classpath/slf4j-api-1.7.25.jar
71
71
  - config/checkstyle/checkstyle.xml
72
72
  - config/checkstyle/default.xml
73
+ - docker-compose.yml
73
74
  - embulk-output-cassandra.iml
75
+ - example.yml
74
76
  - gradle/wrapper/gradle-wrapper.jar
75
77
  - gradle/wrapper/gradle-wrapper.properties
76
78
  - gradlew
@@ -105,12 +107,14 @@ files:
105
107
  - src/test/resources/org/embulk/output/cassandra/create_keyspace.cql
106
108
  - src/test/resources/org/embulk/output/cassandra/create_table_test_basic.cql
107
109
  - src/test/resources/org/embulk/output/cassandra/create_table_test_complex.cql
110
+ - src/test/resources/org/embulk/output/cassandra/create_table_test_counter.cql
108
111
  - src/test/resources/org/embulk/output/cassandra/create_table_test_uuid.cql
109
112
  - src/test/resources/org/embulk/output/cassandra/test1.csv
110
113
  - src/test/resources/org/embulk/output/cassandra/test2.csv
111
114
  - src/test/resources/org/embulk/output/cassandra/test3.csv
112
115
  - src/test/resources/org/embulk/output/cassandra/test_basic.yaml
113
116
  - src/test/resources/org/embulk/output/cassandra/test_complex.yaml
117
+ - src/test/resources/org/embulk/output/cassandra/test_counter.yaml
114
118
  - src/test/resources/org/embulk/output/cassandra/test_uuid.yaml
115
119
  homepage: https://github.com/joker1007/embulk-output-cassandra
116
120
  licenses: