fluent-plugin-cassandra-driver 0.0.17 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 25ef90aa92ffd95954d6d08c03138a8f592754e0
4
- data.tar.gz: 2fb7c13faae6a94e60d9dc237af0100cd89ba692
3
+ metadata.gz: 59f881900aec4466eb669a9223ae1d59f2665a72
4
+ data.tar.gz: dc9e4d39436b007ea6dcbb988715f4579b1f94cc
5
5
  SHA512:
6
- metadata.gz: fbad4b4edb328675f0fcebcc6c2e56cb249fdffd3250b06ee48acfc6adbfc03976544538834caeb8a51d4e43acdb53faf0ef7255b6ed0defb2e5d88672f13fe0
7
- data.tar.gz: 8c059b565f715bd75179f899951a1cc36e245fe62a034327ca46a0097eec30be88337c778ae03c8a4eaf30cab7d55f3d630adc89095ef46417174683084c72b5
6
+ metadata.gz: 0d2aa721973077d178571756c4d3af3c26267b7876001d06f204284bc594da0f3fa7841f030a0b5268206453be1273a5b77190bd115f624fdff559737dac6cd9
7
+ data.tar.gz: e569006e18921befbf58713238f5fdb3f04d5ca065083edf4e0e134191e1b4af263a01b2caf0a69a9b69b1831f3bf74db645a78a6e49edbd288184549afbd7be
data/README.md CHANGED
@@ -10,17 +10,16 @@ and Cassandra 1.2 - 3.x
10
10
  via RubyGems
11
11
 
12
12
  fluent-gem install fluent-plugin-cassandra-driver
13
+ td-agent-gem install fluent-plugin-cassandra-driver
13
14
 
14
15
  # Quick Start
15
16
 
16
17
  ## Cassandra Configuration
17
- # create keyspace (via CQL)
18
- CREATE KEYSPACE \"metrics\" WITH strategy_class='org.apache.cassandra.locator.SimpleStrategy' AND strategy_options:replication_factor=1;
18
+ # Create keyspace (via CQL)
19
+ CREATE KEYSPACE metrics WITH strategy_class='org.apache.cassandra.locator.SimpleStrategy' AND strategy_options:replication_factor=1;
19
20
 
20
- # create table (column family)
21
- CREATE TABLE logs (id varchar, ts bigint, payload text, PRIMARY KEY (id, ts)) WITH CLUSTERING ORDER BY (ts DESC);
22
-
23
- # NOTE: schema definition should match that specified in the Fluentd.conf configuration file (see below)
21
+ # Create table (column family)
22
+ CREATE TABLE logs (id varchar, timestamp timestamp, json text, PRIMARY KEY (id, timestamp)) WITH CLUSTERING ORDER BY (timestamp DESC);
24
23
 
25
24
  ## Fluentd.conf Configuration
26
25
  <match cassandra.**>
@@ -28,12 +27,24 @@ via RubyGems
28
27
  hosts 127.0.0.1 # comma delimited string of hosts
29
28
  keyspace metrics # cassandra keyspace
30
29
  columnfamily logs # cassandra column family
31
- ttl 60 # cassandra ttl *optional => default is 0*
32
- schema # cassandra column family schema *hash where keys => column names and values => data types* for example: {:id => :string}
33
- data_keys # comma delimited string of the fluentd hash's keys
34
- pop_data_keys # keep or pop key/values from the fluentd hash when storing it as json
30
+ ttl 60 # cassandra ttl (optional, default is 0)
31
+ schema # cassandra column family schema (see example below)
32
+ pop_data_keys # keep or pop key/values from the fluentd hash when storing it as json (optional, default is false)
33
+ json_column json # column where store all remaining data from fluentd (optional)
35
34
  </match>
36
35
 
36
+ ### Schema example
37
+ # hash of hashes :column_damily_key => {:fluentd_record_key => :type_from_list}
38
+ '{:id => {:id => nil}, :timestamp => {:timestamp => :time}}'
39
+
40
+ Available mappings:
41
+ * :integer
42
+ * :string
43
+ * :timeuuid
44
+ * :time
45
+
46
+ All nil types will be recognized as string.
47
+
37
48
  # Tests
38
49
 
39
50
  TODO
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.17
1
+ 0.0.18
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: fluent-plugin-cassandra-driver 0.0.17 ruby lib
5
+ # stub: fluent-plugin-cassandra-driver 0.0.18 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "fluent-plugin-cassandra-driver".freeze
9
- s.version = "0.0.17"
9
+ s.version = "0.0.18"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib".freeze]
13
13
  s.authors = ["Yaroslav Lukyanov".freeze]
14
- s.date = "2016-11-10"
14
+ s.date = "2016-11-12"
15
15
  s.description = "Fluent output plugin for Cassandra via Datastax Ruby Driver for Apache Cassandra".freeze
16
16
  s.email = "c_sharp@mail.ru".freeze
17
17
  s.extra_rdoc_files = [
@@ -8,15 +8,17 @@ module Fluent
8
8
 
9
9
  config_param :hosts, :string
10
10
  config_param :keyspace, :string
11
- config_param :columnfamily, :string
11
+ config_param :column_family, :string
12
12
  config_param :ttl, :integer, :default => 0
13
13
  config_param :schema, :string
14
- config_param :data_keys, :string
15
14
 
16
15
  # remove keys from the fluentd json event as they're processed
17
16
  # for individual columns?
18
17
  config_param :pop_data_keys, :bool, :default => true
19
18
 
19
+ # column to store all data keys as json
20
+ config_param :json_column, :string
21
+
20
22
  def session
21
23
  @session ||= get_session(self.hosts, self.keyspace)
22
24
  end
@@ -27,16 +29,16 @@ module Fluent
27
29
  # perform validations
28
30
  raise ConfigError, "'Hosts' is required by Cassandra output (ex: localhost, 127.0.0.1, ec2-54-242-141-252.compute-1.amazonaws.com" if self.hosts.nil?
29
31
  raise ConfigError, "'Keyspace' is required by Cassandra output (ex: FluentdLoggers)" if self.keyspace.nil?
30
- raise ConfigError, "'ColumnFamily' is required by Cassandra output (ex: events)" if self.columnfamily.nil?
31
- raise ConfigError, "'Schema' is required by Cassandra output (ex: id,ts,payload)" if self.schema.nil?
32
- raise ConfigError, "'Schema' must contain at least two column names (ex: id,ts,payload)" if self.schema.split(',').count < 2
33
- raise ConfigError, "'DataKeys' is required by Cassandra output (ex: tag,created_at,data)" if self.data_keys.nil?
32
+ raise ConfigError, "'ColumnFamily' is required by Cassandra output (ex: events)" if self.column_family.nil?
33
+ raise ConfigError, "'Schema' is required by Cassandra output" if self.schema.nil?
34
34
 
35
35
  # convert schema from string to hash
36
36
  # NOTE: ok to use eval b/c this isn't this isn't a user
37
37
  # supplied string
38
38
  self.schema = eval(self.schema)
39
39
 
40
+ raise ConfigError, "'Schema' must contain at least one column" if self.schema.keys.length < 1
41
+
40
42
  # convert data keys from string to array
41
43
  self.data_keys = self.data_keys.split(',')
42
44
 
@@ -62,12 +64,12 @@ module Fluent
62
64
  chunk.msgpack_each { |record|
63
65
  $log.debug "Sending a new record to Cassandra: #{record.to_json}"
64
66
 
65
- values = build_insert_values(self.schema.keys, self.data_keys, record, self.pop_data_keys)
67
+ values = build_insert_values(record)
66
68
 
67
- cql = "INSERT INTO #{self.columnfamily} (#{self.schema.keys.join(',')}) VALUES (#{values.length.times.map { '?' }.join(',')}) USING TTL #{self.ttl}"
69
+ cql = "INSERT INTO #{self.column_family} (#{values.keys.join(',')}) VALUES (#{values.keys.map { |key| ":#{key}" }.join(',')}) USING TTL #{self.ttl}"
68
70
 
69
71
  $log.debug "CQL query: #{cql}"
70
- $log.debug "Running with values: #{values.to_json}"
72
+ $log.debug "Running with arguments: #{values.to_json}"
71
73
 
72
74
  begin
73
75
  @session.execute(cql, arguments: values)
@@ -87,44 +89,31 @@ module Fluent
87
89
  cluster.connect(keyspace)
88
90
  end
89
91
 
90
- def build_insert_values(schema_keys, data_keys, record, pop_data_keys)
91
- values = data_keys.map.with_index do |key, index|
92
- value = record[key]
92
+ def build_insert_values(record)
93
+ values = self.schema.map.with_index { |column_family_key, mapping|
94
+ record_key, type = mapping.first
95
+ value = record[record_key]
93
96
 
94
- case self.schema[schema_keys[index]]
95
- when :string
96
- value = value.to_s
97
+ case type
97
98
  when :integer
98
99
  value = value.to_i
99
100
  when :timeuuid
100
101
  value = ::Cassandra::Uuid::Generator.new.at(Time.parse(value))
101
102
  when :time
102
103
  value = Time.parse(value)
104
+ when :string
103
105
  else
106
+ value = value.to_s
104
107
  end
105
108
 
106
- value
107
- end
108
-
109
- data_keys.each { |key| record.delete(key) } if pop_data_keys
110
-
111
- # if we have one more schema key than data keys,
112
- # we can then infer that we should store the event
113
- # as a string representation of the corresponding
114
- # json object in the last schema column
115
- if schema_keys.count == data_keys.count + 1
116
- values << if record.count > 0
117
- "'#{record.to_json}'"
118
- else
119
- # by this point, the extra schema column has been
120
- # added to insert cql statement, so we must put
121
- # something in it
122
- # TODO: detect this scenario earlier and don't
123
- # specify the column name/value at all
124
- # when constructing the cql stmt
125
- "''"
126
- end
127
- end
109
+ [column_family_key, value]
110
+ }.to_h
111
+
112
+ self.schema.each { |mapping| record.delete(mapping.first.first) } if self.pop_data_keys
113
+
114
+ # if we have one more data in record and json column
115
+ # then store all remaining data in that column
116
+ values[self.json_column] = record.to_json if self.json_column and record.length > 0
128
117
 
129
118
  values
130
119
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-cassandra-driver
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.17
4
+ version: 0.0.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yaroslav Lukyanov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-10 00:00:00.000000000 Z
11
+ date: 2016-11-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd