fluent-plugin-cassandra-driver 0.0.17 → 0.0.18

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 25ef90aa92ffd95954d6d08c03138a8f592754e0
4
- data.tar.gz: 2fb7c13faae6a94e60d9dc237af0100cd89ba692
3
+ metadata.gz: 59f881900aec4466eb669a9223ae1d59f2665a72
4
+ data.tar.gz: dc9e4d39436b007ea6dcbb988715f4579b1f94cc
5
5
  SHA512:
6
- metadata.gz: fbad4b4edb328675f0fcebcc6c2e56cb249fdffd3250b06ee48acfc6adbfc03976544538834caeb8a51d4e43acdb53faf0ef7255b6ed0defb2e5d88672f13fe0
7
- data.tar.gz: 8c059b565f715bd75179f899951a1cc36e245fe62a034327ca46a0097eec30be88337c778ae03c8a4eaf30cab7d55f3d630adc89095ef46417174683084c72b5
6
+ metadata.gz: 0d2aa721973077d178571756c4d3af3c26267b7876001d06f204284bc594da0f3fa7841f030a0b5268206453be1273a5b77190bd115f624fdff559737dac6cd9
7
+ data.tar.gz: e569006e18921befbf58713238f5fdb3f04d5ca065083edf4e0e134191e1b4af263a01b2caf0a69a9b69b1831f3bf74db645a78a6e49edbd288184549afbd7be
data/README.md CHANGED
@@ -10,17 +10,16 @@ and Cassandra 1.2 - 3.x
10
10
  via RubyGems
11
11
 
12
12
  fluent-gem install fluent-plugin-cassandra-driver
13
+ td-agent-gem install fluent-plugin-cassandra-driver
13
14
 
14
15
  # Quick Start
15
16
 
16
17
  ## Cassandra Configuration
17
- # create keyspace (via CQL)
18
- CREATE KEYSPACE \"metrics\" WITH strategy_class='org.apache.cassandra.locator.SimpleStrategy' AND strategy_options:replication_factor=1;
18
+ # Create keyspace (via CQL)
19
+ CREATE KEYSPACE metrics WITH strategy_class='org.apache.cassandra.locator.SimpleStrategy' AND strategy_options:replication_factor=1;
19
20
 
20
- # create table (column family)
21
- CREATE TABLE logs (id varchar, ts bigint, payload text, PRIMARY KEY (id, ts)) WITH CLUSTERING ORDER BY (ts DESC);
22
-
23
- # NOTE: schema definition should match that specified in the Fluentd.conf configuration file (see below)
21
+ # Create table (column family)
22
+ CREATE TABLE logs (id varchar, timestamp timestamp, json text, PRIMARY KEY (id, timestamp)) WITH CLUSTERING ORDER BY (timestamp DESC);
24
23
 
25
24
  ## Fluentd.conf Configuration
26
25
  <match cassandra.**>
@@ -28,12 +27,24 @@ via RubyGems
28
27
  hosts 127.0.0.1 # comma delimited string of hosts
29
28
  keyspace metrics # cassandra keyspace
30
29
  columnfamily logs # cassandra column family
31
- ttl 60 # cassandra ttl *optional => default is 0*
32
- schema # cassandra column family schema *hash where keys => column names and values => data types* for example: {:id => :string}
33
- data_keys # comma delimited string of the fluentd hash's keys
34
- pop_data_keys # keep or pop key/values from the fluentd hash when storing it as json
30
+ ttl 60 # cassandra ttl (optional, default is 0)
31
+ schema # cassandra column family schema (see example below)
32
+ pop_data_keys # keep or pop key/values from the fluentd hash when storing it as json (optional, default is false)
33
+ json_column json # column where store all remaining data from fluentd (optional)
35
34
  </match>
36
35
 
36
+ ### Schema example
37
+ # hash of hashes :column_damily_key => {:fluentd_record_key => :type_from_list}
38
+ '{:id => {:id => nil}, :timestamp => {:timestamp => :time}}'
39
+
40
+ Available mappings:
41
+ * :integer
42
+ * :string
43
+ * :timeuuid
44
+ * :time
45
+
46
+ All nil types will be recognized as string.
47
+
37
48
  # Tests
38
49
 
39
50
  TODO
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.17
1
+ 0.0.18
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: fluent-plugin-cassandra-driver 0.0.17 ruby lib
5
+ # stub: fluent-plugin-cassandra-driver 0.0.18 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "fluent-plugin-cassandra-driver".freeze
9
- s.version = "0.0.17"
9
+ s.version = "0.0.18"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib".freeze]
13
13
  s.authors = ["Yaroslav Lukyanov".freeze]
14
- s.date = "2016-11-10"
14
+ s.date = "2016-11-12"
15
15
  s.description = "Fluent output plugin for Cassandra via Datastax Ruby Driver for Apache Cassandra".freeze
16
16
  s.email = "c_sharp@mail.ru".freeze
17
17
  s.extra_rdoc_files = [
@@ -8,15 +8,17 @@ module Fluent
8
8
 
9
9
  config_param :hosts, :string
10
10
  config_param :keyspace, :string
11
- config_param :columnfamily, :string
11
+ config_param :column_family, :string
12
12
  config_param :ttl, :integer, :default => 0
13
13
  config_param :schema, :string
14
- config_param :data_keys, :string
15
14
 
16
15
  # remove keys from the fluentd json event as they're processed
17
16
  # for individual columns?
18
17
  config_param :pop_data_keys, :bool, :default => true
19
18
 
19
+ # column to store all data keys as json
20
+ config_param :json_column, :string
21
+
20
22
  def session
21
23
  @session ||= get_session(self.hosts, self.keyspace)
22
24
  end
@@ -27,16 +29,16 @@ module Fluent
27
29
  # perform validations
28
30
  raise ConfigError, "'Hosts' is required by Cassandra output (ex: localhost, 127.0.0.1, ec2-54-242-141-252.compute-1.amazonaws.com" if self.hosts.nil?
29
31
  raise ConfigError, "'Keyspace' is required by Cassandra output (ex: FluentdLoggers)" if self.keyspace.nil?
30
- raise ConfigError, "'ColumnFamily' is required by Cassandra output (ex: events)" if self.columnfamily.nil?
31
- raise ConfigError, "'Schema' is required by Cassandra output (ex: id,ts,payload)" if self.schema.nil?
32
- raise ConfigError, "'Schema' must contain at least two column names (ex: id,ts,payload)" if self.schema.split(',').count < 2
33
- raise ConfigError, "'DataKeys' is required by Cassandra output (ex: tag,created_at,data)" if self.data_keys.nil?
32
+ raise ConfigError, "'ColumnFamily' is required by Cassandra output (ex: events)" if self.column_family.nil?
33
+ raise ConfigError, "'Schema' is required by Cassandra output" if self.schema.nil?
34
34
 
35
35
  # convert schema from string to hash
36
36
  # NOTE: ok to use eval b/c this isn't this isn't a user
37
37
  # supplied string
38
38
  self.schema = eval(self.schema)
39
39
 
40
+ raise ConfigError, "'Schema' must contain at least one column" if self.schema.keys.length < 1
41
+
40
42
  # convert data keys from string to array
41
43
  self.data_keys = self.data_keys.split(',')
42
44
 
@@ -62,12 +64,12 @@ module Fluent
62
64
  chunk.msgpack_each { |record|
63
65
  $log.debug "Sending a new record to Cassandra: #{record.to_json}"
64
66
 
65
- values = build_insert_values(self.schema.keys, self.data_keys, record, self.pop_data_keys)
67
+ values = build_insert_values(record)
66
68
 
67
- cql = "INSERT INTO #{self.columnfamily} (#{self.schema.keys.join(',')}) VALUES (#{values.length.times.map { '?' }.join(',')}) USING TTL #{self.ttl}"
69
+ cql = "INSERT INTO #{self.column_family} (#{values.keys.join(',')}) VALUES (#{values.keys.map { |key| ":#{key}" }.join(',')}) USING TTL #{self.ttl}"
68
70
 
69
71
  $log.debug "CQL query: #{cql}"
70
- $log.debug "Running with values: #{values.to_json}"
72
+ $log.debug "Running with arguments: #{values.to_json}"
71
73
 
72
74
  begin
73
75
  @session.execute(cql, arguments: values)
@@ -87,44 +89,31 @@ module Fluent
87
89
  cluster.connect(keyspace)
88
90
  end
89
91
 
90
- def build_insert_values(schema_keys, data_keys, record, pop_data_keys)
91
- values = data_keys.map.with_index do |key, index|
92
- value = record[key]
92
+ def build_insert_values(record)
93
+ values = self.schema.map.with_index { |column_family_key, mapping|
94
+ record_key, type = mapping.first
95
+ value = record[record_key]
93
96
 
94
- case self.schema[schema_keys[index]]
95
- when :string
96
- value = value.to_s
97
+ case type
97
98
  when :integer
98
99
  value = value.to_i
99
100
  when :timeuuid
100
101
  value = ::Cassandra::Uuid::Generator.new.at(Time.parse(value))
101
102
  when :time
102
103
  value = Time.parse(value)
104
+ when :string
103
105
  else
106
+ value = value.to_s
104
107
  end
105
108
 
106
- value
107
- end
108
-
109
- data_keys.each { |key| record.delete(key) } if pop_data_keys
110
-
111
- # if we have one more schema key than data keys,
112
- # we can then infer that we should store the event
113
- # as a string representation of the corresponding
114
- # json object in the last schema column
115
- if schema_keys.count == data_keys.count + 1
116
- values << if record.count > 0
117
- "'#{record.to_json}'"
118
- else
119
- # by this point, the extra schema column has been
120
- # added to insert cql statement, so we must put
121
- # something in it
122
- # TODO: detect this scenario earlier and don't
123
- # specify the column name/value at all
124
- # when constructing the cql stmt
125
- "''"
126
- end
127
- end
109
+ [column_family_key, value]
110
+ }.to_h
111
+
112
+ self.schema.each { |mapping| record.delete(mapping.first.first) } if self.pop_data_keys
113
+
114
+ # if we have one more data in record and json column
115
+ # then store all remaining data in that column
116
+ values[self.json_column] = record.to_json if self.json_column and record.length > 0
128
117
 
129
118
  values
130
119
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-cassandra-driver
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.17
4
+ version: 0.0.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yaroslav Lukyanov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-10 00:00:00.000000000 Z
11
+ date: 2016-11-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd