fluent-plugin-output-solr 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ea700ae417e3291c67e294e9e64af1bd87315385
4
- data.tar.gz: 42a95923a0023f2be86a63e247513607bd4fa259
3
+ metadata.gz: b2190d11bd62fbf4efbd390ae997e2ee4d962051
4
+ data.tar.gz: fd86da124f5ed9b94fdaffdb0c55e076e888806d
5
5
  SHA512:
6
- metadata.gz: 148ec34675f9f1c1ba835848f88227786afba04f47ce709fee423c28f4dbed5a630f587cb2dd99c73f52e9496449bc72e2d4f2b7374270b697b3195c2a06f04b
7
- data.tar.gz: 3ceb8cdfdec6e4105f0783414efaa7d217309d8f1e816839ea115779b48c72e0d9874651a6dde31ab227ac9d7aa4305ce9a908e2329791b3ae15ef165bb0bb27
6
+ metadata.gz: cc5031b355be897054a0577a9f2d0623e13274002ee8534ed0d20de9c4f7c8f851d8cf52017ce3c3d4d2fcf8d5f113a5090cb70ded850fa71f839ca293913c3e
7
+ data.tar.gz: 21da9bb4a7a7954ec3363e7acae5a11fe6fbf5ae0789e1bf48207b66be698766c5a6189c86288020f55ff20ef26be04da94be3f8253c44935935b5c9576b1594
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Fluent::Plugin::OutSolr
2
2
 
3
- This is a [Fluentd](http://fluentd.org/) plugin for send data to [Apache Solr](http://lucene.apache.org/solr/).
3
+ This is a [Fluentd](http://fluentd.org/) output plugin for send data to [Apache Solr](http://lucene.apache.org/solr/). It support [SolrCloud](https://cwiki.apache.org/confluence/display/solr/SolrCloud) not only Standalone Solr.
4
4
 
5
5
  ## Installation
6
6
 
@@ -18,17 +18,83 @@ Or install it yourself as:
18
18
 
19
19
  $ gem install fluent-plugin-output-solr
20
20
 
21
- ## Examples
21
+ ## Config parameters
22
22
 
23
- ### Sent to standalone Solr
23
+ ### url
24
+
25
+ The Solr server url (for example http://localhost:8983/solr/collection1).
26
+
27
+ ```
28
+ url http://localhost:8983/solr/collection1
29
+ ```
30
+
31
+ ### zk_host
32
+
33
+ The ZooKeeper connection string that SolrCloud refers to (for example localhost:2181/solr).
34
+
35
+ ```
36
+ zk_host localhost:2181/solr
37
+ ```
38
+
39
+ ### collection
40
+
41
+ The SolrCloud collection name (default collection1).
42
+
43
+ ```
44
+ collection collection1
45
+ ```
46
+
47
+ ### defined_fields
48
+
49
+ The defined fields in the Solr schema.xml. If omitted, it will get fields via Solr Schema API.
50
+
51
+ ```
52
+ defined_fields ["id", "title"]
53
+ ```
54
+
55
+ ### ignore_undefined_fields
56
+
57
+ Ignore undefined fields in the Solr schema.xml.
58
+
59
+ ```
60
+ ignore_undefined_fields false
61
+ ```
62
+
63
+ ### unique_key_field
64
+
65
+ A field name of unique key in the Solr schema.xml. If omitted, it will get unique key via Solr Schema API.
66
+
67
+ ```
68
+ unique_key_field id
69
+ ```
70
+
71
+ ### timestamp_field
72
+
73
+ A field name of event timestamp in the Solr schema.xml (default event_timestamp).
74
+
75
+ ```
76
+ timestamp_field event_timestamp
77
+ ```
78
+
79
+ ### flush_size
80
+
81
+ A number of events to queue up before writing to Solr (default 100).
82
+
83
+ ```
84
+ flush_size 100
85
+ ```
86
+
87
+
88
+ ## Plugin setup examples
89
+
90
+ ### Sent to standalone Solr using data-driven schemaless mode.
24
91
  ```
25
92
  <match something.logs>
26
93
  @type solr
27
94
 
95
+ # The Solr server url (for example http://localhost:8983/solr/collection1).
28
96
  url http://localhost:8983/solr/collection1
29
97
 
30
- batch_size 100
31
-
32
98
  buffer_type memory
33
99
  buffer_queue_limit 64m
34
100
  buffer_chunk_limit 8m
@@ -36,15 +102,16 @@ Or install it yourself as:
36
102
  </match>
37
103
  ```
38
104
 
39
- ### Sent to SolrCloud
105
+ ### Sent to SolrCloud using data-driven schemaless mode.
40
106
  ```
41
107
  <match something.logs>
42
108
  @type solr
43
109
 
110
+ # The ZooKeeper connection string that SolrCloud refers to (for example localhost:2181/solr).
44
111
  zk_host localhost:2181/solr
45
- collection collection1
46
112
 
47
- batch_size 100
113
+ # The SolrCloud collection name (default collection1).
114
+ collection1 collection1
48
115
 
49
116
  buffer_type memory
50
117
  buffer_queue_limit 64m
@@ -53,6 +120,78 @@ Or install it yourself as:
53
120
  </match>
54
121
  ```
55
122
 
123
+ ## Solr setup examples
124
+
125
+ ### How to setup Standalone Solr using data-driven schemaless mode.
126
+
127
+ 1.Download and install Solr
128
+
129
+ ```sh
130
+ $ mkdir $HOME/solr
131
+ $ cd $HOME/solr
132
+ $ wget https://archive.apache.org/dist/lucene/solr/5.4.0/solr-5.4.0.tgz
133
+ $ tar zxvf solr-5.4.0.tgz
134
+ $ cd solr-5.4.0
135
+ ```
136
+
137
+ 2.Start standalone Solr
138
+
139
+ ```sh
140
+ $ ./bin/solr start -p 8983 -s server/solr
141
+ ```
142
+
143
+ 3.Create core
144
+
145
+ ```sh
146
+ $ ./bin/solr create -c collection1 -d server/solr/configsets/data_driven_schema_configs -n collection1_configs
147
+ ```
148
+
149
+ ### How to setup SolrCloud using data-driven schemaless mode (shards=1 and replicationfactor=2).
150
+
151
+ 1.Download and install ZooKeeper
152
+
153
+ ```sh
154
+ $ mkdir $HOME/zookeeper
155
+ $ cd $HOME/zookeeper
156
+ $ wget https://archive.apache.org/dist/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz
157
+ $ tar zxvf zookeeper-3.4.6.tar.gz
158
+ $ cd zookeeper-3.4.6
159
+ $ cp -p ./conf/zoo_sample.cfg ./conf/zoo.cfg
160
+ ```
161
+
162
+ 2.Start standalone ZooKeeper
163
+
164
+ ```sh
165
+ $ ./bin/zkServer.sh start
166
+ ```
167
+
168
+ 3.Download an install Solr
169
+
170
+ ```sh
171
+ $ mkdir $HOME/solr
172
+ $ cd $HOME/solr
173
+ $ wget https://archive.apache.org/dist/lucene/solr/5.4.0/solr-5.4.0.tgz
174
+ $ tar zxvf solr-5.4.0.tgz
175
+ $ cd solr-5.4.0
176
+ $ ./server/scripts/cloud-scripts/zkcli.sh -zkhost localhost:2181 -cmd clear /solr
177
+ $ ./server/scripts/cloud-scripts/zkcli.sh -zkhost localhost:2181 -cmd makepath /solr
178
+ $ cp -pr server/solr server/solr1
179
+ $ cp -pr server/solr server/solr2
180
+ ```
181
+
182
+ 4.Start SolrCloud
183
+
184
+ ```sh
185
+ $ ./bin/solr start -p 8983 -z localhost:2181/solr -s server/solr1
186
+ $ ./bin/solr start -p 8985 -z localhost:2181/solr -s server/solr2
187
+ ```
188
+
189
+ 5.Create collection
190
+
191
+ ```sh
192
+ $ ./bin/solr create -c collection1 -d server/solr1/configsets/data_driven_schema_configs -n collection1_configs -shards 1 -replicationFactor 2
193
+ ```
194
+
56
195
  ## Development
57
196
 
58
197
  After checking out the repo, run `bundle install` to install dependencies. Then, run `rake test` to run the tests.
@@ -4,12 +4,12 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "fluent-plugin-output-solr"
7
- spec.version = "0.1.1"
7
+ spec.version = "0.2.0"
8
8
  spec.authors = ["Minoru Osuka"]
9
9
  spec.email = ["minoru.osuka@gmail.com"]
10
10
 
11
- spec.summary = %q{Solr output plugin for Fluent event collector}
12
- spec.description = spec.summary
11
+ spec.summary = "Fluent output plugin for sending data to Apache Solr."
12
+ spec.description = "Fluent output plugin for sending data to Apache Solr. It support SolrCloud not only Standalone Solr."
13
13
  spec.homepage = "https://github.com/mosuka/fluent-plugin-output-solr"
14
14
 
15
15
  spec.license = "Apache-2.0"
@@ -7,19 +7,40 @@ module Fluent
7
7
  class SolrOutput < BufferedOutput
8
8
  Fluent::Plugin.register_output('solr', self)
9
9
 
10
+ DEFAULT_COLLECTION = 'collection1'
11
+ DEFAULT_IGNORE_UNDEFINED_FIELDS = false
12
+ DEFAULT_TIMESTAMP_FIELD = 'event_timestamp'
13
+ DEFAULT_FLUSH_SIZE = 100
14
+
15
+ MODE_STANDALONE = 'Standalone'
16
+ MODE_SOLRCLOUD = 'SolrCloud'
17
+
18
+ include Fluent::SetTagKeyMixin
19
+ config_set_default :include_tag_key, false
20
+
21
+ include Fluent::SetTimeKeyMixin
22
+ config_set_default :include_time_key, false
23
+
10
24
  config_param :url, :string, :default => nil,
11
- :desc => 'The Solr server url (for example http://localhost:8983/solr/collection1).'
25
+ :desc => 'The Solr server url (for example http://localhost:8983/solr/collection1).'
12
26
 
13
27
  config_param :zk_host, :string, :default => nil,
14
- :desc => 'The ZooKeeper connection string that SolrCloud refers to (for example localhost:2181/solr).'
15
- config_param :collection, :string, :default => 'collection1',
16
- :desc => 'The SolrCloud collection name.'
28
+ :desc => 'The ZooKeeper connection string that SolrCloud refers to (for example localhost:2181/solr).'
29
+ config_param :collection, :string, :default => DEFAULT_COLLECTION,
30
+ :desc => 'The SolrCloud collection name (default collection1).'
17
31
 
18
- config_param :batch_size, :integer, :default => 100,
19
- :desc => 'The batch size used in update.'
32
+ config_param :defined_fields, :array, :default => nil,
33
+ :desc => 'The defined fields in the Solr schema.xml. If omitted, it will get fields via Solr Schema API.'
34
+ config_param :ignore_undefined_fields, :bool, :default => DEFAULT_IGNORE_UNDEFINED_FIELDS,
35
+ :desc => 'Ignore undefined fields in the Solr schema.xml.'
20
36
 
21
- MODE_STANDALONE = 'Standalone'
22
- MODE_SOLRCLOUD = 'SolrCloud'
37
+ config_param :unique_key_field, :string, :default => nil,
38
+ :desc => 'A field name of unique key in the Solr schema.xml. If omitted, it will get unique key via Solr Schema API.'
39
+ config_param :timestamp_field, :string, :default => DEFAULT_TIMESTAMP_FIELD,
40
+ :desc => 'A field name of event timestamp in the Solr schema.xml (default event_timestamp).'
41
+
42
+ config_param :flush_size, :integer, :default => DEFAULT_FLUSH_SIZE,
43
+ :desc => 'A number of events to queue up before writing to Solr (default 100).'
23
44
 
24
45
  def initialize
25
46
  super
@@ -31,9 +52,15 @@ module Fluent
31
52
  @url = conf['url']
32
53
 
33
54
  @zk_host = conf['zk_host']
34
- @collection = conf['collection']
55
+ @collection = conf.has_key?('collection') ? conf['collection'] : DEFAULT_COLLECTION
56
+
57
+ @defined_fields = conf['defined_fields']
58
+ @ignore_undefined_field = conf.has_key?('ignore_undefined_field') ? conf['ignore_undefined_field'] : DEFAULT_IGNORE_UNDEFINED_FIELDS
35
59
 
36
- @batch_size = conf['batch_size'].to_i
60
+ @unique_key_field = conf['unique_key_field']
61
+ @timestamp_field = conf.has_key?('timestamp_field') ? conf['timestamp_field'] : DEFAULT_TIMESTAMP_FIELD
62
+
63
+ @flush_size = conf.has_key?('flush_size') ? conf['flush_size'].to_i : DEFAULT_FLUSH_SIZE
37
64
  end
38
65
 
39
66
  def start
@@ -73,19 +100,33 @@ module Fluent
73
100
  def write(chunk)
74
101
  documents = []
75
102
 
103
+ @fields = @defined_fields.nil? ? get_fields : @defined_fields
104
+ @unique_key = @unique_key_field.nil? ? get_unique_key : @unique_key_field
105
+
76
106
  chunk.msgpack_each do |tag, time, record|
77
- unless record.has_key?('id') then
78
- record.merge!({'id' => SecureRandom.uuid})
107
+
108
+ unless record.has_key?(@unique_key) then
109
+ record.merge!({@unique_key => SecureRandom.uuid})
110
+ end
111
+
112
+ record.merge!({@timestamp_field => Time.at(time).utc.strftime('%FT%TZ')})
113
+
114
+ if @ignore_undefined_fields then
115
+ record.each_key do |key|
116
+ unless @fields.include?(key) then
117
+ record.delete(key)
118
+ end
119
+ end
79
120
  end
80
121
 
81
122
  documents << record
82
-
83
- if documents.count >= @batch_size
123
+
124
+ if documents.count >= @flush_size
84
125
  update documents
85
126
  documents.clear
86
127
  end
87
128
  end
88
-
129
+
89
130
  update documents unless documents.empty?
90
131
  end
91
132
 
@@ -97,6 +138,47 @@ module Fluent
97
138
  @solr.add documents, collection: @collection, :params => {:commit => true}
98
139
  log.info "Added %d document(s) to Solr" % documents.count
99
140
  end
141
+ rescue Exception => e
142
+ log.warn("An error occurred while indexing: #{e.message}")
143
+ end
144
+
145
+ def get_unique_key
146
+ response = nil
147
+
148
+ if @mode == MODE_STANDALONE then
149
+ response = @solr.get 'schema/uniquekey'
150
+ elsif @mode == MODE_SOLRCLOUD then
151
+ response = @solr.get 'schema/uniquekey', collection: @collection
152
+ end
153
+
154
+ unique_key = response['uniqueKey']
155
+ log.info ("Unique key: #{unique_key}")
156
+
157
+ return unique_key
158
+
159
+ rescue Exception => e
160
+ log.warn("An error occurred while indexing: #{e.message}")
161
+ end
162
+
163
+ def get_fields
164
+ response = nil
165
+
166
+ if @mode == MODE_STANDALONE then
167
+ response = @solr.get 'schema/fields'
168
+ elsif @mode == MODE_SOLRCLOUD then
169
+ response = @solr.get 'schema/fields', collection: @collection
170
+ end
171
+
172
+ fields = []
173
+ response['fields'].each do |field|
174
+ fields.push(field['name'])
175
+ end
176
+ log.info ("Fields: #{fields}")
177
+
178
+ return fields
179
+
180
+ rescue Exception => e
181
+ log.warn("An error occurred while indexing: #{e.message}")
100
182
  end
101
183
  end
102
184
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-output-solr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minoru Osuka
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-01-07 00:00:00.000000000 Z
11
+ date: 2016-01-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -150,7 +150,8 @@ dependencies:
150
150
  - - ~>
151
151
  - !ruby/object:Gem::Version
152
152
  version: 1.1.8
153
- description: Solr output plugin for Fluent event collector
153
+ description: Fluent output plugin for sending data to Apache Solr. It support SolrCloud
154
+ not only Standalone Solr.
154
155
  email:
155
156
  - minoru.osuka@gmail.com
156
157
  executables: []
@@ -188,5 +189,5 @@ rubyforge_project:
188
189
  rubygems_version: 2.0.14
189
190
  signing_key:
190
191
  specification_version: 4
191
- summary: Solr output plugin for Fluent event collector
192
+ summary: Fluent output plugin for sending data to Apache Solr.
192
193
  test_files: []