fluent-plugin-output-solr 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +147 -8
- data/fluent-plugin-output-solr.gemspec +3 -3
- data/lib/fluent/plugin/out_solr.rb +97 -15
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b2190d11bd62fbf4efbd390ae997e2ee4d962051
|
4
|
+
data.tar.gz: fd86da124f5ed9b94fdaffdb0c55e076e888806d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc5031b355be897054a0577a9f2d0623e13274002ee8534ed0d20de9c4f7c8f851d8cf52017ce3c3d4d2fcf8d5f113a5090cb70ded850fa71f839ca293913c3e
|
7
|
+
data.tar.gz: 21da9bb4a7a7954ec3363e7acae5a11fe6fbf5ae0789e1bf48207b66be698766c5a6189c86288020f55ff20ef26be04da94be3f8253c44935935b5c9576b1594
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Fluent::Plugin::OutSolr
|
2
2
|
|
3
|
-
This is a [Fluentd](http://fluentd.org/) plugin for send data to [Apache Solr](http://lucene.apache.org/solr/).
|
3
|
+
This is a [Fluentd](http://fluentd.org/) output plugin for send data to [Apache Solr](http://lucene.apache.org/solr/). It support [SolrCloud](https://cwiki.apache.org/confluence/display/solr/SolrCloud) not only Standalone Solr.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -18,17 +18,83 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
$ gem install fluent-plugin-output-solr
|
20
20
|
|
21
|
-
##
|
21
|
+
## Config parameters
|
22
22
|
|
23
|
-
###
|
23
|
+
### url
|
24
|
+
|
25
|
+
The Solr server url (for example http://localhost:8983/solr/collection1).
|
26
|
+
|
27
|
+
```
|
28
|
+
url http://localhost:8983/solr/collection1
|
29
|
+
```
|
30
|
+
|
31
|
+
### zk_host
|
32
|
+
|
33
|
+
The ZooKeeper connection string that SolrCloud refers to (for example localhost:2181/solr).
|
34
|
+
|
35
|
+
```
|
36
|
+
zk_host localhost:2181/solr
|
37
|
+
```
|
38
|
+
|
39
|
+
### collection
|
40
|
+
|
41
|
+
The SolrCloud collection name (default collection1).
|
42
|
+
|
43
|
+
```
|
44
|
+
collection collection1
|
45
|
+
```
|
46
|
+
|
47
|
+
### defined_fields
|
48
|
+
|
49
|
+
The defined fields in the Solr schema.xml. If omitted, it will get fields via Solr Schema API.
|
50
|
+
|
51
|
+
```
|
52
|
+
defined_fields ["id", "title"]
|
53
|
+
```
|
54
|
+
|
55
|
+
### ignore_undefined_fields
|
56
|
+
|
57
|
+
Ignore undefined fields in the Solr schema.xml.
|
58
|
+
|
59
|
+
```
|
60
|
+
ignore_undefined_fields false
|
61
|
+
```
|
62
|
+
|
63
|
+
### unique_key_field
|
64
|
+
|
65
|
+
A field name of unique key in the Solr schema.xml. If omitted, it will get unique key via Solr Schema API.
|
66
|
+
|
67
|
+
```
|
68
|
+
unique_key_field id
|
69
|
+
```
|
70
|
+
|
71
|
+
### timestamp_field
|
72
|
+
|
73
|
+
A field name of event timestamp in the Solr schema.xml (default event_timestamp).
|
74
|
+
|
75
|
+
```
|
76
|
+
timestamp_field event_timestamp
|
77
|
+
```
|
78
|
+
|
79
|
+
### flush_size
|
80
|
+
|
81
|
+
A number of events to queue up before writing to Solr (default 100).
|
82
|
+
|
83
|
+
```
|
84
|
+
flush_size 100
|
85
|
+
```
|
86
|
+
|
87
|
+
|
88
|
+
## Plugin setup examples
|
89
|
+
|
90
|
+
### Sent to standalone Solr using data-driven schemaless mode.
|
24
91
|
```
|
25
92
|
<match something.logs>
|
26
93
|
@type solr
|
27
94
|
|
95
|
+
# The Solr server url (for example http://localhost:8983/solr/collection1).
|
28
96
|
url http://localhost:8983/solr/collection1
|
29
97
|
|
30
|
-
batch_size 100
|
31
|
-
|
32
98
|
buffer_type memory
|
33
99
|
buffer_queue_limit 64m
|
34
100
|
buffer_chunk_limit 8m
|
@@ -36,15 +102,16 @@ Or install it yourself as:
|
|
36
102
|
</match>
|
37
103
|
```
|
38
104
|
|
39
|
-
### Sent to SolrCloud
|
105
|
+
### Sent to SolrCloud using data-driven schemaless mode.
|
40
106
|
```
|
41
107
|
<match something.logs>
|
42
108
|
@type solr
|
43
109
|
|
110
|
+
# The ZooKeeper connection string that SolrCloud refers to (for example localhost:2181/solr).
|
44
111
|
zk_host localhost:2181/solr
|
45
|
-
collection collection1
|
46
112
|
|
47
|
-
|
113
|
+
# The SolrCloud collection name (default collection1).
|
114
|
+
collection1 collection1
|
48
115
|
|
49
116
|
buffer_type memory
|
50
117
|
buffer_queue_limit 64m
|
@@ -53,6 +120,78 @@ Or install it yourself as:
|
|
53
120
|
</match>
|
54
121
|
```
|
55
122
|
|
123
|
+
## Solr setup examples
|
124
|
+
|
125
|
+
### How to setup Standalone Solr using data-driven schemaless mode.
|
126
|
+
|
127
|
+
1.Download and install Solr
|
128
|
+
|
129
|
+
```sh
|
130
|
+
$ mkdir $HOME/solr
|
131
|
+
$ cd $HOME/solr
|
132
|
+
$ wget https://archive.apache.org/dist/lucene/solr/5.4.0/solr-5.4.0.tgz
|
133
|
+
$ tar zxvf solr-5.4.0.tgz
|
134
|
+
$ cd solr-5.4.0
|
135
|
+
```
|
136
|
+
|
137
|
+
2.Start standalone Solr
|
138
|
+
|
139
|
+
```sh
|
140
|
+
$ ./bin/solr start -p 8983 -s server/solr
|
141
|
+
```
|
142
|
+
|
143
|
+
3.Create core
|
144
|
+
|
145
|
+
```sh
|
146
|
+
$ ./bin/solr create -c collection1 -d server/solr/configsets/data_driven_schema_configs -n collection1_configs
|
147
|
+
```
|
148
|
+
|
149
|
+
### How to setup SolrCloud using data-driven schemaless mode (shards=1 and replicationfactor=2).
|
150
|
+
|
151
|
+
1.Download and install ZooKeeper
|
152
|
+
|
153
|
+
```sh
|
154
|
+
$ mkdir $HOME/zookeeper
|
155
|
+
$ cd $HOME/zookeeper
|
156
|
+
$ wget https://archive.apache.org/dist/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz
|
157
|
+
$ tar zxvf zookeeper-3.4.6.tar.gz
|
158
|
+
$ cd zookeeper-3.4.6
|
159
|
+
$ cp -p ./conf/zoo_sample.cfg ./conf/zoo.cfg
|
160
|
+
```
|
161
|
+
|
162
|
+
2.Start standalone ZooKeeper
|
163
|
+
|
164
|
+
```sh
|
165
|
+
$ ./bin/zkServer.sh start
|
166
|
+
```
|
167
|
+
|
168
|
+
3.Download an install Solr
|
169
|
+
|
170
|
+
```sh
|
171
|
+
$ mkdir $HOME/solr
|
172
|
+
$ cd $HOME/solr
|
173
|
+
$ wget https://archive.apache.org/dist/lucene/solr/5.4.0/solr-5.4.0.tgz
|
174
|
+
$ tar zxvf solr-5.4.0.tgz
|
175
|
+
$ cd solr-5.4.0
|
176
|
+
$ ./server/scripts/cloud-scripts/zkcli.sh -zkhost localhost:2181 -cmd clear /solr
|
177
|
+
$ ./server/scripts/cloud-scripts/zkcli.sh -zkhost localhost:2181 -cmd makepath /solr
|
178
|
+
$ cp -pr server/solr server/solr1
|
179
|
+
$ cp -pr server/solr server/solr2
|
180
|
+
```
|
181
|
+
|
182
|
+
4.Start SolrCloud
|
183
|
+
|
184
|
+
```sh
|
185
|
+
$ ./bin/solr start -p 8983 -z localhost:2181/solr -s server/solr1
|
186
|
+
$ ./bin/solr start -p 8985 -z localhost:2181/solr -s server/solr2
|
187
|
+
```
|
188
|
+
|
189
|
+
5.Create collection
|
190
|
+
|
191
|
+
```sh
|
192
|
+
$ ./bin/solr create -c collection1 -d server/solr1/configsets/data_driven_schema_configs -n collection1_configs -shards 1 -replicationFactor 2
|
193
|
+
```
|
194
|
+
|
56
195
|
## Development
|
57
196
|
|
58
197
|
After checking out the repo, run `bundle install` to install dependencies. Then, run `rake test` to run the tests.
|
@@ -4,12 +4,12 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-output-solr"
|
7
|
-
spec.version = "0.
|
7
|
+
spec.version = "0.2.0"
|
8
8
|
spec.authors = ["Minoru Osuka"]
|
9
9
|
spec.email = ["minoru.osuka@gmail.com"]
|
10
10
|
|
11
|
-
spec.summary =
|
12
|
-
spec.description =
|
11
|
+
spec.summary = "Fluent output plugin for sending data to Apache Solr."
|
12
|
+
spec.description = "Fluent output plugin for sending data to Apache Solr. It support SolrCloud not only Standalone Solr."
|
13
13
|
spec.homepage = "https://github.com/mosuka/fluent-plugin-output-solr"
|
14
14
|
|
15
15
|
spec.license = "Apache-2.0"
|
@@ -7,19 +7,40 @@ module Fluent
|
|
7
7
|
class SolrOutput < BufferedOutput
|
8
8
|
Fluent::Plugin.register_output('solr', self)
|
9
9
|
|
10
|
+
DEFAULT_COLLECTION = 'collection1'
|
11
|
+
DEFAULT_IGNORE_UNDEFINED_FIELDS = false
|
12
|
+
DEFAULT_TIMESTAMP_FIELD = 'event_timestamp'
|
13
|
+
DEFAULT_FLUSH_SIZE = 100
|
14
|
+
|
15
|
+
MODE_STANDALONE = 'Standalone'
|
16
|
+
MODE_SOLRCLOUD = 'SolrCloud'
|
17
|
+
|
18
|
+
include Fluent::SetTagKeyMixin
|
19
|
+
config_set_default :include_tag_key, false
|
20
|
+
|
21
|
+
include Fluent::SetTimeKeyMixin
|
22
|
+
config_set_default :include_time_key, false
|
23
|
+
|
10
24
|
config_param :url, :string, :default => nil,
|
11
|
-
|
25
|
+
:desc => 'The Solr server url (for example http://localhost:8983/solr/collection1).'
|
12
26
|
|
13
27
|
config_param :zk_host, :string, :default => nil,
|
14
|
-
|
15
|
-
config_param :collection, :string, :default =>
|
16
|
-
|
28
|
+
:desc => 'The ZooKeeper connection string that SolrCloud refers to (for example localhost:2181/solr).'
|
29
|
+
config_param :collection, :string, :default => DEFAULT_COLLECTION,
|
30
|
+
:desc => 'The SolrCloud collection name (default collection1).'
|
17
31
|
|
18
|
-
config_param :
|
19
|
-
|
32
|
+
config_param :defined_fields, :array, :default => nil,
|
33
|
+
:desc => 'The defined fields in the Solr schema.xml. If omitted, it will get fields via Solr Schema API.'
|
34
|
+
config_param :ignore_undefined_fields, :bool, :default => DEFAULT_IGNORE_UNDEFINED_FIELDS,
|
35
|
+
:desc => 'Ignore undefined fields in the Solr schema.xml.'
|
20
36
|
|
21
|
-
|
22
|
-
|
37
|
+
config_param :unique_key_field, :string, :default => nil,
|
38
|
+
:desc => 'A field name of unique key in the Solr schema.xml. If omitted, it will get unique key via Solr Schema API.'
|
39
|
+
config_param :timestamp_field, :string, :default => DEFAULT_TIMESTAMP_FIELD,
|
40
|
+
:desc => 'A field name of event timestamp in the Solr schema.xml (default event_timestamp).'
|
41
|
+
|
42
|
+
config_param :flush_size, :integer, :default => DEFAULT_FLUSH_SIZE,
|
43
|
+
:desc => 'A number of events to queue up before writing to Solr (default 100).'
|
23
44
|
|
24
45
|
def initialize
|
25
46
|
super
|
@@ -31,9 +52,15 @@ module Fluent
|
|
31
52
|
@url = conf['url']
|
32
53
|
|
33
54
|
@zk_host = conf['zk_host']
|
34
|
-
@collection = conf['collection']
|
55
|
+
@collection = conf.has_key?('collection') ? conf['collection'] : DEFAULT_COLLECTION
|
56
|
+
|
57
|
+
@defined_fields = conf['defined_fields']
|
58
|
+
@ignore_undefined_field = conf.has_key?('ignore_undefined_field') ? conf['ignore_undefined_field'] : DEFAULT_IGNORE_UNDEFINED_FIELDS
|
35
59
|
|
36
|
-
@
|
60
|
+
@unique_key_field = conf['unique_key_field']
|
61
|
+
@timestamp_field = conf.has_key?('timestamp_field') ? conf['timestamp_field'] : DEFAULT_TIMESTAMP_FIELD
|
62
|
+
|
63
|
+
@flush_size = conf.has_key?('flush_size') ? conf['flush_size'].to_i : DEFAULT_FLUSH_SIZE
|
37
64
|
end
|
38
65
|
|
39
66
|
def start
|
@@ -73,19 +100,33 @@ module Fluent
|
|
73
100
|
def write(chunk)
|
74
101
|
documents = []
|
75
102
|
|
103
|
+
@fields = @defined_fields.nil? ? get_fields : @defined_fields
|
104
|
+
@unique_key = @unique_key_field.nil? ? get_unique_key : @unique_key_field
|
105
|
+
|
76
106
|
chunk.msgpack_each do |tag, time, record|
|
77
|
-
|
78
|
-
|
107
|
+
|
108
|
+
unless record.has_key?(@unique_key) then
|
109
|
+
record.merge!({@unique_key => SecureRandom.uuid})
|
110
|
+
end
|
111
|
+
|
112
|
+
record.merge!({@timestamp_field => Time.at(time).utc.strftime('%FT%TZ')})
|
113
|
+
|
114
|
+
if @ignore_undefined_fields then
|
115
|
+
record.each_key do |key|
|
116
|
+
unless @fields.include?(key) then
|
117
|
+
record.delete(key)
|
118
|
+
end
|
119
|
+
end
|
79
120
|
end
|
80
121
|
|
81
122
|
documents << record
|
82
|
-
|
83
|
-
if documents.count >= @
|
123
|
+
|
124
|
+
if documents.count >= @flush_size
|
84
125
|
update documents
|
85
126
|
documents.clear
|
86
127
|
end
|
87
128
|
end
|
88
|
-
|
129
|
+
|
89
130
|
update documents unless documents.empty?
|
90
131
|
end
|
91
132
|
|
@@ -97,6 +138,47 @@ module Fluent
|
|
97
138
|
@solr.add documents, collection: @collection, :params => {:commit => true}
|
98
139
|
log.info "Added %d document(s) to Solr" % documents.count
|
99
140
|
end
|
141
|
+
rescue Exception => e
|
142
|
+
log.warn("An error occurred while indexing: #{e.message}")
|
143
|
+
end
|
144
|
+
|
145
|
+
def get_unique_key
|
146
|
+
response = nil
|
147
|
+
|
148
|
+
if @mode == MODE_STANDALONE then
|
149
|
+
response = @solr.get 'schema/uniquekey'
|
150
|
+
elsif @mode == MODE_SOLRCLOUD then
|
151
|
+
response = @solr.get 'schema/uniquekey', collection: @collection
|
152
|
+
end
|
153
|
+
|
154
|
+
unique_key = response['uniqueKey']
|
155
|
+
log.info ("Unique key: #{unique_key}")
|
156
|
+
|
157
|
+
return unique_key
|
158
|
+
|
159
|
+
rescue Exception => e
|
160
|
+
log.warn("An error occurred while indexing: #{e.message}")
|
161
|
+
end
|
162
|
+
|
163
|
+
def get_fields
|
164
|
+
response = nil
|
165
|
+
|
166
|
+
if @mode == MODE_STANDALONE then
|
167
|
+
response = @solr.get 'schema/fields'
|
168
|
+
elsif @mode == MODE_SOLRCLOUD then
|
169
|
+
response = @solr.get 'schema/fields', collection: @collection
|
170
|
+
end
|
171
|
+
|
172
|
+
fields = []
|
173
|
+
response['fields'].each do |field|
|
174
|
+
fields.push(field['name'])
|
175
|
+
end
|
176
|
+
log.info ("Fields: #{fields}")
|
177
|
+
|
178
|
+
return fields
|
179
|
+
|
180
|
+
rescue Exception => e
|
181
|
+
log.warn("An error occurred while indexing: #{e.message}")
|
100
182
|
end
|
101
183
|
end
|
102
184
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-output-solr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minoru Osuka
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fluentd
|
@@ -150,7 +150,8 @@ dependencies:
|
|
150
150
|
- - ~>
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 1.1.8
|
153
|
-
description:
|
153
|
+
description: Fluent output plugin for sending data to Apache Solr. It support SolrCloud
|
154
|
+
not only Standalone Solr.
|
154
155
|
email:
|
155
156
|
- minoru.osuka@gmail.com
|
156
157
|
executables: []
|
@@ -188,5 +189,5 @@ rubyforge_project:
|
|
188
189
|
rubygems_version: 2.0.14
|
189
190
|
signing_key:
|
190
191
|
specification_version: 4
|
191
|
-
summary:
|
192
|
+
summary: Fluent output plugin for sending data to Apache Solr.
|
192
193
|
test_files: []
|