fluent-plugin-output-solr 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +147 -8
- data/fluent-plugin-output-solr.gemspec +3 -3
- data/lib/fluent/plugin/out_solr.rb +97 -15
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b2190d11bd62fbf4efbd390ae997e2ee4d962051
|
4
|
+
data.tar.gz: fd86da124f5ed9b94fdaffdb0c55e076e888806d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc5031b355be897054a0577a9f2d0623e13274002ee8534ed0d20de9c4f7c8f851d8cf52017ce3c3d4d2fcf8d5f113a5090cb70ded850fa71f839ca293913c3e
|
7
|
+
data.tar.gz: 21da9bb4a7a7954ec3363e7acae5a11fe6fbf5ae0789e1bf48207b66be698766c5a6189c86288020f55ff20ef26be04da94be3f8253c44935935b5c9576b1594
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Fluent::Plugin::OutSolr
|
2
2
|
|
3
|
-
This is a [Fluentd](http://fluentd.org/) plugin for send data to [Apache Solr](http://lucene.apache.org/solr/).
|
3
|
+
This is a [Fluentd](http://fluentd.org/) output plugin for send data to [Apache Solr](http://lucene.apache.org/solr/). It support [SolrCloud](https://cwiki.apache.org/confluence/display/solr/SolrCloud) not only Standalone Solr.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -18,17 +18,83 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
$ gem install fluent-plugin-output-solr
|
20
20
|
|
21
|
-
##
|
21
|
+
## Config parameters
|
22
22
|
|
23
|
-
###
|
23
|
+
### url
|
24
|
+
|
25
|
+
The Solr server url (for example http://localhost:8983/solr/collection1).
|
26
|
+
|
27
|
+
```
|
28
|
+
url http://localhost:8983/solr/collection1
|
29
|
+
```
|
30
|
+
|
31
|
+
### zk_host
|
32
|
+
|
33
|
+
The ZooKeeper connection string that SolrCloud refers to (for example localhost:2181/solr).
|
34
|
+
|
35
|
+
```
|
36
|
+
zk_host localhost:2181/solr
|
37
|
+
```
|
38
|
+
|
39
|
+
### collection
|
40
|
+
|
41
|
+
The SolrCloud collection name (default collection1).
|
42
|
+
|
43
|
+
```
|
44
|
+
collection collection1
|
45
|
+
```
|
46
|
+
|
47
|
+
### defined_fields
|
48
|
+
|
49
|
+
The defined fields in the Solr schema.xml. If omitted, it will get fields via Solr Schema API.
|
50
|
+
|
51
|
+
```
|
52
|
+
defined_fields ["id", "title"]
|
53
|
+
```
|
54
|
+
|
55
|
+
### ignore_undefined_fields
|
56
|
+
|
57
|
+
Ignore undefined fields in the Solr schema.xml.
|
58
|
+
|
59
|
+
```
|
60
|
+
ignore_undefined_fields false
|
61
|
+
```
|
62
|
+
|
63
|
+
### unique_key_field
|
64
|
+
|
65
|
+
A field name of unique key in the Solr schema.xml. If omitted, it will get unique key via Solr Schema API.
|
66
|
+
|
67
|
+
```
|
68
|
+
unique_key_field id
|
69
|
+
```
|
70
|
+
|
71
|
+
### timestamp_field
|
72
|
+
|
73
|
+
A field name of event timestamp in the Solr schema.xml (default event_timestamp).
|
74
|
+
|
75
|
+
```
|
76
|
+
timestamp_field event_timestamp
|
77
|
+
```
|
78
|
+
|
79
|
+
### flush_size
|
80
|
+
|
81
|
+
A number of events to queue up before writing to Solr (default 100).
|
82
|
+
|
83
|
+
```
|
84
|
+
flush_size 100
|
85
|
+
```
|
86
|
+
|
87
|
+
|
88
|
+
## Plugin setup examples
|
89
|
+
|
90
|
+
### Sent to standalone Solr using data-driven schemaless mode.
|
24
91
|
```
|
25
92
|
<match something.logs>
|
26
93
|
@type solr
|
27
94
|
|
95
|
+
# The Solr server url (for example http://localhost:8983/solr/collection1).
|
28
96
|
url http://localhost:8983/solr/collection1
|
29
97
|
|
30
|
-
batch_size 100
|
31
|
-
|
32
98
|
buffer_type memory
|
33
99
|
buffer_queue_limit 64m
|
34
100
|
buffer_chunk_limit 8m
|
@@ -36,15 +102,16 @@ Or install it yourself as:
|
|
36
102
|
</match>
|
37
103
|
```
|
38
104
|
|
39
|
-
### Sent to SolrCloud
|
105
|
+
### Sent to SolrCloud using data-driven schemaless mode.
|
40
106
|
```
|
41
107
|
<match something.logs>
|
42
108
|
@type solr
|
43
109
|
|
110
|
+
# The ZooKeeper connection string that SolrCloud refers to (for example localhost:2181/solr).
|
44
111
|
zk_host localhost:2181/solr
|
45
|
-
collection collection1
|
46
112
|
|
47
|
-
|
113
|
+
# The SolrCloud collection name (default collection1).
|
114
|
+
collection1 collection1
|
48
115
|
|
49
116
|
buffer_type memory
|
50
117
|
buffer_queue_limit 64m
|
@@ -53,6 +120,78 @@ Or install it yourself as:
|
|
53
120
|
</match>
|
54
121
|
```
|
55
122
|
|
123
|
+
## Solr setup examples
|
124
|
+
|
125
|
+
### How to setup Standalone Solr using data-driven schemaless mode.
|
126
|
+
|
127
|
+
1.Download and install Solr
|
128
|
+
|
129
|
+
```sh
|
130
|
+
$ mkdir $HOME/solr
|
131
|
+
$ cd $HOME/solr
|
132
|
+
$ wget https://archive.apache.org/dist/lucene/solr/5.4.0/solr-5.4.0.tgz
|
133
|
+
$ tar zxvf solr-5.4.0.tgz
|
134
|
+
$ cd solr-5.4.0
|
135
|
+
```
|
136
|
+
|
137
|
+
2.Start standalone Solr
|
138
|
+
|
139
|
+
```sh
|
140
|
+
$ ./bin/solr start -p 8983 -s server/solr
|
141
|
+
```
|
142
|
+
|
143
|
+
3.Create core
|
144
|
+
|
145
|
+
```sh
|
146
|
+
$ ./bin/solr create -c collection1 -d server/solr/configsets/data_driven_schema_configs -n collection1_configs
|
147
|
+
```
|
148
|
+
|
149
|
+
### How to setup SolrCloud using data-driven schemaless mode (shards=1 and replicationfactor=2).
|
150
|
+
|
151
|
+
1.Download and install ZooKeeper
|
152
|
+
|
153
|
+
```sh
|
154
|
+
$ mkdir $HOME/zookeeper
|
155
|
+
$ cd $HOME/zookeeper
|
156
|
+
$ wget https://archive.apache.org/dist/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz
|
157
|
+
$ tar zxvf zookeeper-3.4.6.tar.gz
|
158
|
+
$ cd zookeeper-3.4.6
|
159
|
+
$ cp -p ./conf/zoo_sample.cfg ./conf/zoo.cfg
|
160
|
+
```
|
161
|
+
|
162
|
+
2.Start standalone ZooKeeper
|
163
|
+
|
164
|
+
```sh
|
165
|
+
$ ./bin/zkServer.sh start
|
166
|
+
```
|
167
|
+
|
168
|
+
3.Download an install Solr
|
169
|
+
|
170
|
+
```sh
|
171
|
+
$ mkdir $HOME/solr
|
172
|
+
$ cd $HOME/solr
|
173
|
+
$ wget https://archive.apache.org/dist/lucene/solr/5.4.0/solr-5.4.0.tgz
|
174
|
+
$ tar zxvf solr-5.4.0.tgz
|
175
|
+
$ cd solr-5.4.0
|
176
|
+
$ ./server/scripts/cloud-scripts/zkcli.sh -zkhost localhost:2181 -cmd clear /solr
|
177
|
+
$ ./server/scripts/cloud-scripts/zkcli.sh -zkhost localhost:2181 -cmd makepath /solr
|
178
|
+
$ cp -pr server/solr server/solr1
|
179
|
+
$ cp -pr server/solr server/solr2
|
180
|
+
```
|
181
|
+
|
182
|
+
4.Start SolrCloud
|
183
|
+
|
184
|
+
```sh
|
185
|
+
$ ./bin/solr start -p 8983 -z localhost:2181/solr -s server/solr1
|
186
|
+
$ ./bin/solr start -p 8985 -z localhost:2181/solr -s server/solr2
|
187
|
+
```
|
188
|
+
|
189
|
+
5.Create collection
|
190
|
+
|
191
|
+
```sh
|
192
|
+
$ ./bin/solr create -c collection1 -d server/solr1/configsets/data_driven_schema_configs -n collection1_configs -shards 1 -replicationFactor 2
|
193
|
+
```
|
194
|
+
|
56
195
|
## Development
|
57
196
|
|
58
197
|
After checking out the repo, run `bundle install` to install dependencies. Then, run `rake test` to run the tests.
|
@@ -4,12 +4,12 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-output-solr"
|
7
|
-
spec.version = "0.
|
7
|
+
spec.version = "0.2.0"
|
8
8
|
spec.authors = ["Minoru Osuka"]
|
9
9
|
spec.email = ["minoru.osuka@gmail.com"]
|
10
10
|
|
11
|
-
spec.summary =
|
12
|
-
spec.description =
|
11
|
+
spec.summary = "Fluent output plugin for sending data to Apache Solr."
|
12
|
+
spec.description = "Fluent output plugin for sending data to Apache Solr. It support SolrCloud not only Standalone Solr."
|
13
13
|
spec.homepage = "https://github.com/mosuka/fluent-plugin-output-solr"
|
14
14
|
|
15
15
|
spec.license = "Apache-2.0"
|
@@ -7,19 +7,40 @@ module Fluent
|
|
7
7
|
class SolrOutput < BufferedOutput
|
8
8
|
Fluent::Plugin.register_output('solr', self)
|
9
9
|
|
10
|
+
DEFAULT_COLLECTION = 'collection1'
|
11
|
+
DEFAULT_IGNORE_UNDEFINED_FIELDS = false
|
12
|
+
DEFAULT_TIMESTAMP_FIELD = 'event_timestamp'
|
13
|
+
DEFAULT_FLUSH_SIZE = 100
|
14
|
+
|
15
|
+
MODE_STANDALONE = 'Standalone'
|
16
|
+
MODE_SOLRCLOUD = 'SolrCloud'
|
17
|
+
|
18
|
+
include Fluent::SetTagKeyMixin
|
19
|
+
config_set_default :include_tag_key, false
|
20
|
+
|
21
|
+
include Fluent::SetTimeKeyMixin
|
22
|
+
config_set_default :include_time_key, false
|
23
|
+
|
10
24
|
config_param :url, :string, :default => nil,
|
11
|
-
|
25
|
+
:desc => 'The Solr server url (for example http://localhost:8983/solr/collection1).'
|
12
26
|
|
13
27
|
config_param :zk_host, :string, :default => nil,
|
14
|
-
|
15
|
-
config_param :collection, :string, :default =>
|
16
|
-
|
28
|
+
:desc => 'The ZooKeeper connection string that SolrCloud refers to (for example localhost:2181/solr).'
|
29
|
+
config_param :collection, :string, :default => DEFAULT_COLLECTION,
|
30
|
+
:desc => 'The SolrCloud collection name (default collection1).'
|
17
31
|
|
18
|
-
config_param :
|
19
|
-
|
32
|
+
config_param :defined_fields, :array, :default => nil,
|
33
|
+
:desc => 'The defined fields in the Solr schema.xml. If omitted, it will get fields via Solr Schema API.'
|
34
|
+
config_param :ignore_undefined_fields, :bool, :default => DEFAULT_IGNORE_UNDEFINED_FIELDS,
|
35
|
+
:desc => 'Ignore undefined fields in the Solr schema.xml.'
|
20
36
|
|
21
|
-
|
22
|
-
|
37
|
+
config_param :unique_key_field, :string, :default => nil,
|
38
|
+
:desc => 'A field name of unique key in the Solr schema.xml. If omitted, it will get unique key via Solr Schema API.'
|
39
|
+
config_param :timestamp_field, :string, :default => DEFAULT_TIMESTAMP_FIELD,
|
40
|
+
:desc => 'A field name of event timestamp in the Solr schema.xml (default event_timestamp).'
|
41
|
+
|
42
|
+
config_param :flush_size, :integer, :default => DEFAULT_FLUSH_SIZE,
|
43
|
+
:desc => 'A number of events to queue up before writing to Solr (default 100).'
|
23
44
|
|
24
45
|
def initialize
|
25
46
|
super
|
@@ -31,9 +52,15 @@ module Fluent
|
|
31
52
|
@url = conf['url']
|
32
53
|
|
33
54
|
@zk_host = conf['zk_host']
|
34
|
-
@collection = conf['collection']
|
55
|
+
@collection = conf.has_key?('collection') ? conf['collection'] : DEFAULT_COLLECTION
|
56
|
+
|
57
|
+
@defined_fields = conf['defined_fields']
|
58
|
+
@ignore_undefined_field = conf.has_key?('ignore_undefined_field') ? conf['ignore_undefined_field'] : DEFAULT_IGNORE_UNDEFINED_FIELDS
|
35
59
|
|
36
|
-
@
|
60
|
+
@unique_key_field = conf['unique_key_field']
|
61
|
+
@timestamp_field = conf.has_key?('timestamp_field') ? conf['timestamp_field'] : DEFAULT_TIMESTAMP_FIELD
|
62
|
+
|
63
|
+
@flush_size = conf.has_key?('flush_size') ? conf['flush_size'].to_i : DEFAULT_FLUSH_SIZE
|
37
64
|
end
|
38
65
|
|
39
66
|
def start
|
@@ -73,19 +100,33 @@ module Fluent
|
|
73
100
|
def write(chunk)
|
74
101
|
documents = []
|
75
102
|
|
103
|
+
@fields = @defined_fields.nil? ? get_fields : @defined_fields
|
104
|
+
@unique_key = @unique_key_field.nil? ? get_unique_key : @unique_key_field
|
105
|
+
|
76
106
|
chunk.msgpack_each do |tag, time, record|
|
77
|
-
|
78
|
-
|
107
|
+
|
108
|
+
unless record.has_key?(@unique_key) then
|
109
|
+
record.merge!({@unique_key => SecureRandom.uuid})
|
110
|
+
end
|
111
|
+
|
112
|
+
record.merge!({@timestamp_field => Time.at(time).utc.strftime('%FT%TZ')})
|
113
|
+
|
114
|
+
if @ignore_undefined_fields then
|
115
|
+
record.each_key do |key|
|
116
|
+
unless @fields.include?(key) then
|
117
|
+
record.delete(key)
|
118
|
+
end
|
119
|
+
end
|
79
120
|
end
|
80
121
|
|
81
122
|
documents << record
|
82
|
-
|
83
|
-
if documents.count >= @
|
123
|
+
|
124
|
+
if documents.count >= @flush_size
|
84
125
|
update documents
|
85
126
|
documents.clear
|
86
127
|
end
|
87
128
|
end
|
88
|
-
|
129
|
+
|
89
130
|
update documents unless documents.empty?
|
90
131
|
end
|
91
132
|
|
@@ -97,6 +138,47 @@ module Fluent
|
|
97
138
|
@solr.add documents, collection: @collection, :params => {:commit => true}
|
98
139
|
log.info "Added %d document(s) to Solr" % documents.count
|
99
140
|
end
|
141
|
+
rescue Exception => e
|
142
|
+
log.warn("An error occurred while indexing: #{e.message}")
|
143
|
+
end
|
144
|
+
|
145
|
+
def get_unique_key
|
146
|
+
response = nil
|
147
|
+
|
148
|
+
if @mode == MODE_STANDALONE then
|
149
|
+
response = @solr.get 'schema/uniquekey'
|
150
|
+
elsif @mode == MODE_SOLRCLOUD then
|
151
|
+
response = @solr.get 'schema/uniquekey', collection: @collection
|
152
|
+
end
|
153
|
+
|
154
|
+
unique_key = response['uniqueKey']
|
155
|
+
log.info ("Unique key: #{unique_key}")
|
156
|
+
|
157
|
+
return unique_key
|
158
|
+
|
159
|
+
rescue Exception => e
|
160
|
+
log.warn("An error occurred while indexing: #{e.message}")
|
161
|
+
end
|
162
|
+
|
163
|
+
def get_fields
|
164
|
+
response = nil
|
165
|
+
|
166
|
+
if @mode == MODE_STANDALONE then
|
167
|
+
response = @solr.get 'schema/fields'
|
168
|
+
elsif @mode == MODE_SOLRCLOUD then
|
169
|
+
response = @solr.get 'schema/fields', collection: @collection
|
170
|
+
end
|
171
|
+
|
172
|
+
fields = []
|
173
|
+
response['fields'].each do |field|
|
174
|
+
fields.push(field['name'])
|
175
|
+
end
|
176
|
+
log.info ("Fields: #{fields}")
|
177
|
+
|
178
|
+
return fields
|
179
|
+
|
180
|
+
rescue Exception => e
|
181
|
+
log.warn("An error occurred while indexing: #{e.message}")
|
100
182
|
end
|
101
183
|
end
|
102
184
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-output-solr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minoru Osuka
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fluentd
|
@@ -150,7 +150,8 @@ dependencies:
|
|
150
150
|
- - ~>
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 1.1.8
|
153
|
-
description:
|
153
|
+
description: Fluent output plugin for sending data to Apache Solr. It support SolrCloud
|
154
|
+
not only Standalone Solr.
|
154
155
|
email:
|
155
156
|
- minoru.osuka@gmail.com
|
156
157
|
executables: []
|
@@ -188,5 +189,5 @@ rubyforge_project:
|
|
188
189
|
rubygems_version: 2.0.14
|
189
190
|
signing_key:
|
190
191
|
specification_version: 4
|
191
|
-
summary:
|
192
|
+
summary: Fluent output plugin for sending data to Apache Solr.
|
192
193
|
test_files: []
|