logstash-input-mongodb 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/DEVELOPER.md +2 -2
- data/Gemfile +2 -2
- data/LICENSE +13 -13
- data/README.md +71 -64
- data/lib/logstash/inputs/mongodb.rb +353 -356
- data/logstash-input-mongodb.gemspec +38 -38
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5239efc622206199789eedf0e1af75242fc6262d
|
4
|
+
data.tar.gz: d254084e1e1b9791b02e4f3207260da0f11cf046
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d717f6a55dbe745b3e4c66bb43465559c7c409dc9d0191636ab49f23c3c054d6b55f2283ba45aa11a5ac245c6040925185cada2e5637606c903f84f29d32ec83
|
7
|
+
data.tar.gz: 61562cf62e60a8f6055d4f31849dc06d5033bffdf01ed1858e71938d3d04d77a88a1278cd1f6efeebfc5b74e81c52ed0ed1619c85ffeac411cbf3536d1f2accc
|
data/DEVELOPER.md
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
# logstash-input-example
|
2
|
-
Example input plugin. This should help bootstrap your effort to write your own input plugin!
|
1
|
+
# logstash-input-example
|
2
|
+
Example input plugin. This should help bootstrap your effort to write your own input plugin!
|
data/Gemfile
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
source 'https://rubygems.org'
|
2
|
-
gemspec
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
gemspec
|
data/LICENSE
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
Copyright (c) 2012-2015 Elasticsearch <http://www.elasticsearch.org>
|
2
|
-
|
3
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
you may not use this file except in compliance with the License.
|
5
|
-
You may obtain a copy of the License at
|
6
|
-
|
7
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
|
9
|
-
Unless required by applicable law or agreed to in writing, software
|
10
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
See the License for the specific language governing permissions and
|
13
|
-
limitations under the License.
|
1
|
+
Copyright (c) 2012-2015 Elasticsearch <http://www.elasticsearch.org>
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
CHANGED
@@ -1,64 +1,71 @@
|
|
1
|
-
# Logstash Plugin
|
2
|
-
|
3
|
-
This is a plugin for [Logstash](https://github.com/elasticsearch/logstash).
|
4
|
-
|
5
|
-
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
6
|
-
|
7
|
-
## Documentation
|
8
|
-
|
9
|
-
This is a logstash plugin for pulling data out of mongodb and processing with logstash. It will connect to the database specified in `uri`, use the `collection` attribute to find collections to pull documents from, start at the first collection it finds and pull the number of documents specified in `batch_size`, save it's progress in an sqlite database who's location is specified by `placeholder_db_dir` and `placeholder_db_name` and repeat. It will continue this until it no longer finds documents newer than ones that it has processed, sleep for a moment, then continue to loop over the collections.
|
10
|
-
|
11
|
-
This was designed for parsing logs that were written into mongodb. This means that it may not re-parse db entries that were changed and already parsed.
|
12
|
-
|
13
|
-
|
14
|
-
### Installation
|
15
|
-
|
16
|
-
+ Logstash installed from ZIP | TGZ
|
17
|
-
+ bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
|
18
|
-
|
19
|
-
+ Logstash from GIT
|
20
|
-
+ git clone https://github.com/elastic/logstash.git
|
21
|
-
+ cd logstash
|
22
|
-
+ (ensure that the correct jruby is installed for the version of logstash you are installing)
|
23
|
-
+ rake test:install-core
|
24
|
-
+ bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
|
25
|
-
+ bin/plugin install --development
|
26
|
-
|
27
|
-
### Configuration Options
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
}
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
1
|
+
# Logstash Plugin
|
2
|
+
|
3
|
+
This is a plugin for [Logstash](https://github.com/elasticsearch/logstash).
|
4
|
+
|
5
|
+
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
6
|
+
|
7
|
+
## Documentation
|
8
|
+
|
9
|
+
This is a logstash plugin for pulling data out of mongodb and processing with logstash. It will connect to the database specified in `uri`, use the `collection` attribute to find collections to pull documents from, start at the first collection it finds and pull the number of documents specified in `batch_size`, save it's progress in an sqlite database who's location is specified by `placeholder_db_dir` and `placeholder_db_name` and repeat. It will continue this until it no longer finds documents newer than ones that it has processed, sleep for a moment, then continue to loop over the collections.
|
10
|
+
|
11
|
+
This was designed for parsing logs that were written into mongodb. This means that it may not re-parse db entries that were changed and already parsed.
|
12
|
+
|
13
|
+
|
14
|
+
### Installation
|
15
|
+
|
16
|
+
+ Logstash installed from ZIP | TGZ
|
17
|
+
+ bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
|
18
|
+
|
19
|
+
+ Logstash from GIT
|
20
|
+
+ git clone https://github.com/elastic/logstash.git
|
21
|
+
+ cd logstash
|
22
|
+
+ (ensure that the correct jruby is installed for the version of logstash you are installing)
|
23
|
+
+ rake test:install-core
|
24
|
+
+ bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
|
25
|
+
+ bin/plugin install --development
|
26
|
+
|
27
|
+
### Configuration Options
|
28
|
+
|
29
|
+
```
|
30
|
+
Name Type Description
|
31
|
+
uri [String] A MongoDB URI for your database or cluster (check the MongoDB documentation for further info on this) [No Default, Required]
|
32
|
+
placeholder_db_dir [String] Path where the place holder database will be stored locally to disk [No Default, Required]
|
33
|
+
This gets created by the plugin so the directory needs to be writeable by the user that logstash is running as
|
34
|
+
placeholder_db_name [String] Name of the database file that will be created [Default: logstash_sqlite.db]
|
35
|
+
collection [String] A regex that will be used to find desired collecitons. [No Default, Required]
|
36
|
+
generateId [Boolean] If true, this will add a field '_id' that contains the MongoDB Document id
|
37
|
+
batch_size [Int] Size of the batch of mongo documents to pull at a time [Default: 30]
|
38
|
+
parse_method [String] Built in parsing of the mongodb document object [Default: 'flatten']
|
39
|
+
dig_fields [Array] An array of fields that should employ the dig method
|
40
|
+
dig_dig_fields [Array] This provides a second level of hash flattening after the initial dig has been done
|
41
|
+
```
|
42
|
+
|
43
|
+
|
44
|
+
### Configuration
|
45
|
+
|
46
|
+
Example
|
47
|
+
```
|
48
|
+
input {
|
49
|
+
mongodb {
|
50
|
+
uri => 'mongodb://10.0.0.30/my-logs?ssl=true'
|
51
|
+
placeholder_db_dir => '/opt/logstash-mongodb/'
|
52
|
+
placeholder_db_name => 'logstash_sqlite.db'
|
53
|
+
collection => 'events_'
|
54
|
+
batch_size => 5000
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
filter {
|
59
|
+
date {
|
60
|
+
match => [ "logdate", "ISO8601" ]
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
output {
|
65
|
+
redis {
|
66
|
+
host => "localhost"
|
67
|
+
data_type => "list"
|
68
|
+
key => "logstash-mylogs"
|
69
|
+
}
|
70
|
+
}
|
71
|
+
```
|
@@ -1,356 +1,353 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require "logstash/inputs/base"
|
3
|
-
require "logstash/namespace"
|
4
|
-
require "logstash/timestamp"
|
5
|
-
require "stud/interval"
|
6
|
-
require "socket" # for Socket.gethostname
|
7
|
-
require "json"
|
8
|
-
require "mongo"
|
9
|
-
|
10
|
-
include Mongo
|
11
|
-
|
12
|
-
class LogStash::Inputs::MongoDB < LogStash::Inputs::Base
|
13
|
-
config_name "mongodb"
|
14
|
-
|
15
|
-
# If undefined, Logstash will complain, even if codec is unused.
|
16
|
-
default :codec, "plain"
|
17
|
-
|
18
|
-
# Example URI: mongodb://mydb.host:27017/mydbname?ssl=true
|
19
|
-
config :uri, :validate => :string, :required => true
|
20
|
-
|
21
|
-
# The directory that will contain the sqlite database file.
|
22
|
-
config :placeholder_db_dir, :validate => :string, :required => true
|
23
|
-
|
24
|
-
# The name of the sqlite databse file
|
25
|
-
config :placeholder_db_name, :validate => :string, :default => "logstash_sqlite.db"
|
26
|
-
|
27
|
-
# Any table to exclude by name
|
28
|
-
config :exclude_tables, :validate => :array, :default => []
|
29
|
-
|
30
|
-
config :batch_size, :avlidate => :number, :default => 30
|
31
|
-
|
32
|
-
config :since_table, :validate => :string, :default => "logstash_since"
|
33
|
-
|
34
|
-
# The collection to use. Is turned into a regex so 'events' will match 'events_20150227'
|
35
|
-
# Example collection: events_20150227 or events_
|
36
|
-
config :collection, :validate => :string, :required => true
|
37
|
-
|
38
|
-
# This allows you to select the method you would like to use to parse your data
|
39
|
-
config :parse_method, :validate => :string, :default => 'flatten'
|
40
|
-
|
41
|
-
# If not flattening you can dig to flatten select fields
|
42
|
-
config :dig_fields, :validate => :array, :default => []
|
43
|
-
|
44
|
-
# This is the second level of hash flattening
|
45
|
-
config :dig_dig_fields, :validate => :array, :default => []
|
46
|
-
|
47
|
-
# If true, store the @timestamp field in mongodb as an ISODate type instead
|
48
|
-
# of an ISO8601 string. For more information about this, see
|
49
|
-
# http://www.mongodb.org/display/DOCS/Dates
|
50
|
-
config :isodate, :validate => :boolean, :default => false
|
51
|
-
|
52
|
-
# Number of seconds to wait after failure before retrying
|
53
|
-
config :retry_delay, :validate => :number, :default => 3, :required => false
|
54
|
-
|
55
|
-
# If true, an "_id" field will be added to the document before insertion.
|
56
|
-
# The "_id" field will use the timestamp of the event and overwrite an existing
|
57
|
-
# "_id" field in the event.
|
58
|
-
config :generateId, :validate => :boolean, :default => false
|
59
|
-
|
60
|
-
config :unpack_mongo_id, :validate => :boolean, :default => false
|
61
|
-
|
62
|
-
# The message string to use in the event.
|
63
|
-
config :message, :validate => :string, :default => "Default message..."
|
64
|
-
|
65
|
-
# Set how frequently messages should be sent.
|
66
|
-
# The default, `1`, means send a message every second.
|
67
|
-
config :interval, :validate => :number, :default => 1
|
68
|
-
|
69
|
-
SINCE_TABLE = :since_table
|
70
|
-
|
71
|
-
public
|
72
|
-
def init_placeholder_table(sqlitedb)
|
73
|
-
begin
|
74
|
-
sqlitedb.create_table "#{SINCE_TABLE}" do
|
75
|
-
String :table
|
76
|
-
Int :place
|
77
|
-
end
|
78
|
-
rescue
|
79
|
-
@logger.debug("since table already exists")
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
public
|
84
|
-
def init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
85
|
-
@logger.debug("init placeholder for #{since_table}_#{mongo_collection_name}")
|
86
|
-
since = sqlitedb[SINCE_TABLE]
|
87
|
-
mongo_collection = mongodb.collection(mongo_collection_name)
|
88
|
-
first_entry = mongo_collection.find({}).sort('_id' => 1).limit(1).first
|
89
|
-
first_entry_id = first_entry['_id'].to_s
|
90
|
-
since.insert(:table => "#{since_table}_#{mongo_collection_name}", :place => first_entry_id)
|
91
|
-
return first_entry_id
|
92
|
-
end
|
93
|
-
|
94
|
-
public
|
95
|
-
def get_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
96
|
-
since = sqlitedb[SINCE_TABLE]
|
97
|
-
x = since.where(:table => "#{since_table}_#{mongo_collection_name}")
|
98
|
-
if x[:place].nil? || x[:place] == 0
|
99
|
-
first_entry_id = init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
100
|
-
@logger.debug("FIRST ENTRY ID for #{mongo_collection_name} is #{first_entry_id}")
|
101
|
-
return first_entry_id
|
102
|
-
else
|
103
|
-
@logger.debug("placeholder already exists, it is #{x[:place]}")
|
104
|
-
return x[:place][:place]
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
public
|
109
|
-
def update_placeholder(sqlitedb, since_table, mongo_collection_name, place)
|
110
|
-
#@logger.debug("updating placeholder for #{since_table}_#{mongo_collection_name} to #{place}")
|
111
|
-
since = sqlitedb[SINCE_TABLE]
|
112
|
-
since.where(:table => "#{since_table}_#{mongo_collection_name}").update(:place => place)
|
113
|
-
end
|
114
|
-
|
115
|
-
public
|
116
|
-
def get_all_tables(mongodb)
|
117
|
-
return @mongodb.collection_names
|
118
|
-
end
|
119
|
-
|
120
|
-
public
|
121
|
-
def get_collection_names(mongodb, collection)
|
122
|
-
collection_names = []
|
123
|
-
@mongodb.collection_names.each do |coll|
|
124
|
-
if /#{collection}/ =~ coll
|
125
|
-
collection_names.push(coll)
|
126
|
-
@logger.debug("Added #{coll} to the collection list as it matches our collection search")
|
127
|
-
end
|
128
|
-
end
|
129
|
-
return collection_names
|
130
|
-
end
|
131
|
-
|
132
|
-
public
|
133
|
-
def get_cursor_for_collection(mongodb, mongo_collection_name, last_id_object, batch_size)
|
134
|
-
collection = mongodb.collection(mongo_collection_name)
|
135
|
-
# Need to make this sort by date in object id then get the first of the series
|
136
|
-
# db.events_20150320.find().limit(1).sort({ts:1})
|
137
|
-
return collection.find({:_id => {:$gt => last_id_object}}).limit(batch_size)
|
138
|
-
end
|
139
|
-
|
140
|
-
public
|
141
|
-
def update_watched_collections(mongodb, collection, sqlitedb)
|
142
|
-
collections = get_collection_names(mongodb, collection)
|
143
|
-
collection_data = {}
|
144
|
-
collections.each do |my_collection|
|
145
|
-
init_placeholder_table(sqlitedb)
|
146
|
-
last_id = get_placeholder(sqlitedb, since_table, mongodb, my_collection)
|
147
|
-
if !collection_data[my_collection]
|
148
|
-
collection_data[my_collection] = { :name => my_collection, :last_id => last_id }
|
149
|
-
end
|
150
|
-
end
|
151
|
-
return collection_data
|
152
|
-
end
|
153
|
-
|
154
|
-
public
|
155
|
-
def register
|
156
|
-
require "jdbc/sqlite3"
|
157
|
-
require "sequel"
|
158
|
-
placeholder_db_path = File.join(@placeholder_db_dir, @placeholder_db_name)
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
@logger.debug("
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
end
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
end # def run
|
355
|
-
|
356
|
-
end # class LogStash::Inputs::Example
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/inputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "logstash/timestamp"
|
5
|
+
require "stud/interval"
|
6
|
+
require "socket" # for Socket.gethostname
|
7
|
+
require "json"
|
8
|
+
require "mongo"
|
9
|
+
|
10
|
+
include Mongo
|
11
|
+
|
12
|
+
class LogStash::Inputs::MongoDB < LogStash::Inputs::Base
|
13
|
+
config_name "mongodb"
|
14
|
+
|
15
|
+
# If undefined, Logstash will complain, even if codec is unused.
|
16
|
+
default :codec, "plain"
|
17
|
+
|
18
|
+
# Example URI: mongodb://mydb.host:27017/mydbname?ssl=true
|
19
|
+
config :uri, :validate => :string, :required => true
|
20
|
+
|
21
|
+
# The directory that will contain the sqlite database file.
|
22
|
+
config :placeholder_db_dir, :validate => :string, :required => true
|
23
|
+
|
24
|
+
# The name of the sqlite databse file
|
25
|
+
config :placeholder_db_name, :validate => :string, :default => "logstash_sqlite.db"
|
26
|
+
|
27
|
+
# Any table to exclude by name
|
28
|
+
config :exclude_tables, :validate => :array, :default => []
|
29
|
+
|
30
|
+
config :batch_size, :avlidate => :number, :default => 30
|
31
|
+
|
32
|
+
config :since_table, :validate => :string, :default => "logstash_since"
|
33
|
+
|
34
|
+
# The collection to use. Is turned into a regex so 'events' will match 'events_20150227'
|
35
|
+
# Example collection: events_20150227 or events_
|
36
|
+
config :collection, :validate => :string, :required => true
|
37
|
+
|
38
|
+
# This allows you to select the method you would like to use to parse your data
|
39
|
+
config :parse_method, :validate => :string, :default => 'flatten'
|
40
|
+
|
41
|
+
# If not flattening you can dig to flatten select fields
|
42
|
+
config :dig_fields, :validate => :array, :default => []
|
43
|
+
|
44
|
+
# This is the second level of hash flattening
|
45
|
+
config :dig_dig_fields, :validate => :array, :default => []
|
46
|
+
|
47
|
+
# If true, store the @timestamp field in mongodb as an ISODate type instead
|
48
|
+
# of an ISO8601 string. For more information about this, see
|
49
|
+
# http://www.mongodb.org/display/DOCS/Dates
|
50
|
+
config :isodate, :validate => :boolean, :default => false
|
51
|
+
|
52
|
+
# Number of seconds to wait after failure before retrying
|
53
|
+
config :retry_delay, :validate => :number, :default => 3, :required => false
|
54
|
+
|
55
|
+
# If true, an "_id" field will be added to the document before insertion.
|
56
|
+
# The "_id" field will use the timestamp of the event and overwrite an existing
|
57
|
+
# "_id" field in the event.
|
58
|
+
config :generateId, :validate => :boolean, :default => false
|
59
|
+
|
60
|
+
config :unpack_mongo_id, :validate => :boolean, :default => false
|
61
|
+
|
62
|
+
# The message string to use in the event.
|
63
|
+
config :message, :validate => :string, :default => "Default message..."
|
64
|
+
|
65
|
+
# Set how frequently messages should be sent.
|
66
|
+
# The default, `1`, means send a message every second.
|
67
|
+
config :interval, :validate => :number, :default => 1
|
68
|
+
|
69
|
+
SINCE_TABLE = :since_table
|
70
|
+
|
71
|
+
public
|
72
|
+
def init_placeholder_table(sqlitedb)
|
73
|
+
begin
|
74
|
+
sqlitedb.create_table "#{SINCE_TABLE}" do
|
75
|
+
String :table
|
76
|
+
Int :place
|
77
|
+
end
|
78
|
+
rescue
|
79
|
+
@logger.debug("since table already exists")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
public
|
84
|
+
def init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
85
|
+
@logger.debug("init placeholder for #{since_table}_#{mongo_collection_name}")
|
86
|
+
since = sqlitedb[SINCE_TABLE]
|
87
|
+
mongo_collection = mongodb.collection(mongo_collection_name)
|
88
|
+
first_entry = mongo_collection.find({}).sort('_id' => 1).limit(1).first
|
89
|
+
first_entry_id = first_entry['_id'].to_s
|
90
|
+
since.insert(:table => "#{since_table}_#{mongo_collection_name}", :place => first_entry_id)
|
91
|
+
return first_entry_id
|
92
|
+
end
|
93
|
+
|
94
|
+
public
|
95
|
+
def get_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
96
|
+
since = sqlitedb[SINCE_TABLE]
|
97
|
+
x = since.where(:table => "#{since_table}_#{mongo_collection_name}")
|
98
|
+
if x[:place].nil? || x[:place] == 0
|
99
|
+
first_entry_id = init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
100
|
+
@logger.debug("FIRST ENTRY ID for #{mongo_collection_name} is #{first_entry_id}")
|
101
|
+
return first_entry_id
|
102
|
+
else
|
103
|
+
@logger.debug("placeholder already exists, it is #{x[:place]}")
|
104
|
+
return x[:place][:place]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
public
|
109
|
+
def update_placeholder(sqlitedb, since_table, mongo_collection_name, place)
|
110
|
+
#@logger.debug("updating placeholder for #{since_table}_#{mongo_collection_name} to #{place}")
|
111
|
+
since = sqlitedb[SINCE_TABLE]
|
112
|
+
since.where(:table => "#{since_table}_#{mongo_collection_name}").update(:place => place)
|
113
|
+
end
|
114
|
+
|
115
|
+
public
|
116
|
+
def get_all_tables(mongodb)
|
117
|
+
return @mongodb.collection_names
|
118
|
+
end
|
119
|
+
|
120
|
+
public
|
121
|
+
def get_collection_names(mongodb, collection)
|
122
|
+
collection_names = []
|
123
|
+
@mongodb.collection_names.each do |coll|
|
124
|
+
if /#{collection}/ =~ coll
|
125
|
+
collection_names.push(coll)
|
126
|
+
@logger.debug("Added #{coll} to the collection list as it matches our collection search")
|
127
|
+
end
|
128
|
+
end
|
129
|
+
return collection_names
|
130
|
+
end
|
131
|
+
|
132
|
+
public
|
133
|
+
def get_cursor_for_collection(mongodb, mongo_collection_name, last_id_object, batch_size)
|
134
|
+
collection = mongodb.collection(mongo_collection_name)
|
135
|
+
# Need to make this sort by date in object id then get the first of the series
|
136
|
+
# db.events_20150320.find().limit(1).sort({ts:1})
|
137
|
+
return collection.find({:_id => {:$gt => last_id_object}}).limit(batch_size)
|
138
|
+
end
|
139
|
+
|
140
|
+
public
|
141
|
+
def update_watched_collections(mongodb, collection, sqlitedb)
|
142
|
+
collections = get_collection_names(mongodb, collection)
|
143
|
+
collection_data = {}
|
144
|
+
collections.each do |my_collection|
|
145
|
+
init_placeholder_table(sqlitedb)
|
146
|
+
last_id = get_placeholder(sqlitedb, since_table, mongodb, my_collection)
|
147
|
+
if !collection_data[my_collection]
|
148
|
+
collection_data[my_collection] = { :name => my_collection, :last_id => last_id }
|
149
|
+
end
|
150
|
+
end
|
151
|
+
return collection_data
|
152
|
+
end
|
153
|
+
|
154
|
+
public
|
155
|
+
def register
|
156
|
+
require "jdbc/sqlite3"
|
157
|
+
require "sequel"
|
158
|
+
placeholder_db_path = File.join(@placeholder_db_dir, @placeholder_db_name)
|
159
|
+
conn = Mongo::Client.new(@uri)
|
160
|
+
|
161
|
+
@host = Socket.gethostname
|
162
|
+
@logger.info("Registering MongoDB input")
|
163
|
+
|
164
|
+
@mongodb = conn.database
|
165
|
+
@sqlitedb = Sequel.connect("jdbc:sqlite:#{placeholder_db_path}")
|
166
|
+
|
167
|
+
# Should check to see if there are new matching tables at a predefined interval or on some trigger
|
168
|
+
@collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
|
169
|
+
end # def register
|
170
|
+
|
171
|
+
class BSON::OrderedHash
|
172
|
+
def to_h
|
173
|
+
inject({}) { |acc, element| k,v = element; acc[k] = (if v.class == BSON::OrderedHash then v.to_h else v end); acc }
|
174
|
+
end
|
175
|
+
|
176
|
+
def to_json
|
177
|
+
JSON.parse(self.to_h.to_json, :allow_nan => true)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def flatten(my_hash)
|
182
|
+
new_hash = {}
|
183
|
+
@logger.debug("Raw Hash: #{my_hash}")
|
184
|
+
if my_hash.respond_to? :each
|
185
|
+
my_hash.each do |k1,v1|
|
186
|
+
if v1.is_a?(Hash)
|
187
|
+
v1.each do |k2,v2|
|
188
|
+
if v2.is_a?(Hash)
|
189
|
+
# puts "Found a nested hash"
|
190
|
+
result = flatten(v2)
|
191
|
+
result.each do |k3,v3|
|
192
|
+
new_hash[k1.to_s+"_"+k2.to_s+"_"+k3.to_s] = v3
|
193
|
+
end
|
194
|
+
# puts "result: "+result.to_s+" k2: "+k2.to_s+" v2: "+v2.to_s
|
195
|
+
else
|
196
|
+
new_hash[k1.to_s+"_"+k2.to_s] = v2
|
197
|
+
end
|
198
|
+
end
|
199
|
+
else
|
200
|
+
# puts "Key: "+k1.to_s+" is not a hash"
|
201
|
+
new_hash[k1.to_s] = v1
|
202
|
+
end
|
203
|
+
end
|
204
|
+
else
|
205
|
+
@logger.debug("Flatten [ERROR]: hash did not respond to :each")
|
206
|
+
end
|
207
|
+
@logger.debug("Flattened Hash: #{new_hash}")
|
208
|
+
return new_hash
|
209
|
+
end
|
210
|
+
|
211
|
+
def run(queue)
|
212
|
+
sleep_min = 0.01
|
213
|
+
sleep_max = 5
|
214
|
+
sleeptime = sleep_min
|
215
|
+
|
216
|
+
begin
|
217
|
+
@logger.debug("Tailing MongoDB")
|
218
|
+
@logger.debug("Collection data is: #{@collection_data}")
|
219
|
+
loop do
|
220
|
+
@collection_data.each do |index, collection|
|
221
|
+
collection_name = collection[:name]
|
222
|
+
@logger.debug("collection_data is: #{@collection_data}")
|
223
|
+
last_id = @collection_data[index][:last_id]
|
224
|
+
#@logger.debug("last_id is #{last_id}", :index => index, :collection => collection_name)
|
225
|
+
# get batch of events starting at the last_place if it is set
|
226
|
+
last_id_object = BSON::ObjectId(last_id)
|
227
|
+
cursor = get_cursor_for_collection(@mongodb, collection_name, last_id_object, batch_size)
|
228
|
+
cursor.each do |doc|
|
229
|
+
logdate = DateTime.parse(doc['_id'].generation_time.to_s)
|
230
|
+
event = LogStash::Event.new("host" => @host)
|
231
|
+
decorate(event)
|
232
|
+
event["logdate"] = logdate.iso8601
|
233
|
+
log_entry = doc.to_h.to_s
|
234
|
+
log_entry['_id'] = log_entry['_id'].to_s
|
235
|
+
event["log_entry"] = log_entry
|
236
|
+
event["mongo_id"] = doc['_id'].to_s
|
237
|
+
@logger.debug("mongo_id: "+doc['_id'].to_s)
|
238
|
+
#@logger.debug("EVENT looks like: "+event.to_s)
|
239
|
+
#@logger.debug("Sent message: "+doc.to_h.to_s)
|
240
|
+
#@logger.debug("EVENT looks like: "+event.to_s)
|
241
|
+
# Extract the HOST_ID and PID from the MongoDB BSON::ObjectID
|
242
|
+
if @unpack_mongo_id
|
243
|
+
doc_hex_bytes = doc['_id'].to_s.each_char.each_slice(2).map {|b| b.join.to_i(16) }
|
244
|
+
doc_obj_bin = doc_hex_bytes.pack("C*").unpack("a4 a3 a2 a3")
|
245
|
+
host_id = doc_obj_bin[1].unpack("S")
|
246
|
+
process_id = doc_obj_bin[2].unpack("S")
|
247
|
+
event['host_id'] = host_id.first.to_i
|
248
|
+
event['process_id'] = process_id.first.to_i
|
249
|
+
end
|
250
|
+
|
251
|
+
if @parse_method == 'flatten'
|
252
|
+
# Flatten the JSON so that the data is usable in Kibana
|
253
|
+
flat_doc = flatten(doc)
|
254
|
+
# Check for different types of expected values and add them to the event
|
255
|
+
if flat_doc['info_message'] && (flat_doc['info_message'] =~ /collection stats: .+/)
|
256
|
+
# Some custom stuff I'm having to do to fix formatting in past logs...
|
257
|
+
sub_value = flat_doc['info_message'].sub("collection stats: ", "")
|
258
|
+
JSON.parse(sub_value).each do |k1,v1|
|
259
|
+
flat_doc["collection_stats_#{k1.to_s}"] = v1
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
flat_doc.each do |k,v|
|
264
|
+
# Check for an integer
|
265
|
+
@logger.debug("key: #{k.to_s} value: #{v.to_s}")
|
266
|
+
if v.is_a? Numeric
|
267
|
+
event[k.to_s] = v
|
268
|
+
elsif v.is_a? String
|
269
|
+
if v == "NaN"
|
270
|
+
event[k.to_s] = Float::NAN
|
271
|
+
elsif /\A[-+]?\d+[.][\d]+\z/ == v
|
272
|
+
event[k.to_s] = v.to_f
|
273
|
+
elsif (/\A[-+]?\d+\z/ === v) || (v.is_a? Integer)
|
274
|
+
event[k.to_s] = v.to_i
|
275
|
+
else
|
276
|
+
event[k.to_s] = v
|
277
|
+
end
|
278
|
+
else
|
279
|
+
event[k.to_s] = v.to_s unless k.to_s == "_id" || k.to_s == "tags"
|
280
|
+
if (k.to_s == "tags") && (v.is_a? Array)
|
281
|
+
event['tags'] = v
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
elsif @parse_method == 'dig'
|
286
|
+
# Dig into the JSON and flatten select elements
|
287
|
+
doc.each do |k, v|
|
288
|
+
if k != "_id"
|
289
|
+
if (@dig_fields.include? k) && (v.respond_to? :each)
|
290
|
+
v.each do |kk, vv|
|
291
|
+
if (@dig_dig_fields.include? kk) && (vv.respond_to? :each)
|
292
|
+
vv.each do |kkk, vvv|
|
293
|
+
if /\A[-+]?\d+\z/ === vvv
|
294
|
+
event["#{k}_#{kk}_#{kkk}"] = vvv.to_i
|
295
|
+
else
|
296
|
+
event["#{k}_#{kk}_#{kkk}"] = vvv.to_s
|
297
|
+
end
|
298
|
+
end
|
299
|
+
else
|
300
|
+
if /\A[-+]?\d+\z/ === vv
|
301
|
+
event["#{k}_#{kk}"] = vv.to_i
|
302
|
+
else
|
303
|
+
event["#{k}_#{kk}"] = vv.to_s
|
304
|
+
end
|
305
|
+
end
|
306
|
+
end
|
307
|
+
else
|
308
|
+
if /\A[-+]?\d+\z/ === v
|
309
|
+
event[k] = v.to_i
|
310
|
+
else
|
311
|
+
event[k] = v.to_s
|
312
|
+
end
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
316
|
+
elsif @parse_method == 'simple'
|
317
|
+
doc.each do |k, v|
|
318
|
+
if v.is_a? Numeric
|
319
|
+
event[k] = v.abs
|
320
|
+
elsif v.is_a? Array
|
321
|
+
event[k] = v
|
322
|
+
elsif v == "NaN"
|
323
|
+
event[k] = Float::NAN
|
324
|
+
else
|
325
|
+
event[k] = v.to_s
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
queue << event
|
331
|
+
@collection_data[index][:last_id] = doc['_id'].to_s
|
332
|
+
end
|
333
|
+
# Store the last-seen doc in the database
|
334
|
+
update_placeholder(@sqlitedb, since_table, collection_name, @collection_data[index][:last_id])
|
335
|
+
end
|
336
|
+
@logger.debug("Updating watch collections")
|
337
|
+
@collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
|
338
|
+
|
339
|
+
# nothing found in that iteration
|
340
|
+
# sleep a bit
|
341
|
+
@logger.debug("No new rows. Sleeping.", :time => sleeptime)
|
342
|
+
sleeptime = [sleeptime * 2, sleep_max].min
|
343
|
+
sleep(sleeptime)
|
344
|
+
#sleeptime = sleep_min
|
345
|
+
end
|
346
|
+
rescue LogStash::ShutdownSignal
|
347
|
+
if @interrupted
|
348
|
+
@logger.debug("Mongo Input shutting down")
|
349
|
+
end
|
350
|
+
end
|
351
|
+
end # def run
|
352
|
+
|
353
|
+
end # class LogStash::Inputs::Example
|
@@ -1,38 +1,38 @@
|
|
1
|
-
Gem::Specification.new do |s|
|
2
|
-
s.name = 'logstash-input-mongodb'
|
3
|
-
s.version = '0.3.
|
4
|
-
s.licenses = ['Apache License (2.0)']
|
5
|
-
s.summary = "This takes entries from mongodb as an input to logstash."
|
6
|
-
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
7
|
-
s.authors = ["Philip Hutchins"]
|
8
|
-
s.email = 'flipture@gmail.com'
|
9
|
-
s.homepage = "http://www.phutchins.com"
|
10
|
-
s.require_paths = ["lib"]
|
11
|
-
|
12
|
-
# Files
|
13
|
-
s.files = Dir[
|
14
|
-
'lib/**/*',
|
15
|
-
'spec/**/*',
|
16
|
-
'vendor/**/*',
|
17
|
-
'*.gemspec',
|
18
|
-
'*.md',
|
19
|
-
'CONTRIBUTORS',
|
20
|
-
'Gemfile',
|
21
|
-
'LICENSE',
|
22
|
-
'NOTICE.TXT'
|
23
|
-
]
|
24
|
-
# Tests
|
25
|
-
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
26
|
-
|
27
|
-
# Special flag to let us know this is actually a logstash plugin
|
28
|
-
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
|
29
|
-
|
30
|
-
# Gem dependencies
|
31
|
-
s.add_runtime_dependency 'logstash-core', ">= 2.0.0.beta2", "< 3.0.0"
|
32
|
-
s.add_runtime_dependency 'logstash-codec-plain'
|
33
|
-
s.add_runtime_dependency 'stud'
|
34
|
-
s.add_runtime_dependency 'jdbc-sqlite3', '3.8.10.1'
|
35
|
-
s.add_runtime_dependency 'sequel'
|
36
|
-
s.add_runtime_dependency 'mongo', '>= 2.0.0'
|
37
|
-
s.add_development_dependency 'logstash-devutils'
|
38
|
-
end
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-input-mongodb'
|
3
|
+
s.version = '0.3.2'
|
4
|
+
s.licenses = ['Apache License (2.0)']
|
5
|
+
s.summary = "This takes entries from mongodb as an input to logstash."
|
6
|
+
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
7
|
+
s.authors = ["Philip Hutchins"]
|
8
|
+
s.email = 'flipture@gmail.com'
|
9
|
+
s.homepage = "http://www.phutchins.com"
|
10
|
+
s.require_paths = ["lib"]
|
11
|
+
|
12
|
+
# Files
|
13
|
+
s.files = Dir[
|
14
|
+
'lib/**/*',
|
15
|
+
'spec/**/*',
|
16
|
+
'vendor/**/*',
|
17
|
+
'*.gemspec',
|
18
|
+
'*.md',
|
19
|
+
'CONTRIBUTORS',
|
20
|
+
'Gemfile',
|
21
|
+
'LICENSE',
|
22
|
+
'NOTICE.TXT'
|
23
|
+
]
|
24
|
+
# Tests
|
25
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
26
|
+
|
27
|
+
# Special flag to let us know this is actually a logstash plugin
|
28
|
+
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
|
29
|
+
|
30
|
+
# Gem dependencies
|
31
|
+
s.add_runtime_dependency 'logstash-core', ">= 2.0.0.beta2", "< 3.0.0"
|
32
|
+
s.add_runtime_dependency 'logstash-codec-plain'
|
33
|
+
s.add_runtime_dependency 'stud'
|
34
|
+
s.add_runtime_dependency 'jdbc-sqlite3', '3.8.10.1'
|
35
|
+
s.add_runtime_dependency 'sequel'
|
36
|
+
s.add_runtime_dependency 'mongo', '>= 2.0.0'
|
37
|
+
s.add_development_dependency 'logstash-devutils'
|
38
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-mongodb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Philip Hutchins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: logstash-core
|
@@ -151,7 +151,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
151
151
|
version: '0'
|
152
152
|
requirements: []
|
153
153
|
rubyforge_project:
|
154
|
-
rubygems_version: 2.4.
|
154
|
+
rubygems_version: 2.4.6
|
155
155
|
signing_key:
|
156
156
|
specification_version: 4
|
157
157
|
summary: This takes entries from mongodb as an input to logstash.
|