logstash-input-mongodb 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 610e56ecb9dbe13f8a94e0abd903b3889ec32d22
4
- data.tar.gz: f8232dc0ee30a7d3fb877be02d7e36c72a6cb96a
3
+ metadata.gz: 5239efc622206199789eedf0e1af75242fc6262d
4
+ data.tar.gz: d254084e1e1b9791b02e4f3207260da0f11cf046
5
5
  SHA512:
6
- metadata.gz: 61eb59603580d59b6b837a53d174bf79a1b292cbdf6797b513ab82b700d64571ff37ef8e6205be2b5f203b6a0cdc9f1c7eea4ffca4617f0746485431bbc153b9
7
- data.tar.gz: 7cec076e0eb7a7de7e62e1945cb8ccbcc626f015628fda9ba2ddcb407b17128f02b16782b9d2553128da8c84cc314c059b86be0795248326e8e5b4680689fd7e
6
+ metadata.gz: d717f6a55dbe745b3e4c66bb43465559c7c409dc9d0191636ab49f23c3c054d6b55f2283ba45aa11a5ac245c6040925185cada2e5637606c903f84f29d32ec83
7
+ data.tar.gz: 61562cf62e60a8f6055d4f31849dc06d5033bffdf01ed1858e71938d3d04d77a88a1278cd1f6efeebfc5b74e81c52ed0ed1619c85ffeac411cbf3536d1f2accc
@@ -1,2 +1,2 @@
1
- # logstash-input-example
2
- Example input plugin. This should help bootstrap your effort to write your own input plugin!
1
+ # logstash-input-example
2
+ Example input plugin. This should help bootstrap your effort to write your own input plugin!
data/Gemfile CHANGED
@@ -1,2 +1,2 @@
1
- source 'https://rubygems.org'
2
- gemspec
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE CHANGED
@@ -1,13 +1,13 @@
1
- Copyright (c) 2012-2015 Elasticsearch <http://www.elasticsearch.org>
2
-
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
6
-
7
- http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- Unless required by applicable law or agreed to in writing, software
10
- distributed under the License is distributed on an "AS IS" BASIS,
11
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- See the License for the specific language governing permissions and
13
- limitations under the License.
1
+ Copyright (c) 2012-2015 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md CHANGED
@@ -1,64 +1,71 @@
1
- # Logstash Plugin
2
-
3
- This is a plugin for [Logstash](https://github.com/elasticsearch/logstash).
4
-
5
- It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
6
-
7
- ## Documentation
8
-
9
- This is a logstash plugin for pulling data out of mongodb and processing with logstash. It will connect to the database specified in `uri`, use the `collection` attribute to find collections to pull documents from, start at the first collection it finds and pull the number of documents specified in `batch_size`, save it's progress in an sqlite database who's location is specified by `placeholder_db_dir` and `placeholder_db_name` and repeat. It will continue this until it no longer finds documents newer than ones that it has processed, sleep for a moment, then continue to loop over the collections.
10
-
11
- This was designed for parsing logs that were written into mongodb. This means that it may not re-parse db entries that were changed and already parsed.
12
-
13
-
14
- ### Installation
15
-
16
- + Logstash installed from ZIP | TGZ
17
- + bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
18
-
19
- + Logstash from GIT
20
- + git clone https://github.com/elastic/logstash.git
21
- + cd logstash
22
- + (ensure that the correct jruby is installed for the version of logstash you are installing)
23
- + rake test:install-core
24
- + bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
25
- + bin/plugin install --development
26
-
27
- ### Configuration Options
28
-
29
- uri: A MongoDB URI for your database or cluster (check the MongoDB documentation for further info on this) [No Default, Required]
30
- placeholder_db_dir: Path where the place holder database will be stored locally to disk [No Default, Required]
31
- This gets created by the plugin so the directory needs to be writeable by the user that logstash is running as
32
- placeholder_db_name: Name of the database file that will be created [Default: logstash_sqlite.db]
33
- collection: A regex that will be used to find desired collecitons. [No Default, Required]
34
- batch_size: Size of the batch of mongo documents to pull at a time [Default: 30]
35
-
36
-
37
- ### Configuration
38
-
39
- Example
40
- ```
41
- input {
42
- mongodb {
43
- uri => 'mongodb://10.0.0.30/my-logs?ssl=true'
44
- placeholder_db_dir => '/opt/logstash-mongodb/'
45
- placeholder_db_name => 'logstash_sqlite.db'
46
- collection => 'events_'
47
- batch_size => 5000
48
- }
49
- }
50
-
51
- filter {
52
- date {
53
- match => [ "logdate", "ISO8601" ]
54
- }
55
- }
56
-
57
- output {
58
- redis {
59
- host => "localhost"
60
- data_type => "list"
61
- key => "logstash-mylogs"
62
- }
63
- }
64
- ```
1
+ # Logstash Plugin
2
+
3
+ This is a plugin for [Logstash](https://github.com/elasticsearch/logstash).
4
+
5
+ It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
6
+
7
+ ## Documentation
8
+
9
+ This is a logstash plugin for pulling data out of mongodb and processing with logstash. It will connect to the database specified in `uri`, use the `collection` attribute to find collections to pull documents from, start at the first collection it finds and pull the number of documents specified in `batch_size`, save it's progress in an sqlite database who's location is specified by `placeholder_db_dir` and `placeholder_db_name` and repeat. It will continue this until it no longer finds documents newer than ones that it has processed, sleep for a moment, then continue to loop over the collections.
10
+
11
+ This was designed for parsing logs that were written into mongodb. This means that it may not re-parse db entries that were changed and already parsed.
12
+
13
+
14
+ ### Installation
15
+
16
+ + Logstash installed from ZIP | TGZ
17
+ + bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
18
+
19
+ + Logstash from GIT
20
+ + git clone https://github.com/elastic/logstash.git
21
+ + cd logstash
22
+ + (ensure that the correct jruby is installed for the version of logstash you are installing)
23
+ + rake test:install-core
24
+ + bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
25
+ + bin/plugin install --development
26
+
27
+ ### Configuration Options
28
+
29
+ ```
30
+ Name Type Description
31
+ uri [String] A MongoDB URI for your database or cluster (check the MongoDB documentation for further info on this) [No Default, Required]
32
+ placeholder_db_dir [String] Path where the place holder database will be stored locally to disk [No Default, Required]
33
+ This gets created by the plugin so the directory needs to be writeable by the user that logstash is running as
34
+ placeholder_db_name [String] Name of the database file that will be created [Default: logstash_sqlite.db]
35
+ collection [String] A regex that will be used to find desired collecitons. [No Default, Required]
36
+ generateId [Boolean] If true, this will add a field '_id' that contains the MongoDB Document id
37
+ batch_size [Int] Size of the batch of mongo documents to pull at a time [Default: 30]
38
+ parse_method [String] Built in parsing of the mongodb document object [Default: 'flatten']
39
+ dig_fields [Array] An array of fields that should employ the dig method
40
+ dig_dig_fields [Array] This provides a second level of hash flattening after the initial dig has been done
41
+ ```
42
+
43
+
44
+ ### Configuration
45
+
46
+ Example
47
+ ```
48
+ input {
49
+ mongodb {
50
+ uri => 'mongodb://10.0.0.30/my-logs?ssl=true'
51
+ placeholder_db_dir => '/opt/logstash-mongodb/'
52
+ placeholder_db_name => 'logstash_sqlite.db'
53
+ collection => 'events_'
54
+ batch_size => 5000
55
+ }
56
+ }
57
+
58
+ filter {
59
+ date {
60
+ match => [ "logdate", "ISO8601" ]
61
+ }
62
+ }
63
+
64
+ output {
65
+ redis {
66
+ host => "localhost"
67
+ data_type => "list"
68
+ key => "logstash-mylogs"
69
+ }
70
+ }
71
+ ```
@@ -1,356 +1,353 @@
1
- # encoding: utf-8
2
- require "logstash/inputs/base"
3
- require "logstash/namespace"
4
- require "logstash/timestamp"
5
- require "stud/interval"
6
- require "socket" # for Socket.gethostname
7
- require "json"
8
- require "mongo"
9
-
10
- include Mongo
11
-
12
- class LogStash::Inputs::MongoDB < LogStash::Inputs::Base
13
- config_name "mongodb"
14
-
15
- # If undefined, Logstash will complain, even if codec is unused.
16
- default :codec, "plain"
17
-
18
- # Example URI: mongodb://mydb.host:27017/mydbname?ssl=true
19
- config :uri, :validate => :string, :required => true
20
-
21
- # The directory that will contain the sqlite database file.
22
- config :placeholder_db_dir, :validate => :string, :required => true
23
-
24
- # The name of the sqlite databse file
25
- config :placeholder_db_name, :validate => :string, :default => "logstash_sqlite.db"
26
-
27
- # Any table to exclude by name
28
- config :exclude_tables, :validate => :array, :default => []
29
-
30
- config :batch_size, :avlidate => :number, :default => 30
31
-
32
- config :since_table, :validate => :string, :default => "logstash_since"
33
-
34
- # The collection to use. Is turned into a regex so 'events' will match 'events_20150227'
35
- # Example collection: events_20150227 or events_
36
- config :collection, :validate => :string, :required => true
37
-
38
- # This allows you to select the method you would like to use to parse your data
39
- config :parse_method, :validate => :string, :default => 'flatten'
40
-
41
- # If not flattening you can dig to flatten select fields
42
- config :dig_fields, :validate => :array, :default => []
43
-
44
- # This is the second level of hash flattening
45
- config :dig_dig_fields, :validate => :array, :default => []
46
-
47
- # If true, store the @timestamp field in mongodb as an ISODate type instead
48
- # of an ISO8601 string. For more information about this, see
49
- # http://www.mongodb.org/display/DOCS/Dates
50
- config :isodate, :validate => :boolean, :default => false
51
-
52
- # Number of seconds to wait after failure before retrying
53
- config :retry_delay, :validate => :number, :default => 3, :required => false
54
-
55
- # If true, an "_id" field will be added to the document before insertion.
56
- # The "_id" field will use the timestamp of the event and overwrite an existing
57
- # "_id" field in the event.
58
- config :generateId, :validate => :boolean, :default => false
59
-
60
- config :unpack_mongo_id, :validate => :boolean, :default => false
61
-
62
- # The message string to use in the event.
63
- config :message, :validate => :string, :default => "Default message..."
64
-
65
- # Set how frequently messages should be sent.
66
- # The default, `1`, means send a message every second.
67
- config :interval, :validate => :number, :default => 1
68
-
69
- SINCE_TABLE = :since_table
70
-
71
- public
72
- def init_placeholder_table(sqlitedb)
73
- begin
74
- sqlitedb.create_table "#{SINCE_TABLE}" do
75
- String :table
76
- Int :place
77
- end
78
- rescue
79
- @logger.debug("since table already exists")
80
- end
81
- end
82
-
83
- public
84
- def init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
85
- @logger.debug("init placeholder for #{since_table}_#{mongo_collection_name}")
86
- since = sqlitedb[SINCE_TABLE]
87
- mongo_collection = mongodb.collection(mongo_collection_name)
88
- first_entry = mongo_collection.find({}).sort('_id' => 1).limit(1).first
89
- first_entry_id = first_entry['_id'].to_s
90
- since.insert(:table => "#{since_table}_#{mongo_collection_name}", :place => first_entry_id)
91
- return first_entry_id
92
- end
93
-
94
- public
95
- def get_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
96
- since = sqlitedb[SINCE_TABLE]
97
- x = since.where(:table => "#{since_table}_#{mongo_collection_name}")
98
- if x[:place].nil? || x[:place] == 0
99
- first_entry_id = init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
100
- @logger.debug("FIRST ENTRY ID for #{mongo_collection_name} is #{first_entry_id}")
101
- return first_entry_id
102
- else
103
- @logger.debug("placeholder already exists, it is #{x[:place]}")
104
- return x[:place][:place]
105
- end
106
- end
107
-
108
- public
109
- def update_placeholder(sqlitedb, since_table, mongo_collection_name, place)
110
- #@logger.debug("updating placeholder for #{since_table}_#{mongo_collection_name} to #{place}")
111
- since = sqlitedb[SINCE_TABLE]
112
- since.where(:table => "#{since_table}_#{mongo_collection_name}").update(:place => place)
113
- end
114
-
115
- public
116
- def get_all_tables(mongodb)
117
- return @mongodb.collection_names
118
- end
119
-
120
- public
121
- def get_collection_names(mongodb, collection)
122
- collection_names = []
123
- @mongodb.collection_names.each do |coll|
124
- if /#{collection}/ =~ coll
125
- collection_names.push(coll)
126
- @logger.debug("Added #{coll} to the collection list as it matches our collection search")
127
- end
128
- end
129
- return collection_names
130
- end
131
-
132
- public
133
- def get_cursor_for_collection(mongodb, mongo_collection_name, last_id_object, batch_size)
134
- collection = mongodb.collection(mongo_collection_name)
135
- # Need to make this sort by date in object id then get the first of the series
136
- # db.events_20150320.find().limit(1).sort({ts:1})
137
- return collection.find({:_id => {:$gt => last_id_object}}).limit(batch_size)
138
- end
139
-
140
- public
141
- def update_watched_collections(mongodb, collection, sqlitedb)
142
- collections = get_collection_names(mongodb, collection)
143
- collection_data = {}
144
- collections.each do |my_collection|
145
- init_placeholder_table(sqlitedb)
146
- last_id = get_placeholder(sqlitedb, since_table, mongodb, my_collection)
147
- if !collection_data[my_collection]
148
- collection_data[my_collection] = { :name => my_collection, :last_id => last_id }
149
- end
150
- end
151
- return collection_data
152
- end
153
-
154
- public
155
- def register
156
- require "jdbc/sqlite3"
157
- require "sequel"
158
- placeholder_db_path = File.join(@placeholder_db_dir, @placeholder_db_name)
159
- mongo_uri = Mongo::URI.new(@uri)
160
- hosts_array = mongo_uri.servers
161
- db_name = mongo_uri.database
162
- ssl_enabled = mongo_uri.options[:ssl]
163
- conn = Mongo::Client.new(hosts_array, ssl: ssl_enabled, database: db_name)
164
-
165
- if @db_auths
166
- @db_auths.each do |auth|
167
- if !auth['db_name'].nil?
168
- conn.add_auth(auth['db_name'], auth['username'], auth['password'], nil)
169
- end
170
- end
171
- conn.apply_saved_authentication()
172
- end
173
-
174
- @host = Socket.gethostname
175
- @logger.info("Registering MongoDB input")
176
-
177
- @mongodb = conn.database
178
- @sqlitedb = Sequel.connect("jdbc:sqlite:#{placeholder_db_path}")
179
-
180
- # Should check to see if there are new matching tables at a predefined interval or on some trigger
181
- @collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
182
- end # def register
183
-
184
- class BSON::OrderedHash
185
- def to_h
186
- inject({}) { |acc, element| k,v = element; acc[k] = (if v.class == BSON::OrderedHash then v.to_h else v end); acc }
187
- end
188
-
189
- def to_json
190
- JSON.parse(self.to_h.to_json, :allow_nan => true)
191
- end
192
- end
193
-
194
- def flatten(my_hash)
195
- new_hash = {}
196
- @logger.debug("Raw Hash: #{my_hash}")
197
- if my_hash.respond_to? :each
198
- my_hash.each do |k1,v1|
199
- if v1.is_a?(Hash)
200
- v1.each do |k2,v2|
201
- if v2.is_a?(Hash)
202
- # puts "Found a nested hash"
203
- result = flatten(v2)
204
- result.each do |k3,v3|
205
- new_hash[k1.to_s+"_"+k2.to_s+"_"+k3.to_s] = v3
206
- end
207
- # puts "result: "+result.to_s+" k2: "+k2.to_s+" v2: "+v2.to_s
208
- else
209
- new_hash[k1.to_s+"_"+k2.to_s] = v2
210
- end
211
- end
212
- else
213
- # puts "Key: "+k1.to_s+" is not a hash"
214
- new_hash[k1.to_s] = v1
215
- end
216
- end
217
- else
218
- @logger.debug("Flatten [ERROR]: hash did not respond to :each")
219
- end
220
- @logger.debug("Flattened Hash: #{new_hash}")
221
- return new_hash
222
- end
223
-
224
- def run(queue)
225
- sleep_min = 0.01
226
- sleep_max = 5
227
- sleeptime = sleep_min
228
-
229
- begin
230
- @logger.debug("Tailing MongoDB")
231
- @logger.debug("Collection data is: #{@collection_data}")
232
- loop do
233
- @collection_data.each do |index, collection|
234
- collection_name = collection[:name]
235
- @logger.debug("collection_data is: #{@collection_data}")
236
- last_id = @collection_data[index][:last_id]
237
- #@logger.debug("last_id is #{last_id}", :index => index, :collection => collection_name)
238
- # get batch of events starting at the last_place if it is set
239
- last_id_object = BSON::ObjectId(last_id)
240
- cursor = get_cursor_for_collection(@mongodb, collection_name, last_id_object, batch_size)
241
- cursor.each do |doc|
242
- logdate = DateTime.parse(doc['_id'].generation_time.to_s)
243
- event = LogStash::Event.new("host" => @host)
244
- decorate(event)
245
- event["logdate"] = logdate.iso8601
246
- log_entry = doc.to_h.to_s
247
- log_entry['_id'] = log_entry['_id'].to_s
248
- event["log_entry"] = log_entry
249
- event["mongo_id"] = doc['_id'].to_s
250
- @logger.debug("mongo_id: "+doc['_id'].to_s)
251
- #@logger.debug("EVENT looks like: "+event.to_s)
252
- #@logger.debug("Sent message: "+doc.to_h.to_s)
253
- #@logger.debug("EVENT looks like: "+event.to_s)
254
- # Extract the HOST_ID and PID from the MongoDB BSON::ObjectID
255
- if @unpack_mongo_id
256
- doc_hex_bytes = doc['_id'].to_s.each_char.each_slice(2).map {|b| b.join.to_i(16) }
257
- doc_obj_bin = doc_hex_bytes.pack("C*").unpack("a4 a3 a2 a3")
258
- host_id = doc_obj_bin[1].unpack("S")
259
- process_id = doc_obj_bin[2].unpack("S")
260
- event['host_id'] = host_id.first.to_i
261
- event['process_id'] = process_id.first.to_i
262
- end
263
-
264
- if @parse_method == 'flatten'
265
- # Flatten the JSON so that the data is usable in Kibana
266
- flat_doc = flatten(doc)
267
- # Check for different types of expected values and add them to the event
268
- if flat_doc['info_message'] && (flat_doc['info_message'] =~ /collection stats: .+/)
269
- # Some custom stuff I'm having to do to fix formatting in past logs...
270
- sub_value = flat_doc['info_message'].sub("collection stats: ", "")
271
- JSON.parse(sub_value).each do |k1,v1|
272
- flat_doc["collection_stats_#{k1.to_s}"] = v1
273
- end
274
- end
275
-
276
- flat_doc.each do |k,v|
277
- # Check for an integer
278
- @logger.debug("key: #{k.to_s} value: #{v.to_s}")
279
- if v.is_a? Numeric
280
- event[k.to_s] = v
281
- elsif v.is_a? String
282
- if v == "NaN"
283
- event[k.to_s] = Float::NAN
284
- elsif /\A[-+]?\d+[.][\d]+\z/ == v
285
- event[k.to_s] = v.to_f
286
- elsif (/\A[-+]?\d+\z/ === v) || (v.is_a? Integer)
287
- event[k.to_s] = v.to_i
288
- else
289
- event[k.to_s] = v
290
- end
291
- else
292
- event[k.to_s] = v.to_s unless k.to_s == "_id" || k.to_s == "tags"
293
- if (k.to_s == "tags") && (v.is_a? Array)
294
- event['tags'] = v
295
- end
296
- end
297
- end
298
- elsif @parse_method == 'dig'
299
- # Dig into the JSON and flatten select elements
300
- doc.each do |k, v|
301
- if k != "_id"
302
- if (@dig_fields.include? k) && (v.respond_to? :each)
303
- v.each do |kk, vv|
304
- if (@dig_dig_fields.include? kk) && (vv.respond_to? :each)
305
- vv.each do |kkk, vvv|
306
- if /\A[-+]?\d+\z/ === vvv
307
- event["#{k}_#{kk}_#{kkk}"] = vvv.to_i
308
- else
309
- event["#{k}_#{kk}_#{kkk}"] = vvv.to_s
310
- end
311
- end
312
- else
313
- if /\A[-+]?\d+\z/ === vv
314
- event["#{k}_#{kk}"] = vv.to_i
315
- else
316
- event["#{k}_#{kk}"] = vv.to_s
317
- end
318
- end
319
- end
320
- else
321
- if /\A[-+]?\d+\z/ === v
322
- event[k] = v.to_i
323
- else
324
- event[k] = v.to_s
325
- end
326
- end
327
- end
328
- end
329
- else
330
- # Should probably do some sanitization here and insert the doc as raw as possible for parsing in logstash
331
- end
332
-
333
- queue << event
334
- @collection_data[index][:last_id] = doc['_id'].to_s
335
- end
336
- # Store the last-seen doc in the database
337
- update_placeholder(@sqlitedb, since_table, collection_name, @collection_data[index][:last_id])
338
- end
339
- @logger.debug("Updating watch collections")
340
- @collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
341
-
342
- # nothing found in that iteration
343
- # sleep a bit
344
- @logger.debug("No new rows. Sleeping.", :time => sleeptime)
345
- sleeptime = [sleeptime * 2, sleep_max].min
346
- sleep(sleeptime)
347
- #sleeptime = sleep_min
348
- end
349
- rescue LogStash::ShutdownSignal
350
- if @interrupted
351
- @logger.debug("Mongo Input shutting down")
352
- end
353
- end
354
- end # def run
355
-
356
- end # class LogStash::Inputs::Example
1
+ # encoding: utf-8
2
+ require "logstash/inputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/timestamp"
5
+ require "stud/interval"
6
+ require "socket" # for Socket.gethostname
7
+ require "json"
8
+ require "mongo"
9
+
10
+ include Mongo
11
+
12
+ class LogStash::Inputs::MongoDB < LogStash::Inputs::Base
13
+ config_name "mongodb"
14
+
15
+ # If undefined, Logstash will complain, even if codec is unused.
16
+ default :codec, "plain"
17
+
18
+ # Example URI: mongodb://mydb.host:27017/mydbname?ssl=true
19
+ config :uri, :validate => :string, :required => true
20
+
21
+ # The directory that will contain the sqlite database file.
22
+ config :placeholder_db_dir, :validate => :string, :required => true
23
+
24
+ # The name of the sqlite databse file
25
+ config :placeholder_db_name, :validate => :string, :default => "logstash_sqlite.db"
26
+
27
+ # Any table to exclude by name
28
+ config :exclude_tables, :validate => :array, :default => []
29
+
30
+ config :batch_size, :avlidate => :number, :default => 30
31
+
32
+ config :since_table, :validate => :string, :default => "logstash_since"
33
+
34
+ # The collection to use. Is turned into a regex so 'events' will match 'events_20150227'
35
+ # Example collection: events_20150227 or events_
36
+ config :collection, :validate => :string, :required => true
37
+
38
+ # This allows you to select the method you would like to use to parse your data
39
+ config :parse_method, :validate => :string, :default => 'flatten'
40
+
41
+ # If not flattening you can dig to flatten select fields
42
+ config :dig_fields, :validate => :array, :default => []
43
+
44
+ # This is the second level of hash flattening
45
+ config :dig_dig_fields, :validate => :array, :default => []
46
+
47
+ # If true, store the @timestamp field in mongodb as an ISODate type instead
48
+ # of an ISO8601 string. For more information about this, see
49
+ # http://www.mongodb.org/display/DOCS/Dates
50
+ config :isodate, :validate => :boolean, :default => false
51
+
52
+ # Number of seconds to wait after failure before retrying
53
+ config :retry_delay, :validate => :number, :default => 3, :required => false
54
+
55
+ # If true, an "_id" field will be added to the document before insertion.
56
+ # The "_id" field will use the timestamp of the event and overwrite an existing
57
+ # "_id" field in the event.
58
+ config :generateId, :validate => :boolean, :default => false
59
+
60
+ config :unpack_mongo_id, :validate => :boolean, :default => false
61
+
62
+ # The message string to use in the event.
63
+ config :message, :validate => :string, :default => "Default message..."
64
+
65
+ # Set how frequently messages should be sent.
66
+ # The default, `1`, means send a message every second.
67
+ config :interval, :validate => :number, :default => 1
68
+
69
+ SINCE_TABLE = :since_table
70
+
71
+ public
72
+ def init_placeholder_table(sqlitedb)
73
+ begin
74
+ sqlitedb.create_table "#{SINCE_TABLE}" do
75
+ String :table
76
+ Int :place
77
+ end
78
+ rescue
79
+ @logger.debug("since table already exists")
80
+ end
81
+ end
82
+
83
+ public
84
+ def init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
85
+ @logger.debug("init placeholder for #{since_table}_#{mongo_collection_name}")
86
+ since = sqlitedb[SINCE_TABLE]
87
+ mongo_collection = mongodb.collection(mongo_collection_name)
88
+ first_entry = mongo_collection.find({}).sort('_id' => 1).limit(1).first
89
+ first_entry_id = first_entry['_id'].to_s
90
+ since.insert(:table => "#{since_table}_#{mongo_collection_name}", :place => first_entry_id)
91
+ return first_entry_id
92
+ end
93
+
94
+ public
95
+ def get_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
96
+ since = sqlitedb[SINCE_TABLE]
97
+ x = since.where(:table => "#{since_table}_#{mongo_collection_name}")
98
+ if x[:place].nil? || x[:place] == 0
99
+ first_entry_id = init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
100
+ @logger.debug("FIRST ENTRY ID for #{mongo_collection_name} is #{first_entry_id}")
101
+ return first_entry_id
102
+ else
103
+ @logger.debug("placeholder already exists, it is #{x[:place]}")
104
+ return x[:place][:place]
105
+ end
106
+ end
107
+
108
+ public
109
+ def update_placeholder(sqlitedb, since_table, mongo_collection_name, place)
110
+ #@logger.debug("updating placeholder for #{since_table}_#{mongo_collection_name} to #{place}")
111
+ since = sqlitedb[SINCE_TABLE]
112
+ since.where(:table => "#{since_table}_#{mongo_collection_name}").update(:place => place)
113
+ end
114
+
115
+ public
116
+ def get_all_tables(mongodb)
117
+ return @mongodb.collection_names
118
+ end
119
+
120
+ public
121
+ def get_collection_names(mongodb, collection)
122
+ collection_names = []
123
+ @mongodb.collection_names.each do |coll|
124
+ if /#{collection}/ =~ coll
125
+ collection_names.push(coll)
126
+ @logger.debug("Added #{coll} to the collection list as it matches our collection search")
127
+ end
128
+ end
129
+ return collection_names
130
+ end
131
+
132
+ public
133
+ def get_cursor_for_collection(mongodb, mongo_collection_name, last_id_object, batch_size)
134
+ collection = mongodb.collection(mongo_collection_name)
135
+ # Need to make this sort by date in object id then get the first of the series
136
+ # db.events_20150320.find().limit(1).sort({ts:1})
137
+ return collection.find({:_id => {:$gt => last_id_object}}).limit(batch_size)
138
+ end
139
+
140
+ public
141
+ def update_watched_collections(mongodb, collection, sqlitedb)
142
+ collections = get_collection_names(mongodb, collection)
143
+ collection_data = {}
144
+ collections.each do |my_collection|
145
+ init_placeholder_table(sqlitedb)
146
+ last_id = get_placeholder(sqlitedb, since_table, mongodb, my_collection)
147
+ if !collection_data[my_collection]
148
+ collection_data[my_collection] = { :name => my_collection, :last_id => last_id }
149
+ end
150
+ end
151
+ return collection_data
152
+ end
153
+
154
+ public
155
+ def register
156
+ require "jdbc/sqlite3"
157
+ require "sequel"
158
+ placeholder_db_path = File.join(@placeholder_db_dir, @placeholder_db_name)
159
+ conn = Mongo::Client.new(@uri)
160
+
161
+ @host = Socket.gethostname
162
+ @logger.info("Registering MongoDB input")
163
+
164
+ @mongodb = conn.database
165
+ @sqlitedb = Sequel.connect("jdbc:sqlite:#{placeholder_db_path}")
166
+
167
+ # Should check to see if there are new matching tables at a predefined interval or on some trigger
168
+ @collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
169
+ end # def register
170
+
171
+ class BSON::OrderedHash
172
+ def to_h
173
+ inject({}) { |acc, element| k,v = element; acc[k] = (if v.class == BSON::OrderedHash then v.to_h else v end); acc }
174
+ end
175
+
176
+ def to_json
177
+ JSON.parse(self.to_h.to_json, :allow_nan => true)
178
+ end
179
+ end
180
+
181
+ def flatten(my_hash)
182
+ new_hash = {}
183
+ @logger.debug("Raw Hash: #{my_hash}")
184
+ if my_hash.respond_to? :each
185
+ my_hash.each do |k1,v1|
186
+ if v1.is_a?(Hash)
187
+ v1.each do |k2,v2|
188
+ if v2.is_a?(Hash)
189
+ # puts "Found a nested hash"
190
+ result = flatten(v2)
191
+ result.each do |k3,v3|
192
+ new_hash[k1.to_s+"_"+k2.to_s+"_"+k3.to_s] = v3
193
+ end
194
+ # puts "result: "+result.to_s+" k2: "+k2.to_s+" v2: "+v2.to_s
195
+ else
196
+ new_hash[k1.to_s+"_"+k2.to_s] = v2
197
+ end
198
+ end
199
+ else
200
+ # puts "Key: "+k1.to_s+" is not a hash"
201
+ new_hash[k1.to_s] = v1
202
+ end
203
+ end
204
+ else
205
+ @logger.debug("Flatten [ERROR]: hash did not respond to :each")
206
+ end
207
+ @logger.debug("Flattened Hash: #{new_hash}")
208
+ return new_hash
209
+ end
210
+
211
+ def run(queue)
212
+ sleep_min = 0.01
213
+ sleep_max = 5
214
+ sleeptime = sleep_min
215
+
216
+ begin
217
+ @logger.debug("Tailing MongoDB")
218
+ @logger.debug("Collection data is: #{@collection_data}")
219
+ loop do
220
+ @collection_data.each do |index, collection|
221
+ collection_name = collection[:name]
222
+ @logger.debug("collection_data is: #{@collection_data}")
223
+ last_id = @collection_data[index][:last_id]
224
+ #@logger.debug("last_id is #{last_id}", :index => index, :collection => collection_name)
225
+ # get batch of events starting at the last_place if it is set
226
+ last_id_object = BSON::ObjectId(last_id)
227
+ cursor = get_cursor_for_collection(@mongodb, collection_name, last_id_object, batch_size)
228
+ cursor.each do |doc|
229
+ logdate = DateTime.parse(doc['_id'].generation_time.to_s)
230
+ event = LogStash::Event.new("host" => @host)
231
+ decorate(event)
232
+ event["logdate"] = logdate.iso8601
233
+ log_entry = doc.to_h.to_s
234
+ log_entry['_id'] = log_entry['_id'].to_s
235
+ event["log_entry"] = log_entry
236
+ event["mongo_id"] = doc['_id'].to_s
237
+ @logger.debug("mongo_id: "+doc['_id'].to_s)
238
+ #@logger.debug("EVENT looks like: "+event.to_s)
239
+ #@logger.debug("Sent message: "+doc.to_h.to_s)
240
+ #@logger.debug("EVENT looks like: "+event.to_s)
241
+ # Extract the HOST_ID and PID from the MongoDB BSON::ObjectID
242
+ if @unpack_mongo_id
243
+ doc_hex_bytes = doc['_id'].to_s.each_char.each_slice(2).map {|b| b.join.to_i(16) }
244
+ doc_obj_bin = doc_hex_bytes.pack("C*").unpack("a4 a3 a2 a3")
245
+ host_id = doc_obj_bin[1].unpack("S")
246
+ process_id = doc_obj_bin[2].unpack("S")
247
+ event['host_id'] = host_id.first.to_i
248
+ event['process_id'] = process_id.first.to_i
249
+ end
250
+
251
+ if @parse_method == 'flatten'
252
+ # Flatten the JSON so that the data is usable in Kibana
253
+ flat_doc = flatten(doc)
254
+ # Check for different types of expected values and add them to the event
255
+ if flat_doc['info_message'] && (flat_doc['info_message'] =~ /collection stats: .+/)
256
+ # Some custom stuff I'm having to do to fix formatting in past logs...
257
+ sub_value = flat_doc['info_message'].sub("collection stats: ", "")
258
+ JSON.parse(sub_value).each do |k1,v1|
259
+ flat_doc["collection_stats_#{k1.to_s}"] = v1
260
+ end
261
+ end
262
+
263
+ flat_doc.each do |k,v|
264
+ # Check for an integer
265
+ @logger.debug("key: #{k.to_s} value: #{v.to_s}")
266
+ if v.is_a? Numeric
267
+ event[k.to_s] = v
268
+ elsif v.is_a? String
269
+ if v == "NaN"
270
+ event[k.to_s] = Float::NAN
271
+ elsif /\A[-+]?\d+[.][\d]+\z/ == v
272
+ event[k.to_s] = v.to_f
273
+ elsif (/\A[-+]?\d+\z/ === v) || (v.is_a? Integer)
274
+ event[k.to_s] = v.to_i
275
+ else
276
+ event[k.to_s] = v
277
+ end
278
+ else
279
+ event[k.to_s] = v.to_s unless k.to_s == "_id" || k.to_s == "tags"
280
+ if (k.to_s == "tags") && (v.is_a? Array)
281
+ event['tags'] = v
282
+ end
283
+ end
284
+ end
285
+ elsif @parse_method == 'dig'
286
+ # Dig into the JSON and flatten select elements
287
+ doc.each do |k, v|
288
+ if k != "_id"
289
+ if (@dig_fields.include? k) && (v.respond_to? :each)
290
+ v.each do |kk, vv|
291
+ if (@dig_dig_fields.include? kk) && (vv.respond_to? :each)
292
+ vv.each do |kkk, vvv|
293
+ if /\A[-+]?\d+\z/ === vvv
294
+ event["#{k}_#{kk}_#{kkk}"] = vvv.to_i
295
+ else
296
+ event["#{k}_#{kk}_#{kkk}"] = vvv.to_s
297
+ end
298
+ end
299
+ else
300
+ if /\A[-+]?\d+\z/ === vv
301
+ event["#{k}_#{kk}"] = vv.to_i
302
+ else
303
+ event["#{k}_#{kk}"] = vv.to_s
304
+ end
305
+ end
306
+ end
307
+ else
308
+ if /\A[-+]?\d+\z/ === v
309
+ event[k] = v.to_i
310
+ else
311
+ event[k] = v.to_s
312
+ end
313
+ end
314
+ end
315
+ end
316
+ elsif @parse_method == 'simple'
317
+ doc.each do |k, v|
318
+ if v.is_a? Numeric
319
+ event[k] = v.abs
320
+ elsif v.is_a? Array
321
+ event[k] = v
322
+ elsif v == "NaN"
323
+ event[k] = Float::NAN
324
+ else
325
+ event[k] = v.to_s
326
+ end
327
+ end
328
+ end
329
+
330
+ queue << event
331
+ @collection_data[index][:last_id] = doc['_id'].to_s
332
+ end
333
+ # Store the last-seen doc in the database
334
+ update_placeholder(@sqlitedb, since_table, collection_name, @collection_data[index][:last_id])
335
+ end
336
+ @logger.debug("Updating watch collections")
337
+ @collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
338
+
339
+ # nothing found in that iteration
340
+ # sleep a bit
341
+ @logger.debug("No new rows. Sleeping.", :time => sleeptime)
342
+ sleeptime = [sleeptime * 2, sleep_max].min
343
+ sleep(sleeptime)
344
+ #sleeptime = sleep_min
345
+ end
346
+ rescue LogStash::ShutdownSignal
347
+ if @interrupted
348
+ @logger.debug("Mongo Input shutting down")
349
+ end
350
+ end
351
+ end # def run
352
+
353
+ end # class LogStash::Inputs::Example
@@ -1,38 +1,38 @@
1
- Gem::Specification.new do |s|
2
- s.name = 'logstash-input-mongodb'
3
- s.version = '0.3.1'
4
- s.licenses = ['Apache License (2.0)']
5
- s.summary = "This takes entries from mongodb as an input to logstash."
6
- s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
7
- s.authors = ["Philip Hutchins"]
8
- s.email = 'flipture@gmail.com'
9
- s.homepage = "http://www.phutchins.com"
10
- s.require_paths = ["lib"]
11
-
12
- # Files
13
- s.files = Dir[
14
- 'lib/**/*',
15
- 'spec/**/*',
16
- 'vendor/**/*',
17
- '*.gemspec',
18
- '*.md',
19
- 'CONTRIBUTORS',
20
- 'Gemfile',
21
- 'LICENSE',
22
- 'NOTICE.TXT'
23
- ]
24
- # Tests
25
- s.test_files = s.files.grep(%r{^(test|spec|features)/})
26
-
27
- # Special flag to let us know this is actually a logstash plugin
28
- s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
29
-
30
- # Gem dependencies
31
- s.add_runtime_dependency 'logstash-core', ">= 2.0.0.beta2", "< 3.0.0"
32
- s.add_runtime_dependency 'logstash-codec-plain'
33
- s.add_runtime_dependency 'stud'
34
- s.add_runtime_dependency 'jdbc-sqlite3', '3.8.10.1'
35
- s.add_runtime_dependency 'sequel'
36
- s.add_runtime_dependency 'mongo', '>= 2.0.0'
37
- s.add_development_dependency 'logstash-devutils'
38
- end
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-input-mongodb'
3
+ s.version = '0.3.2'
4
+ s.licenses = ['Apache License (2.0)']
5
+ s.summary = "This takes entries from mongodb as an input to logstash."
6
+ s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
7
+ s.authors = ["Philip Hutchins"]
8
+ s.email = 'flipture@gmail.com'
9
+ s.homepage = "http://www.phutchins.com"
10
+ s.require_paths = ["lib"]
11
+
12
+ # Files
13
+ s.files = Dir[
14
+ 'lib/**/*',
15
+ 'spec/**/*',
16
+ 'vendor/**/*',
17
+ '*.gemspec',
18
+ '*.md',
19
+ 'CONTRIBUTORS',
20
+ 'Gemfile',
21
+ 'LICENSE',
22
+ 'NOTICE.TXT'
23
+ ]
24
+ # Tests
25
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
26
+
27
+ # Special flag to let us know this is actually a logstash plugin
28
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
29
+
30
+ # Gem dependencies
31
+ s.add_runtime_dependency 'logstash-core', ">= 2.0.0.beta2", "< 3.0.0"
32
+ s.add_runtime_dependency 'logstash-codec-plain'
33
+ s.add_runtime_dependency 'stud'
34
+ s.add_runtime_dependency 'jdbc-sqlite3', '3.8.10.1'
35
+ s.add_runtime_dependency 'sequel'
36
+ s.add_runtime_dependency 'mongo', '>= 2.0.0'
37
+ s.add_development_dependency 'logstash-devutils'
38
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-mongodb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Philip Hutchins
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-25 00:00:00.000000000 Z
11
+ date: 2015-12-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: logstash-core
@@ -151,7 +151,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
151
151
  version: '0'
152
152
  requirements: []
153
153
  rubyforge_project:
154
- rubygems_version: 2.4.5.1
154
+ rubygems_version: 2.4.6
155
155
  signing_key:
156
156
  specification_version: 4
157
157
  summary: This takes entries from mongodb as an input to logstash.