logstash-input-mongodb 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 610e56ecb9dbe13f8a94e0abd903b3889ec32d22
4
- data.tar.gz: f8232dc0ee30a7d3fb877be02d7e36c72a6cb96a
3
+ metadata.gz: 5239efc622206199789eedf0e1af75242fc6262d
4
+ data.tar.gz: d254084e1e1b9791b02e4f3207260da0f11cf046
5
5
  SHA512:
6
- metadata.gz: 61eb59603580d59b6b837a53d174bf79a1b292cbdf6797b513ab82b700d64571ff37ef8e6205be2b5f203b6a0cdc9f1c7eea4ffca4617f0746485431bbc153b9
7
- data.tar.gz: 7cec076e0eb7a7de7e62e1945cb8ccbcc626f015628fda9ba2ddcb407b17128f02b16782b9d2553128da8c84cc314c059b86be0795248326e8e5b4680689fd7e
6
+ metadata.gz: d717f6a55dbe745b3e4c66bb43465559c7c409dc9d0191636ab49f23c3c054d6b55f2283ba45aa11a5ac245c6040925185cada2e5637606c903f84f29d32ec83
7
+ data.tar.gz: 61562cf62e60a8f6055d4f31849dc06d5033bffdf01ed1858e71938d3d04d77a88a1278cd1f6efeebfc5b74e81c52ed0ed1619c85ffeac411cbf3536d1f2accc
@@ -1,2 +1,2 @@
1
- # logstash-input-example
2
- Example input plugin. This should help bootstrap your effort to write your own input plugin!
1
+ # logstash-input-example
2
+ Example input plugin. This should help bootstrap your effort to write your own input plugin!
data/Gemfile CHANGED
@@ -1,2 +1,2 @@
1
- source 'https://rubygems.org'
2
- gemspec
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE CHANGED
@@ -1,13 +1,13 @@
1
- Copyright (c) 2012-2015 Elasticsearch <http://www.elasticsearch.org>
2
-
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
6
-
7
- http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- Unless required by applicable law or agreed to in writing, software
10
- distributed under the License is distributed on an "AS IS" BASIS,
11
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- See the License for the specific language governing permissions and
13
- limitations under the License.
1
+ Copyright (c) 2012-2015 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md CHANGED
@@ -1,64 +1,71 @@
1
- # Logstash Plugin
2
-
3
- This is a plugin for [Logstash](https://github.com/elasticsearch/logstash).
4
-
5
- It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
6
-
7
- ## Documentation
8
-
9
- This is a logstash plugin for pulling data out of mongodb and processing with logstash. It will connect to the database specified in `uri`, use the `collection` attribute to find collections to pull documents from, start at the first collection it finds and pull the number of documents specified in `batch_size`, save it's progress in an sqlite database who's location is specified by `placeholder_db_dir` and `placeholder_db_name` and repeat. It will continue this until it no longer finds documents newer than ones that it has processed, sleep for a moment, then continue to loop over the collections.
10
-
11
- This was designed for parsing logs that were written into mongodb. This means that it may not re-parse db entries that were changed and already parsed.
12
-
13
-
14
- ### Installation
15
-
16
- + Logstash installed from ZIP | TGZ
17
- + bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
18
-
19
- + Logstash from GIT
20
- + git clone https://github.com/elastic/logstash.git
21
- + cd logstash
22
- + (ensure that the correct jruby is installed for the version of logstash you are installing)
23
- + rake test:install-core
24
- + bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
25
- + bin/plugin install --development
26
-
27
- ### Configuration Options
28
-
29
- uri: A MongoDB URI for your database or cluster (check the MongoDB documentation for further info on this) [No Default, Required]
30
- placeholder_db_dir: Path where the place holder database will be stored locally to disk [No Default, Required]
31
- This gets created by the plugin so the directory needs to be writeable by the user that logstash is running as
32
- placeholder_db_name: Name of the database file that will be created [Default: logstash_sqlite.db]
33
- collection: A regex that will be used to find desired collecitons. [No Default, Required]
34
- batch_size: Size of the batch of mongo documents to pull at a time [Default: 30]
35
-
36
-
37
- ### Configuration
38
-
39
- Example
40
- ```
41
- input {
42
- mongodb {
43
- uri => 'mongodb://10.0.0.30/my-logs?ssl=true'
44
- placeholder_db_dir => '/opt/logstash-mongodb/'
45
- placeholder_db_name => 'logstash_sqlite.db'
46
- collection => 'events_'
47
- batch_size => 5000
48
- }
49
- }
50
-
51
- filter {
52
- date {
53
- match => [ "logdate", "ISO8601" ]
54
- }
55
- }
56
-
57
- output {
58
- redis {
59
- host => "localhost"
60
- data_type => "list"
61
- key => "logstash-mylogs"
62
- }
63
- }
64
- ```
1
+ # Logstash Plugin
2
+
3
+ This is a plugin for [Logstash](https://github.com/elasticsearch/logstash).
4
+
5
+ It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
6
+
7
+ ## Documentation
8
+
9
+ This is a logstash plugin for pulling data out of mongodb and processing with logstash. It will connect to the database specified in `uri`, use the `collection` attribute to find collections to pull documents from, start at the first collection it finds and pull the number of documents specified in `batch_size`, save it's progress in an sqlite database who's location is specified by `placeholder_db_dir` and `placeholder_db_name` and repeat. It will continue this until it no longer finds documents newer than ones that it has processed, sleep for a moment, then continue to loop over the collections.
10
+
11
+ This was designed for parsing logs that were written into mongodb. This means that it may not re-parse db entries that were changed and already parsed.
12
+
13
+
14
+ ### Installation
15
+
16
+ + Logstash installed from ZIP | TGZ
17
+ + bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
18
+
19
+ + Logstash from GIT
20
+ + git clone https://github.com/elastic/logstash.git
21
+ + cd logstash
22
+ + (ensure that the correct jruby is installed for the version of logstash you are installing)
23
+ + rake test:install-core
24
+ + bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
25
+ + bin/plugin install --development
26
+
27
+ ### Configuration Options
28
+
29
+ ```
30
+ Name Type Description
31
+ uri [String] A MongoDB URI for your database or cluster (check the MongoDB documentation for further info on this) [No Default, Required]
32
+ placeholder_db_dir [String] Path where the place holder database will be stored locally to disk [No Default, Required]
33
+ This gets created by the plugin so the directory needs to be writeable by the user that logstash is running as
34
+ placeholder_db_name [String] Name of the database file that will be created [Default: logstash_sqlite.db]
35
+ collection [String] A regex that will be used to find desired collecitons. [No Default, Required]
36
+ generateId [Boolean] If true, this will add a field '_id' that contains the MongoDB Document id
37
+ batch_size [Int] Size of the batch of mongo documents to pull at a time [Default: 30]
38
+ parse_method [String] Built in parsing of the mongodb document object [Default: 'flatten']
39
+ dig_fields [Array] An array of fields that should employ the dig method
40
+ dig_dig_fields [Array] This provides a second level of hash flattening after the initial dig has been done
41
+ ```
42
+
43
+
44
+ ### Configuration
45
+
46
+ Example
47
+ ```
48
+ input {
49
+ mongodb {
50
+ uri => 'mongodb://10.0.0.30/my-logs?ssl=true'
51
+ placeholder_db_dir => '/opt/logstash-mongodb/'
52
+ placeholder_db_name => 'logstash_sqlite.db'
53
+ collection => 'events_'
54
+ batch_size => 5000
55
+ }
56
+ }
57
+
58
+ filter {
59
+ date {
60
+ match => [ "logdate", "ISO8601" ]
61
+ }
62
+ }
63
+
64
+ output {
65
+ redis {
66
+ host => "localhost"
67
+ data_type => "list"
68
+ key => "logstash-mylogs"
69
+ }
70
+ }
71
+ ```
@@ -1,356 +1,353 @@
1
- # encoding: utf-8
2
- require "logstash/inputs/base"
3
- require "logstash/namespace"
4
- require "logstash/timestamp"
5
- require "stud/interval"
6
- require "socket" # for Socket.gethostname
7
- require "json"
8
- require "mongo"
9
-
10
- include Mongo
11
-
12
- class LogStash::Inputs::MongoDB < LogStash::Inputs::Base
13
- config_name "mongodb"
14
-
15
- # If undefined, Logstash will complain, even if codec is unused.
16
- default :codec, "plain"
17
-
18
- # Example URI: mongodb://mydb.host:27017/mydbname?ssl=true
19
- config :uri, :validate => :string, :required => true
20
-
21
- # The directory that will contain the sqlite database file.
22
- config :placeholder_db_dir, :validate => :string, :required => true
23
-
24
- # The name of the sqlite databse file
25
- config :placeholder_db_name, :validate => :string, :default => "logstash_sqlite.db"
26
-
27
- # Any table to exclude by name
28
- config :exclude_tables, :validate => :array, :default => []
29
-
30
- config :batch_size, :avlidate => :number, :default => 30
31
-
32
- config :since_table, :validate => :string, :default => "logstash_since"
33
-
34
- # The collection to use. Is turned into a regex so 'events' will match 'events_20150227'
35
- # Example collection: events_20150227 or events_
36
- config :collection, :validate => :string, :required => true
37
-
38
- # This allows you to select the method you would like to use to parse your data
39
- config :parse_method, :validate => :string, :default => 'flatten'
40
-
41
- # If not flattening you can dig to flatten select fields
42
- config :dig_fields, :validate => :array, :default => []
43
-
44
- # This is the second level of hash flattening
45
- config :dig_dig_fields, :validate => :array, :default => []
46
-
47
- # If true, store the @timestamp field in mongodb as an ISODate type instead
48
- # of an ISO8601 string. For more information about this, see
49
- # http://www.mongodb.org/display/DOCS/Dates
50
- config :isodate, :validate => :boolean, :default => false
51
-
52
- # Number of seconds to wait after failure before retrying
53
- config :retry_delay, :validate => :number, :default => 3, :required => false
54
-
55
- # If true, an "_id" field will be added to the document before insertion.
56
- # The "_id" field will use the timestamp of the event and overwrite an existing
57
- # "_id" field in the event.
58
- config :generateId, :validate => :boolean, :default => false
59
-
60
- config :unpack_mongo_id, :validate => :boolean, :default => false
61
-
62
- # The message string to use in the event.
63
- config :message, :validate => :string, :default => "Default message..."
64
-
65
- # Set how frequently messages should be sent.
66
- # The default, `1`, means send a message every second.
67
- config :interval, :validate => :number, :default => 1
68
-
69
- SINCE_TABLE = :since_table
70
-
71
- public
72
- def init_placeholder_table(sqlitedb)
73
- begin
74
- sqlitedb.create_table "#{SINCE_TABLE}" do
75
- String :table
76
- Int :place
77
- end
78
- rescue
79
- @logger.debug("since table already exists")
80
- end
81
- end
82
-
83
- public
84
- def init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
85
- @logger.debug("init placeholder for #{since_table}_#{mongo_collection_name}")
86
- since = sqlitedb[SINCE_TABLE]
87
- mongo_collection = mongodb.collection(mongo_collection_name)
88
- first_entry = mongo_collection.find({}).sort('_id' => 1).limit(1).first
89
- first_entry_id = first_entry['_id'].to_s
90
- since.insert(:table => "#{since_table}_#{mongo_collection_name}", :place => first_entry_id)
91
- return first_entry_id
92
- end
93
-
94
- public
95
- def get_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
96
- since = sqlitedb[SINCE_TABLE]
97
- x = since.where(:table => "#{since_table}_#{mongo_collection_name}")
98
- if x[:place].nil? || x[:place] == 0
99
- first_entry_id = init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
100
- @logger.debug("FIRST ENTRY ID for #{mongo_collection_name} is #{first_entry_id}")
101
- return first_entry_id
102
- else
103
- @logger.debug("placeholder already exists, it is #{x[:place]}")
104
- return x[:place][:place]
105
- end
106
- end
107
-
108
- public
109
- def update_placeholder(sqlitedb, since_table, mongo_collection_name, place)
110
- #@logger.debug("updating placeholder for #{since_table}_#{mongo_collection_name} to #{place}")
111
- since = sqlitedb[SINCE_TABLE]
112
- since.where(:table => "#{since_table}_#{mongo_collection_name}").update(:place => place)
113
- end
114
-
115
- public
116
- def get_all_tables(mongodb)
117
- return @mongodb.collection_names
118
- end
119
-
120
- public
121
- def get_collection_names(mongodb, collection)
122
- collection_names = []
123
- @mongodb.collection_names.each do |coll|
124
- if /#{collection}/ =~ coll
125
- collection_names.push(coll)
126
- @logger.debug("Added #{coll} to the collection list as it matches our collection search")
127
- end
128
- end
129
- return collection_names
130
- end
131
-
132
- public
133
- def get_cursor_for_collection(mongodb, mongo_collection_name, last_id_object, batch_size)
134
- collection = mongodb.collection(mongo_collection_name)
135
- # Need to make this sort by date in object id then get the first of the series
136
- # db.events_20150320.find().limit(1).sort({ts:1})
137
- return collection.find({:_id => {:$gt => last_id_object}}).limit(batch_size)
138
- end
139
-
140
- public
141
- def update_watched_collections(mongodb, collection, sqlitedb)
142
- collections = get_collection_names(mongodb, collection)
143
- collection_data = {}
144
- collections.each do |my_collection|
145
- init_placeholder_table(sqlitedb)
146
- last_id = get_placeholder(sqlitedb, since_table, mongodb, my_collection)
147
- if !collection_data[my_collection]
148
- collection_data[my_collection] = { :name => my_collection, :last_id => last_id }
149
- end
150
- end
151
- return collection_data
152
- end
153
-
154
- public
155
- def register
156
- require "jdbc/sqlite3"
157
- require "sequel"
158
- placeholder_db_path = File.join(@placeholder_db_dir, @placeholder_db_name)
159
- mongo_uri = Mongo::URI.new(@uri)
160
- hosts_array = mongo_uri.servers
161
- db_name = mongo_uri.database
162
- ssl_enabled = mongo_uri.options[:ssl]
163
- conn = Mongo::Client.new(hosts_array, ssl: ssl_enabled, database: db_name)
164
-
165
- if @db_auths
166
- @db_auths.each do |auth|
167
- if !auth['db_name'].nil?
168
- conn.add_auth(auth['db_name'], auth['username'], auth['password'], nil)
169
- end
170
- end
171
- conn.apply_saved_authentication()
172
- end
173
-
174
- @host = Socket.gethostname
175
- @logger.info("Registering MongoDB input")
176
-
177
- @mongodb = conn.database
178
- @sqlitedb = Sequel.connect("jdbc:sqlite:#{placeholder_db_path}")
179
-
180
- # Should check to see if there are new matching tables at a predefined interval or on some trigger
181
- @collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
182
- end # def register
183
-
184
- class BSON::OrderedHash
185
- def to_h
186
- inject({}) { |acc, element| k,v = element; acc[k] = (if v.class == BSON::OrderedHash then v.to_h else v end); acc }
187
- end
188
-
189
- def to_json
190
- JSON.parse(self.to_h.to_json, :allow_nan => true)
191
- end
192
- end
193
-
194
- def flatten(my_hash)
195
- new_hash = {}
196
- @logger.debug("Raw Hash: #{my_hash}")
197
- if my_hash.respond_to? :each
198
- my_hash.each do |k1,v1|
199
- if v1.is_a?(Hash)
200
- v1.each do |k2,v2|
201
- if v2.is_a?(Hash)
202
- # puts "Found a nested hash"
203
- result = flatten(v2)
204
- result.each do |k3,v3|
205
- new_hash[k1.to_s+"_"+k2.to_s+"_"+k3.to_s] = v3
206
- end
207
- # puts "result: "+result.to_s+" k2: "+k2.to_s+" v2: "+v2.to_s
208
- else
209
- new_hash[k1.to_s+"_"+k2.to_s] = v2
210
- end
211
- end
212
- else
213
- # puts "Key: "+k1.to_s+" is not a hash"
214
- new_hash[k1.to_s] = v1
215
- end
216
- end
217
- else
218
- @logger.debug("Flatten [ERROR]: hash did not respond to :each")
219
- end
220
- @logger.debug("Flattened Hash: #{new_hash}")
221
- return new_hash
222
- end
223
-
224
- def run(queue)
225
- sleep_min = 0.01
226
- sleep_max = 5
227
- sleeptime = sleep_min
228
-
229
- begin
230
- @logger.debug("Tailing MongoDB")
231
- @logger.debug("Collection data is: #{@collection_data}")
232
- loop do
233
- @collection_data.each do |index, collection|
234
- collection_name = collection[:name]
235
- @logger.debug("collection_data is: #{@collection_data}")
236
- last_id = @collection_data[index][:last_id]
237
- #@logger.debug("last_id is #{last_id}", :index => index, :collection => collection_name)
238
- # get batch of events starting at the last_place if it is set
239
- last_id_object = BSON::ObjectId(last_id)
240
- cursor = get_cursor_for_collection(@mongodb, collection_name, last_id_object, batch_size)
241
- cursor.each do |doc|
242
- logdate = DateTime.parse(doc['_id'].generation_time.to_s)
243
- event = LogStash::Event.new("host" => @host)
244
- decorate(event)
245
- event["logdate"] = logdate.iso8601
246
- log_entry = doc.to_h.to_s
247
- log_entry['_id'] = log_entry['_id'].to_s
248
- event["log_entry"] = log_entry
249
- event["mongo_id"] = doc['_id'].to_s
250
- @logger.debug("mongo_id: "+doc['_id'].to_s)
251
- #@logger.debug("EVENT looks like: "+event.to_s)
252
- #@logger.debug("Sent message: "+doc.to_h.to_s)
253
- #@logger.debug("EVENT looks like: "+event.to_s)
254
- # Extract the HOST_ID and PID from the MongoDB BSON::ObjectID
255
- if @unpack_mongo_id
256
- doc_hex_bytes = doc['_id'].to_s.each_char.each_slice(2).map {|b| b.join.to_i(16) }
257
- doc_obj_bin = doc_hex_bytes.pack("C*").unpack("a4 a3 a2 a3")
258
- host_id = doc_obj_bin[1].unpack("S")
259
- process_id = doc_obj_bin[2].unpack("S")
260
- event['host_id'] = host_id.first.to_i
261
- event['process_id'] = process_id.first.to_i
262
- end
263
-
264
- if @parse_method == 'flatten'
265
- # Flatten the JSON so that the data is usable in Kibana
266
- flat_doc = flatten(doc)
267
- # Check for different types of expected values and add them to the event
268
- if flat_doc['info_message'] && (flat_doc['info_message'] =~ /collection stats: .+/)
269
- # Some custom stuff I'm having to do to fix formatting in past logs...
270
- sub_value = flat_doc['info_message'].sub("collection stats: ", "")
271
- JSON.parse(sub_value).each do |k1,v1|
272
- flat_doc["collection_stats_#{k1.to_s}"] = v1
273
- end
274
- end
275
-
276
- flat_doc.each do |k,v|
277
- # Check for an integer
278
- @logger.debug("key: #{k.to_s} value: #{v.to_s}")
279
- if v.is_a? Numeric
280
- event[k.to_s] = v
281
- elsif v.is_a? String
282
- if v == "NaN"
283
- event[k.to_s] = Float::NAN
284
- elsif /\A[-+]?\d+[.][\d]+\z/ == v
285
- event[k.to_s] = v.to_f
286
- elsif (/\A[-+]?\d+\z/ === v) || (v.is_a? Integer)
287
- event[k.to_s] = v.to_i
288
- else
289
- event[k.to_s] = v
290
- end
291
- else
292
- event[k.to_s] = v.to_s unless k.to_s == "_id" || k.to_s == "tags"
293
- if (k.to_s == "tags") && (v.is_a? Array)
294
- event['tags'] = v
295
- end
296
- end
297
- end
298
- elsif @parse_method == 'dig'
299
- # Dig into the JSON and flatten select elements
300
- doc.each do |k, v|
301
- if k != "_id"
302
- if (@dig_fields.include? k) && (v.respond_to? :each)
303
- v.each do |kk, vv|
304
- if (@dig_dig_fields.include? kk) && (vv.respond_to? :each)
305
- vv.each do |kkk, vvv|
306
- if /\A[-+]?\d+\z/ === vvv
307
- event["#{k}_#{kk}_#{kkk}"] = vvv.to_i
308
- else
309
- event["#{k}_#{kk}_#{kkk}"] = vvv.to_s
310
- end
311
- end
312
- else
313
- if /\A[-+]?\d+\z/ === vv
314
- event["#{k}_#{kk}"] = vv.to_i
315
- else
316
- event["#{k}_#{kk}"] = vv.to_s
317
- end
318
- end
319
- end
320
- else
321
- if /\A[-+]?\d+\z/ === v
322
- event[k] = v.to_i
323
- else
324
- event[k] = v.to_s
325
- end
326
- end
327
- end
328
- end
329
- else
330
- # Should probably do some sanitization here and insert the doc as raw as possible for parsing in logstash
331
- end
332
-
333
- queue << event
334
- @collection_data[index][:last_id] = doc['_id'].to_s
335
- end
336
- # Store the last-seen doc in the database
337
- update_placeholder(@sqlitedb, since_table, collection_name, @collection_data[index][:last_id])
338
- end
339
- @logger.debug("Updating watch collections")
340
- @collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
341
-
342
- # nothing found in that iteration
343
- # sleep a bit
344
- @logger.debug("No new rows. Sleeping.", :time => sleeptime)
345
- sleeptime = [sleeptime * 2, sleep_max].min
346
- sleep(sleeptime)
347
- #sleeptime = sleep_min
348
- end
349
- rescue LogStash::ShutdownSignal
350
- if @interrupted
351
- @logger.debug("Mongo Input shutting down")
352
- end
353
- end
354
- end # def run
355
-
356
- end # class LogStash::Inputs::Example
1
+ # encoding: utf-8
2
+ require "logstash/inputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/timestamp"
5
+ require "stud/interval"
6
+ require "socket" # for Socket.gethostname
7
+ require "json"
8
+ require "mongo"
9
+
10
+ include Mongo
11
+
12
+ class LogStash::Inputs::MongoDB < LogStash::Inputs::Base
13
+ config_name "mongodb"
14
+
15
+ # If undefined, Logstash will complain, even if codec is unused.
16
+ default :codec, "plain"
17
+
18
+ # Example URI: mongodb://mydb.host:27017/mydbname?ssl=true
19
+ config :uri, :validate => :string, :required => true
20
+
21
+ # The directory that will contain the sqlite database file.
22
+ config :placeholder_db_dir, :validate => :string, :required => true
23
+
24
+ # The name of the sqlite databse file
25
+ config :placeholder_db_name, :validate => :string, :default => "logstash_sqlite.db"
26
+
27
+ # Any table to exclude by name
28
+ config :exclude_tables, :validate => :array, :default => []
29
+
30
+ config :batch_size, :avlidate => :number, :default => 30
31
+
32
+ config :since_table, :validate => :string, :default => "logstash_since"
33
+
34
+ # The collection to use. Is turned into a regex so 'events' will match 'events_20150227'
35
+ # Example collection: events_20150227 or events_
36
+ config :collection, :validate => :string, :required => true
37
+
38
+ # This allows you to select the method you would like to use to parse your data
39
+ config :parse_method, :validate => :string, :default => 'flatten'
40
+
41
+ # If not flattening you can dig to flatten select fields
42
+ config :dig_fields, :validate => :array, :default => []
43
+
44
+ # This is the second level of hash flattening
45
+ config :dig_dig_fields, :validate => :array, :default => []
46
+
47
+ # If true, store the @timestamp field in mongodb as an ISODate type instead
48
+ # of an ISO8601 string. For more information about this, see
49
+ # http://www.mongodb.org/display/DOCS/Dates
50
+ config :isodate, :validate => :boolean, :default => false
51
+
52
+ # Number of seconds to wait after failure before retrying
53
+ config :retry_delay, :validate => :number, :default => 3, :required => false
54
+
55
+ # If true, an "_id" field will be added to the document before insertion.
56
+ # The "_id" field will use the timestamp of the event and overwrite an existing
57
+ # "_id" field in the event.
58
+ config :generateId, :validate => :boolean, :default => false
59
+
60
+ config :unpack_mongo_id, :validate => :boolean, :default => false
61
+
62
+ # The message string to use in the event.
63
+ config :message, :validate => :string, :default => "Default message..."
64
+
65
+ # Set how frequently messages should be sent.
66
+ # The default, `1`, means send a message every second.
67
+ config :interval, :validate => :number, :default => 1
68
+
69
+ SINCE_TABLE = :since_table
70
+
71
+ public
72
+ def init_placeholder_table(sqlitedb)
73
+ begin
74
+ sqlitedb.create_table "#{SINCE_TABLE}" do
75
+ String :table
76
+ Int :place
77
+ end
78
+ rescue
79
+ @logger.debug("since table already exists")
80
+ end
81
+ end
82
+
83
+ public
84
+ def init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
85
+ @logger.debug("init placeholder for #{since_table}_#{mongo_collection_name}")
86
+ since = sqlitedb[SINCE_TABLE]
87
+ mongo_collection = mongodb.collection(mongo_collection_name)
88
+ first_entry = mongo_collection.find({}).sort('_id' => 1).limit(1).first
89
+ first_entry_id = first_entry['_id'].to_s
90
+ since.insert(:table => "#{since_table}_#{mongo_collection_name}", :place => first_entry_id)
91
+ return first_entry_id
92
+ end
93
+
94
+ public
95
+ def get_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
96
+ since = sqlitedb[SINCE_TABLE]
97
+ x = since.where(:table => "#{since_table}_#{mongo_collection_name}")
98
+ if x[:place].nil? || x[:place] == 0
99
+ first_entry_id = init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
100
+ @logger.debug("FIRST ENTRY ID for #{mongo_collection_name} is #{first_entry_id}")
101
+ return first_entry_id
102
+ else
103
+ @logger.debug("placeholder already exists, it is #{x[:place]}")
104
+ return x[:place][:place]
105
+ end
106
+ end
107
+
108
+ public
109
+ def update_placeholder(sqlitedb, since_table, mongo_collection_name, place)
110
+ #@logger.debug("updating placeholder for #{since_table}_#{mongo_collection_name} to #{place}")
111
+ since = sqlitedb[SINCE_TABLE]
112
+ since.where(:table => "#{since_table}_#{mongo_collection_name}").update(:place => place)
113
+ end
114
+
115
+ public
116
+ def get_all_tables(mongodb)
117
+ return @mongodb.collection_names
118
+ end
119
+
120
+ public
121
+ def get_collection_names(mongodb, collection)
122
+ collection_names = []
123
+ @mongodb.collection_names.each do |coll|
124
+ if /#{collection}/ =~ coll
125
+ collection_names.push(coll)
126
+ @logger.debug("Added #{coll} to the collection list as it matches our collection search")
127
+ end
128
+ end
129
+ return collection_names
130
+ end
131
+
132
+ public
133
+ def get_cursor_for_collection(mongodb, mongo_collection_name, last_id_object, batch_size)
134
+ collection = mongodb.collection(mongo_collection_name)
135
+ # Need to make this sort by date in object id then get the first of the series
136
+ # db.events_20150320.find().limit(1).sort({ts:1})
137
+ return collection.find({:_id => {:$gt => last_id_object}}).limit(batch_size)
138
+ end
139
+
140
+ public
141
+ def update_watched_collections(mongodb, collection, sqlitedb)
142
+ collections = get_collection_names(mongodb, collection)
143
+ collection_data = {}
144
+ collections.each do |my_collection|
145
+ init_placeholder_table(sqlitedb)
146
+ last_id = get_placeholder(sqlitedb, since_table, mongodb, my_collection)
147
+ if !collection_data[my_collection]
148
+ collection_data[my_collection] = { :name => my_collection, :last_id => last_id }
149
+ end
150
+ end
151
+ return collection_data
152
+ end
153
+
154
+ public
155
+ def register
156
+ require "jdbc/sqlite3"
157
+ require "sequel"
158
+ placeholder_db_path = File.join(@placeholder_db_dir, @placeholder_db_name)
159
+ conn = Mongo::Client.new(@uri)
160
+
161
+ @host = Socket.gethostname
162
+ @logger.info("Registering MongoDB input")
163
+
164
+ @mongodb = conn.database
165
+ @sqlitedb = Sequel.connect("jdbc:sqlite:#{placeholder_db_path}")
166
+
167
+ # Should check to see if there are new matching tables at a predefined interval or on some trigger
168
+ @collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
169
+ end # def register
170
+
171
+ class BSON::OrderedHash
172
+ def to_h
173
+ inject({}) { |acc, element| k,v = element; acc[k] = (if v.class == BSON::OrderedHash then v.to_h else v end); acc }
174
+ end
175
+
176
+ def to_json
177
+ JSON.parse(self.to_h.to_json, :allow_nan => true)
178
+ end
179
+ end
180
+
181
+ def flatten(my_hash)
182
+ new_hash = {}
183
+ @logger.debug("Raw Hash: #{my_hash}")
184
+ if my_hash.respond_to? :each
185
+ my_hash.each do |k1,v1|
186
+ if v1.is_a?(Hash)
187
+ v1.each do |k2,v2|
188
+ if v2.is_a?(Hash)
189
+ # puts "Found a nested hash"
190
+ result = flatten(v2)
191
+ result.each do |k3,v3|
192
+ new_hash[k1.to_s+"_"+k2.to_s+"_"+k3.to_s] = v3
193
+ end
194
+ # puts "result: "+result.to_s+" k2: "+k2.to_s+" v2: "+v2.to_s
195
+ else
196
+ new_hash[k1.to_s+"_"+k2.to_s] = v2
197
+ end
198
+ end
199
+ else
200
+ # puts "Key: "+k1.to_s+" is not a hash"
201
+ new_hash[k1.to_s] = v1
202
+ end
203
+ end
204
+ else
205
+ @logger.debug("Flatten [ERROR]: hash did not respond to :each")
206
+ end
207
+ @logger.debug("Flattened Hash: #{new_hash}")
208
+ return new_hash
209
+ end
210
+
211
+ def run(queue)
212
+ sleep_min = 0.01
213
+ sleep_max = 5
214
+ sleeptime = sleep_min
215
+
216
+ begin
217
+ @logger.debug("Tailing MongoDB")
218
+ @logger.debug("Collection data is: #{@collection_data}")
219
+ loop do
220
+ @collection_data.each do |index, collection|
221
+ collection_name = collection[:name]
222
+ @logger.debug("collection_data is: #{@collection_data}")
223
+ last_id = @collection_data[index][:last_id]
224
+ #@logger.debug("last_id is #{last_id}", :index => index, :collection => collection_name)
225
+ # get batch of events starting at the last_place if it is set
226
+ last_id_object = BSON::ObjectId(last_id)
227
+ cursor = get_cursor_for_collection(@mongodb, collection_name, last_id_object, batch_size)
228
+ cursor.each do |doc|
229
+ logdate = DateTime.parse(doc['_id'].generation_time.to_s)
230
+ event = LogStash::Event.new("host" => @host)
231
+ decorate(event)
232
+ event["logdate"] = logdate.iso8601
233
+ log_entry = doc.to_h.to_s
234
+ log_entry['_id'] = log_entry['_id'].to_s
235
+ event["log_entry"] = log_entry
236
+ event["mongo_id"] = doc['_id'].to_s
237
+ @logger.debug("mongo_id: "+doc['_id'].to_s)
238
+ #@logger.debug("EVENT looks like: "+event.to_s)
239
+ #@logger.debug("Sent message: "+doc.to_h.to_s)
240
+ #@logger.debug("EVENT looks like: "+event.to_s)
241
+ # Extract the HOST_ID and PID from the MongoDB BSON::ObjectID
242
+ if @unpack_mongo_id
243
+ doc_hex_bytes = doc['_id'].to_s.each_char.each_slice(2).map {|b| b.join.to_i(16) }
244
+ doc_obj_bin = doc_hex_bytes.pack("C*").unpack("a4 a3 a2 a3")
245
+ host_id = doc_obj_bin[1].unpack("S")
246
+ process_id = doc_obj_bin[2].unpack("S")
247
+ event['host_id'] = host_id.first.to_i
248
+ event['process_id'] = process_id.first.to_i
249
+ end
250
+
251
+ if @parse_method == 'flatten'
252
+ # Flatten the JSON so that the data is usable in Kibana
253
+ flat_doc = flatten(doc)
254
+ # Check for different types of expected values and add them to the event
255
+ if flat_doc['info_message'] && (flat_doc['info_message'] =~ /collection stats: .+/)
256
+ # Some custom stuff I'm having to do to fix formatting in past logs...
257
+ sub_value = flat_doc['info_message'].sub("collection stats: ", "")
258
+ JSON.parse(sub_value).each do |k1,v1|
259
+ flat_doc["collection_stats_#{k1.to_s}"] = v1
260
+ end
261
+ end
262
+
263
+ flat_doc.each do |k,v|
264
+ # Check for an integer
265
+ @logger.debug("key: #{k.to_s} value: #{v.to_s}")
266
+ if v.is_a? Numeric
267
+ event[k.to_s] = v
268
+ elsif v.is_a? String
269
+ if v == "NaN"
270
+ event[k.to_s] = Float::NAN
271
+ elsif /\A[-+]?\d+[.][\d]+\z/ == v
272
+ event[k.to_s] = v.to_f
273
+ elsif (/\A[-+]?\d+\z/ === v) || (v.is_a? Integer)
274
+ event[k.to_s] = v.to_i
275
+ else
276
+ event[k.to_s] = v
277
+ end
278
+ else
279
+ event[k.to_s] = v.to_s unless k.to_s == "_id" || k.to_s == "tags"
280
+ if (k.to_s == "tags") && (v.is_a? Array)
281
+ event['tags'] = v
282
+ end
283
+ end
284
+ end
285
+ elsif @parse_method == 'dig'
286
+ # Dig into the JSON and flatten select elements
287
+ doc.each do |k, v|
288
+ if k != "_id"
289
+ if (@dig_fields.include? k) && (v.respond_to? :each)
290
+ v.each do |kk, vv|
291
+ if (@dig_dig_fields.include? kk) && (vv.respond_to? :each)
292
+ vv.each do |kkk, vvv|
293
+ if /\A[-+]?\d+\z/ === vvv
294
+ event["#{k}_#{kk}_#{kkk}"] = vvv.to_i
295
+ else
296
+ event["#{k}_#{kk}_#{kkk}"] = vvv.to_s
297
+ end
298
+ end
299
+ else
300
+ if /\A[-+]?\d+\z/ === vv
301
+ event["#{k}_#{kk}"] = vv.to_i
302
+ else
303
+ event["#{k}_#{kk}"] = vv.to_s
304
+ end
305
+ end
306
+ end
307
+ else
308
+ if /\A[-+]?\d+\z/ === v
309
+ event[k] = v.to_i
310
+ else
311
+ event[k] = v.to_s
312
+ end
313
+ end
314
+ end
315
+ end
316
+ elsif @parse_method == 'simple'
317
+ doc.each do |k, v|
318
+ if v.is_a? Numeric
319
+ event[k] = v.abs
320
+ elsif v.is_a? Array
321
+ event[k] = v
322
+ elsif v == "NaN"
323
+ event[k] = Float::NAN
324
+ else
325
+ event[k] = v.to_s
326
+ end
327
+ end
328
+ end
329
+
330
+ queue << event
331
+ @collection_data[index][:last_id] = doc['_id'].to_s
332
+ end
333
+ # Store the last-seen doc in the database
334
+ update_placeholder(@sqlitedb, since_table, collection_name, @collection_data[index][:last_id])
335
+ end
336
+ @logger.debug("Updating watch collections")
337
+ @collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
338
+
339
+ # nothing found in that iteration
340
+ # sleep a bit
341
+ @logger.debug("No new rows. Sleeping.", :time => sleeptime)
342
+ sleeptime = [sleeptime * 2, sleep_max].min
343
+ sleep(sleeptime)
344
+ #sleeptime = sleep_min
345
+ end
346
+ rescue LogStash::ShutdownSignal
347
+ if @interrupted
348
+ @logger.debug("Mongo Input shutting down")
349
+ end
350
+ end
351
+ end # def run
352
+
353
+ end # class LogStash::Inputs::Example
@@ -1,38 +1,38 @@
1
- Gem::Specification.new do |s|
2
- s.name = 'logstash-input-mongodb'
3
- s.version = '0.3.1'
4
- s.licenses = ['Apache License (2.0)']
5
- s.summary = "This takes entries from mongodb as an input to logstash."
6
- s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
7
- s.authors = ["Philip Hutchins"]
8
- s.email = 'flipture@gmail.com'
9
- s.homepage = "http://www.phutchins.com"
10
- s.require_paths = ["lib"]
11
-
12
- # Files
13
- s.files = Dir[
14
- 'lib/**/*',
15
- 'spec/**/*',
16
- 'vendor/**/*',
17
- '*.gemspec',
18
- '*.md',
19
- 'CONTRIBUTORS',
20
- 'Gemfile',
21
- 'LICENSE',
22
- 'NOTICE.TXT'
23
- ]
24
- # Tests
25
- s.test_files = s.files.grep(%r{^(test|spec|features)/})
26
-
27
- # Special flag to let us know this is actually a logstash plugin
28
- s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
29
-
30
- # Gem dependencies
31
- s.add_runtime_dependency 'logstash-core', ">= 2.0.0.beta2", "< 3.0.0"
32
- s.add_runtime_dependency 'logstash-codec-plain'
33
- s.add_runtime_dependency 'stud'
34
- s.add_runtime_dependency 'jdbc-sqlite3', '3.8.10.1'
35
- s.add_runtime_dependency 'sequel'
36
- s.add_runtime_dependency 'mongo', '>= 2.0.0'
37
- s.add_development_dependency 'logstash-devutils'
38
- end
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-input-mongodb'
3
+ s.version = '0.3.2'
4
+ s.licenses = ['Apache License (2.0)']
5
+ s.summary = "This takes entries from mongodb as an input to logstash."
6
+ s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
7
+ s.authors = ["Philip Hutchins"]
8
+ s.email = 'flipture@gmail.com'
9
+ s.homepage = "http://www.phutchins.com"
10
+ s.require_paths = ["lib"]
11
+
12
+ # Files
13
+ s.files = Dir[
14
+ 'lib/**/*',
15
+ 'spec/**/*',
16
+ 'vendor/**/*',
17
+ '*.gemspec',
18
+ '*.md',
19
+ 'CONTRIBUTORS',
20
+ 'Gemfile',
21
+ 'LICENSE',
22
+ 'NOTICE.TXT'
23
+ ]
24
+ # Tests
25
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
26
+
27
+ # Special flag to let us know this is actually a logstash plugin
28
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
29
+
30
+ # Gem dependencies
31
+ s.add_runtime_dependency 'logstash-core', ">= 2.0.0.beta2", "< 3.0.0"
32
+ s.add_runtime_dependency 'logstash-codec-plain'
33
+ s.add_runtime_dependency 'stud'
34
+ s.add_runtime_dependency 'jdbc-sqlite3', '3.8.10.1'
35
+ s.add_runtime_dependency 'sequel'
36
+ s.add_runtime_dependency 'mongo', '>= 2.0.0'
37
+ s.add_development_dependency 'logstash-devutils'
38
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-mongodb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Philip Hutchins
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-25 00:00:00.000000000 Z
11
+ date: 2015-12-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: logstash-core
@@ -151,7 +151,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
151
151
  version: '0'
152
152
  requirements: []
153
153
  rubyforge_project:
154
- rubygems_version: 2.4.5.1
154
+ rubygems_version: 2.4.6
155
155
  signing_key:
156
156
  specification_version: 4
157
157
  summary: This takes entries from mongodb as an input to logstash.