logstash-input-mongodb 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -13
- data/DEVELOPER.md +2 -2
- data/Gemfile +2 -2
- data/LICENSE +13 -13
- data/README.md +64 -64
- data/lib/logstash/inputs/mongodb.rb +356 -356
- data/logstash-input-mongodb.gemspec +38 -28
- data/spec/inputs/example_spec.rb +0 -0
- metadata +25 -25
- data/Gemfile.lock +0 -106
- data/Rakefile +0 -1
- data/logstash-input-mongodb-0.1.3.gem +0 -0
- data/logstash-input-mongodb-0.2.0.gem +0 -0
- data/test/flattener_test.rb +0 -73
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
YzAyZDJhYTM0MDFiZmM0YjkxNDg1Y2ZkYTc4YTE5ZTNmMmZkMTUyYg==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 610e56ecb9dbe13f8a94e0abd903b3889ec32d22
|
4
|
+
data.tar.gz: f8232dc0ee30a7d3fb877be02d7e36c72a6cb96a
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
N2ExMzc0OTkwNzU3NDIyNDRiMDYyZGQyYTczOTY3MGM0YmNkZWUxNTllNjUw
|
11
|
-
MWI4NjdjNjYwOWIzOTRhMDFmZGI3MGFkZDFlZTAwMjhkZDUxN2U=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
MTM0MjAyNTcyN2FmNmI5ZjMwZTRjNjA4NzU5OTgzODYwOTdiY2I0NDY5YWJi
|
14
|
-
ZjNhYjE0OTA4Y2U3MjZkMjJjOGJiZjc4OGVhOTQ3MjliODM0ZDJjNGYzYmU1
|
15
|
-
M2QwYjI1ZThiMTM5YmUzMTNmZDM5NTgyYzNjNDA0MjZhZjMyZjU=
|
6
|
+
metadata.gz: 61eb59603580d59b6b837a53d174bf79a1b292cbdf6797b513ab82b700d64571ff37ef8e6205be2b5f203b6a0cdc9f1c7eea4ffca4617f0746485431bbc153b9
|
7
|
+
data.tar.gz: 7cec076e0eb7a7de7e62e1945cb8ccbcc626f015628fda9ba2ddcb407b17128f02b16782b9d2553128da8c84cc314c059b86be0795248326e8e5b4680689fd7e
|
data/DEVELOPER.md
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
# logstash-input-example
|
2
|
-
Example input plugin. This should help bootstrap your effort to write your own input plugin!
|
1
|
+
# logstash-input-example
|
2
|
+
Example input plugin. This should help bootstrap your effort to write your own input plugin!
|
data/Gemfile
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
source 'https://rubygems.org'
|
2
|
-
gemspec
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
gemspec
|
data/LICENSE
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
Copyright (c) 2012-2015 Elasticsearch <http://www.elasticsearch.org>
|
2
|
-
|
3
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
you may not use this file except in compliance with the License.
|
5
|
-
You may obtain a copy of the License at
|
6
|
-
|
7
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
|
9
|
-
Unless required by applicable law or agreed to in writing, software
|
10
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
See the License for the specific language governing permissions and
|
13
|
-
limitations under the License.
|
1
|
+
Copyright (c) 2012-2015 Elasticsearch <http://www.elasticsearch.org>
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
CHANGED
@@ -1,64 +1,64 @@
|
|
1
|
-
# Logstash Plugin
|
2
|
-
|
3
|
-
This is a plugin for [Logstash](https://github.com/elasticsearch/logstash).
|
4
|
-
|
5
|
-
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
6
|
-
|
7
|
-
## Documentation
|
8
|
-
|
9
|
-
This is a logstash plugin for pulling data out of mongodb and processing with logstash. It will connect to the database specified in `uri`, use the `collection` attribute to find collections to pull documents from, start at the first collection it finds and pull the number of documents specified in `batch_size`, save it's progress in an sqlite database who's location is specified by `placeholder_db_dir` and `placeholder_db_name` and repeat. It will continue this until it no longer finds documents newer than ones that it has processed, sleep for a moment, then continue to loop over the collections.
|
10
|
-
|
11
|
-
This was designed for parsing logs that were written into mongodb. This means that it may not re-parse db entries that were changed and already parsed.
|
12
|
-
|
13
|
-
|
14
|
-
### Installation
|
15
|
-
|
16
|
-
+ Logstash installed from ZIP | TGZ
|
17
|
-
+ bin/plugin install /path/to/logstash-input-mongodb-0.
|
18
|
-
|
19
|
-
+ Logstash from GIT
|
20
|
-
+ git clone https://github.com/elastic/logstash.git
|
21
|
-
+ cd logstash
|
22
|
-
+ (ensure that the correct jruby is installed for the version of logstash you are installing)
|
23
|
-
+ rake test:install-core
|
24
|
-
+ bin/plugin install /path/to/logstash-input-mongodb-0.
|
25
|
-
+ bin/plugin install --development
|
26
|
-
|
27
|
-
### Configuration Options
|
28
|
-
|
29
|
-
uri: A MongoDB URI for your database or cluster (check the MongoDB documentation for further info on this) [No Default, Required]
|
30
|
-
placeholder_db_dir: Path where the place holder database will be stored locally to disk [No Default, Required]
|
31
|
-
This gets created by the plugin so the directory needs to be writeable by the user that logstash is running as
|
32
|
-
placeholder_db_name: Name of the database file that will be created [Default: logstash_sqlite.db]
|
33
|
-
collection: A regex that will be used to find desired collecitons. [No Default, Required]
|
34
|
-
batch_size: Size of the batch of mongo documents to pull at a time [Default: 30]
|
35
|
-
|
36
|
-
|
37
|
-
### Configuration
|
38
|
-
|
39
|
-
Example
|
40
|
-
```
|
41
|
-
input {
|
42
|
-
mongodb {
|
43
|
-
uri => 'mongodb://10.0.0.30/my-logs?ssl=true'
|
44
|
-
placeholder_db_dir => '/opt/logstash-mongodb/'
|
45
|
-
placeholder_db_name => 'logstash_sqlite.db'
|
46
|
-
collection => 'events_'
|
47
|
-
batch_size => 5000
|
48
|
-
}
|
49
|
-
}
|
50
|
-
|
51
|
-
filter {
|
52
|
-
date {
|
53
|
-
match => [ "logdate", "ISO8601" ]
|
54
|
-
}
|
55
|
-
}
|
56
|
-
|
57
|
-
output {
|
58
|
-
redis {
|
59
|
-
host => "localhost"
|
60
|
-
data_type => "list"
|
61
|
-
key => "logstash-mylogs"
|
62
|
-
}
|
63
|
-
}
|
64
|
-
```
|
1
|
+
# Logstash Plugin
|
2
|
+
|
3
|
+
This is a plugin for [Logstash](https://github.com/elasticsearch/logstash).
|
4
|
+
|
5
|
+
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
6
|
+
|
7
|
+
## Documentation
|
8
|
+
|
9
|
+
This is a logstash plugin for pulling data out of mongodb and processing with logstash. It will connect to the database specified in `uri`, use the `collection` attribute to find collections to pull documents from, start at the first collection it finds and pull the number of documents specified in `batch_size`, save it's progress in an sqlite database who's location is specified by `placeholder_db_dir` and `placeholder_db_name` and repeat. It will continue this until it no longer finds documents newer than ones that it has processed, sleep for a moment, then continue to loop over the collections.
|
10
|
+
|
11
|
+
This was designed for parsing logs that were written into mongodb. This means that it may not re-parse db entries that were changed and already parsed.
|
12
|
+
|
13
|
+
|
14
|
+
### Installation
|
15
|
+
|
16
|
+
+ Logstash installed from ZIP | TGZ
|
17
|
+
+ bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
|
18
|
+
|
19
|
+
+ Logstash from GIT
|
20
|
+
+ git clone https://github.com/elastic/logstash.git
|
21
|
+
+ cd logstash
|
22
|
+
+ (ensure that the correct jruby is installed for the version of logstash you are installing)
|
23
|
+
+ rake test:install-core
|
24
|
+
+ bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
|
25
|
+
+ bin/plugin install --development
|
26
|
+
|
27
|
+
### Configuration Options
|
28
|
+
|
29
|
+
uri: A MongoDB URI for your database or cluster (check the MongoDB documentation for further info on this) [No Default, Required]
|
30
|
+
placeholder_db_dir: Path where the place holder database will be stored locally to disk [No Default, Required]
|
31
|
+
This gets created by the plugin so the directory needs to be writeable by the user that logstash is running as
|
32
|
+
placeholder_db_name: Name of the database file that will be created [Default: logstash_sqlite.db]
|
33
|
+
collection: A regex that will be used to find desired collecitons. [No Default, Required]
|
34
|
+
batch_size: Size of the batch of mongo documents to pull at a time [Default: 30]
|
35
|
+
|
36
|
+
|
37
|
+
### Configuration
|
38
|
+
|
39
|
+
Example
|
40
|
+
```
|
41
|
+
input {
|
42
|
+
mongodb {
|
43
|
+
uri => 'mongodb://10.0.0.30/my-logs?ssl=true'
|
44
|
+
placeholder_db_dir => '/opt/logstash-mongodb/'
|
45
|
+
placeholder_db_name => 'logstash_sqlite.db'
|
46
|
+
collection => 'events_'
|
47
|
+
batch_size => 5000
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
filter {
|
52
|
+
date {
|
53
|
+
match => [ "logdate", "ISO8601" ]
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
output {
|
58
|
+
redis {
|
59
|
+
host => "localhost"
|
60
|
+
data_type => "list"
|
61
|
+
key => "logstash-mylogs"
|
62
|
+
}
|
63
|
+
}
|
64
|
+
```
|
@@ -1,356 +1,356 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require "logstash/inputs/base"
|
3
|
-
require "logstash/namespace"
|
4
|
-
require "logstash/timestamp"
|
5
|
-
require "stud/interval"
|
6
|
-
require "socket" # for Socket.gethostname
|
7
|
-
require "json"
|
8
|
-
require "mongo"
|
9
|
-
|
10
|
-
include Mongo
|
11
|
-
|
12
|
-
class LogStash::Inputs::MongoDB < LogStash::Inputs::Base
|
13
|
-
config_name "mongodb"
|
14
|
-
|
15
|
-
# If undefined, Logstash will complain, even if codec is unused.
|
16
|
-
default :codec, "plain"
|
17
|
-
|
18
|
-
# Example URI: mongodb://mydb.host:27017/mydbname?ssl=true
|
19
|
-
config :uri, :validate => :string, :required => true
|
20
|
-
|
21
|
-
# The directory that will contain the sqlite database file.
|
22
|
-
config :placeholder_db_dir, :validate => :string, :required => true
|
23
|
-
|
24
|
-
# The name of the sqlite databse file
|
25
|
-
config :placeholder_db_name, :validate => :string, :default => "logstash_sqlite.db"
|
26
|
-
|
27
|
-
# Any table to exclude by name
|
28
|
-
config :exclude_tables, :validate => :array, :default => []
|
29
|
-
|
30
|
-
config :batch_size, :avlidate => :number, :default => 30
|
31
|
-
|
32
|
-
config :since_table, :validate => :string, :default => "logstash_since"
|
33
|
-
|
34
|
-
# The collection to use. Is turned into a regex so 'events' will match 'events_20150227'
|
35
|
-
# Example collection: events_20150227 or events_
|
36
|
-
config :collection, :validate => :string, :required => true
|
37
|
-
|
38
|
-
# This allows you to select the method you would like to use to parse your data
|
39
|
-
config :parse_method, :validate => :string, :default => 'flatten'
|
40
|
-
|
41
|
-
# If not flattening you can dig to flatten select fields
|
42
|
-
config :dig_fields, :validate => :array, :default => []
|
43
|
-
|
44
|
-
# This is the second level of hash flattening
|
45
|
-
config :dig_dig_fields, :validate => :array, :default => []
|
46
|
-
|
47
|
-
# If true, store the @timestamp field in mongodb as an ISODate type instead
|
48
|
-
# of an ISO8601 string. For more information about this, see
|
49
|
-
# http://www.mongodb.org/display/DOCS/Dates
|
50
|
-
config :isodate, :validate => :boolean, :default => false
|
51
|
-
|
52
|
-
# Number of seconds to wait after failure before retrying
|
53
|
-
config :retry_delay, :validate => :number, :default => 3, :required => false
|
54
|
-
|
55
|
-
# If true, an "_id" field will be added to the document before insertion.
|
56
|
-
# The "_id" field will use the timestamp of the event and overwrite an existing
|
57
|
-
# "_id" field in the event.
|
58
|
-
config :generateId, :validate => :boolean, :default => false
|
59
|
-
|
60
|
-
config :unpack_mongo_id, :validate => :boolean, :default => false
|
61
|
-
|
62
|
-
# The message string to use in the event.
|
63
|
-
config :message, :validate => :string, :default => "Default message..."
|
64
|
-
|
65
|
-
# Set how frequently messages should be sent.
|
66
|
-
# The default, `1`, means send a message every second.
|
67
|
-
config :interval, :validate => :number, :default => 1
|
68
|
-
|
69
|
-
SINCE_TABLE = :since_table
|
70
|
-
|
71
|
-
public
|
72
|
-
def init_placeholder_table(sqlitedb)
|
73
|
-
begin
|
74
|
-
sqlitedb.create_table "#{SINCE_TABLE}" do
|
75
|
-
String :table
|
76
|
-
Int :place
|
77
|
-
end
|
78
|
-
rescue
|
79
|
-
@logger.debug("since table already exists")
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
public
|
84
|
-
def init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
85
|
-
@logger.debug("init placeholder for #{since_table}_#{mongo_collection_name}")
|
86
|
-
since = sqlitedb[SINCE_TABLE]
|
87
|
-
mongo_collection = mongodb.collection(mongo_collection_name)
|
88
|
-
first_entry = mongo_collection.find({}).sort('_id' => 1).limit(1).first
|
89
|
-
first_entry_id = first_entry['_id'].to_s
|
90
|
-
since.insert(:table => "#{since_table}_#{mongo_collection_name}", :place => first_entry_id)
|
91
|
-
return first_entry_id
|
92
|
-
end
|
93
|
-
|
94
|
-
public
|
95
|
-
def get_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
96
|
-
since = sqlitedb[SINCE_TABLE]
|
97
|
-
x = since.where(:table => "#{since_table}_#{mongo_collection_name}")
|
98
|
-
if x[:place].nil? || x[:place] == 0
|
99
|
-
first_entry_id = init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
100
|
-
@logger.debug("FIRST ENTRY ID for #{mongo_collection_name} is #{first_entry_id}")
|
101
|
-
return first_entry_id
|
102
|
-
else
|
103
|
-
@logger.debug("placeholder already exists, it is #{x[:place]}")
|
104
|
-
return x[:place][:place]
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
public
|
109
|
-
def update_placeholder(sqlitedb, since_table, mongo_collection_name, place)
|
110
|
-
#@logger.debug("updating placeholder for #{since_table}_#{mongo_collection_name} to #{place}")
|
111
|
-
since = sqlitedb[SINCE_TABLE]
|
112
|
-
since.where(:table => "#{since_table}_#{mongo_collection_name}").update(:place => place)
|
113
|
-
end
|
114
|
-
|
115
|
-
public
|
116
|
-
def get_all_tables(mongodb)
|
117
|
-
return @mongodb.collection_names
|
118
|
-
end
|
119
|
-
|
120
|
-
public
|
121
|
-
def get_collection_names(mongodb, collection)
|
122
|
-
collection_names = []
|
123
|
-
@mongodb.collection_names.each do |coll|
|
124
|
-
if /#{collection}/ =~ coll
|
125
|
-
collection_names.push(coll)
|
126
|
-
@logger.debug("Added #{coll} to the collection list as it matches our collection search")
|
127
|
-
end
|
128
|
-
end
|
129
|
-
return collection_names
|
130
|
-
end
|
131
|
-
|
132
|
-
public
|
133
|
-
def get_cursor_for_collection(mongodb, mongo_collection_name, last_id_object, batch_size)
|
134
|
-
collection = mongodb.collection(mongo_collection_name)
|
135
|
-
# Need to make this sort by date in object id then get the first of the series
|
136
|
-
# db.events_20150320.find().limit(1).sort({ts:1})
|
137
|
-
return collection.find({:_id => {:$gt => last_id_object}}).limit(batch_size)
|
138
|
-
end
|
139
|
-
|
140
|
-
public
|
141
|
-
def update_watched_collections(mongodb, collection, sqlitedb)
|
142
|
-
collections = get_collection_names(mongodb, collection)
|
143
|
-
collection_data = {}
|
144
|
-
collections.each do |my_collection|
|
145
|
-
init_placeholder_table(sqlitedb)
|
146
|
-
last_id = get_placeholder(sqlitedb, since_table, mongodb, my_collection)
|
147
|
-
if !collection_data[my_collection]
|
148
|
-
collection_data[my_collection] = { :name => my_collection, :last_id => last_id }
|
149
|
-
end
|
150
|
-
end
|
151
|
-
return collection_data
|
152
|
-
end
|
153
|
-
|
154
|
-
public
|
155
|
-
def register
|
156
|
-
require "jdbc/sqlite3"
|
157
|
-
require "sequel"
|
158
|
-
placeholder_db_path = File.join(@placeholder_db_dir, @placeholder_db_name)
|
159
|
-
mongo_uri = Mongo::URI.new(@uri)
|
160
|
-
hosts_array = mongo_uri.servers
|
161
|
-
db_name = mongo_uri.database
|
162
|
-
ssl_enabled = mongo_uri.options[:ssl]
|
163
|
-
conn = Mongo::Client.new(hosts_array, ssl: ssl_enabled, database: db_name)
|
164
|
-
|
165
|
-
if @db_auths
|
166
|
-
@db_auths.each do |auth|
|
167
|
-
if !auth['db_name'].nil?
|
168
|
-
conn.add_auth(auth['db_name'], auth['username'], auth['password'], nil)
|
169
|
-
end
|
170
|
-
end
|
171
|
-
conn.apply_saved_authentication()
|
172
|
-
end
|
173
|
-
|
174
|
-
@host = Socket.gethostname
|
175
|
-
@logger.info("Registering MongoDB input")
|
176
|
-
|
177
|
-
@mongodb = conn.database
|
178
|
-
@sqlitedb = Sequel.connect("jdbc:sqlite:#{placeholder_db_path}")
|
179
|
-
|
180
|
-
# Should check to see if there are new matching tables at a predefined interval or on some trigger
|
181
|
-
@collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
|
182
|
-
end # def register
|
183
|
-
|
184
|
-
class BSON::OrderedHash
|
185
|
-
def to_h
|
186
|
-
inject({}) { |acc, element| k,v = element; acc[k] = (if v.class == BSON::OrderedHash then v.to_h else v end); acc }
|
187
|
-
end
|
188
|
-
|
189
|
-
def to_json
|
190
|
-
JSON.parse(self.to_h.to_json, :allow_nan => true)
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
def flatten(my_hash)
|
195
|
-
new_hash = {}
|
196
|
-
@logger.debug("Raw Hash: #{my_hash}")
|
197
|
-
if my_hash.respond_to? :each
|
198
|
-
my_hash.each do |k1,v1|
|
199
|
-
if v1.is_a?(Hash)
|
200
|
-
v1.each do |k2,v2|
|
201
|
-
if v2.is_a?(Hash)
|
202
|
-
# puts "Found a nested hash"
|
203
|
-
result = flatten(v2)
|
204
|
-
result.each do |k3,v3|
|
205
|
-
new_hash[k1.to_s+"_"+k2.to_s+"_"+k3.to_s] = v3
|
206
|
-
end
|
207
|
-
# puts "result: "+result.to_s+" k2: "+k2.to_s+" v2: "+v2.to_s
|
208
|
-
else
|
209
|
-
new_hash[k1.to_s+"_"+k2.to_s] = v2
|
210
|
-
end
|
211
|
-
end
|
212
|
-
else
|
213
|
-
# puts "Key: "+k1.to_s+" is not a hash"
|
214
|
-
new_hash[k1.to_s] = v1
|
215
|
-
end
|
216
|
-
end
|
217
|
-
else
|
218
|
-
@logger.debug("Flatten [ERROR]: hash did not respond to :each")
|
219
|
-
end
|
220
|
-
@logger.debug("Flattened Hash: #{new_hash}")
|
221
|
-
return new_hash
|
222
|
-
end
|
223
|
-
|
224
|
-
def run(queue)
|
225
|
-
sleep_min = 0.01
|
226
|
-
sleep_max = 5
|
227
|
-
sleeptime = sleep_min
|
228
|
-
|
229
|
-
begin
|
230
|
-
@logger.debug("Tailing MongoDB")
|
231
|
-
@logger.debug("Collection data is: #{@collection_data}")
|
232
|
-
loop do
|
233
|
-
@collection_data.each do |index, collection|
|
234
|
-
collection_name = collection[:name]
|
235
|
-
@logger.debug("collection_data is: #{@collection_data}")
|
236
|
-
last_id = @collection_data[index][:last_id]
|
237
|
-
#@logger.debug("last_id is #{last_id}", :index => index, :collection => collection_name)
|
238
|
-
# get batch of events starting at the last_place if it is set
|
239
|
-
last_id_object = BSON::ObjectId(last_id)
|
240
|
-
cursor = get_cursor_for_collection(@mongodb, collection_name, last_id_object, batch_size)
|
241
|
-
cursor.each do |doc|
|
242
|
-
logdate = DateTime.parse(doc['_id'].generation_time.to_s)
|
243
|
-
event = LogStash::Event.new("host" => @host)
|
244
|
-
decorate(event)
|
245
|
-
event["logdate"] = logdate.iso8601
|
246
|
-
log_entry = doc.to_h.to_s
|
247
|
-
log_entry['_id'] = log_entry['_id'].to_s
|
248
|
-
event["log_entry"] = log_entry
|
249
|
-
event["mongo_id"] = doc['_id'].to_s
|
250
|
-
@logger.debug("mongo_id: "+doc['_id'].to_s)
|
251
|
-
#@logger.debug("EVENT looks like: "+event.to_s)
|
252
|
-
#@logger.debug("Sent message: "+doc.to_h.to_s)
|
253
|
-
#@logger.debug("EVENT looks like: "+event.to_s)
|
254
|
-
# Extract the HOST_ID and PID from the MongoDB BSON::ObjectID
|
255
|
-
if @unpack_mongo_id
|
256
|
-
doc_hex_bytes = doc['_id'].to_s.each_char.each_slice(2).map {|b| b.join.to_i(16) }
|
257
|
-
doc_obj_bin = doc_hex_bytes.pack("C*").unpack("a4 a3 a2 a3")
|
258
|
-
host_id = doc_obj_bin[1].unpack("S")
|
259
|
-
process_id = doc_obj_bin[2].unpack("S")
|
260
|
-
event['host_id'] = host_id.first.to_i
|
261
|
-
event['process_id'] = process_id.first.to_i
|
262
|
-
end
|
263
|
-
|
264
|
-
if @parse_method == 'flatten'
|
265
|
-
# Flatten the JSON so that the data is usable in Kibana
|
266
|
-
flat_doc = flatten(doc)
|
267
|
-
# Check for different types of expected values and add them to the event
|
268
|
-
if flat_doc['info_message'] && (flat_doc['info_message'] =~ /collection stats: .+/)
|
269
|
-
# Some custom stuff I'm having to do to fix formatting in past logs...
|
270
|
-
sub_value = flat_doc['info_message'].sub("collection stats: ", "")
|
271
|
-
JSON.parse(sub_value).each do |k1,v1|
|
272
|
-
flat_doc["collection_stats_#{k1.to_s}"] = v1
|
273
|
-
end
|
274
|
-
end
|
275
|
-
|
276
|
-
flat_doc.each do |k,v|
|
277
|
-
# Check for an integer
|
278
|
-
@logger.debug("key: #{k.to_s} value: #{v.to_s}")
|
279
|
-
if v.is_a? Numeric
|
280
|
-
event[k.to_s] = v
|
281
|
-
elsif v.is_a? String
|
282
|
-
if v == "NaN"
|
283
|
-
event[k.to_s] = Float::NAN
|
284
|
-
elsif /\A[-+]?\d+[.][\d]+\z/ == v
|
285
|
-
event[k.to_s] = v.to_f
|
286
|
-
elsif (/\A[-+]?\d+\z/ === v) || (v.is_a? Integer)
|
287
|
-
event[k.to_s] = v.to_i
|
288
|
-
else
|
289
|
-
event[k.to_s] = v
|
290
|
-
end
|
291
|
-
else
|
292
|
-
event[k.to_s] = v.to_s unless k.to_s == "_id" || k.to_s == "tags"
|
293
|
-
if (k.to_s == "tags") && (v.is_a? Array)
|
294
|
-
event['tags'] = v
|
295
|
-
end
|
296
|
-
end
|
297
|
-
end
|
298
|
-
elsif @parse_method == 'dig'
|
299
|
-
# Dig into the JSON and flatten select elements
|
300
|
-
doc.each do |k, v|
|
301
|
-
if k != "_id"
|
302
|
-
if (@dig_fields.include? k) && (v.respond_to? :each)
|
303
|
-
v.each do |kk, vv|
|
304
|
-
if (@dig_dig_fields.include? kk) && (vv.respond_to? :each)
|
305
|
-
vv.each do |kkk, vvv|
|
306
|
-
if /\A[-+]?\d+\z/ === vvv
|
307
|
-
event["#{k}_#{kk}_#{kkk}"] = vvv.to_i
|
308
|
-
else
|
309
|
-
event["#{k}_#{kk}_#{kkk}"] = vvv.to_s
|
310
|
-
end
|
311
|
-
end
|
312
|
-
else
|
313
|
-
if /\A[-+]?\d+\z/ === vv
|
314
|
-
event["#{k}_#{kk}"] = vv.to_i
|
315
|
-
else
|
316
|
-
event["#{k}_#{kk}"] = vv.to_s
|
317
|
-
end
|
318
|
-
end
|
319
|
-
end
|
320
|
-
else
|
321
|
-
if /\A[-+]?\d+\z/ === v
|
322
|
-
event[k] = v.to_i
|
323
|
-
else
|
324
|
-
event[k] = v.to_s
|
325
|
-
end
|
326
|
-
end
|
327
|
-
end
|
328
|
-
end
|
329
|
-
else
|
330
|
-
# Should probably do some sanitization here and insert the doc as raw as possible for parsing in logstash
|
331
|
-
end
|
332
|
-
|
333
|
-
queue << event
|
334
|
-
@collection_data[index][:last_id] = doc['_id'].to_s
|
335
|
-
end
|
336
|
-
# Store the last-seen doc in the database
|
337
|
-
update_placeholder(@sqlitedb, since_table, collection_name, @collection_data[index][:last_id])
|
338
|
-
end
|
339
|
-
@logger.debug("Updating watch collections")
|
340
|
-
@collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
|
341
|
-
|
342
|
-
# nothing found in that iteration
|
343
|
-
# sleep a bit
|
344
|
-
@logger.debug("No new rows. Sleeping.", :time => sleeptime)
|
345
|
-
sleeptime = [sleeptime * 2, sleep_max].min
|
346
|
-
sleep(sleeptime)
|
347
|
-
#sleeptime = sleep_min
|
348
|
-
end
|
349
|
-
rescue LogStash::ShutdownSignal
|
350
|
-
if @interrupted
|
351
|
-
@logger.debug("Mongo Input shutting down")
|
352
|
-
end
|
353
|
-
end
|
354
|
-
end # def run
|
355
|
-
|
356
|
-
end # class LogStash::Inputs::Example
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/inputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "logstash/timestamp"
|
5
|
+
require "stud/interval"
|
6
|
+
require "socket" # for Socket.gethostname
|
7
|
+
require "json"
|
8
|
+
require "mongo"
|
9
|
+
|
10
|
+
include Mongo
|
11
|
+
|
12
|
+
class LogStash::Inputs::MongoDB < LogStash::Inputs::Base
|
13
|
+
config_name "mongodb"
|
14
|
+
|
15
|
+
# If undefined, Logstash will complain, even if codec is unused.
|
16
|
+
default :codec, "plain"
|
17
|
+
|
18
|
+
# Example URI: mongodb://mydb.host:27017/mydbname?ssl=true
|
19
|
+
config :uri, :validate => :string, :required => true
|
20
|
+
|
21
|
+
# The directory that will contain the sqlite database file.
|
22
|
+
config :placeholder_db_dir, :validate => :string, :required => true
|
23
|
+
|
24
|
+
# The name of the sqlite databse file
|
25
|
+
config :placeholder_db_name, :validate => :string, :default => "logstash_sqlite.db"
|
26
|
+
|
27
|
+
# Any table to exclude by name
|
28
|
+
config :exclude_tables, :validate => :array, :default => []
|
29
|
+
|
30
|
+
config :batch_size, :avlidate => :number, :default => 30
|
31
|
+
|
32
|
+
config :since_table, :validate => :string, :default => "logstash_since"
|
33
|
+
|
34
|
+
# The collection to use. Is turned into a regex so 'events' will match 'events_20150227'
|
35
|
+
# Example collection: events_20150227 or events_
|
36
|
+
config :collection, :validate => :string, :required => true
|
37
|
+
|
38
|
+
# This allows you to select the method you would like to use to parse your data
|
39
|
+
config :parse_method, :validate => :string, :default => 'flatten'
|
40
|
+
|
41
|
+
# If not flattening you can dig to flatten select fields
|
42
|
+
config :dig_fields, :validate => :array, :default => []
|
43
|
+
|
44
|
+
# This is the second level of hash flattening
|
45
|
+
config :dig_dig_fields, :validate => :array, :default => []
|
46
|
+
|
47
|
+
# If true, store the @timestamp field in mongodb as an ISODate type instead
|
48
|
+
# of an ISO8601 string. For more information about this, see
|
49
|
+
# http://www.mongodb.org/display/DOCS/Dates
|
50
|
+
config :isodate, :validate => :boolean, :default => false
|
51
|
+
|
52
|
+
# Number of seconds to wait after failure before retrying
|
53
|
+
config :retry_delay, :validate => :number, :default => 3, :required => false
|
54
|
+
|
55
|
+
# If true, an "_id" field will be added to the document before insertion.
|
56
|
+
# The "_id" field will use the timestamp of the event and overwrite an existing
|
57
|
+
# "_id" field in the event.
|
58
|
+
config :generateId, :validate => :boolean, :default => false
|
59
|
+
|
60
|
+
config :unpack_mongo_id, :validate => :boolean, :default => false
|
61
|
+
|
62
|
+
# The message string to use in the event.
|
63
|
+
config :message, :validate => :string, :default => "Default message..."
|
64
|
+
|
65
|
+
# Set how frequently messages should be sent.
|
66
|
+
# The default, `1`, means send a message every second.
|
67
|
+
config :interval, :validate => :number, :default => 1
|
68
|
+
|
69
|
+
SINCE_TABLE = :since_table
|
70
|
+
|
71
|
+
public
|
72
|
+
def init_placeholder_table(sqlitedb)
|
73
|
+
begin
|
74
|
+
sqlitedb.create_table "#{SINCE_TABLE}" do
|
75
|
+
String :table
|
76
|
+
Int :place
|
77
|
+
end
|
78
|
+
rescue
|
79
|
+
@logger.debug("since table already exists")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
public
|
84
|
+
def init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
85
|
+
@logger.debug("init placeholder for #{since_table}_#{mongo_collection_name}")
|
86
|
+
since = sqlitedb[SINCE_TABLE]
|
87
|
+
mongo_collection = mongodb.collection(mongo_collection_name)
|
88
|
+
first_entry = mongo_collection.find({}).sort('_id' => 1).limit(1).first
|
89
|
+
first_entry_id = first_entry['_id'].to_s
|
90
|
+
since.insert(:table => "#{since_table}_#{mongo_collection_name}", :place => first_entry_id)
|
91
|
+
return first_entry_id
|
92
|
+
end
|
93
|
+
|
94
|
+
public
|
95
|
+
def get_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
96
|
+
since = sqlitedb[SINCE_TABLE]
|
97
|
+
x = since.where(:table => "#{since_table}_#{mongo_collection_name}")
|
98
|
+
if x[:place].nil? || x[:place] == 0
|
99
|
+
first_entry_id = init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
100
|
+
@logger.debug("FIRST ENTRY ID for #{mongo_collection_name} is #{first_entry_id}")
|
101
|
+
return first_entry_id
|
102
|
+
else
|
103
|
+
@logger.debug("placeholder already exists, it is #{x[:place]}")
|
104
|
+
return x[:place][:place]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
public
|
109
|
+
def update_placeholder(sqlitedb, since_table, mongo_collection_name, place)
|
110
|
+
#@logger.debug("updating placeholder for #{since_table}_#{mongo_collection_name} to #{place}")
|
111
|
+
since = sqlitedb[SINCE_TABLE]
|
112
|
+
since.where(:table => "#{since_table}_#{mongo_collection_name}").update(:place => place)
|
113
|
+
end
|
114
|
+
|
115
|
+
public
|
116
|
+
def get_all_tables(mongodb)
|
117
|
+
return @mongodb.collection_names
|
118
|
+
end
|
119
|
+
|
120
|
+
public
|
121
|
+
def get_collection_names(mongodb, collection)
|
122
|
+
collection_names = []
|
123
|
+
@mongodb.collection_names.each do |coll|
|
124
|
+
if /#{collection}/ =~ coll
|
125
|
+
collection_names.push(coll)
|
126
|
+
@logger.debug("Added #{coll} to the collection list as it matches our collection search")
|
127
|
+
end
|
128
|
+
end
|
129
|
+
return collection_names
|
130
|
+
end
|
131
|
+
|
132
|
+
public
|
133
|
+
def get_cursor_for_collection(mongodb, mongo_collection_name, last_id_object, batch_size)
|
134
|
+
collection = mongodb.collection(mongo_collection_name)
|
135
|
+
# Need to make this sort by date in object id then get the first of the series
|
136
|
+
# db.events_20150320.find().limit(1).sort({ts:1})
|
137
|
+
return collection.find({:_id => {:$gt => last_id_object}}).limit(batch_size)
|
138
|
+
end
|
139
|
+
|
140
|
+
public
|
141
|
+
def update_watched_collections(mongodb, collection, sqlitedb)
|
142
|
+
collections = get_collection_names(mongodb, collection)
|
143
|
+
collection_data = {}
|
144
|
+
collections.each do |my_collection|
|
145
|
+
init_placeholder_table(sqlitedb)
|
146
|
+
last_id = get_placeholder(sqlitedb, since_table, mongodb, my_collection)
|
147
|
+
if !collection_data[my_collection]
|
148
|
+
collection_data[my_collection] = { :name => my_collection, :last_id => last_id }
|
149
|
+
end
|
150
|
+
end
|
151
|
+
return collection_data
|
152
|
+
end
|
153
|
+
|
154
|
+
public
|
155
|
+
def register
|
156
|
+
require "jdbc/sqlite3"
|
157
|
+
require "sequel"
|
158
|
+
placeholder_db_path = File.join(@placeholder_db_dir, @placeholder_db_name)
|
159
|
+
mongo_uri = Mongo::URI.new(@uri)
|
160
|
+
hosts_array = mongo_uri.servers
|
161
|
+
db_name = mongo_uri.database
|
162
|
+
ssl_enabled = mongo_uri.options[:ssl]
|
163
|
+
conn = Mongo::Client.new(hosts_array, ssl: ssl_enabled, database: db_name)
|
164
|
+
|
165
|
+
if @db_auths
|
166
|
+
@db_auths.each do |auth|
|
167
|
+
if !auth['db_name'].nil?
|
168
|
+
conn.add_auth(auth['db_name'], auth['username'], auth['password'], nil)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
conn.apply_saved_authentication()
|
172
|
+
end
|
173
|
+
|
174
|
+
@host = Socket.gethostname
|
175
|
+
@logger.info("Registering MongoDB input")
|
176
|
+
|
177
|
+
@mongodb = conn.database
|
178
|
+
@sqlitedb = Sequel.connect("jdbc:sqlite:#{placeholder_db_path}")
|
179
|
+
|
180
|
+
# Should check to see if there are new matching tables at a predefined interval or on some trigger
|
181
|
+
@collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
|
182
|
+
end # def register
|
183
|
+
|
184
|
+
class BSON::OrderedHash
|
185
|
+
def to_h
|
186
|
+
inject({}) { |acc, element| k,v = element; acc[k] = (if v.class == BSON::OrderedHash then v.to_h else v end); acc }
|
187
|
+
end
|
188
|
+
|
189
|
+
def to_json
|
190
|
+
JSON.parse(self.to_h.to_json, :allow_nan => true)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def flatten(my_hash)
|
195
|
+
new_hash = {}
|
196
|
+
@logger.debug("Raw Hash: #{my_hash}")
|
197
|
+
if my_hash.respond_to? :each
|
198
|
+
my_hash.each do |k1,v1|
|
199
|
+
if v1.is_a?(Hash)
|
200
|
+
v1.each do |k2,v2|
|
201
|
+
if v2.is_a?(Hash)
|
202
|
+
# puts "Found a nested hash"
|
203
|
+
result = flatten(v2)
|
204
|
+
result.each do |k3,v3|
|
205
|
+
new_hash[k1.to_s+"_"+k2.to_s+"_"+k3.to_s] = v3
|
206
|
+
end
|
207
|
+
# puts "result: "+result.to_s+" k2: "+k2.to_s+" v2: "+v2.to_s
|
208
|
+
else
|
209
|
+
new_hash[k1.to_s+"_"+k2.to_s] = v2
|
210
|
+
end
|
211
|
+
end
|
212
|
+
else
|
213
|
+
# puts "Key: "+k1.to_s+" is not a hash"
|
214
|
+
new_hash[k1.to_s] = v1
|
215
|
+
end
|
216
|
+
end
|
217
|
+
else
|
218
|
+
@logger.debug("Flatten [ERROR]: hash did not respond to :each")
|
219
|
+
end
|
220
|
+
@logger.debug("Flattened Hash: #{new_hash}")
|
221
|
+
return new_hash
|
222
|
+
end
|
223
|
+
|
224
|
+
def run(queue)
|
225
|
+
sleep_min = 0.01
|
226
|
+
sleep_max = 5
|
227
|
+
sleeptime = sleep_min
|
228
|
+
|
229
|
+
begin
|
230
|
+
@logger.debug("Tailing MongoDB")
|
231
|
+
@logger.debug("Collection data is: #{@collection_data}")
|
232
|
+
loop do
|
233
|
+
@collection_data.each do |index, collection|
|
234
|
+
collection_name = collection[:name]
|
235
|
+
@logger.debug("collection_data is: #{@collection_data}")
|
236
|
+
last_id = @collection_data[index][:last_id]
|
237
|
+
#@logger.debug("last_id is #{last_id}", :index => index, :collection => collection_name)
|
238
|
+
# get batch of events starting at the last_place if it is set
|
239
|
+
last_id_object = BSON::ObjectId(last_id)
|
240
|
+
cursor = get_cursor_for_collection(@mongodb, collection_name, last_id_object, batch_size)
|
241
|
+
cursor.each do |doc|
|
242
|
+
logdate = DateTime.parse(doc['_id'].generation_time.to_s)
|
243
|
+
event = LogStash::Event.new("host" => @host)
|
244
|
+
decorate(event)
|
245
|
+
event["logdate"] = logdate.iso8601
|
246
|
+
log_entry = doc.to_h.to_s
|
247
|
+
log_entry['_id'] = log_entry['_id'].to_s
|
248
|
+
event["log_entry"] = log_entry
|
249
|
+
event["mongo_id"] = doc['_id'].to_s
|
250
|
+
@logger.debug("mongo_id: "+doc['_id'].to_s)
|
251
|
+
#@logger.debug("EVENT looks like: "+event.to_s)
|
252
|
+
#@logger.debug("Sent message: "+doc.to_h.to_s)
|
253
|
+
#@logger.debug("EVENT looks like: "+event.to_s)
|
254
|
+
# Extract the HOST_ID and PID from the MongoDB BSON::ObjectID
|
255
|
+
if @unpack_mongo_id
|
256
|
+
doc_hex_bytes = doc['_id'].to_s.each_char.each_slice(2).map {|b| b.join.to_i(16) }
|
257
|
+
doc_obj_bin = doc_hex_bytes.pack("C*").unpack("a4 a3 a2 a3")
|
258
|
+
host_id = doc_obj_bin[1].unpack("S")
|
259
|
+
process_id = doc_obj_bin[2].unpack("S")
|
260
|
+
event['host_id'] = host_id.first.to_i
|
261
|
+
event['process_id'] = process_id.first.to_i
|
262
|
+
end
|
263
|
+
|
264
|
+
if @parse_method == 'flatten'
|
265
|
+
# Flatten the JSON so that the data is usable in Kibana
|
266
|
+
flat_doc = flatten(doc)
|
267
|
+
# Check for different types of expected values and add them to the event
|
268
|
+
if flat_doc['info_message'] && (flat_doc['info_message'] =~ /collection stats: .+/)
|
269
|
+
# Some custom stuff I'm having to do to fix formatting in past logs...
|
270
|
+
sub_value = flat_doc['info_message'].sub("collection stats: ", "")
|
271
|
+
JSON.parse(sub_value).each do |k1,v1|
|
272
|
+
flat_doc["collection_stats_#{k1.to_s}"] = v1
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
flat_doc.each do |k,v|
|
277
|
+
# Check for an integer
|
278
|
+
@logger.debug("key: #{k.to_s} value: #{v.to_s}")
|
279
|
+
if v.is_a? Numeric
|
280
|
+
event[k.to_s] = v
|
281
|
+
elsif v.is_a? String
|
282
|
+
if v == "NaN"
|
283
|
+
event[k.to_s] = Float::NAN
|
284
|
+
elsif /\A[-+]?\d+[.][\d]+\z/ == v
|
285
|
+
event[k.to_s] = v.to_f
|
286
|
+
elsif (/\A[-+]?\d+\z/ === v) || (v.is_a? Integer)
|
287
|
+
event[k.to_s] = v.to_i
|
288
|
+
else
|
289
|
+
event[k.to_s] = v
|
290
|
+
end
|
291
|
+
else
|
292
|
+
event[k.to_s] = v.to_s unless k.to_s == "_id" || k.to_s == "tags"
|
293
|
+
if (k.to_s == "tags") && (v.is_a? Array)
|
294
|
+
event['tags'] = v
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|
298
|
+
elsif @parse_method == 'dig'
|
299
|
+
# Dig into the JSON and flatten select elements
|
300
|
+
doc.each do |k, v|
|
301
|
+
if k != "_id"
|
302
|
+
if (@dig_fields.include? k) && (v.respond_to? :each)
|
303
|
+
v.each do |kk, vv|
|
304
|
+
if (@dig_dig_fields.include? kk) && (vv.respond_to? :each)
|
305
|
+
vv.each do |kkk, vvv|
|
306
|
+
if /\A[-+]?\d+\z/ === vvv
|
307
|
+
event["#{k}_#{kk}_#{kkk}"] = vvv.to_i
|
308
|
+
else
|
309
|
+
event["#{k}_#{kk}_#{kkk}"] = vvv.to_s
|
310
|
+
end
|
311
|
+
end
|
312
|
+
else
|
313
|
+
if /\A[-+]?\d+\z/ === vv
|
314
|
+
event["#{k}_#{kk}"] = vv.to_i
|
315
|
+
else
|
316
|
+
event["#{k}_#{kk}"] = vv.to_s
|
317
|
+
end
|
318
|
+
end
|
319
|
+
end
|
320
|
+
else
|
321
|
+
if /\A[-+]?\d+\z/ === v
|
322
|
+
event[k] = v.to_i
|
323
|
+
else
|
324
|
+
event[k] = v.to_s
|
325
|
+
end
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
else
|
330
|
+
# Should probably do some sanitization here and insert the doc as raw as possible for parsing in logstash
|
331
|
+
end
|
332
|
+
|
333
|
+
queue << event
|
334
|
+
@collection_data[index][:last_id] = doc['_id'].to_s
|
335
|
+
end
|
336
|
+
# Store the last-seen doc in the database
|
337
|
+
update_placeholder(@sqlitedb, since_table, collection_name, @collection_data[index][:last_id])
|
338
|
+
end
|
339
|
+
@logger.debug("Updating watch collections")
|
340
|
+
@collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
|
341
|
+
|
342
|
+
# nothing found in that iteration
|
343
|
+
# sleep a bit
|
344
|
+
@logger.debug("No new rows. Sleeping.", :time => sleeptime)
|
345
|
+
sleeptime = [sleeptime * 2, sleep_max].min
|
346
|
+
sleep(sleeptime)
|
347
|
+
#sleeptime = sleep_min
|
348
|
+
end
|
349
|
+
rescue LogStash::ShutdownSignal
|
350
|
+
if @interrupted
|
351
|
+
@logger.debug("Mongo Input shutting down")
|
352
|
+
end
|
353
|
+
end
|
354
|
+
end # def run
|
355
|
+
|
356
|
+
end # class LogStash::Inputs::Example
|