logstash-input-mongodb 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/DEVELOPER.md +2 -2
- data/Gemfile +2 -2
- data/LICENSE +13 -13
- data/README.md +64 -64
- data/lib/logstash/inputs/mongodb.rb +356 -356
- data/logstash-input-mongodb.gemspec +38 -28
- data/spec/inputs/example_spec.rb +0 -0
- metadata +25 -25
- data/Gemfile.lock +0 -106
- data/Rakefile +0 -1
- data/logstash-input-mongodb-0.1.3.gem +0 -0
- data/logstash-input-mongodb-0.2.0.gem +0 -0
- data/test/flattener_test.rb +0 -73
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
YzAyZDJhYTM0MDFiZmM0YjkxNDg1Y2ZkYTc4YTE5ZTNmMmZkMTUyYg==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 610e56ecb9dbe13f8a94e0abd903b3889ec32d22
|
4
|
+
data.tar.gz: f8232dc0ee30a7d3fb877be02d7e36c72a6cb96a
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
N2ExMzc0OTkwNzU3NDIyNDRiMDYyZGQyYTczOTY3MGM0YmNkZWUxNTllNjUw
|
11
|
-
MWI4NjdjNjYwOWIzOTRhMDFmZGI3MGFkZDFlZTAwMjhkZDUxN2U=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
MTM0MjAyNTcyN2FmNmI5ZjMwZTRjNjA4NzU5OTgzODYwOTdiY2I0NDY5YWJi
|
14
|
-
ZjNhYjE0OTA4Y2U3MjZkMjJjOGJiZjc4OGVhOTQ3MjliODM0ZDJjNGYzYmU1
|
15
|
-
M2QwYjI1ZThiMTM5YmUzMTNmZDM5NTgyYzNjNDA0MjZhZjMyZjU=
|
6
|
+
metadata.gz: 61eb59603580d59b6b837a53d174bf79a1b292cbdf6797b513ab82b700d64571ff37ef8e6205be2b5f203b6a0cdc9f1c7eea4ffca4617f0746485431bbc153b9
|
7
|
+
data.tar.gz: 7cec076e0eb7a7de7e62e1945cb8ccbcc626f015628fda9ba2ddcb407b17128f02b16782b9d2553128da8c84cc314c059b86be0795248326e8e5b4680689fd7e
|
data/DEVELOPER.md
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
# logstash-input-example
|
2
|
-
Example input plugin. This should help bootstrap your effort to write your own input plugin!
|
1
|
+
# logstash-input-example
|
2
|
+
Example input plugin. This should help bootstrap your effort to write your own input plugin!
|
data/Gemfile
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
source 'https://rubygems.org'
|
2
|
-
gemspec
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
gemspec
|
data/LICENSE
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
Copyright (c) 2012-2015 Elasticsearch <http://www.elasticsearch.org>
|
2
|
-
|
3
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
you may not use this file except in compliance with the License.
|
5
|
-
You may obtain a copy of the License at
|
6
|
-
|
7
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
|
9
|
-
Unless required by applicable law or agreed to in writing, software
|
10
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
See the License for the specific language governing permissions and
|
13
|
-
limitations under the License.
|
1
|
+
Copyright (c) 2012-2015 Elasticsearch <http://www.elasticsearch.org>
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
CHANGED
@@ -1,64 +1,64 @@
|
|
1
|
-
# Logstash Plugin
|
2
|
-
|
3
|
-
This is a plugin for [Logstash](https://github.com/elasticsearch/logstash).
|
4
|
-
|
5
|
-
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
6
|
-
|
7
|
-
## Documentation
|
8
|
-
|
9
|
-
This is a logstash plugin for pulling data out of mongodb and processing with logstash. It will connect to the database specified in `uri`, use the `collection` attribute to find collections to pull documents from, start at the first collection it finds and pull the number of documents specified in `batch_size`, save it's progress in an sqlite database who's location is specified by `placeholder_db_dir` and `placeholder_db_name` and repeat. It will continue this until it no longer finds documents newer than ones that it has processed, sleep for a moment, then continue to loop over the collections.
|
10
|
-
|
11
|
-
This was designed for parsing logs that were written into mongodb. This means that it may not re-parse db entries that were changed and already parsed.
|
12
|
-
|
13
|
-
|
14
|
-
### Installation
|
15
|
-
|
16
|
-
+ Logstash installed from ZIP | TGZ
|
17
|
-
+ bin/plugin install /path/to/logstash-input-mongodb-0.
|
18
|
-
|
19
|
-
+ Logstash from GIT
|
20
|
-
+ git clone https://github.com/elastic/logstash.git
|
21
|
-
+ cd logstash
|
22
|
-
+ (ensure that the correct jruby is installed for the version of logstash you are installing)
|
23
|
-
+ rake test:install-core
|
24
|
-
+ bin/plugin install /path/to/logstash-input-mongodb-0.
|
25
|
-
+ bin/plugin install --development
|
26
|
-
|
27
|
-
### Configuration Options
|
28
|
-
|
29
|
-
uri: A MongoDB URI for your database or cluster (check the MongoDB documentation for further info on this) [No Default, Required]
|
30
|
-
placeholder_db_dir: Path where the place holder database will be stored locally to disk [No Default, Required]
|
31
|
-
This gets created by the plugin so the directory needs to be writeable by the user that logstash is running as
|
32
|
-
placeholder_db_name: Name of the database file that will be created [Default: logstash_sqlite.db]
|
33
|
-
collection: A regex that will be used to find desired collecitons. [No Default, Required]
|
34
|
-
batch_size: Size of the batch of mongo documents to pull at a time [Default: 30]
|
35
|
-
|
36
|
-
|
37
|
-
### Configuration
|
38
|
-
|
39
|
-
Example
|
40
|
-
```
|
41
|
-
input {
|
42
|
-
mongodb {
|
43
|
-
uri => 'mongodb://10.0.0.30/my-logs?ssl=true'
|
44
|
-
placeholder_db_dir => '/opt/logstash-mongodb/'
|
45
|
-
placeholder_db_name => 'logstash_sqlite.db'
|
46
|
-
collection => 'events_'
|
47
|
-
batch_size => 5000
|
48
|
-
}
|
49
|
-
}
|
50
|
-
|
51
|
-
filter {
|
52
|
-
date {
|
53
|
-
match => [ "logdate", "ISO8601" ]
|
54
|
-
}
|
55
|
-
}
|
56
|
-
|
57
|
-
output {
|
58
|
-
redis {
|
59
|
-
host => "localhost"
|
60
|
-
data_type => "list"
|
61
|
-
key => "logstash-mylogs"
|
62
|
-
}
|
63
|
-
}
|
64
|
-
```
|
1
|
+
# Logstash Plugin
|
2
|
+
|
3
|
+
This is a plugin for [Logstash](https://github.com/elasticsearch/logstash).
|
4
|
+
|
5
|
+
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
6
|
+
|
7
|
+
## Documentation
|
8
|
+
|
9
|
+
This is a logstash plugin for pulling data out of mongodb and processing with logstash. It will connect to the database specified in `uri`, use the `collection` attribute to find collections to pull documents from, start at the first collection it finds and pull the number of documents specified in `batch_size`, save it's progress in an sqlite database who's location is specified by `placeholder_db_dir` and `placeholder_db_name` and repeat. It will continue this until it no longer finds documents newer than ones that it has processed, sleep for a moment, then continue to loop over the collections.
|
10
|
+
|
11
|
+
This was designed for parsing logs that were written into mongodb. This means that it may not re-parse db entries that were changed and already parsed.
|
12
|
+
|
13
|
+
|
14
|
+
### Installation
|
15
|
+
|
16
|
+
+ Logstash installed from ZIP | TGZ
|
17
|
+
+ bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
|
18
|
+
|
19
|
+
+ Logstash from GIT
|
20
|
+
+ git clone https://github.com/elastic/logstash.git
|
21
|
+
+ cd logstash
|
22
|
+
+ (ensure that the correct jruby is installed for the version of logstash you are installing)
|
23
|
+
+ rake test:install-core
|
24
|
+
+ bin/plugin install /path/to/logstash-input-mongodb-0.3.0.gem
|
25
|
+
+ bin/plugin install --development
|
26
|
+
|
27
|
+
### Configuration Options
|
28
|
+
|
29
|
+
uri: A MongoDB URI for your database or cluster (check the MongoDB documentation for further info on this) [No Default, Required]
|
30
|
+
placeholder_db_dir: Path where the place holder database will be stored locally to disk [No Default, Required]
|
31
|
+
This gets created by the plugin so the directory needs to be writeable by the user that logstash is running as
|
32
|
+
placeholder_db_name: Name of the database file that will be created [Default: logstash_sqlite.db]
|
33
|
+
collection: A regex that will be used to find desired collecitons. [No Default, Required]
|
34
|
+
batch_size: Size of the batch of mongo documents to pull at a time [Default: 30]
|
35
|
+
|
36
|
+
|
37
|
+
### Configuration
|
38
|
+
|
39
|
+
Example
|
40
|
+
```
|
41
|
+
input {
|
42
|
+
mongodb {
|
43
|
+
uri => 'mongodb://10.0.0.30/my-logs?ssl=true'
|
44
|
+
placeholder_db_dir => '/opt/logstash-mongodb/'
|
45
|
+
placeholder_db_name => 'logstash_sqlite.db'
|
46
|
+
collection => 'events_'
|
47
|
+
batch_size => 5000
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
filter {
|
52
|
+
date {
|
53
|
+
match => [ "logdate", "ISO8601" ]
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
output {
|
58
|
+
redis {
|
59
|
+
host => "localhost"
|
60
|
+
data_type => "list"
|
61
|
+
key => "logstash-mylogs"
|
62
|
+
}
|
63
|
+
}
|
64
|
+
```
|
@@ -1,356 +1,356 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require "logstash/inputs/base"
|
3
|
-
require "logstash/namespace"
|
4
|
-
require "logstash/timestamp"
|
5
|
-
require "stud/interval"
|
6
|
-
require "socket" # for Socket.gethostname
|
7
|
-
require "json"
|
8
|
-
require "mongo"
|
9
|
-
|
10
|
-
include Mongo
|
11
|
-
|
12
|
-
class LogStash::Inputs::MongoDB < LogStash::Inputs::Base
|
13
|
-
config_name "mongodb"
|
14
|
-
|
15
|
-
# If undefined, Logstash will complain, even if codec is unused.
|
16
|
-
default :codec, "plain"
|
17
|
-
|
18
|
-
# Example URI: mongodb://mydb.host:27017/mydbname?ssl=true
|
19
|
-
config :uri, :validate => :string, :required => true
|
20
|
-
|
21
|
-
# The directory that will contain the sqlite database file.
|
22
|
-
config :placeholder_db_dir, :validate => :string, :required => true
|
23
|
-
|
24
|
-
# The name of the sqlite databse file
|
25
|
-
config :placeholder_db_name, :validate => :string, :default => "logstash_sqlite.db"
|
26
|
-
|
27
|
-
# Any table to exclude by name
|
28
|
-
config :exclude_tables, :validate => :array, :default => []
|
29
|
-
|
30
|
-
config :batch_size, :avlidate => :number, :default => 30
|
31
|
-
|
32
|
-
config :since_table, :validate => :string, :default => "logstash_since"
|
33
|
-
|
34
|
-
# The collection to use. Is turned into a regex so 'events' will match 'events_20150227'
|
35
|
-
# Example collection: events_20150227 or events_
|
36
|
-
config :collection, :validate => :string, :required => true
|
37
|
-
|
38
|
-
# This allows you to select the method you would like to use to parse your data
|
39
|
-
config :parse_method, :validate => :string, :default => 'flatten'
|
40
|
-
|
41
|
-
# If not flattening you can dig to flatten select fields
|
42
|
-
config :dig_fields, :validate => :array, :default => []
|
43
|
-
|
44
|
-
# This is the second level of hash flattening
|
45
|
-
config :dig_dig_fields, :validate => :array, :default => []
|
46
|
-
|
47
|
-
# If true, store the @timestamp field in mongodb as an ISODate type instead
|
48
|
-
# of an ISO8601 string. For more information about this, see
|
49
|
-
# http://www.mongodb.org/display/DOCS/Dates
|
50
|
-
config :isodate, :validate => :boolean, :default => false
|
51
|
-
|
52
|
-
# Number of seconds to wait after failure before retrying
|
53
|
-
config :retry_delay, :validate => :number, :default => 3, :required => false
|
54
|
-
|
55
|
-
# If true, an "_id" field will be added to the document before insertion.
|
56
|
-
# The "_id" field will use the timestamp of the event and overwrite an existing
|
57
|
-
# "_id" field in the event.
|
58
|
-
config :generateId, :validate => :boolean, :default => false
|
59
|
-
|
60
|
-
config :unpack_mongo_id, :validate => :boolean, :default => false
|
61
|
-
|
62
|
-
# The message string to use in the event.
|
63
|
-
config :message, :validate => :string, :default => "Default message..."
|
64
|
-
|
65
|
-
# Set how frequently messages should be sent.
|
66
|
-
# The default, `1`, means send a message every second.
|
67
|
-
config :interval, :validate => :number, :default => 1
|
68
|
-
|
69
|
-
SINCE_TABLE = :since_table
|
70
|
-
|
71
|
-
public
|
72
|
-
def init_placeholder_table(sqlitedb)
|
73
|
-
begin
|
74
|
-
sqlitedb.create_table "#{SINCE_TABLE}" do
|
75
|
-
String :table
|
76
|
-
Int :place
|
77
|
-
end
|
78
|
-
rescue
|
79
|
-
@logger.debug("since table already exists")
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
public
|
84
|
-
def init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
85
|
-
@logger.debug("init placeholder for #{since_table}_#{mongo_collection_name}")
|
86
|
-
since = sqlitedb[SINCE_TABLE]
|
87
|
-
mongo_collection = mongodb.collection(mongo_collection_name)
|
88
|
-
first_entry = mongo_collection.find({}).sort('_id' => 1).limit(1).first
|
89
|
-
first_entry_id = first_entry['_id'].to_s
|
90
|
-
since.insert(:table => "#{since_table}_#{mongo_collection_name}", :place => first_entry_id)
|
91
|
-
return first_entry_id
|
92
|
-
end
|
93
|
-
|
94
|
-
public
|
95
|
-
def get_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
96
|
-
since = sqlitedb[SINCE_TABLE]
|
97
|
-
x = since.where(:table => "#{since_table}_#{mongo_collection_name}")
|
98
|
-
if x[:place].nil? || x[:place] == 0
|
99
|
-
first_entry_id = init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
100
|
-
@logger.debug("FIRST ENTRY ID for #{mongo_collection_name} is #{first_entry_id}")
|
101
|
-
return first_entry_id
|
102
|
-
else
|
103
|
-
@logger.debug("placeholder already exists, it is #{x[:place]}")
|
104
|
-
return x[:place][:place]
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
public
|
109
|
-
def update_placeholder(sqlitedb, since_table, mongo_collection_name, place)
|
110
|
-
#@logger.debug("updating placeholder for #{since_table}_#{mongo_collection_name} to #{place}")
|
111
|
-
since = sqlitedb[SINCE_TABLE]
|
112
|
-
since.where(:table => "#{since_table}_#{mongo_collection_name}").update(:place => place)
|
113
|
-
end
|
114
|
-
|
115
|
-
public
|
116
|
-
def get_all_tables(mongodb)
|
117
|
-
return @mongodb.collection_names
|
118
|
-
end
|
119
|
-
|
120
|
-
public
|
121
|
-
def get_collection_names(mongodb, collection)
|
122
|
-
collection_names = []
|
123
|
-
@mongodb.collection_names.each do |coll|
|
124
|
-
if /#{collection}/ =~ coll
|
125
|
-
collection_names.push(coll)
|
126
|
-
@logger.debug("Added #{coll} to the collection list as it matches our collection search")
|
127
|
-
end
|
128
|
-
end
|
129
|
-
return collection_names
|
130
|
-
end
|
131
|
-
|
132
|
-
public
|
133
|
-
def get_cursor_for_collection(mongodb, mongo_collection_name, last_id_object, batch_size)
|
134
|
-
collection = mongodb.collection(mongo_collection_name)
|
135
|
-
# Need to make this sort by date in object id then get the first of the series
|
136
|
-
# db.events_20150320.find().limit(1).sort({ts:1})
|
137
|
-
return collection.find({:_id => {:$gt => last_id_object}}).limit(batch_size)
|
138
|
-
end
|
139
|
-
|
140
|
-
public
|
141
|
-
def update_watched_collections(mongodb, collection, sqlitedb)
|
142
|
-
collections = get_collection_names(mongodb, collection)
|
143
|
-
collection_data = {}
|
144
|
-
collections.each do |my_collection|
|
145
|
-
init_placeholder_table(sqlitedb)
|
146
|
-
last_id = get_placeholder(sqlitedb, since_table, mongodb, my_collection)
|
147
|
-
if !collection_data[my_collection]
|
148
|
-
collection_data[my_collection] = { :name => my_collection, :last_id => last_id }
|
149
|
-
end
|
150
|
-
end
|
151
|
-
return collection_data
|
152
|
-
end
|
153
|
-
|
154
|
-
public
|
155
|
-
def register
|
156
|
-
require "jdbc/sqlite3"
|
157
|
-
require "sequel"
|
158
|
-
placeholder_db_path = File.join(@placeholder_db_dir, @placeholder_db_name)
|
159
|
-
mongo_uri = Mongo::URI.new(@uri)
|
160
|
-
hosts_array = mongo_uri.servers
|
161
|
-
db_name = mongo_uri.database
|
162
|
-
ssl_enabled = mongo_uri.options[:ssl]
|
163
|
-
conn = Mongo::Client.new(hosts_array, ssl: ssl_enabled, database: db_name)
|
164
|
-
|
165
|
-
if @db_auths
|
166
|
-
@db_auths.each do |auth|
|
167
|
-
if !auth['db_name'].nil?
|
168
|
-
conn.add_auth(auth['db_name'], auth['username'], auth['password'], nil)
|
169
|
-
end
|
170
|
-
end
|
171
|
-
conn.apply_saved_authentication()
|
172
|
-
end
|
173
|
-
|
174
|
-
@host = Socket.gethostname
|
175
|
-
@logger.info("Registering MongoDB input")
|
176
|
-
|
177
|
-
@mongodb = conn.database
|
178
|
-
@sqlitedb = Sequel.connect("jdbc:sqlite:#{placeholder_db_path}")
|
179
|
-
|
180
|
-
# Should check to see if there are new matching tables at a predefined interval or on some trigger
|
181
|
-
@collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
|
182
|
-
end # def register
|
183
|
-
|
184
|
-
class BSON::OrderedHash
|
185
|
-
def to_h
|
186
|
-
inject({}) { |acc, element| k,v = element; acc[k] = (if v.class == BSON::OrderedHash then v.to_h else v end); acc }
|
187
|
-
end
|
188
|
-
|
189
|
-
def to_json
|
190
|
-
JSON.parse(self.to_h.to_json, :allow_nan => true)
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
def flatten(my_hash)
|
195
|
-
new_hash = {}
|
196
|
-
@logger.debug("Raw Hash: #{my_hash}")
|
197
|
-
if my_hash.respond_to? :each
|
198
|
-
my_hash.each do |k1,v1|
|
199
|
-
if v1.is_a?(Hash)
|
200
|
-
v1.each do |k2,v2|
|
201
|
-
if v2.is_a?(Hash)
|
202
|
-
# puts "Found a nested hash"
|
203
|
-
result = flatten(v2)
|
204
|
-
result.each do |k3,v3|
|
205
|
-
new_hash[k1.to_s+"_"+k2.to_s+"_"+k3.to_s] = v3
|
206
|
-
end
|
207
|
-
# puts "result: "+result.to_s+" k2: "+k2.to_s+" v2: "+v2.to_s
|
208
|
-
else
|
209
|
-
new_hash[k1.to_s+"_"+k2.to_s] = v2
|
210
|
-
end
|
211
|
-
end
|
212
|
-
else
|
213
|
-
# puts "Key: "+k1.to_s+" is not a hash"
|
214
|
-
new_hash[k1.to_s] = v1
|
215
|
-
end
|
216
|
-
end
|
217
|
-
else
|
218
|
-
@logger.debug("Flatten [ERROR]: hash did not respond to :each")
|
219
|
-
end
|
220
|
-
@logger.debug("Flattened Hash: #{new_hash}")
|
221
|
-
return new_hash
|
222
|
-
end
|
223
|
-
|
224
|
-
def run(queue)
|
225
|
-
sleep_min = 0.01
|
226
|
-
sleep_max = 5
|
227
|
-
sleeptime = sleep_min
|
228
|
-
|
229
|
-
begin
|
230
|
-
@logger.debug("Tailing MongoDB")
|
231
|
-
@logger.debug("Collection data is: #{@collection_data}")
|
232
|
-
loop do
|
233
|
-
@collection_data.each do |index, collection|
|
234
|
-
collection_name = collection[:name]
|
235
|
-
@logger.debug("collection_data is: #{@collection_data}")
|
236
|
-
last_id = @collection_data[index][:last_id]
|
237
|
-
#@logger.debug("last_id is #{last_id}", :index => index, :collection => collection_name)
|
238
|
-
# get batch of events starting at the last_place if it is set
|
239
|
-
last_id_object = BSON::ObjectId(last_id)
|
240
|
-
cursor = get_cursor_for_collection(@mongodb, collection_name, last_id_object, batch_size)
|
241
|
-
cursor.each do |doc|
|
242
|
-
logdate = DateTime.parse(doc['_id'].generation_time.to_s)
|
243
|
-
event = LogStash::Event.new("host" => @host)
|
244
|
-
decorate(event)
|
245
|
-
event["logdate"] = logdate.iso8601
|
246
|
-
log_entry = doc.to_h.to_s
|
247
|
-
log_entry['_id'] = log_entry['_id'].to_s
|
248
|
-
event["log_entry"] = log_entry
|
249
|
-
event["mongo_id"] = doc['_id'].to_s
|
250
|
-
@logger.debug("mongo_id: "+doc['_id'].to_s)
|
251
|
-
#@logger.debug("EVENT looks like: "+event.to_s)
|
252
|
-
#@logger.debug("Sent message: "+doc.to_h.to_s)
|
253
|
-
#@logger.debug("EVENT looks like: "+event.to_s)
|
254
|
-
# Extract the HOST_ID and PID from the MongoDB BSON::ObjectID
|
255
|
-
if @unpack_mongo_id
|
256
|
-
doc_hex_bytes = doc['_id'].to_s.each_char.each_slice(2).map {|b| b.join.to_i(16) }
|
257
|
-
doc_obj_bin = doc_hex_bytes.pack("C*").unpack("a4 a3 a2 a3")
|
258
|
-
host_id = doc_obj_bin[1].unpack("S")
|
259
|
-
process_id = doc_obj_bin[2].unpack("S")
|
260
|
-
event['host_id'] = host_id.first.to_i
|
261
|
-
event['process_id'] = process_id.first.to_i
|
262
|
-
end
|
263
|
-
|
264
|
-
if @parse_method == 'flatten'
|
265
|
-
# Flatten the JSON so that the data is usable in Kibana
|
266
|
-
flat_doc = flatten(doc)
|
267
|
-
# Check for different types of expected values and add them to the event
|
268
|
-
if flat_doc['info_message'] && (flat_doc['info_message'] =~ /collection stats: .+/)
|
269
|
-
# Some custom stuff I'm having to do to fix formatting in past logs...
|
270
|
-
sub_value = flat_doc['info_message'].sub("collection stats: ", "")
|
271
|
-
JSON.parse(sub_value).each do |k1,v1|
|
272
|
-
flat_doc["collection_stats_#{k1.to_s}"] = v1
|
273
|
-
end
|
274
|
-
end
|
275
|
-
|
276
|
-
flat_doc.each do |k,v|
|
277
|
-
# Check for an integer
|
278
|
-
@logger.debug("key: #{k.to_s} value: #{v.to_s}")
|
279
|
-
if v.is_a? Numeric
|
280
|
-
event[k.to_s] = v
|
281
|
-
elsif v.is_a? String
|
282
|
-
if v == "NaN"
|
283
|
-
event[k.to_s] = Float::NAN
|
284
|
-
elsif /\A[-+]?\d+[.][\d]+\z/ == v
|
285
|
-
event[k.to_s] = v.to_f
|
286
|
-
elsif (/\A[-+]?\d+\z/ === v) || (v.is_a? Integer)
|
287
|
-
event[k.to_s] = v.to_i
|
288
|
-
else
|
289
|
-
event[k.to_s] = v
|
290
|
-
end
|
291
|
-
else
|
292
|
-
event[k.to_s] = v.to_s unless k.to_s == "_id" || k.to_s == "tags"
|
293
|
-
if (k.to_s == "tags") && (v.is_a? Array)
|
294
|
-
event['tags'] = v
|
295
|
-
end
|
296
|
-
end
|
297
|
-
end
|
298
|
-
elsif @parse_method == 'dig'
|
299
|
-
# Dig into the JSON and flatten select elements
|
300
|
-
doc.each do |k, v|
|
301
|
-
if k != "_id"
|
302
|
-
if (@dig_fields.include? k) && (v.respond_to? :each)
|
303
|
-
v.each do |kk, vv|
|
304
|
-
if (@dig_dig_fields.include? kk) && (vv.respond_to? :each)
|
305
|
-
vv.each do |kkk, vvv|
|
306
|
-
if /\A[-+]?\d+\z/ === vvv
|
307
|
-
event["#{k}_#{kk}_#{kkk}"] = vvv.to_i
|
308
|
-
else
|
309
|
-
event["#{k}_#{kk}_#{kkk}"] = vvv.to_s
|
310
|
-
end
|
311
|
-
end
|
312
|
-
else
|
313
|
-
if /\A[-+]?\d+\z/ === vv
|
314
|
-
event["#{k}_#{kk}"] = vv.to_i
|
315
|
-
else
|
316
|
-
event["#{k}_#{kk}"] = vv.to_s
|
317
|
-
end
|
318
|
-
end
|
319
|
-
end
|
320
|
-
else
|
321
|
-
if /\A[-+]?\d+\z/ === v
|
322
|
-
event[k] = v.to_i
|
323
|
-
else
|
324
|
-
event[k] = v.to_s
|
325
|
-
end
|
326
|
-
end
|
327
|
-
end
|
328
|
-
end
|
329
|
-
else
|
330
|
-
# Should probably do some sanitization here and insert the doc as raw as possible for parsing in logstash
|
331
|
-
end
|
332
|
-
|
333
|
-
queue << event
|
334
|
-
@collection_data[index][:last_id] = doc['_id'].to_s
|
335
|
-
end
|
336
|
-
# Store the last-seen doc in the database
|
337
|
-
update_placeholder(@sqlitedb, since_table, collection_name, @collection_data[index][:last_id])
|
338
|
-
end
|
339
|
-
@logger.debug("Updating watch collections")
|
340
|
-
@collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
|
341
|
-
|
342
|
-
# nothing found in that iteration
|
343
|
-
# sleep a bit
|
344
|
-
@logger.debug("No new rows. Sleeping.", :time => sleeptime)
|
345
|
-
sleeptime = [sleeptime * 2, sleep_max].min
|
346
|
-
sleep(sleeptime)
|
347
|
-
#sleeptime = sleep_min
|
348
|
-
end
|
349
|
-
rescue LogStash::ShutdownSignal
|
350
|
-
if @interrupted
|
351
|
-
@logger.debug("Mongo Input shutting down")
|
352
|
-
end
|
353
|
-
end
|
354
|
-
end # def run
|
355
|
-
|
356
|
-
end # class LogStash::Inputs::Example
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/inputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "logstash/timestamp"
|
5
|
+
require "stud/interval"
|
6
|
+
require "socket" # for Socket.gethostname
|
7
|
+
require "json"
|
8
|
+
require "mongo"
|
9
|
+
|
10
|
+
include Mongo
|
11
|
+
|
12
|
+
class LogStash::Inputs::MongoDB < LogStash::Inputs::Base
|
13
|
+
config_name "mongodb"
|
14
|
+
|
15
|
+
# If undefined, Logstash will complain, even if codec is unused.
|
16
|
+
default :codec, "plain"
|
17
|
+
|
18
|
+
# Example URI: mongodb://mydb.host:27017/mydbname?ssl=true
|
19
|
+
config :uri, :validate => :string, :required => true
|
20
|
+
|
21
|
+
# The directory that will contain the sqlite database file.
|
22
|
+
config :placeholder_db_dir, :validate => :string, :required => true
|
23
|
+
|
24
|
+
# The name of the sqlite databse file
|
25
|
+
config :placeholder_db_name, :validate => :string, :default => "logstash_sqlite.db"
|
26
|
+
|
27
|
+
# Any table to exclude by name
|
28
|
+
config :exclude_tables, :validate => :array, :default => []
|
29
|
+
|
30
|
+
config :batch_size, :avlidate => :number, :default => 30
|
31
|
+
|
32
|
+
config :since_table, :validate => :string, :default => "logstash_since"
|
33
|
+
|
34
|
+
# The collection to use. Is turned into a regex so 'events' will match 'events_20150227'
|
35
|
+
# Example collection: events_20150227 or events_
|
36
|
+
config :collection, :validate => :string, :required => true
|
37
|
+
|
38
|
+
# This allows you to select the method you would like to use to parse your data
|
39
|
+
config :parse_method, :validate => :string, :default => 'flatten'
|
40
|
+
|
41
|
+
# If not flattening you can dig to flatten select fields
|
42
|
+
config :dig_fields, :validate => :array, :default => []
|
43
|
+
|
44
|
+
# This is the second level of hash flattening
|
45
|
+
config :dig_dig_fields, :validate => :array, :default => []
|
46
|
+
|
47
|
+
# If true, store the @timestamp field in mongodb as an ISODate type instead
|
48
|
+
# of an ISO8601 string. For more information about this, see
|
49
|
+
# http://www.mongodb.org/display/DOCS/Dates
|
50
|
+
config :isodate, :validate => :boolean, :default => false
|
51
|
+
|
52
|
+
# Number of seconds to wait after failure before retrying
|
53
|
+
config :retry_delay, :validate => :number, :default => 3, :required => false
|
54
|
+
|
55
|
+
# If true, an "_id" field will be added to the document before insertion.
|
56
|
+
# The "_id" field will use the timestamp of the event and overwrite an existing
|
57
|
+
# "_id" field in the event.
|
58
|
+
config :generateId, :validate => :boolean, :default => false
|
59
|
+
|
60
|
+
config :unpack_mongo_id, :validate => :boolean, :default => false
|
61
|
+
|
62
|
+
# The message string to use in the event.
|
63
|
+
config :message, :validate => :string, :default => "Default message..."
|
64
|
+
|
65
|
+
# Set how frequently messages should be sent.
|
66
|
+
# The default, `1`, means send a message every second.
|
67
|
+
config :interval, :validate => :number, :default => 1
|
68
|
+
|
69
|
+
SINCE_TABLE = :since_table
|
70
|
+
|
71
|
+
public
|
72
|
+
def init_placeholder_table(sqlitedb)
|
73
|
+
begin
|
74
|
+
sqlitedb.create_table "#{SINCE_TABLE}" do
|
75
|
+
String :table
|
76
|
+
Int :place
|
77
|
+
end
|
78
|
+
rescue
|
79
|
+
@logger.debug("since table already exists")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
public
|
84
|
+
def init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
85
|
+
@logger.debug("init placeholder for #{since_table}_#{mongo_collection_name}")
|
86
|
+
since = sqlitedb[SINCE_TABLE]
|
87
|
+
mongo_collection = mongodb.collection(mongo_collection_name)
|
88
|
+
first_entry = mongo_collection.find({}).sort('_id' => 1).limit(1).first
|
89
|
+
first_entry_id = first_entry['_id'].to_s
|
90
|
+
since.insert(:table => "#{since_table}_#{mongo_collection_name}", :place => first_entry_id)
|
91
|
+
return first_entry_id
|
92
|
+
end
|
93
|
+
|
94
|
+
public
|
95
|
+
def get_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
96
|
+
since = sqlitedb[SINCE_TABLE]
|
97
|
+
x = since.where(:table => "#{since_table}_#{mongo_collection_name}")
|
98
|
+
if x[:place].nil? || x[:place] == 0
|
99
|
+
first_entry_id = init_placeholder(sqlitedb, since_table, mongodb, mongo_collection_name)
|
100
|
+
@logger.debug("FIRST ENTRY ID for #{mongo_collection_name} is #{first_entry_id}")
|
101
|
+
return first_entry_id
|
102
|
+
else
|
103
|
+
@logger.debug("placeholder already exists, it is #{x[:place]}")
|
104
|
+
return x[:place][:place]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
public
|
109
|
+
def update_placeholder(sqlitedb, since_table, mongo_collection_name, place)
|
110
|
+
#@logger.debug("updating placeholder for #{since_table}_#{mongo_collection_name} to #{place}")
|
111
|
+
since = sqlitedb[SINCE_TABLE]
|
112
|
+
since.where(:table => "#{since_table}_#{mongo_collection_name}").update(:place => place)
|
113
|
+
end
|
114
|
+
|
115
|
+
public
|
116
|
+
def get_all_tables(mongodb)
|
117
|
+
return @mongodb.collection_names
|
118
|
+
end
|
119
|
+
|
120
|
+
public
|
121
|
+
def get_collection_names(mongodb, collection)
|
122
|
+
collection_names = []
|
123
|
+
@mongodb.collection_names.each do |coll|
|
124
|
+
if /#{collection}/ =~ coll
|
125
|
+
collection_names.push(coll)
|
126
|
+
@logger.debug("Added #{coll} to the collection list as it matches our collection search")
|
127
|
+
end
|
128
|
+
end
|
129
|
+
return collection_names
|
130
|
+
end
|
131
|
+
|
132
|
+
public
|
133
|
+
def get_cursor_for_collection(mongodb, mongo_collection_name, last_id_object, batch_size)
|
134
|
+
collection = mongodb.collection(mongo_collection_name)
|
135
|
+
# Need to make this sort by date in object id then get the first of the series
|
136
|
+
# db.events_20150320.find().limit(1).sort({ts:1})
|
137
|
+
return collection.find({:_id => {:$gt => last_id_object}}).limit(batch_size)
|
138
|
+
end
|
139
|
+
|
140
|
+
public
|
141
|
+
def update_watched_collections(mongodb, collection, sqlitedb)
|
142
|
+
collections = get_collection_names(mongodb, collection)
|
143
|
+
collection_data = {}
|
144
|
+
collections.each do |my_collection|
|
145
|
+
init_placeholder_table(sqlitedb)
|
146
|
+
last_id = get_placeholder(sqlitedb, since_table, mongodb, my_collection)
|
147
|
+
if !collection_data[my_collection]
|
148
|
+
collection_data[my_collection] = { :name => my_collection, :last_id => last_id }
|
149
|
+
end
|
150
|
+
end
|
151
|
+
return collection_data
|
152
|
+
end
|
153
|
+
|
154
|
+
public
|
155
|
+
def register
|
156
|
+
require "jdbc/sqlite3"
|
157
|
+
require "sequel"
|
158
|
+
placeholder_db_path = File.join(@placeholder_db_dir, @placeholder_db_name)
|
159
|
+
mongo_uri = Mongo::URI.new(@uri)
|
160
|
+
hosts_array = mongo_uri.servers
|
161
|
+
db_name = mongo_uri.database
|
162
|
+
ssl_enabled = mongo_uri.options[:ssl]
|
163
|
+
conn = Mongo::Client.new(hosts_array, ssl: ssl_enabled, database: db_name)
|
164
|
+
|
165
|
+
if @db_auths
|
166
|
+
@db_auths.each do |auth|
|
167
|
+
if !auth['db_name'].nil?
|
168
|
+
conn.add_auth(auth['db_name'], auth['username'], auth['password'], nil)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
conn.apply_saved_authentication()
|
172
|
+
end
|
173
|
+
|
174
|
+
@host = Socket.gethostname
|
175
|
+
@logger.info("Registering MongoDB input")
|
176
|
+
|
177
|
+
@mongodb = conn.database
|
178
|
+
@sqlitedb = Sequel.connect("jdbc:sqlite:#{placeholder_db_path}")
|
179
|
+
|
180
|
+
# Should check to see if there are new matching tables at a predefined interval or on some trigger
|
181
|
+
@collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
|
182
|
+
end # def register
|
183
|
+
|
184
|
+
class BSON::OrderedHash
|
185
|
+
def to_h
|
186
|
+
inject({}) { |acc, element| k,v = element; acc[k] = (if v.class == BSON::OrderedHash then v.to_h else v end); acc }
|
187
|
+
end
|
188
|
+
|
189
|
+
def to_json
|
190
|
+
JSON.parse(self.to_h.to_json, :allow_nan => true)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def flatten(my_hash)
|
195
|
+
new_hash = {}
|
196
|
+
@logger.debug("Raw Hash: #{my_hash}")
|
197
|
+
if my_hash.respond_to? :each
|
198
|
+
my_hash.each do |k1,v1|
|
199
|
+
if v1.is_a?(Hash)
|
200
|
+
v1.each do |k2,v2|
|
201
|
+
if v2.is_a?(Hash)
|
202
|
+
# puts "Found a nested hash"
|
203
|
+
result = flatten(v2)
|
204
|
+
result.each do |k3,v3|
|
205
|
+
new_hash[k1.to_s+"_"+k2.to_s+"_"+k3.to_s] = v3
|
206
|
+
end
|
207
|
+
# puts "result: "+result.to_s+" k2: "+k2.to_s+" v2: "+v2.to_s
|
208
|
+
else
|
209
|
+
new_hash[k1.to_s+"_"+k2.to_s] = v2
|
210
|
+
end
|
211
|
+
end
|
212
|
+
else
|
213
|
+
# puts "Key: "+k1.to_s+" is not a hash"
|
214
|
+
new_hash[k1.to_s] = v1
|
215
|
+
end
|
216
|
+
end
|
217
|
+
else
|
218
|
+
@logger.debug("Flatten [ERROR]: hash did not respond to :each")
|
219
|
+
end
|
220
|
+
@logger.debug("Flattened Hash: #{new_hash}")
|
221
|
+
return new_hash
|
222
|
+
end
|
223
|
+
|
224
|
+
def run(queue)
|
225
|
+
sleep_min = 0.01
|
226
|
+
sleep_max = 5
|
227
|
+
sleeptime = sleep_min
|
228
|
+
|
229
|
+
begin
|
230
|
+
@logger.debug("Tailing MongoDB")
|
231
|
+
@logger.debug("Collection data is: #{@collection_data}")
|
232
|
+
loop do
|
233
|
+
@collection_data.each do |index, collection|
|
234
|
+
collection_name = collection[:name]
|
235
|
+
@logger.debug("collection_data is: #{@collection_data}")
|
236
|
+
last_id = @collection_data[index][:last_id]
|
237
|
+
#@logger.debug("last_id is #{last_id}", :index => index, :collection => collection_name)
|
238
|
+
# get batch of events starting at the last_place if it is set
|
239
|
+
last_id_object = BSON::ObjectId(last_id)
|
240
|
+
cursor = get_cursor_for_collection(@mongodb, collection_name, last_id_object, batch_size)
|
241
|
+
cursor.each do |doc|
|
242
|
+
logdate = DateTime.parse(doc['_id'].generation_time.to_s)
|
243
|
+
event = LogStash::Event.new("host" => @host)
|
244
|
+
decorate(event)
|
245
|
+
event["logdate"] = logdate.iso8601
|
246
|
+
log_entry = doc.to_h.to_s
|
247
|
+
log_entry['_id'] = log_entry['_id'].to_s
|
248
|
+
event["log_entry"] = log_entry
|
249
|
+
event["mongo_id"] = doc['_id'].to_s
|
250
|
+
@logger.debug("mongo_id: "+doc['_id'].to_s)
|
251
|
+
#@logger.debug("EVENT looks like: "+event.to_s)
|
252
|
+
#@logger.debug("Sent message: "+doc.to_h.to_s)
|
253
|
+
#@logger.debug("EVENT looks like: "+event.to_s)
|
254
|
+
# Extract the HOST_ID and PID from the MongoDB BSON::ObjectID
|
255
|
+
if @unpack_mongo_id
|
256
|
+
doc_hex_bytes = doc['_id'].to_s.each_char.each_slice(2).map {|b| b.join.to_i(16) }
|
257
|
+
doc_obj_bin = doc_hex_bytes.pack("C*").unpack("a4 a3 a2 a3")
|
258
|
+
host_id = doc_obj_bin[1].unpack("S")
|
259
|
+
process_id = doc_obj_bin[2].unpack("S")
|
260
|
+
event['host_id'] = host_id.first.to_i
|
261
|
+
event['process_id'] = process_id.first.to_i
|
262
|
+
end
|
263
|
+
|
264
|
+
if @parse_method == 'flatten'
|
265
|
+
# Flatten the JSON so that the data is usable in Kibana
|
266
|
+
flat_doc = flatten(doc)
|
267
|
+
# Check for different types of expected values and add them to the event
|
268
|
+
if flat_doc['info_message'] && (flat_doc['info_message'] =~ /collection stats: .+/)
|
269
|
+
# Some custom stuff I'm having to do to fix formatting in past logs...
|
270
|
+
sub_value = flat_doc['info_message'].sub("collection stats: ", "")
|
271
|
+
JSON.parse(sub_value).each do |k1,v1|
|
272
|
+
flat_doc["collection_stats_#{k1.to_s}"] = v1
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
flat_doc.each do |k,v|
|
277
|
+
# Check for an integer
|
278
|
+
@logger.debug("key: #{k.to_s} value: #{v.to_s}")
|
279
|
+
if v.is_a? Numeric
|
280
|
+
event[k.to_s] = v
|
281
|
+
elsif v.is_a? String
|
282
|
+
if v == "NaN"
|
283
|
+
event[k.to_s] = Float::NAN
|
284
|
+
elsif /\A[-+]?\d+[.][\d]+\z/ == v
|
285
|
+
event[k.to_s] = v.to_f
|
286
|
+
elsif (/\A[-+]?\d+\z/ === v) || (v.is_a? Integer)
|
287
|
+
event[k.to_s] = v.to_i
|
288
|
+
else
|
289
|
+
event[k.to_s] = v
|
290
|
+
end
|
291
|
+
else
|
292
|
+
event[k.to_s] = v.to_s unless k.to_s == "_id" || k.to_s == "tags"
|
293
|
+
if (k.to_s == "tags") && (v.is_a? Array)
|
294
|
+
event['tags'] = v
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|
298
|
+
elsif @parse_method == 'dig'
|
299
|
+
# Dig into the JSON and flatten select elements
|
300
|
+
doc.each do |k, v|
|
301
|
+
if k != "_id"
|
302
|
+
if (@dig_fields.include? k) && (v.respond_to? :each)
|
303
|
+
v.each do |kk, vv|
|
304
|
+
if (@dig_dig_fields.include? kk) && (vv.respond_to? :each)
|
305
|
+
vv.each do |kkk, vvv|
|
306
|
+
if /\A[-+]?\d+\z/ === vvv
|
307
|
+
event["#{k}_#{kk}_#{kkk}"] = vvv.to_i
|
308
|
+
else
|
309
|
+
event["#{k}_#{kk}_#{kkk}"] = vvv.to_s
|
310
|
+
end
|
311
|
+
end
|
312
|
+
else
|
313
|
+
if /\A[-+]?\d+\z/ === vv
|
314
|
+
event["#{k}_#{kk}"] = vv.to_i
|
315
|
+
else
|
316
|
+
event["#{k}_#{kk}"] = vv.to_s
|
317
|
+
end
|
318
|
+
end
|
319
|
+
end
|
320
|
+
else
|
321
|
+
if /\A[-+]?\d+\z/ === v
|
322
|
+
event[k] = v.to_i
|
323
|
+
else
|
324
|
+
event[k] = v.to_s
|
325
|
+
end
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
else
|
330
|
+
# Should probably do some sanitization here and insert the doc as raw as possible for parsing in logstash
|
331
|
+
end
|
332
|
+
|
333
|
+
queue << event
|
334
|
+
@collection_data[index][:last_id] = doc['_id'].to_s
|
335
|
+
end
|
336
|
+
# Store the last-seen doc in the database
|
337
|
+
update_placeholder(@sqlitedb, since_table, collection_name, @collection_data[index][:last_id])
|
338
|
+
end
|
339
|
+
@logger.debug("Updating watch collections")
|
340
|
+
@collection_data = update_watched_collections(@mongodb, @collection, @sqlitedb)
|
341
|
+
|
342
|
+
# nothing found in that iteration
|
343
|
+
# sleep a bit
|
344
|
+
@logger.debug("No new rows. Sleeping.", :time => sleeptime)
|
345
|
+
sleeptime = [sleeptime * 2, sleep_max].min
|
346
|
+
sleep(sleeptime)
|
347
|
+
#sleeptime = sleep_min
|
348
|
+
end
|
349
|
+
rescue LogStash::ShutdownSignal
|
350
|
+
if @interrupted
|
351
|
+
@logger.debug("Mongo Input shutting down")
|
352
|
+
end
|
353
|
+
end
|
354
|
+
end # def run
|
355
|
+
|
356
|
+
end # class LogStash::Inputs::Example
|