logstash-input-cloudwatch_logs 0.10.3 → 1.0.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d3cf76d4fcae5d3c2145be37be2ee196bc443e67
4
- data.tar.gz: 07f81ee5d24b68ee92727b056ddcf0189035bcb8
3
+ metadata.gz: 0add333171d946f22e690ecd2fa6167115136761
4
+ data.tar.gz: 3f85bcf9b77be0aec9466e8fe014841597da164d
5
5
  SHA512:
6
- metadata.gz: 2038764e97fea3b2ef55bdca11b7910491317fc2bdcd55922e85ef649fc9830ff0015196463e29c45d873ab48a538ec240a8af2561201071206ad54d30cc93cb
7
- data.tar.gz: 663df20c6fb609a1a4eb4f98d7a44bdf42376b6b842df2817710dfe810e1692571fd20a30e511f5a8bff75115056e5ebb151fab729499d6a69d5505a7b241b65
6
+ metadata.gz: 0ddf9aba860508e42ae5ee675d687d97e123152eee16555c82e0ebf72e64ae09d7dcbaeefddd2a22c532bd17ddb009fee7b82bf7529418a8d1a2861ce5dd5f44
7
+ data.tar.gz: d2e00e97521353b991fb39b053d9b9d57ae46e4d07e019f775d2f81b565f337b8e886df84dd306021168bad9c8181b0e5289572c00b8653b8c47ef07bf9c8036
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # Release Notes for `logstash-input-cloudwatch_logs`
2
2
 
3
+ ## v1.0.0 Pre-Release (2017-06-24)
4
+ * BREAKING CHANGE: `log_group` must now be an array, adds support for specifying multiple groups or prefixes
5
+ * Refactored ingestion, fixes multiple memory leaks (Fixes [#24](https://github.com/lukewaite/logstash-input-cloudwatch-logs/issues/4))
6
+ * Pull only log_events since last ingestion (Fixes [#10](https://github.com/lukewaite/logstash-input-cloudwatch-logs/issues/10))
7
+ * Incrementally write to since_db on each page of data from the CWL API (Fixes [#4](https://github.com/lukewaite/logstash-input-cloudwatch-logs/issues/4))
8
+
3
9
  ## v0.10.3 (2017-05-07)
4
10
 
5
11
  ### Fixed
data/README.md CHANGED
@@ -20,7 +20,7 @@ and ingest all logs available in all of the matching groups.
20
20
  ### Parameters
21
21
  | Parameter | Input Type | Required | Default |
22
22
  |-----------|------------|----------|---------|
23
- | log_group | string | Yes | |
23
+ | log_group | array | Yes | |
24
24
  | log_group_prefix | boolean | No | `false` |
25
25
  | sincedb_path | string | No | `$HOME/.sincedb*` |
26
26
  | interval | number | No | 60 |
@@ -40,7 +40,7 @@ Other standard logstash parameters are available such as:
40
40
 
41
41
  input {
42
42
  cloudwatch_logs {
43
- log_group => "/aws/lambda/my-lambda"
43
+ log_group => [ "/aws/lambda/my-lambda" ]
44
44
  access_key_id => "AKIAXXXXXX"
45
45
  secret_access_key => "SECRET"
46
46
  }
@@ -4,11 +4,10 @@ require "logstash/namespace"
4
4
  require "logstash/plugin_mixins/aws_config"
5
5
  require "logstash/timestamp"
6
6
  require "time"
7
- require "tmpdir"
8
7
  require "stud/interval"
9
- require "stud/temporary"
10
8
  require "aws-sdk"
11
- require "logstash/inputs/cloudwatch/patch"
9
+ require "logstash/inputs/cloudwatch_logs/patch"
10
+ require "fileutils"
12
11
 
13
12
  Aws.eager_autoload!
14
13
 
@@ -28,9 +27,9 @@ class LogStash::Inputs::CloudWatch_Logs < LogStash::Inputs::Base
28
27
 
29
28
  default :codec, "plain"
30
29
 
31
- # Log group to pull logs from for this plugin. Will pull in all
32
- # streams inside of this log group.
33
- config :log_group, :validate => :string, :required => true
30
+ # Log group(s) to use as an input. If `log_group_prefix` is set
31
+ # to `true`, then each member of the array is treated as a prefix
32
+ config :log_group, :validate => :array, :required => true
34
33
 
35
34
  # Where to write the since database (keeps track of the date
36
35
  # the last handled log stream was updated). The default will write
@@ -50,93 +49,127 @@ class LogStash::Inputs::CloudWatch_Logs < LogStash::Inputs::Base
50
49
  public
51
50
  def register
52
51
  require "digest/md5"
53
-
54
- @logger.info("Registering cloudwatch_logs input", :log_group => @log_group)
52
+ @logger.trace("Registering cloudwatch_logs input", :log_group => @log_group)
53
+ settings = defined?(LogStash::SETTINGS) ? LogStash::SETTINGS : nil
54
+ @sincedb = {}
55
55
 
56
56
  Aws::ConfigService::Client.new(aws_options_hash)
57
-
58
57
  @cloudwatch = Aws::CloudWatchLogs::Client.new(aws_options_hash)
58
+
59
+ if @sincedb_path.nil?
60
+ if settings
61
+ datapath = File.join(settings.get_value("path.data"), "plugins", "inputs", "cloudwatch_logs")
62
+ # Ensure that the filepath exists before writing, since it's deeply nested.
63
+ FileUtils::mkdir_p datapath
64
+ @sincedb_path = File.join(datapath, ".sincedb_" + Digest::MD5.hexdigest(@log_group.join(",")))
65
+ end
66
+ end
67
+
68
+ # This section is going to be deprecated eventually, as path.data will be
69
+ # the default, not an environment variable (SINCEDB_DIR or HOME)
70
+ if @sincedb_path.nil? # If it is _still_ nil...
71
+ if ENV["SINCEDB_DIR"].nil? && ENV["HOME"].nil?
72
+ @logger.error("No SINCEDB_DIR or HOME environment variable set, I don't know where " \
73
+ "to keep track of the files I'm watching. Either set " \
74
+ "HOME or SINCEDB_DIR in your environment, or set sincedb_path in " \
75
+ "in your Logstash config for the file input with " \
76
+ "path '#{@path.inspect}'")
77
+ raise
78
+ end
79
+
80
+ #pick SINCEDB_DIR if available, otherwise use HOME
81
+ sincedb_dir = ENV["SINCEDB_DIR"] || ENV["HOME"]
82
+
83
+ @sincedb_path = File.join(sincedb_dir, ".sincedb_" + Digest::MD5.hexdigest(@log_group.join(",")))
84
+
85
+ @logger.info("No sincedb_path set, generating one based on the log_group setting",
86
+ :sincedb_path => @sincedb_path, :log_group => @log_group)
87
+ end
88
+
59
89
  end #def register
60
90
 
61
91
  # def run
62
92
  public
63
93
  def run(queue)
64
- while !stop?
65
- process_group(queue)
66
- Stud.stoppable_sleep(@interval)
94
+ @queue = queue
95
+ _sincedb_open
96
+
97
+ Stud.interval(@interval) do
98
+ groups = find_log_groups
99
+
100
+ groups.each do |group|
101
+ @logger.debug("calling process_group on #{group}")
102
+ process_group(group)
103
+ end # groups.each
67
104
  end
105
+
68
106
  end # def run
69
107
 
70
- # def list_new_streams
71
108
  public
72
- def list_new_streams()
109
+ def find_log_groups
73
110
  if @log_group_prefix
74
- log_groups = @cloudwatch.describe_log_groups(log_group_name_prefix: @log_group)
75
- groups = log_groups.log_groups.map {|n| n.log_group_name}
76
- while log_groups.next_token
77
- log_groups = @cloudwatch.describe_log_groups(log_group_name_prefix: @log_group, next_token: log_groups.next_token)
78
- groups += log_groups.log_groups.map {|n| n.log_group_name}
111
+ @logger.debug("log_group prefix is enabled, searching for log groups")
112
+ groups = []
113
+ next_token = nil
114
+ @log_group.each do |group|
115
+ loop do
116
+ log_groups = @cloudwatch.describe_log_groups(log_group_name_prefix: group, next_token: next_token)
117
+ groups += log_groups.log_groups.map {|n| n.log_group_name}
118
+ next_token = log_groups.next_token
119
+ @logger.debug("found #{log_groups.log_groups.length} log groups matching prefix #{group}")
120
+ break if next_token.nil?
121
+ end
79
122
  end
80
123
  else
81
- groups = [@log_group]
82
- end
83
- objects = []
84
- for log_group in groups
85
- objects.concat(list_new_streams_for_log_group(log_group))
124
+ @logger.debug("log_group_prefix not enabled")
125
+ groups = @log_group
86
126
  end
87
- objects
88
- end
89
-
90
- # def list_new_streams_for_log_group
91
- public
92
- def list_new_streams_for_log_group(log_group, token = nil, objects = [], stepback=0)
93
- params = {
94
- :log_group_name => log_group,
95
- :order_by => "LastEventTime",
96
- :descending => false
97
- }
98
-
99
- @logger.debug("CloudWatch Logs for log_group #{log_group}")
127
+ groups
128
+ end # def find_log_groups
100
129
 
101
- if token != nil
102
- params[:next_token] = token
103
- end
130
+ private
131
+ def process_group(group)
132
+ next_token = nil
133
+ loop do
134
+ if !@sincedb.member?(group)
135
+ @sincedb[group] = 0
136
+ end
137
+ params = {
138
+ :log_group_name => group,
139
+ :start_time => @sincedb[group],
140
+ :limit => 10,
141
+ :interleaved => true,
142
+ :next_token => next_token
143
+ }
144
+ resp = @cloudwatch.filter_log_events(params)
145
+
146
+ resp.events.each do |event|
147
+ process_log(event, group)
148
+ end
104
149
 
105
- begin
106
- streams = @cloudwatch.describe_log_streams(params)
107
- rescue Aws::CloudWatchLogs::Errors::ThrottlingException
108
- @logger.debug("CloudWatch Logs stepping back ", :stepback => 2 ** stepback * 60)
109
- sleep(2 ** stepback * 60)
110
- stepback += 1
111
- @logger.debug("CloudWatch Logs repeating list_new_streams again with token", :token => token)
112
- return list_new_streams_for_log_group(log_group, token=token, objects=objects, stepback=stepback)
113
- end
150
+ _sincedb_write
114
151
 
115
- objects.push(*streams.log_streams)
116
- if streams.next_token == nil
117
- @logger.debug("CloudWatch Logs hit end of tokens for streams")
118
- objects
119
- else
120
- @logger.debug("CloudWatch Logs calling list_new_streams again on token", :token => streams.next_token)
121
- list_new_streams_for_log_group(log_group, streams.next_token, objects)
152
+ next_token = resp.next_token
153
+ break if next_token.nil?
122
154
  end
123
- end # def list_new_streams_for_log_group
155
+ end #def process_group
124
156
 
125
157
  # def process_log
126
158
  private
127
- def process_log(queue, log, stream)
159
+ def process_log(log, group)
128
160
 
129
161
  @codec.decode(log.message.to_str) do |event|
130
162
  event.set("@timestamp", parse_time(log.timestamp))
131
- event.set("[cloudwatch][ingestion_time]", parse_time(log.ingestion_time))
132
- event.set("[cloudwatch][log_group]", stream.arn.split(/:/)[6])
133
- event.set("[cloudwatch][log_stream]", stream.log_stream_name)
163
+ event.set("[cloudwatch_logs][ingestion_time]", parse_time(log.ingestion_time))
164
+ event.set("[cloudwatch_logs][log_group]", group)
165
+ event.set("[cloudwatch_logs][log_stream]", log.log_stream_name)
166
+ event.set("[cloudwatch_logs][event_id]", log.event_id)
134
167
  decorate(event)
135
168
 
136
- queue << event
169
+ @queue << event
170
+ @sincedb[group] = log.timestamp + 1
137
171
  end
138
- end
139
- # def process_log
172
+ end # def process_log
140
173
 
141
174
  # def parse_time
142
175
  private
@@ -144,111 +177,39 @@ class LogStash::Inputs::CloudWatch_Logs < LogStash::Inputs::Base
144
177
  LogStash::Timestamp.at(data.to_i / 1000, (data.to_i % 1000) * 1000)
145
178
  end # def parse_time
146
179
 
147
- # def process_group
148
- public
149
- def process_group(queue)
150
- objects = list_new_streams
151
-
152
- last_read = sincedb.read
153
- current_window = DateTime.now.strftime('%Q')
154
-
155
- if last_read < 0
156
- last_read = 1
157
- end
158
-
159
- objects.each do |stream|
160
- if stream.last_ingestion_time && stream.last_ingestion_time > last_read
161
- process_log_stream(queue, stream, last_read, current_window)
162
- end
163
- end
164
-
165
- sincedb.write(current_window)
166
- end # def process_group
167
-
168
- # def process_log_stream
169
180
  private
170
- def process_log_stream(queue, stream, last_read, current_window, token = nil, stepback=0)
171
- @logger.debug("CloudWatch Logs processing stream",
172
- :log_stream => stream.log_stream_name,
173
- :log_group => stream.arn.split(":")[6],
174
- :lastRead => last_read,
175
- :currentWindow => current_window,
176
- :token => token
177
- )
178
-
179
- params = {
180
- :log_group_name => stream.arn.split(":")[6],
181
- :log_stream_name => stream.log_stream_name,
182
- :start_from_head => true
183
- }
184
-
185
- if token != nil
186
- params[:next_token] = token
187
- end
188
-
189
-
181
+ def _sincedb_open
190
182
  begin
191
- logs = @cloudwatch.get_log_events(params)
192
- rescue Aws::CloudWatchLogs::Errors::ThrottlingException
193
- @logger.debug("CloudWatch Logs stepping back ", :stepback => 2 ** stepback * 60)
194
- sleep(2 ** stepback * 60)
195
- stepback += 1
196
- @logger.debug("CloudWatch Logs repeating process_log_stream again with token", :token => token)
197
- return process_log_stream(queue, stream, last_read, current_window, token, stepback)
198
- end
199
-
200
- logs.events.each do |log|
201
- if log.ingestion_time > last_read
202
- process_log(queue, log, stream)
183
+ File.open(@sincedb_path) do |db|
184
+ @logger.debug? && @logger.debug("_sincedb_open: reading from #{@sincedb_path}")
185
+ db.each do |line|
186
+ group, pos = line.split(" ", 2)
187
+ @logger.debug? && @logger.debug("_sincedb_open: setting #{group} to #{pos.to_i}")
188
+ @sincedb[group] = pos.to_i
189
+ end
203
190
  end
191
+ rescue
192
+ #No existing sincedb to load
193
+ @logger.debug? && @logger.debug("_sincedb_open: error: #{@sincedb_path}: #{$!}")
204
194
  end
195
+ end # def _sincedb_open
205
196
 
206
- # if there are more pages, continue
207
- if logs.events.count != 0 && logs.next_forward_token != nil
208
- process_log_stream(queue, stream, last_read, current_window, logs.next_forward_token)
197
+ private
198
+ def _sincedb_write
199
+ begin
200
+ IO.write(@sincedb_path, serialize_sincedb, 0)
201
+ rescue Errno::EACCES
202
+ # probably no file handles free
203
+ # maybe it will work next time
204
+ @logger.debug? && @logger.debug("_sincedb_write: error: #{@sincedb_path}: #{$!}")
209
205
  end
210
- end # def process_log_stream
206
+ end # def _sincedb_write
211
207
 
212
- private
213
- def sincedb
214
- @sincedb ||= if @sincedb_path.nil?
215
- @logger.info("Using default generated file for the sincedb", :filename => sincedb_file)
216
- SinceDB::File.new(sincedb_file)
217
- else
218
- @logger.info("Using the provided sincedb_path",
219
- :sincedb_path => @sincedb_path)
220
- SinceDB::File.new(@sincedb_path)
221
- end
222
- end
223
208
 
224
209
  private
225
- def sincedb_file
226
- File.join(ENV["HOME"], ".sincedb_" + Digest::MD5.hexdigest("#{@log_group}"))
227
- end
228
-
229
- module SinceDB
230
- class File
231
- def initialize(file)
232
- @sincedb_path = file
233
- end
234
-
235
- def newer?(date)
236
- date > read
237
- end
238
-
239
- def read
240
- if ::File.exists?(@sincedb_path)
241
- since = ::File.read(@sincedb_path).chomp.strip.to_i
242
- else
243
- since = 1
244
- end
245
- return since
246
- end
247
-
248
- def write(since = nil)
249
- since = DateTime.now.strftime('%Q') if since.nil?
250
- ::File.open(@sincedb_path, 'w') { |file| file.write(since.to_s) }
251
- end
252
- end
210
+ def serialize_sincedb
211
+ @sincedb.map do |group, pos|
212
+ [group, pos].join(" ")
213
+ end.join("\n") + "\n"
253
214
  end
254
215
  end # class LogStash::Inputs::CloudWatch_Logs
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-input-cloudwatch_logs'
4
- s.version = '0.10.3'
4
+ s.version = '1.0.0.pre'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = 'Stream events from CloudWatch Logs.'
7
7
  s.description = 'This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program'
@@ -13,7 +13,7 @@ describe LogStash::Inputs::CloudWatch_Logs do
13
13
  {
14
14
  'access_key_id' => '1234',
15
15
  'secret_access_key' => 'secret',
16
- 'log_group' => 'sample-log-group',
16
+ 'log_group' => ['sample-log-group'],
17
17
  'region' => 'us-east-1'
18
18
  }
19
19
  }
@@ -23,4 +23,21 @@ describe LogStash::Inputs::CloudWatch_Logs do
23
23
  expect {subject.register}.to_not raise_error
24
24
  end
25
25
  end
26
+
27
+ describe '#run' do
28
+ let(:config) {
29
+ {
30
+ 'access_key_id' => '1234',
31
+ 'secret_access_key' => 'secret',
32
+ 'log_group' => ['sample-log-group'],
33
+ 'region' => 'us-east-1'
34
+ }
35
+ }
36
+ subject {LogStash::Inputs::CloudWatch_Logs.new(config)}
37
+
38
+ it "runs" do
39
+ subject.register
40
+ expect{subject.run({})}.to_not raise_error
41
+ end
42
+ end
26
43
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-cloudwatch_logs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.3
4
+ version: 1.0.0.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luke Waite
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-07 00:00:00.000000000 Z
11
+ date: 2017-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -84,8 +84,8 @@ files:
84
84
  - LICENSE
85
85
  - NOTICE.TXT
86
86
  - README.md
87
- - lib/logstash/inputs/cloudwatch/patch.rb
88
87
  - lib/logstash/inputs/cloudwatch_logs.rb
88
+ - lib/logstash/inputs/cloudwatch_logs/patch.rb
89
89
  - logstash-input-cloudwatch_logs.gemspec
90
90
  - spec/inputs/cloudwatch_logs_spec.rb
91
91
  homepage: ''
@@ -105,9 +105,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
105
105
  version: '0'
106
106
  required_rubygems_version: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ">="
108
+ - - ">"
109
109
  - !ruby/object:Gem::Version
110
- version: '0'
110
+ version: 1.3.1
111
111
  requirements: []
112
112
  rubyforge_project:
113
113
  rubygems_version: 2.4.8