logstash-output-picsv 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8fe57ac88a14e3ed8becf6f750c9f9708c3aeb38
4
+ data.tar.gz: bb239e83f2e5c3dd4915ab668fb4299b19d26a54
5
+ SHA512:
6
+ metadata.gz: 1a0259c8796a3180d6c9c757c19ae7abbd9eeeb27bf75f038f167d245197d6ef72698a3ad2ca6891344156a07aebcb824252a9546e1bf1aa83d11a580feaf5fc
7
+ data.tar.gz: bdcc0729fed0edd80b5f825a910d49a50b79b91e29c3a53f5985644c1542eb056f903cd7f1041db5a2b93104c4eec5dc2f69a8cb14ad942650196e681893b825
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+
2
+ .DS_Store
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
3
+ gem "logstash", :github => "elastic/logstash", :branch => "2.4"
data/README.md ADDED
@@ -0,0 +1,2 @@
1
+ # logstash-output-picsv
2
+ Logstash output plugin for IBM OA Predictive Insights
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "logstash/devutils/rake"
@@ -0,0 +1,365 @@
1
+ ############################################
2
+ #
3
+ # picsv
4
+ #
5
+ # Logstash mediation output for IOA PI
6
+ #
7
+ # Version 030815.1 Robert Mckeown
8
+ # Version 030815.2 Jim Ray
9
+ #
10
+ ############################################
11
+
12
+ require "csv"
13
+ require "logstash/namespace"
14
+ require "logstash/outputs/file"
15
+ require 'java' # for the java data format stuff
16
+
17
+ # SCACSV - based upon original Logstash CSV output.
18
+ #
19
+ # Write events to disk in CSV format
20
+ # Write a PI header as the first line in the file
21
+ # Name file per PI convention, based upon first and last timestamps encountered
22
+
23
+ class LogStash::Outputs::SCACSV < LogStash::Outputs::File
24
+
25
+ config_name "scacsv"
26
+ milestone 1
27
+
28
+ # The field names from the event that should be written to the CSV file.
29
+ # Fields are written to the CSV in the same order as the array.
30
+ # If a field does not exist on the event, an empty string will be written.
31
+ config :fields, :validate => :array, :required => true
32
+
33
+
34
+ # If present, the values here will over-ride the default header
35
+ # names. Useful if you simply want to provide other names
36
+ config :header, :validate => :array, :required => false
37
+
38
+ # Options for CSV output. This is passed directly to the Ruby stdlib to\_csv function.
39
+ # Full documentation is available here: [http://ruby-doc.org/stdlib-2.0.0/libdoc/csv/rdoc/index.html].
40
+ # A typical use case would be to use alternative column or row seperators eg: `csv_options => {"col_sep" => "\t" "row_sep" => "\r\n"}` gives tab seperated data with windows line endings
41
+ config :csv_options, :validate => :hash, :required => false, :default => Hash.new
42
+
43
+ # Name of the output group - used as a prefix in the renamed file
44
+ config :group, :validate => :string, :required => true
45
+ config :max_size, :validate => :number, :default => 0
46
+ config :file_interval_width, :validate => :string, :default => "" # Allow "" or "hour","day" or "minute"
47
+ config :flush_interval, :validate => :number, :default => 60
48
+ config :time_field, :validate => :string, :default => "timestamp"
49
+ # config :time_format, :validate => :string, :default => "%Y%m%d%H%M%S"
50
+ config :time_field_format, :validate => :string, :required => true
51
+ config :timestamp_output_format, :validate => :string, :default => "" # "yyyyMMddHHmmss" # java format
52
+ config :force_GMT_filenames, :validate => :boolean, :default => false
53
+
54
+ config :tz_offset, :validate => :number, :default => 0
55
+ config :increment_time, :validate => :boolean, :default => false
56
+
57
+ public
58
+ def register
59
+ super
60
+ @csv_options = Hash[@csv_options.map{|(k,v)|[k.to_sym, v]}]
61
+
62
+ # variables to hold the start and end times which we'll use to rename the files to
63
+ @startTime = "missingStartTime"
64
+ @endTime = "missingEndTime"
65
+ @recordCount = 0
66
+
67
+ @lastOutputTime = 0 #data time
68
+ @flushInterval = @flush_interval.to_i
69
+
70
+ @timerThread = Thread.new { flushWatchdog(@flush_interval) }
71
+
72
+ @currentOutputIntervalStartTime = 0
73
+ @fileIntervalWidthSeconds = 0
74
+ @closeOnIntervalBoundaries = false
75
+ case @file_interval_width.upcase
76
+ when "MINUTE"
77
+ @fileIntervalWidthSeconds = 60
78
+ @closeOnIntervalBoundaries = true
79
+ when "FIVE"
80
+ @fileIntervalWidthSeconds = 300
81
+ @closeOnIntervalBoundaries = true
82
+ when "FIFTEEN"
83
+ @fileIntervalWidthSeconds = 900
84
+ @closeOnIntervalBoundaries = true
85
+ when "HOUR"
86
+ @fileIntervalWidthSeconds = 3600
87
+ @closeOnIntervalBoundaries = true
88
+ when "DAY"
89
+ @fileIntervalWidthSeconds = 86400
90
+ @closeOnIntervalBoundaries = true
91
+ else
92
+ @fileIntervalWidthSeconds = 0 #not used
93
+ @closeOnIntervalBoundaries = false
94
+ end
95
+
96
+ @df = nil
97
+ if (@time_field_format != "epoch")
98
+ @df = java.text.SimpleDateFormat.new(@time_field_format)
99
+ end
100
+
101
+
102
+ end
103
+
104
+ # This thread ensures that we output (close and rename) a file every so often
105
+ private
106
+ def flushWatchdog(delay)
107
+ begin
108
+ @logger.debug("SCACSVFlushWatchdog - Last output time = " + @lastOutputTime.to_s)
109
+ while true do
110
+ @logger.debug("SCACSVFlushWatchdog - Time.now = " + Time.now.to_s + " $lastOutputTime=" + @lastOutputTime.to_s + " delay=" + delay.to_s)
111
+
112
+ if ( (Time.now.to_i >= (@lastOutputTime.to_i + delay.to_i)) and (@recordCount > 0)) then
113
+ @logger.debug("SCACSVFlushWatchdog - closeAndRenameCurrentFile")
114
+ closeAndRenameCurrentFile
115
+ end
116
+ @logger.debug("SCACSVFlushWatchdog - Sleeping")
117
+ sleep 1
118
+ end
119
+ end
120
+ end
121
+
122
+ public
123
+ def receive(event)
124
+ return unless output?(event)
125
+
126
+ @logger.debug("in SCACSV receive")
127
+
128
+ if (event['SCAWindowMarker'])
129
+ # just eat the marker - don't output it
130
+ # if we had at least one record output, then close the file and move on
131
+ if @recordCount >= 1
132
+ closeAndRenameCurrentFile
133
+ end
134
+ else
135
+
136
+ # Now see if we need to close file because of a new boundary
137
+ if @closeOnIntervalBoundaries and @recordCount >= 1 and (@currentOutputIntervalStartTime != snapTimestampToInterval(timestampFromEventAsEpochSeconds(event),@fileIntervalWidthSeconds))
138
+ closeAndRenameCurrentFile
139
+ end
140
+
141
+ @formattedPath = event.sprintf(@path)
142
+ fd = open(@formattedPath)
143
+ @logger.debug("SCACSVreceive - after opening fd=" + fd.to_s)
144
+
145
+ if @recordCount == 0
146
+ # output header on first line - note, need a minimum of one record for sensible output
147
+ if @header then
148
+ # csv_header = @fields.map { |name| name }
149
+ fd.write(@header.to_csv(@csv_options))
150
+ else
151
+ fd.write(@fields.to_csv(@csv_options))
152
+ end
153
+ end
154
+
155
+ csv_values = @fields.map {|name| get_value(name, event)}
156
+ fd.write(csv_values.to_csv(@csv_options))
157
+
158
+ flush(fd)
159
+ close_stale_files
160
+
161
+ # remember state
162
+ @recordCount = @recordCount + 1
163
+ @lastOutputTime = Time.now
164
+
165
+ # capture the earliest - assumption is that records are in order
166
+ if (@recordCount) == 1
167
+ if !@closeOnIntervalBoundaries
168
+ @startTime = timestampFromEventAsEpochSeconds(event)
169
+ else
170
+ @startTime = snapTimestampToInterval(timestampFromEventAsEpochSeconds(event),@fileIntervalWidthSeconds)
171
+ end
172
+ end
173
+
174
+ # for every record, update endTime - again, assumption is that records are in order
175
+ if !@closeOnIntervalBoundaries
176
+ @endTime = timestampFromEventAsEpochSeconds(event)
177
+ else
178
+ @endTime = @startTime + @fileIntervalWidthSeconds - 1 # end of interval
179
+ end
180
+
181
+ # remember start of boundary for next time
182
+ if @closeOnIntervalBoundaries
183
+ @currentOutputIntervalStartTime = @startTime
184
+ end
185
+
186
+ if ((@max_size > 0) and (@recordCount >= max_size))
187
+ # Have enough records, close it out
188
+ closeAndRenameCurrentFile
189
+ end
190
+ end
191
+
192
+ end #def receive
193
+
194
+ private
195
+ def timestampAsEpochSeconds(timestamp, dateFormat)
196
+ if !dateFormat.nil?
197
+ return dateFormat.parse(timestamp.to_s).getTime/1000 # convert milliSeconds to seconds
198
+ else
199
+ #when df not set, we assume epoch SECONDS
200
+ return timestamp.to_i
201
+ end
202
+ end
203
+
204
+ private
205
+ def timestampFromEventAsEpochSeconds(event)
206
+ timestampAsEpochSeconds(event[@time_field],@df)
207
+ end
208
+
209
+ private
210
+ def snapTimestampToInterval(timestamp,interval)
211
+ # timestamp & interval are in seconds
212
+ intervalStart = (timestamp/ interval) * interval
213
+ end
214
+
215
+ private
216
+ def get_value(name, event)
217
+ val = event[name]
218
+ case val
219
+ when Hash
220
+ return val.to_json
221
+ else
222
+ return val
223
+ end
224
+ end
225
+
226
+ private
227
+ def epochAsJavaDate( epochTimestamp )
228
+
229
+ x = 0
230
+ if epochTimestamp.to_s.length == 13
231
+ x = java.util.Date.new(epochTimestamp.to_i)
232
+ else
233
+ # should be 10
234
+ x = java.util.Date.new(epochTimestamp.to_i * 1000)
235
+ end
236
+ x
237
+ end
238
+
239
+ def formatOutputTime( timestamp, time_field_format, timestamp_output_format, missingString )
240
+
241
+ outputString = ""
242
+
243
+ begin
244
+
245
+ if timestamp.nil? then
246
+ @logger.debug("SCACSV " + missingString + " for #{group}")
247
+ elsif timestamp_output_format == "epoch" then
248
+ outputString = timestamp.to_s
249
+ elsif timestamp_output_format == "" then
250
+ # use time_field format
251
+ if time_field_format == "epoch" then
252
+ outputString = timestamp.to_s
253
+ else
254
+ df = java.text.SimpleDateFormat.new(time_field_format)
255
+ if (@force_GMT_filenames) then
256
+ df.setTimeZone(java.util.TimeZone.getTimeZone("GMT"))
257
+ end
258
+ outputString = df.format(epochAsJavaDate(timestamp))
259
+ end
260
+ else # explicit java timeformat supplied
261
+ df = java.text.SimpleDateFormat.new(timestamp_output_format)
262
+ if (@force_GMT_filenames) then
263
+ df.setTimeZone(java.util.TimeZone.getTimeZone("GMT"))
264
+ end
265
+ outputString = df.format(epochAsJavaDate(timestamp))
266
+ end
267
+
268
+ rescue Exception => e
269
+ @logger.error("Exception determining output file timestamp. " + missingString, :exception => e)
270
+ outputString = missingString
271
+ end
272
+
273
+ outputString
274
+
275
+ end
276
+
277
+ def closeAndRenameCurrentFile
278
+
279
+ # cloned and changed from the 'file.rb' operator
280
+ # even though this is in a loop - assumption is that we have one file here for the SCA CSV use
281
+ @files.each do |path, fd|
282
+ begin
283
+ fd.close
284
+ @files.delete(path) # so it will be forgotten and we can open it up again if needed
285
+ @logger.debug("closeAndRenameCurrentFile #{path}", :fd => fd)
286
+
287
+ # Now the various time adjustments
288
+
289
+ begin # determine start&end times
290
+
291
+ if (@time_field_format != "epoch")
292
+ # if not epoch, then we expect java timestamp format
293
+ # so must convert start/end times
294
+ # nStartTime = @df.parse(@startTime)
295
+ # nEndTime = @df.parse(@endTime)
296
+
297
+ # @startTime = @df.parse(@startTime).getTime
298
+ # @endTime = @df.parse(@endTime).getTime
299
+
300
+ # All timestamps are in epoch time now
301
+
302
+ end
303
+
304
+ # Ensure epoch time from here on out
305
+
306
+ if (!@startTime.nil?)
307
+ @startTime = @startTime.to_i + @tz_offset
308
+ end
309
+
310
+ if (!@endTime.nil?)
311
+ @endTime = @endTime.to_i + @tz_offset
312
+ if (@increment_time)
313
+ # increment is used to ensure that the end-time on the filename is after the last data value
314
+ @endTime = @endTime.to_i # times are in seconds + 1000 # 1000ms = 1sec
315
+ end
316
+ end
317
+
318
+ # then do conversion for output
319
+
320
+ @fileStartTime = formatOutputTime( @startTime, @time_field_format, @timestamp_output_format, "noStartTime" )
321
+ @fileEndTime = formatOutputTime( @endTime, @time_field_format, @timestamp_output_format, "noEndTime" )
322
+
323
+ rescue Exception => e
324
+ @logger.error("Exception while flushing and closing files - preparing start/end time", :exception => e)
325
+ raise
326
+ end
327
+
328
+ # timestamps are strings here
329
+
330
+ newFilename = "#{group}" + "__" + @fileStartTime + "__" + @fileEndTime + ".csv"
331
+
332
+ if newFilename.include? '/'
333
+ @logger.error("New filename " + newFilename + " cannot contain / characters. Check the timestamp format. / characters stripped from filename")
334
+ newFilename = newFilename.delete! '/'
335
+ end
336
+
337
+ realdirpath = File.dirname(File.realdirpath("#{path}"))
338
+ realdirpath = File.dirname(File.realdirpath(path))
339
+ oldFilename = File.basename(path)
340
+
341
+ File.rename(realdirpath + "/" + oldFilename, realdirpath + "/" + newFilename)
342
+
343
+ # reset record count so we'll pick up new start time, and put a header on next file
344
+ # when a new record comes in
345
+ @recordCount = 0
346
+ @lastOutputTime = Time.now
347
+
348
+ rescue Exception => e
349
+ @logger.error("Exception while flushing and closing files.", :exception => e)
350
+ end
351
+ end
352
+
353
+ end
354
+
355
+ def teardown
356
+ @logger.debug("SCACSV - Teardown: closing files")
357
+
358
+ Thread.kill(@timerThread)
359
+ closeAndRenameCurrentFile
360
+
361
+ finished
362
+ end
363
+
364
+ end # class LogStash::Outputs::SCACSV
365
+
@@ -0,0 +1,25 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-output-picsv'
3
+ s.version = "1.0.5"
4
+ s.licenses = ["Apache-2.0"]
5
+ s.summary = "Receives a stream of events and outputs files meeting the csv format for IBM Operation Analytics Predictive Insights"
6
+ # Need to validate 1.5 standalone gemfile compatibility; gemfile exists!
7
+ s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
8
+ s.authors = ["ORIG:Robert Mckeown, Update:Jim Ray"]
9
+ s.email = "raygj@us.ibm.com"
10
+ s.homepage = "https://github.com/raygj/logstash-output-picsv/README.md"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)
15
+ # Tests
16
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
17
+
18
+ # Special flag to let us know this is actually a logstash plugin
19
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
20
+
21
+ # Gem dependencies
22
+ s.add_runtime_dependency "logstash-core", ">= 1.5.0", "< 3.0.0"
23
+ s.add_runtime_dependency "logstash-codec-plain", "~> 0"
24
+ s.add_development_dependency "logstash-devutils", "~> 0"
25
+ end
@@ -0,0 +1,21 @@
1
+ require "logstash/devutils/rspec/spec_helper"
2
+ require "logstash/outputs/base" # changed from /example to /base
3
+ require "logstash/codecs/plain"
4
+ require "logstash/event"
5
+
6
+ describe LogStash::Outputs::Example do
7
+ let(:sample_event) { LogStash::Event.new }
8
+ let(:output) { LogStash::Outputs::Example.new }
9
+
10
+ before do
11
+ output.register
12
+ end
13
+
14
+ describe "receive message" do
15
+ subject { output.receive(sample_event) }
16
+
17
+ it "returns a string" do
18
+ expect(subject).to eq("Event received")
19
+ end
20
+ end
21
+ end
metadata ADDED
@@ -0,0 +1,101 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-output-picsv
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.5
5
+ platform: ruby
6
+ authors:
7
+ - ORIG:Robert Mckeown, Update:Jim Ray
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-09-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: 1.5.0
19
+ - - "<"
20
+ - !ruby/object:Gem::Version
21
+ version: 3.0.0
22
+ name: logstash-core
23
+ prerelease: false
24
+ type: :runtime
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: 1.5.0
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: 3.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - "~>"
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ name: logstash-codec-plain
40
+ prerelease: false
41
+ type: :runtime
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ - !ruby/object:Gem::Dependency
48
+ requirement: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - "~>"
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ name: logstash-devutils
54
+ prerelease: false
55
+ type: :development
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
62
+ email: raygj@us.ibm.com
63
+ executables: []
64
+ extensions: []
65
+ extra_rdoc_files: []
66
+ files:
67
+ - ".gitignore"
68
+ - Gemfile
69
+ - README.md
70
+ - Rakefile
71
+ - lib/logstash/outputs/picsv.rb
72
+ - logstash-output-picsv.gemspec
73
+ - spec/outputs/picsv_spec.rb
74
+ homepage: https://github.com/raygj/logstash-output-picsv/README.md
75
+ licenses:
76
+ - Apache-2.0
77
+ metadata:
78
+ logstash_plugin: 'true'
79
+ logstash_group: output
80
+ post_install_message:
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.4.8
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: Receives a stream of events and outputs files meeting the csv format for IBM Operation Analytics Predictive Insights
100
+ test_files:
101
+ - spec/outputs/picsv_spec.rb