RubyGems - logstash-output-scacsv - Versions diffs - 1.0.0 → 1.0.1 - Mend

logstash-output-scacsv 1.0.0 → 1.0.1

Files changed (5) hide show

checksums.yaml +4 -4
data/README.md +173 -85
data/lib/logstash/outputs/scacsv.rb +71 -13
data/logstash-output-scacsv.gemspec +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 02e5d4b3c9ceaa6e7968a2ec2e3563e1b9bb2119
-  data.tar.gz: a979b0ca6d5bb260769f9f5e83d5e315e7e04728
+  metadata.gz: 373f6c4b2a800362409b49877ddfbb9448a1cc2f
+  data.tar.gz: 2b73b60065ab79367ea9e24a3b908c64a415a39b
 SHA512:
-  metadata.gz: f0b09ab0d67f2e4cee37eb3bd34c3e75c06ebf16e60ea346d511dccc90d402dd73e4b4d713d9dbb990ff54f52bbce0e28a8bc3d66b95fe7329377b090dcdd677
-  data.tar.gz: 9ba0172228e956cb5a0c7cb71a4851c6b8424f9d3d631fd2a5468e02c776fc0346f404f76fada3f6a2178c470381f107873d660792ffce9d8003d3d5f80ada29
+  metadata.gz: ce2af21c0d26bb9686975c5be44481d5ad5bdbfd0f77c4961189310db35fc83a6a4d466cbafbf5fe975df001e592d5172490e01fcd5090d80f7fcf7d8c314d62
+  data.tar.gz: 8d1e769135cf5ba7980a75d6bfed39750120801d15dbe596d6e8ffa5115432b039300e1dde5cc17ca505f2f5ed08b748b1791aebba6add9fb9e123e881d73168

data/README.md CHANGED Viewed

@@ -1,86 +1,174 @@
-# Logstash Plugin
+<html>
+<head>
+<meta charset="UTF-8">
+<title>Logstash for SCAPI - output scacsv</title>
+<link rel="stylesheet" href="http://logstash.net/style.css">
+</head>
+<body>
+<div class="container">
+<div class="header">
+<!--main content goes here, yo!-->
+<div class="content_wrapper">
+<h2>scacsv</h2>
+<h3> Synopsis </h3>
+Receives a stream of events and outputs files complying with the SCAPI requirements related to header and file naming.
+Essentially provides a match between Logstash's 'streaming' approach and SCAPI's file-based input reqmts.
+This is what it might look like in your config file:
+<pre><code>output {
+scacsv {
+  <a href="#fields">fields</a> => ... # array (required)
+  <a href="#header">header</a> => ... # array (optional), default: {}
+  <a href="#path">path</a> => ... # string (required)
+  <a href="#group">group</a> => ... # string (required)
+  <a href="#max_size">max_size</a> => ... # number (optional), default: 0 (not used)
+  <a href="#flush_interval">flush_interval</a> => ... # number (optional), default: 60
+  <a href="#file_interval_width">file_interval_width</a> => ... # string (optional), default: ""
+  <a href="#time_field">time_field</a> => ... # string (optional), default: 'timestamp'
+  <a href="#time_field_format">time_field_format</a> => ... # string (required)
+  <a href="#timestamp_output_format">timestamp_output_format</a> => ... # string (optional), default: ""
+  <a href="#increment_time">increment_time</a> => ... # boolean (optional), default: false
+  }
+}
+</code></pre>
+<h3> Details </h3>
+Note: by default this plugin expects timestamp provided to be in epoch time. You can override this expectation and supply non-epoch timestamps which will be used as using the <a href="#keep_original_timestamps">keep_original_timestamps</a> configuration option. However, such non-epoch timestamps will not automatically be incremented when determining the end time of the file
+<h4>
+<a name="fields">
+fields
+</a>
+</h4>
+<ul>
+<li> Value type is <a href="http://logstash.net/docs/1.4.2/configuration#array">Array</a> </li>
+<li> There is no default for this setting </li>
+</ul>
+<p>Specify which fields from the incoming event you wish to output, and which order</p>
+<h4>
+<a name="header">
+header
+</a>
+</h4>
+<ul>
+<li> Value type is <a href="http://logstash.net/docs/1.4.2/configuration#hash">Array</a> </li>
+<li> Default value is {} </li>
+</ul>
+<p>
+Used to specify a string to put as the header (first) line in the file. Useful if you want to override the default ones which are determined from the fields setting
+</p>
+<h4>
+<a name="path">
+path
+</a>
+</h4>
+<ul>
+<li> Value type is <a href="http://logstash.net/docs/1.4.2/configuration#string">string</a> </li>
+<li> Default value is "" </li>
+</ul>
+<p>Path of temporary output file. Output will be written to this file until it is time to close the file. Then it will be renamed to SCAPI file convention. The temporary output file path will be then reused for the next set of output. For example, if output data for a CPU group, we might define the following path </p>
+</p><code>path =&gt; "./cpu.csv"</code>.</p>
+<h4>
+<a name="group">
+group (required setting)
+</a>
+</h4>
+<ul>
+<li> Value type is <a href="http://logstash.net/docs/1.4.2/configuration#dytomh">string</a> </li>
+<li> There is no default value for this setting. </li>
+</ul>
+<p>SCAPI input filenames must have a group identifier as part of the name. The filename generally has this format <code>&lt;group&gt;__&lt;starttime&gt;__&lt;endtime&gt;.csv</code>. This <code>group</code> parameter is used to specify that group name and it will be used as a prefix when the file is renamed from <code>path</code>. For example</p>
+<p><code>path =&gt; "./cpu"</code>.</p>
+<h4>
+<a name="max_size">
+max_size
+</a>
+</h4>
+<ul>
+<li> Value type is <a href="../configuration#number">number</a> </li>
+<li> Default value is 0 (meaning it is not used)</li>
+</ul>
+<p>This will closing and rename a file if there have been <code>max_size</code> events received. This is to limit the size of a file, and sometimes can be useful when 'chopping' a stream into chunks for use in SCAPI</p>
+<h4>
+<a name="flush_interval">
+flush_interval
+</a>
+</h4>
+<ul>
+<li> Value type is <a href="../configuration#number">number</a> </li>
+<li> Default value is 60 </li>
+</ul>
+<p>Amount of time (seconds) to wait before flushing, closing and renaming a file, if there have been no events received. This is to ensure that after a period of idleness, we will output a SCAPI file.</p>
+<h4>
+<a name="file_interval_width">
+file_interval_width
+</a>
+</h4>
+<ul>
+<li> Value type is <a href="../configuration#string">string</a> </li>
+<li> Default value is "" (meaning it is not used). Allowed values are "MINUTE", "HOUR", "DAY"</li>
+</ul>
+<p>Setting this enables files to be closed on specified boundaries. This is useful to break incoming stream up on PI preferred boundaries.  If HOUR was set for example, then all incoming data for a particular hour would be put in a file for that hour, and when new data in the next hour arrives, the file is close and a new one opened</p>
+<h4>
+<a name="time_field">
+time_field
+</a>
+</h4>
+<ul>
+<li> Value type is <a href="../configuration#string">string</a> </li>
+<li> Default value is "timestamp"</li>
+</ul>
+<p>Specify which field to use as the 'timestamp' when determining filename times. Values from the 'timestamp' field will be used for <code>starttime</code> (first value seen) and <code>endtime</code> (last value seen) in the file name <code>&lt;group&gt;__&lt;starttime&gt;__&lt;endtime&gt;.csv</code></p>
+<h4>
+<a name="time_field_format (required setting)">
+time_field_format
+</a>
+</h4>
+<ul>
+<li> Value type is <a href="../configuration#string">string</a> </li>
+<li> There is no default value for this setting</li>
+</ul>
+<p>A format string, in java SimpleDateFormat format, to specify how to interpret the timefield values e.g. <code>"yyyy-MM-dd HH:mm:ss"</code>. </p>
+<h4>
+<a name="timestamp_output_format">
+timestamp_output_format
+</a>
+</h4>
+<ul>
+<li> Value type is <a href="../configuration#string">string</a> </li>
+<li> If not specified, it uses the format declared by <code>time_field_format</code></li>
+</ul>
+<p>A format string, in java SimpleDateFormat format, to specify how to output filename timestamps</p>
+<h4>
+<a name="increment_time">
+increment_time
+</a>
+</h4>
+<ul>
+<li> Value type is <a href="../configuration#boolean">boolean</a> </li>
+<li> Default value is false</li>
+</ul>
+<p>
+By default, the supplied timestamp will be left as is. If set to <code>true</code>, then the timestamp will be incremented by 1. This is to ensure that the end time is greater than the last event time in the file - per PI datafile requirements
+</p>
+<hr>
+</div>
+<div class="clear">
+</div>
+</div>
+</div>
+<!--closes main container div-->
+<div class="clear">
+</div>
+<div class="footer">
+<p>
+Hello! I'm your friendly footer. If you're actually reading this, I'm impressed.
+</p>
+</div>
+<noscript>
+<div style="display:inline;">
+<img height="1" width="1" style="border-style:none;" alt="" src="//googleads.g.doubleclick.net/pagead/viewthroughconversion/985891458/?value=0&amp;guid=ON&amp;script=0"/>
+</div>
+</noscript>
+<script src="/js/patch.js?1.4.2"></script>
+</body>
+</html>
-This is a plugin for [Logstash](https://github.com/elasticsearch/logstash).
-It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
-## Documentation
-Logstash provides infrastructure to automatically generate documentation for this plugin. We use the asciidoc format to write documentation so any comments in the source code will be first converted into asciidoc and then into html. All plugin documentation are placed under one [central location](http://www.elasticsearch.org/guide/en/logstash/current/).
-- For formatting code or config example, you can use the asciidoc `[source,ruby]` directive
-- For more asciidoc formatting tips, see the excellent reference here https://github.com/elasticsearch/docs#asciidoc-guide
-## Need Help?
-Need help? Try #logstash on freenode IRC or the https://discuss.elastic.co/c/logstash discussion forum.
-## Developing
-### 1. Plugin Developement and Testing
-#### Code
-- To get started, you'll need JRuby with the Bundler gem installed.
-- Create a new plugin or clone and existing from the GitHub [logstash-plugins](https://github.com/logstash-plugins) organization. We also provide [example plugins](https://github.com/logstash-plugins?query=example).
-- Install dependencies
-```sh
-bundle install
-```
-#### Test
-- Update your dependencies
-```sh
-bundle install
-```
-- Run tests
-```sh
-bundle exec rspec
-```
-### 2. Running your unpublished Plugin in Logstash
-#### 2.1 Run in a local Logstash clone
-- Edit Logstash `Gemfile` and add the local plugin path, for example:
-```ruby
-gem "logstash-filter-awesome", :path => "/your/local/logstash-filter-awesome"
-```
-- Install plugin
-```sh
-bin/plugin install --no-verify
-```
-- Run Logstash with your plugin
-```sh
-bin/logstash -e 'filter {awesome {}}'
-```
-At this point any modifications to the plugin code will be applied to this local Logstash setup. After modifying the plugin, simply rerun Logstash.
-#### 2.2 Run in an installed Logstash
-You can use the same **2.1** method to run your plugin in an installed Logstash by editing its `Gemfile` and pointing the `:path` to your local plugin development directory or you can build the gem and install it using:
-- Build your plugin gem
-```sh
-gem build logstash-filter-awesome.gemspec
-```
-- Install the plugin from the Logstash home
-```sh
-bin/plugin install /your/local/plugin/logstash-filter-awesome.gem
-```
-- Start Logstash and proceed to test the plugin
-## Contributing
-All contributions are welcome: ideas, patches, documentation, bug reports, complaints, and even something you drew up on a napkin.
-Programming is not a required skill. Whatever you've seen about open source and maintainers or community members  saying "send patches or die" - you will not see that here.
-It is more important to the community that you are able to contribute.
-For more information about contributing, see the [CONTRIBUTING](https://github.com/elasticsearch/logstash/blob/master/CONTRIBUTING.md) file.

data/lib/logstash/outputs/scacsv.rb CHANGED Viewed

@@ -4,7 +4,7 @@
 #
 # Logstash mediation output for SCAPI
 #
-# Version 160215.1 Robert Mckeown
+# Version 170615.1 Robert Mckeown
 #
 ############################################
@@ -42,14 +42,13 @@ class LogStash::Outputs::SCACSV < LogStash::Outputs::File
   # Name of the output group - used as a prefix in the renamed file
   config :group, :validate  => :string, :required => true
   config :max_size, :validate => :number, :default => 0
+  config :file_interval_width, :validate => :string, :default => "" # Allow "" or "hour","day" or "minute"
   config :flush_interval, :validate => :number, :default => 60
   config :time_field, :validate => :string, :default => "timestamp"
 #  config :time_format, :validate => :string, :default => "%Y%m%d%H%M%S"
   config :time_field_format, :validate => :string, :required => true
   config :timestamp_output_format, :validate => :string, :default => "" # "yyyyMMddHHmmss" # java format
   config :tz_offset, :validate => :number, :default => 0
   config :increment_time, :validate => :boolean, :default => false
@@ -63,11 +62,35 @@ class LogStash::Outputs::SCACSV < LogStash::Outputs::File
     @endTime     = "missingEndTime"
     @recordCount = 0
-    @lastOutputTime = 0
+    @lastOutputTime = 0 #data time
     @flushInterval = @flush_interval.to_i
     @timerThread = Thread.new { flushWatchdog(@flush_interval) }
+    @currentOutputIntervalStartTime = 0
+    @fileIntervalWidthSeconds = 0
+    @closeOnIntervalBoundaries = false
+    case @file_interval_width.upcase
+    when "MINUTE"
+      @fileIntervalWidthSeconds = 60
+      @closeOnIntervalBoundaries = true
+    when "HOUR"
+      @fileIntervalWidthSeconds = 3600
+      @closeOnIntervalBoundaries = true
+    when "DAY"
+      @fileIntervalWidthSeconds = 86400
+      @closeOnIntervalBoundaries = true
+    else
+      @fileIntervalWidthSeconds = 0 #not used
+      @closeOnIntervalBoundaries = false
+    end
+    @df = nil
+    if (@time_field_format != "epoch")
+      @df = java.text.SimpleDateFormat.new(@time_field_format)
+    end
   end
   # This thread ensures that we output (close and rename) a file every so often
@@ -101,6 +124,12 @@ class LogStash::Outputs::SCACSV < LogStash::Outputs::File
         closeAndRenameCurrentFile
       end
     else
+      # Now see if we need to close file because of a new boundary
+      if @closeOnIntervalBoundaries and @recordCount >= 1 and (@currentOutputIntervalStartTime != snapTimestampToInterval(timestampAsEpochSeconds(event),@fileIntervalWidthSeconds))
+          closeAndRenameCurrentFile
+      end
       @formattedPath = event.sprintf(@path)
       fd = open(@formattedPath)
       @logger.debug("SCACSVreceive - after opening fd=" + fd.to_s)
@@ -127,11 +156,27 @@ class LogStash::Outputs::SCACSV < LogStash::Outputs::File
       # capture the earliest - assumption is that records are in order
       if (@recordCount) == 1
-        @startTime = event[@time_field]
+        if !@closeOnIntervalBoundaries
+          @startTime = event[@time_field]
+        else
+          @startTime = snapTimestampToInterval(timestampAsEpochSeconds(event),@fileIntervalWidthSeconds)
+        end
       end
       # for every record, update endTime - again, assumption is that records are in order
-      @endTime = event[@time_field]
+      if !@closeOnIntervalBoundaries
+        @endTime = event[@time_field]
+      else
+        @endTime = @startTime + @fileIntervalWidthSeconds - 1   # end of interval
+      end
+#puts("After snapping. timestamp=" + event[@time_field].to_s + " startTime=" + @startTime.to_s + " endTime = " + @endTime.to_s)
+      # remember start of boundary for next time
+      if @closeOnIntervalBoundaries
+         @currentOutputIntervalStartTime = @startTime
+      end
       if ((@max_size > 0) and (@recordCount >= max_size))
         # Have enough records, close it out
@@ -142,6 +187,22 @@ class LogStash::Outputs::SCACSV < LogStash::Outputs::File
   end #def receive
+  private
+  def timestampAsEpochSeconds(event)
+    # rmck: come back and remove global refs here!
+    if !@df.nil?
+      @df.parse(event[@time_field])
+    else
+      #when df not set, we assume epoch seconds
+      event[@time_field].to_i
+    end
+  end
+  private
+  def snapTimestampToInterval(timestamp,interval)
+    intervalStart = (timestamp/ interval) * interval
+  end
   private
   def get_value(name, event)
     val = event[name]
@@ -215,13 +276,11 @@ class LogStash::Outputs::SCACSV < LogStash::Outputs::File
           if (@time_field_format != "epoch")
             # if not epoch, then we expect java timestamp format
             # so must convert start/end times
+            nStartTime = @df.parse(@startTime)
+            nEndTime   = @df.parse(@endTime)
-            df = java.text.SimpleDateFormat.new(@time_field_format)
-            nStartTime = df.parse(@startTime)
-            nEndTime   = df.parse(@endTime)
-            @startTime = df.parse(@startTime).getTime
-            @endTime   = df.parse(@endTime).getTime
+            @startTime = @df.parse(@startTime).getTime
+            @endTime   = @df.parse(@endTime).getTime
           end
@@ -235,7 +294,6 @@ class LogStash::Outputs::SCACSV < LogStash::Outputs::File
             @endTime   = @endTime.to_i + @tz_offset
             if (@increment_time)
               # increment is used to ensure that the end-time on the filename is after the last data value
               @endTime = @endTime.to_i + 1000 # 1000ms = 1sec
             end

data/logstash-output-scacsv.gemspec CHANGED Viewed

@@ -1,6 +1,6 @@
 Gem::Specification.new do |s|
   s.name = 'logstash-output-scacsv'
-  s.version         = "1.0.0"
+  s.version         = "1.0.1"
   s.licenses = ["Apache License (2.0)"]
   s.summary = "Receives a stream of events and outputs files meeting the csv reqmts for IBM SmartCloudAnalytics Predictive Insights"
   s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: logstash-output-scacsv
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.0.1
 platform: ruby
 authors:
 - Robert Mckeown
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-06-12 00:00:00.000000000 Z
+date: 2015-07-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: logstash-core