logstash-input-sfdc_elf 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rubocop.yml +41 -0
  4. data/Gemfile +9 -0
  5. data/LICENSE.md +12 -0
  6. data/README.md +1 -0
  7. data/Rakefile +13 -0
  8. data/lib/logstash/inputs/sfdc_elf.rb +147 -0
  9. data/lib/logstash/inputs/sfdc_elf/client_with_streaming_support.rb +61 -0
  10. data/lib/logstash/inputs/sfdc_elf/queue_util.rb +176 -0
  11. data/lib/logstash/inputs/sfdc_elf/scheduler.rb +73 -0
  12. data/lib/logstash/inputs/sfdc_elf/state_persistor.rb +49 -0
  13. data/logstash-input-sfdc_elf.gemspec +29 -0
  14. data/spec/fixtures/auth_success_response.json +7 -0
  15. data/spec/fixtures/describe.json +3526 -0
  16. data/spec/fixtures/org_query_response.json +11 -0
  17. data/spec/fixtures/queue_util/create_event_ELF_list1.json +19 -0
  18. data/spec/fixtures/queue_util/create_event_ELF_list2.json +29 -0
  19. data/spec/fixtures/queue_util/create_event_ELF_list3.json +29 -0
  20. data/spec/fixtures/queue_util/create_event_sampledata1.csv +31 -0
  21. data/spec/fixtures/queue_util/create_event_sampledata2.csv +2 -0
  22. data/spec/fixtures/queue_util/create_event_sampledata3.csv +2 -0
  23. data/spec/fixtures/queue_util/eventlogfile_describe.json +990 -0
  24. data/spec/fixtures/queue_util/eventlogfile_list.json +62 -0
  25. data/spec/fixtures/queue_util/sample_data1.csv +5 -0
  26. data/spec/fixtures/queue_util/sample_data2.csv +5 -0
  27. data/spec/fixtures/queue_util/sample_data3.csv +1467 -0
  28. data/spec/fixtures/queue_util/sample_data4.csv +2 -0
  29. data/spec/fixtures/queue_util/sample_data5.csv +309 -0
  30. data/spec/inputs/sfdc_elf/queue_util_spec.rb +176 -0
  31. data/spec/inputs/sfdc_elf/scheduler_spec.rb +56 -0
  32. data/spec/inputs/sfdc_elf/state_persistor_spec.rb +58 -0
  33. data/spec/inputs/sfdc_elf_spec.rb +101 -0
  34. data/spec/spec_helper.rb +38 -0
  35. metadata +180 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 156b5331cb015cdb70ac95c8dd4310d5eb890826
4
+ data.tar.gz: 0400cb3ec9a08acc72954d744c721c7dacfd1b1b
5
+ SHA512:
6
+ metadata.gz: e816e475d111272dc91011dac75ea5579098dbe0ead14a04e02e4b9b423ac58531fa285b8698f4b91cf95e70e8a8a43e99a81a3bf74223c05254e3a6b3df13a9
7
+ data.tar.gz: ac8df1947c0fff174c489e1a2c94de2f247184d1318ea1a48dd498ad440d21354081e11b2bccdc6964a96d3e1a0b6e8ab6e1abcc793073efd4764f96eaa27465
@@ -0,0 +1,11 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
5
+ *.iml
6
+ .idea
7
+ coverage
8
+ .DS_Store
9
+ lib/.DS_Store
10
+ lib/logstash/.DS_Store
11
+ lib/logstash/inputs/.DS_Store
@@ -0,0 +1,41 @@
1
+ # Must be compact, so that Logstash can locate plugins via "LogStash::inputs::YourPluginName"
2
+ Style/ClassAndModuleChildren:
3
+ EnforcedStyle: compact
4
+
5
+ # ABC is a metric based on the number of Assignments, Branches, and Conditions.
6
+ # More info: http://c2.com/cgi/wiki?AbcMetric
7
+ AbcSize:
8
+ Max: 25
9
+
10
+ # Disable messages about using fail instead of raise for a specific try/catch block.
11
+ SignalException:
12
+ Enabled: false
13
+
14
+ # Disable message about providing an exception class and message as an argument to raise.
15
+ RaiseArgs:
16
+ EnforcedStyle: compact
17
+
18
+ Metrics/LineLength:
19
+ AllowURI: true
20
+ Max: 120
21
+
22
+ Metrics/MethodLength:
23
+ Enabled: false
24
+
25
+ Metrics/ClassLength:
26
+ Max: 150
27
+
28
+ EmptyLines:
29
+ Enabled: false
30
+
31
+ # Allow to have no line break between method and modifier. Example using public or private, then method.
32
+ Style/EmptyLinesAroundAccessModifier:
33
+ Enabled: false
34
+
35
+ # Allow to move chained dot methods to the next line.
36
+ Style/DotPosition:
37
+ Enabled: false
38
+
39
+ # Allow to use the word "get" in accessor method names.
40
+ Style/AccessorMethodName:
41
+ Enabled: false
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
3
+
4
+ group :test do
5
+ gem 'rubocop', '>= 0.27'
6
+ gem 'simplecov', '>= 0.9'
7
+ gem 'webmock'
8
+ gem 'timecop'
9
+ end
@@ -0,0 +1,12 @@
1
+ # Logstash plugin for EventLogFile End User License Agreement
2
+
3
+ Except as described below, Logstash plugin for EventLogFile is Copyright (c) 2015, salesforce.com, inc. All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6
+
7
+
8
+ * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9
+ * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10
+ * Neither the name of salesforce.com, inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11
+
12
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1 @@
1
+ # Logstash Plugin for Salesforce's Event Log File
@@ -0,0 +1,13 @@
1
+ @files = []
2
+
3
+ require 'rubocop/rake_task'
4
+ RuboCop::RakeTask.new
5
+
6
+ require 'rspec/core/rake_task'
7
+ RSpec::Core::RakeTask.new(:spec)
8
+
9
+ require 'logstash/devutils/rake'
10
+
11
+ task test: :spec
12
+
13
+ task default: :test
@@ -0,0 +1,147 @@
1
+ # encoding: utf-8
2
+ require 'logstash/inputs/base'
3
+ require 'logstash/namespace'
4
+ require_relative 'sfdc_elf/client_with_streaming_support'
5
+ require_relative 'sfdc_elf/queue_util'
6
+ require_relative 'sfdc_elf/state_persistor'
7
+ require_relative 'sfdc_elf/scheduler'
8
+
9
+ # This plugin enables Salesforce customers to load EventLogFile(ELF) data from their Force.com orgs. The plugin will
10
+ # handle downloading ELF CSV file, parsing them, and handling any schema changes transparently.
11
+ class LogStash::Inputs::SfdcElf < LogStash::Inputs::Base
12
+ LOG_KEY = 'SFDC'
13
+ RETRY_ATTEMPTS = 3
14
+
15
+ config_name 'sfdc_elf'
16
+ default :codec, 'plain'
17
+
18
+ # Username to your Force.com organization.
19
+ config :username, validate: :string, required: true
20
+
21
+ # Password to your Force.com organization.
22
+ config :password, validate: :password, required: true
23
+
24
+ # Client id to your Force.com organization.
25
+ config :client_id, validate: :password, required: true
26
+
27
+ # Client secret to your Force.com organization.
28
+ config :client_secret, validate: :password, required: true
29
+
30
+ # The host to use for OAuth2 authentication.
31
+ config :host, validate: :string, default: 'login.salesforce.com'
32
+
33
+ # Only needed when your Force.com organization requires it.
34
+ # Security token to you Force.com organization, can be found in My Settings > Personal > Reset My Security Token.
35
+ # Then it will take you to "Reset My Security Token" page, and click on the "Reset Security Token" button. The token
36
+ # will be emailed to you.
37
+ config :security_token, validate: :password, default: ''
38
+
39
+ # The path to be use to store the .sfdc_info_logstash state persistor file. You set the path like so, `~/SomeDirectory` Paths must be
40
+ # absolute and cannot be relative.
41
+ config :path, validate: :string, default: Dir.home
42
+
43
+ # Specify how often the plugin should grab new data in terms of minutes.
44
+ config :poll_interval_in_minutes, validate: [*1..(24 * 60)], default: (24 * 60)
45
+
46
+
47
+ # The first part of logstash pipeline is register, where all instance variables are initialized.
48
+
49
+ public
50
+ def register
51
+ # Initialize the client.
52
+ @client = ClientWithStreamingSupport.new
53
+ @client.client_id = @client_id.value
54
+ @client.client_secret = @client_secret.value
55
+ @client.host = @host
56
+ @client.version = '33.0'
57
+
58
+ # Authenticate the client
59
+ @logger.info("#{LOG_KEY}: tyring to authenticate client")
60
+ @client.retryable_authenticate(username: @username,
61
+ password: @password.value + @security_token.value,
62
+ retry_attempts: RETRY_ATTEMPTS)
63
+ @logger.info("#{LOG_KEY}: authenticating succeeded")
64
+
65
+ # Save org id to distinguish between multiple orgs.
66
+ @org_id = @client.query('select id from Organization')[0]['Id']
67
+
68
+ # Set up time interval for forever while loop.
69
+ @poll_interval_in_seconds = @poll_interval_in_minutes * 60
70
+
71
+ # Handel the @path config passed by the user. If path does not exist then set @path to home directory.
72
+ verify_path
73
+
74
+ # Handel parsing the data into event objects and enqueue it to the queue.
75
+ @queue_util = QueueUtil.new
76
+
77
+ # Handel when to schedule the next process based on the @poll_interval_in_hours config.
78
+ @scheduler = Scheduler.new(@poll_interval_in_seconds)
79
+
80
+ # Handel state of the plugin based on the read and writes of LogDates to the .sdfc_info_logstash file.
81
+ @state_persistor = StatePersistor.new(@path, @org_id)
82
+
83
+ # Grab the last indexed log date.
84
+ @last_indexed_log_date = @state_persistor.get_last_indexed_log_date
85
+ @logger.info("#{LOG_KEY}: @last_indexed_log_date = #{@last_indexed_log_date}")
86
+ end # def register
87
+
88
+
89
+
90
+
91
+ # The second stage of Logstash pipeline is run, where it expects to parse your data into event objects and then pass
92
+ # it into the queue to be used in the rest of the pipeline.
93
+
94
+ public
95
+ def run(queue)
96
+ @scheduler.schedule do
97
+ # Line for readable log statements.
98
+ @logger.info('---------------------------------------------------')
99
+
100
+ # Grab a list of SObjects, specifically EventLogFiles.
101
+ soql_expr = "SELECT Id, EventType, Logfile, LogDate, LogFileLength, LogFileFieldTypes
102
+ FROM EventLogFile
103
+ WHERE LogDate > #{@last_indexed_log_date} ORDER BY LogDate ASC "
104
+
105
+
106
+ query_result_list = @client.retryable_query(username: @username,
107
+ password: @password.value + @security_token.value,
108
+ retry_attempts: RETRY_ATTEMPTS,
109
+ soql_expr: soql_expr)
110
+
111
+ @logger.info("#{LOG_KEY}: query result size = #{query_result_list.size}")
112
+
113
+ if !query_result_list.empty?
114
+ # query_result_list is in ascending order based on the LogDate, so grab the last one of the list and save the
115
+ # LogDate to @last_read_log_date and .sfdc_info_logstash
116
+ @last_indexed_log_date = query_result_list.last.LogDate.strftime('%FT%T.%LZ')
117
+
118
+ # TODO: grab tempfiles here!!
119
+
120
+ # Overwrite the .sfdc_info_logstash file with the @last_read_log_date.
121
+ # Note: we currently do not support deduplication, but will implement it soon.
122
+ # TODO: need to implement deduplication
123
+ # TODO: might have to move this after enqueue_events(), in case of a crash in between.
124
+ # TODO: can do all @state_persistor calls after the if statement
125
+ @state_persistor.update_last_indexed_log_date(@last_indexed_log_date)
126
+
127
+ # Creates events from query_result_list, then simply append the events to the queue.
128
+ @queue_util.enqueue_events(query_result_list, queue, @client)
129
+ end
130
+ end # do loop
131
+ end # def run
132
+
133
+
134
+
135
+
136
+ # Handel the @path variable passed by the user. If path does not exist then set @path to home directory.
137
+
138
+ private
139
+ def verify_path
140
+ # Check if the path exist, if not then set @path to home directory.
141
+ unless File.directory?(@path)
142
+ @logger.warn("#{LOG_KEY}: provided path does not exist or is invalid. path=#{@path}")
143
+ @path = Dir.home
144
+ end
145
+ @logger.info("#{LOG_KEY}: path = #{@path}")
146
+ end
147
+ end # class LogStash::inputs::File
@@ -0,0 +1,61 @@
1
+ # encoding: utf-8
2
+ require 'databasedotcom'
3
+
4
+ # This class subclasses Databasedotcom Client object and added steaming
5
+ # downloading and retryable authentication and retryable query.
6
+ class ClientWithStreamingSupport < Databasedotcom::Client
7
+ # Constants
8
+ # LOG_KEY = 'SFDC - ClientWithStreamingSupport'
9
+ #
10
+ # def initialize
11
+ # @logger = Cabin::Channel.get(LogStash)
12
+ # end
13
+
14
+ def streaming_download(path, output_stream)
15
+ connection = Net::HTTP.new(URI.parse(instance_url).host, 443)
16
+ connection.use_ssl = true
17
+ encoded_path = URI.escape(path)
18
+
19
+ req = Net::HTTP::Get.new(encoded_path, 'Authorization' => "OAuth #{oauth_token}")
20
+ connection.request(req) do |response|
21
+ raise SalesForceError.new(response) unless response.is_a?(Net::HTTPSuccess)
22
+ response.read_body do |chunk|
23
+ output_stream.write chunk
24
+ end
25
+ end
26
+ end
27
+
28
+ # This helper method is called whenever we need to initaialize the client
29
+ # object or whenever the client token expires. It will attempt 3 times
30
+ # with a 30 second delay between each retry. On the 3th try, if it fails the
31
+ # exception will be raised.
32
+
33
+ def retryable_authenticate(options = {})
34
+ 1.upto(options[:retry_attempts]) do |count|
35
+ begin
36
+ # If exception is not thrown, then break out of loop.
37
+ authenticate(username: options[:username], password: options[:password])
38
+ break
39
+ rescue StandardError => e
40
+ # Sleep for 30 seconds 2 times. On the 3th time if it fails raise the exception without sleeping.
41
+ if (count == options[:retry_attempts])
42
+ raise e
43
+ else
44
+ sleep(30)
45
+ end
46
+ end
47
+ end
48
+ end # def authenticate
49
+
50
+
51
+ def retryable_query(options = {})
52
+ query(options[:soql_expr])
53
+ rescue Databasedotcom::SalesForceError => e
54
+ # Session has expired. Force user logout, then re-authenticate.
55
+ if e.message == 'Session expired or invalid'
56
+ retryable_authenticate(options)
57
+ else
58
+ raise e
59
+ end
60
+ end # def retryable_query
61
+ end # ClientWithStreamingSupport
@@ -0,0 +1,176 @@
1
+ # encoding: utf-8
2
+ require 'csv'
3
+ require 'resolv'
4
+
5
+ # Handel parsing data into event objects and then enqueue all of the events to the queue.
6
+ class QueueUtil
7
+ # Constants
8
+ LOG_KEY = 'SFDC - QueueUtil'
9
+ SEPARATOR = ','
10
+ QUOTE_CHAR = '"'
11
+
12
+ # Zip up the tempfile, which is a CSV file, and the field types, so that when parsing the CSV file we can accurately
13
+ # convert each field to its respective type. Like Integers and Booleans.
14
+ EventLogFile = Struct.new(:field_types, :temp_file, :event_type)
15
+
16
+
17
+ def initialize
18
+ @logger = Cabin::Channel.get(LogStash)
19
+ end
20
+
21
+
22
+
23
+
24
+ # Given a list of query result's, iterate through it and grab the CSV file associated with it. Then parse the CSV file
25
+ # line by line and generating the event object for it. Then enqueue it.
26
+
27
+ public
28
+ def enqueue_events(query_result_list, queue, client)
29
+ @logger.info("#{LOG_KEY}: enqueue events")
30
+
31
+ # Grab a list of Tempfiles that contains CSV file data.
32
+ event_log_file_records = get_event_log_file_records(query_result_list, client)
33
+
34
+ # Iterate though each record.
35
+ event_log_file_records.each do |elf|
36
+ begin
37
+ # Create local variable to simplify & make code more readable.
38
+ tmp = elf.temp_file
39
+
40
+ # Get the schema from the first line in the tempfile. It will be in CSV format so we parse it, and it will
41
+ # return an array.
42
+ schema = CSV.parse_line(tmp.readline, col_sep: SEPARATOR, quote_char: QUOTE_CHAR)
43
+
44
+ # Loop through tempfile, line by line.
45
+ tmp.each_line do |line|
46
+ # Parse the current line, it will return an string array.
47
+ string_array = CSV.parse_line(line, col_sep: SEPARATOR, quote_char: QUOTE_CHAR)
48
+
49
+ # Convert the string array into its corresponding type array.
50
+ data = string_to_type_array(string_array, elf.field_types)
51
+
52
+ # create_event will return a event object.
53
+ queue << create_event(schema, data, elf.event_type)
54
+ end
55
+ ensure
56
+ # Close tmp file and unlink it, doing this will delete the actual tempfile.
57
+ tmp.close
58
+ tmp.unlink
59
+ end
60
+ end # do loop, tempfile_list
61
+ end # def create_event_list
62
+
63
+
64
+
65
+
66
+ # Convert the given string array to its corresponding type array and return it.
67
+
68
+ private
69
+ def string_to_type_array(string_array, field_types)
70
+ data = []
71
+
72
+ field_types.each_with_index do |type, i|
73
+ case type
74
+ when 'Number'
75
+ data[i] = (string_array[i].empty?) ? nil : string_array[i].to_f
76
+ when 'Boolean'
77
+ data[i] = (string_array[i].empty?) ? nil : (string_array[i] == '0')
78
+ when 'IP'
79
+ data[i] = valid_ip(string_array[i]) ? string_array[i] : nil
80
+ else # 'String', 'Id', 'EscapedString', 'Set'
81
+ data[i] = (string_array[i].empty?) ? nil : string_array[i]
82
+ end
83
+ end # do loop
84
+
85
+ data
86
+ end # convert_string_to_type
87
+
88
+
89
+
90
+ # Check if the given ip address is truely an ip address.
91
+
92
+ private
93
+ def valid_ip(ip)
94
+ ip =~ Resolv::IPv4::Regex ? true : false
95
+ end
96
+
97
+
98
+
99
+
100
+ # Bases on the schema and data, we create the event object. At any point if the data is nil we simply dont add
101
+ # the data to the event object. Special handling is needed when the schema 'TIMESTAMP' occurs, then the data
102
+ # associated with it needs to be converted into a LogStash::Timestamp.
103
+
104
+ private
105
+ def create_event(schema, data, event_type)
106
+ # Initaialize event to be used. @timestamp and @version is automatically added
107
+ event = LogStash::Event.new
108
+
109
+ # Add column data pair to event.
110
+ data.each_index do |i|
111
+ # Grab current key.
112
+ schema_name = schema[i]
113
+
114
+ # Handle when field_name is 'TIMESTAMP', Change the @timestamp field to the actual time on the CSV file,
115
+ # but convert it to iso8601.
116
+ if schema_name == 'TIMESTAMP'
117
+ epochmillis = DateTime.parse(data[i]).to_time.to_f
118
+ event.timestamp = LogStash::Timestamp.at(epochmillis)
119
+ end
120
+
121
+ # Allow Elasticsearch index's have to types set to EventType.
122
+ event['type'] = event_type.downcase
123
+
124
+ # Add the schema data pair to event object.
125
+ if data[i] != nil
126
+ event[schema_name] = data[i]
127
+ end
128
+ end
129
+
130
+ # Return the event
131
+ event
132
+ end # def create_event
133
+
134
+
135
+
136
+
137
+ # This helper method takes as input a list/collection of SObjects which each contains a path to their respective CSV
138
+ # files. The path is stored in the LogFile field. Using that path, we are able to grab the actual CSV file via
139
+ # @client.http_get method.
140
+ #
141
+ # After grabbing the CSV file we then store them using the standard Tempfile library. Tempfile will create a unique
142
+ # file each time using 'sfdc_elf_tempfile' as the prefix and finally we will be returning a list of Tempfile object,
143
+ # where the user can read the Tempfile and then close it and unlink it, which will delete the file.
144
+
145
+ public
146
+ def get_event_log_file_records(query_result_list, client)
147
+ @logger.info("#{LOG_KEY}: generating tempfile list")
148
+ result = []
149
+ query_result_list.each do |event_log_file|
150
+ # Get the path of the CSV file from the LogFile field, then stream the data to the .write method of the Tempfile
151
+ tmp = Tempfile.new('sfdc_elf_tempfile')
152
+ client.streaming_download(event_log_file.LogFile, tmp)
153
+
154
+ # Flushing will write the buffer into the Tempfile itself.
155
+ tmp.flush
156
+
157
+ # Rewind will move the file pointer from the end to the beginning of the file, so that users can simple
158
+ # call the Read method.
159
+ tmp.rewind
160
+
161
+ # Append the EventLogFile object into the result list
162
+ field_types = event_log_file.LogFileFieldTypes.split(',')
163
+ result << EventLogFile.new(field_types, tmp, event_log_file.EventType)
164
+
165
+ # Log the info from event_log_file object.
166
+ @logger.info(" #{LOG_KEY}: Id = #{event_log_file.Id}")
167
+ @logger.info(" #{LOG_KEY}: EventType = #{event_log_file.EventType}")
168
+ @logger.info(" #{LOG_KEY}: LogFile = #{event_log_file.LogFile}")
169
+ @logger.info(" #{LOG_KEY}: LogDate = #{event_log_file.LogDate}")
170
+ @logger.info(" #{LOG_KEY}: LogFileLength = #{event_log_file.LogFileLength}")
171
+ @logger.info(" #{LOG_KEY}: LogFileFieldTypes = #{event_log_file.LogFileFieldTypes}")
172
+ @logger.info(' ......................................')
173
+ end
174
+ result
175
+ end # def get_event_log_file_records
176
+ end # QueueUtil