logstash-filter-empowclassifier 0.3.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +2 -0
- data/CONTRIBUTORS +11 -0
- data/Gemfile +2 -0
- data/LICENSE +11 -0
- data/README.md +90 -0
- data/lib/logstash/filters/center-client.rb +208 -0
- data/lib/logstash/filters/classification-request.rb +17 -0
- data/lib/logstash/filters/classifier-cache.rb +51 -0
- data/lib/logstash/filters/classifier.rb +325 -0
- data/lib/logstash/filters/cognito-client.rb +48 -0
- data/lib/logstash/filters/elastic-db.rb +128 -0
- data/lib/logstash/filters/empowclassifier.rb +249 -0
- data/lib/logstash/filters/field-handler.rb +127 -0
- data/lib/logstash/filters/local-classifier.rb +94 -0
- data/lib/logstash/filters/plugin-logic.rb +163 -0
- data/lib/logstash/filters/response.rb +36 -0
- data/lib/logstash/filters/utils.rb +46 -0
- data/logstash-filter-empowclassifier.gemspec +38 -0
- data/spec/filters/bulk-processor_spec.rb +92 -0
- data/spec/filters/center-client_spec.rb +88 -0
- data/spec/filters/classifier-cache_spec.rb +44 -0
- data/spec/filters/classifier_spec.rb +78 -0
- data/spec/filters/cognito-client_spec.rb +20 -0
- data/spec/filters/elastic-db_spec.rb +44 -0
- data/spec/filters/empowclassifier_spec.rb +103 -0
- data/spec/filters/field-handler_spec.rb +101 -0
- data/spec/filters/local-classifier_spec.rb +46 -0
- data/spec/filters/plugin-logic_spec.rb +127 -0
- data/spec/filters/utils_spec.rb +74 -0
- data/spec/spec_helper.rb +2 -0
- metadata +260 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
require "concurrent"
|
2
|
+
require_relative 'classifier-cache'
|
3
|
+
|
4
|
+
module LogStash; module Filters; module Empow;
|
5
|
+
class LocalClassifier
|
6
|
+
include LogStash::Util::Loggable
|
7
|
+
|
8
|
+
def initialize(cache_size, ttl, async_local_db, local_db)
|
9
|
+
@logger ||= self.logger
|
10
|
+
|
11
|
+
@logger.debug("initializing in memory cache")
|
12
|
+
@logger.debug("cache size #{cache_size}")
|
13
|
+
@logger.debug("cache ttl #{ttl}")
|
14
|
+
|
15
|
+
@cache ||= LogStash::Filters::Empow::ClassifierCache.new(cache_size, ttl)
|
16
|
+
@ttl = ttl
|
17
|
+
|
18
|
+
@local_db ||= local_db
|
19
|
+
|
20
|
+
@local_db_workers ||= Concurrent::ThreadPoolExecutor.new(min_threads: 1, max_threads: 1)
|
21
|
+
@async_local_db ||= async_local_db
|
22
|
+
end
|
23
|
+
|
24
|
+
def close
|
25
|
+
@logger.debug("shutting down local classifier")
|
26
|
+
|
27
|
+
@local_db_workers.shutdown if !@local_db.nil?
|
28
|
+
|
29
|
+
@local_db_workers.wait_for_termination(1)
|
30
|
+
@logger.debug("local classifier shut down")
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def classify(key)
|
35
|
+
if !key.nil?
|
36
|
+
cached_result = @cache.classify(key)
|
37
|
+
return cached_result if !cached_result.nil?
|
38
|
+
end
|
39
|
+
|
40
|
+
return classify_using_local_database(key)
|
41
|
+
end
|
42
|
+
|
43
|
+
def add_to_cache(key, val, expiration_time)
|
44
|
+
return if key.nil?
|
45
|
+
|
46
|
+
@logger.debug? and @logger.info("adding #{key} to cache")
|
47
|
+
|
48
|
+
@cache.put(key, val, Time.now+3600)
|
49
|
+
end
|
50
|
+
|
51
|
+
def save_to_cache_and_db(key, val, expiration_time)
|
52
|
+
return if key.nil?
|
53
|
+
|
54
|
+
@logger.debug? and @logger.info("adding #{key} to the local db and cache")
|
55
|
+
|
56
|
+
product_type = key[:product_type]
|
57
|
+
product = key[:product]
|
58
|
+
term = key[:term]
|
59
|
+
|
60
|
+
doc_id = "#{product_type}-#{product}-term"
|
61
|
+
|
62
|
+
@local_db.save(doc_id, product_type, product, term, val) if !@local_db.nil?
|
63
|
+
add_to_cache(key, val, expiration_time)
|
64
|
+
end
|
65
|
+
|
66
|
+
def read_from_local_database(key)
|
67
|
+
res = @local_db.query(key[:product_type], key[:product], key[:term])
|
68
|
+
|
69
|
+
if !res.nil?
|
70
|
+
@logger.debug("adding result from db to local cache")
|
71
|
+
add_to_cache(key, res, Time.now + @ttl)
|
72
|
+
end
|
73
|
+
|
74
|
+
return res
|
75
|
+
end
|
76
|
+
|
77
|
+
def read_from_local_database_async(key)
|
78
|
+
@local_db_workers.post do
|
79
|
+
read_from_local_database(key)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def classify_using_local_database(key)
|
84
|
+
return nil if @local_db.nil? # if a local db wasn't configured
|
85
|
+
|
86
|
+
if (@async_local_db)
|
87
|
+
read_from_local_database_async(key)
|
88
|
+
return nil
|
89
|
+
end
|
90
|
+
|
91
|
+
return read_from_local_database(key)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end; end; end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
require 'time'
|
2
|
+
require "concurrent"
|
3
|
+
require_relative "classification-request"
|
4
|
+
require_relative "field-handler"
|
5
|
+
require_relative 'response'
|
6
|
+
require_relative 'utils'
|
7
|
+
|
8
|
+
module LogStash; module Filters; module Empow;
|
9
|
+
class PluginLogic
|
10
|
+
include LogStash::Util::Loggable
|
11
|
+
|
12
|
+
def initialize(classifer, field_handler, max_parking_time, max_parked_events, tag_on_timeout, tag_on_error)
|
13
|
+
@logger ||= self.logger
|
14
|
+
#@logger.info("initializing classifier")
|
15
|
+
|
16
|
+
@field_handler = field_handler
|
17
|
+
|
18
|
+
@max_parking_time = max_parking_time
|
19
|
+
@max_parked_events = max_parked_events
|
20
|
+
@tag_on_timeout = tag_on_timeout
|
21
|
+
@tag_on_error = tag_on_error
|
22
|
+
|
23
|
+
@classifer = classifer
|
24
|
+
@parked_events = Concurrent::Array.new
|
25
|
+
end
|
26
|
+
|
27
|
+
def close
|
28
|
+
@classifer.close
|
29
|
+
end
|
30
|
+
|
31
|
+
def classify(event)
|
32
|
+
request = @field_handler.event_to_classification_request(event)
|
33
|
+
|
34
|
+
if request.nil?
|
35
|
+
@tag_on_error.each{|tag| event.tag(tag)}
|
36
|
+
return event
|
37
|
+
end
|
38
|
+
|
39
|
+
if classify_event(request, event)
|
40
|
+
return event
|
41
|
+
else
|
42
|
+
park(event)
|
43
|
+
|
44
|
+
if @parked_events.length > @max_parked_events
|
45
|
+
tuple = @parked_events.shift
|
46
|
+
|
47
|
+
if !tuple.nil?
|
48
|
+
# todo: consider classifying the unparked event one last time
|
49
|
+
return tuple[:event]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
return nil
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def flush(options = {})
|
58
|
+
# tag flushed events,
|
59
|
+
events_to_flush = []
|
60
|
+
|
61
|
+
if options[:final] # indicating "final flush" special event, flush everything
|
62
|
+
while tuple = @parked_events.shift do
|
63
|
+
events_to_flush << tuple[:event]
|
64
|
+
end
|
65
|
+
else
|
66
|
+
@parked_events.delete_if do |tuple|
|
67
|
+
process_parked_event(tuple, events_to_flush)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
return events_to_flush
|
72
|
+
end
|
73
|
+
|
74
|
+
private def process_parked_event(tuple, events_to_flush)
|
75
|
+
event = tuple[:event]
|
76
|
+
request = @field_handler.event_to_classification_request(event)
|
77
|
+
|
78
|
+
begin
|
79
|
+
res = @classifer.classify(request)
|
80
|
+
|
81
|
+
if (parking_time_expired(tuple) or is_valid_classification(res))
|
82
|
+
tag_event(res, event)
|
83
|
+
|
84
|
+
# if we're releasing this event based on time expiration, tag it with timeout
|
85
|
+
if res.nil? or !res.is_final
|
86
|
+
@tag_on_timeout.each{|tag| event.tag(tag)}
|
87
|
+
end
|
88
|
+
|
89
|
+
events_to_flush << event
|
90
|
+
return true
|
91
|
+
end
|
92
|
+
|
93
|
+
rescue StandardError => e
|
94
|
+
@logger.error("an error occured while processing event, event flushed backed to the stream", :request => request, :backtrace => e.backtrace)
|
95
|
+
return true # so that this event will be flushed out of the plugin
|
96
|
+
end
|
97
|
+
|
98
|
+
return false
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
def is_unauthorized(classification)
|
103
|
+
return (!classification.nil? and classification.kind_of?(LogStash::Filters::Empow::UnauthorizedReponse))
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
def classify_event(request, event)
|
108
|
+
res = @classifer.classify(request)
|
109
|
+
|
110
|
+
if is_valid_classification(res)
|
111
|
+
tag_event(res, event)
|
112
|
+
return true
|
113
|
+
end
|
114
|
+
|
115
|
+
return false
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
def is_valid_classification(classification)
|
120
|
+
return (!classification.nil? and classification.is_final())
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
def tag_event(classification, event)
|
125
|
+
return if classification.nil?
|
126
|
+
|
127
|
+
responseBody = classification.response
|
128
|
+
|
129
|
+
@logger.debug("classification response", :classification => responseBody)
|
130
|
+
|
131
|
+
response = responseBody["response"]
|
132
|
+
|
133
|
+
if !response.nil? && response.size > 0
|
134
|
+
response.each do |k, v|
|
135
|
+
event.set("[empow_classification_response][#{k}]", v)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
if !classification.is_successful()
|
140
|
+
@tag_on_error.each{|tag| event.tag(tag)}
|
141
|
+
|
142
|
+
if (!responseBody.nil?)
|
143
|
+
LogStash::Filters::Empow::Utils.add_error(event, responseBody)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
private
|
149
|
+
def park(event)
|
150
|
+
tuple = {}
|
151
|
+
tuple[:event] = event
|
152
|
+
tuple[:time] = Time.now
|
153
|
+
|
154
|
+
@parked_events << tuple
|
155
|
+
end
|
156
|
+
|
157
|
+
private
|
158
|
+
def parking_time_expired(tuple)
|
159
|
+
return (Time.now - tuple[:time]) > @max_parking_time
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end; end; end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module LogStash
|
2
|
+
module Filters
|
3
|
+
module Empow
|
4
|
+
class AbstractResponse
|
5
|
+
attr_reader :response, :is_successful, :is_final
|
6
|
+
|
7
|
+
def initialize(response, is_successful, is_final)
|
8
|
+
@response = response
|
9
|
+
@is_successful = is_successful
|
10
|
+
@is_final = is_final
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class FailureResponse < AbstractResponse
|
15
|
+
def initialize(response)
|
16
|
+
super(response, false, true)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class UnauthorizedReponse < FailureResponse
|
21
|
+
end
|
22
|
+
|
23
|
+
class SuccessfulResponse < AbstractResponse
|
24
|
+
def initialize(response)
|
25
|
+
super(response, true, true)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class InProgressResponse < AbstractResponse
|
30
|
+
def initialize(response)
|
31
|
+
super(response, true, false)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module LogStash; module Filters; module Empow
|
2
|
+
|
3
|
+
class Utils
|
4
|
+
TRUTHY_VALUES = [true, 1, '1']
|
5
|
+
FALSEY_VALUES = [false, 0, '0']
|
6
|
+
|
7
|
+
def self.is_blank_string(txt)
|
8
|
+
return (txt.nil? or txt.strip.length == 0)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.convert_to_boolean(val)
|
12
|
+
return nil if val.nil?
|
13
|
+
|
14
|
+
return true if TRUTHY_VALUES.include?(val)
|
15
|
+
|
16
|
+
return false if FALSEY_VALUES.include?(val)
|
17
|
+
|
18
|
+
return true if (val.is_a?(String) and val.downcase.strip == 'true')
|
19
|
+
|
20
|
+
return false if (val.is_a?(String) and val.downcase.strip == 'false')
|
21
|
+
|
22
|
+
return nil
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.add_error(event, msg)
|
26
|
+
tag_empow_messages(event, msg, 'empow_errors')
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.add_warn(event, msg)
|
30
|
+
tag_empow_messages(event, msg, 'empow_warnings')
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def self.tag_empow_messages(event, msg, block)
|
35
|
+
messages = event.get(block)
|
36
|
+
|
37
|
+
# using arrayinstead of set, as set raises a logstash exception:
|
38
|
+
# No enum constant org.logstash.bivalues.BiValues.ORG_JRUBY_RUBYOBJECTVAR0
|
39
|
+
messages ||= Array.new
|
40
|
+
messages << msg
|
41
|
+
|
42
|
+
event.set(block, messages.uniq)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end; end; end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-filter-empowclassifier'
|
3
|
+
s.version = '0.3.15'
|
4
|
+
s.licenses = ['Apache-2.0']
|
5
|
+
s.summary = 'Logstash intent classification plugin client for accessing empow''s classifiction cloud'
|
6
|
+
#s.description = 'Write a longer description or delete this line.'
|
7
|
+
s.homepage = 'http://www.empowcybersecurity.com'
|
8
|
+
s.authors = ['empow', 'Assaf Abulafia', 'Rami Cohen']
|
9
|
+
s.email = ''
|
10
|
+
s.require_paths = ['lib']
|
11
|
+
|
12
|
+
# Files
|
13
|
+
s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
|
14
|
+
# Tests
|
15
|
+
|
16
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
17
|
+
|
18
|
+
# Special flag to let us know this is actually a logstash plugin
|
19
|
+
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "filter" }
|
20
|
+
|
21
|
+
# Gem dependencies
|
22
|
+
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
23
|
+
s.add_runtime_dependency 'rest-client', '~> 1.8', '>= 1.8.0'
|
24
|
+
s.add_runtime_dependency 'lru_redux', '~> 1.1', '>= 1.1.0'
|
25
|
+
s.add_runtime_dependency 'json', '~> 1.8', '>= 1.8'
|
26
|
+
#s.add_runtime_dependency 'rufus-scheduler'
|
27
|
+
s.add_runtime_dependency 'hashie'
|
28
|
+
#s.add_runtime_dependency "murmurhash3"
|
29
|
+
|
30
|
+
s.add_development_dependency 'aws-sdk', '~> 3'
|
31
|
+
|
32
|
+
s.add_development_dependency 'logstash-devutils'
|
33
|
+
# s.add_runtime_dependency 'jwt', '~> 2.1', '>= 2.1.0'
|
34
|
+
s.add_development_dependency "timecop", "~> 0.7"
|
35
|
+
s.add_development_dependency "webmock", "~> 1.22", ">= 1.21.0"
|
36
|
+
|
37
|
+
s.add_development_dependency 'elasticsearch'
|
38
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require "logstash/filters/classifier"
|
3
|
+
require "logstash/filters/local-classifier"
|
4
|
+
require "logstash/filters/classification-request"
|
5
|
+
require "logstash/filters/center-client"
|
6
|
+
require "logstash/filters/response"
|
7
|
+
require 'timecop'
|
8
|
+
|
9
|
+
describe LogStash::Filters::Empow::Classification::BulkProcessor do
|
10
|
+
#empow_user, empow_password, cache_size, ttl, async_local_db, elastic_hosts, elastic_index, elastic_username, elastic_password
|
11
|
+
let(:time_between_attempts) { 1 }
|
12
|
+
let(:batch_size) { 10 }
|
13
|
+
let(:max_retries) { 5 }
|
14
|
+
|
15
|
+
describe "test with mocked classifiers" do
|
16
|
+
it "single failed log" do
|
17
|
+
|
18
|
+
Timecop.freeze(Time.now)
|
19
|
+
|
20
|
+
req1 = "request1"
|
21
|
+
val1 = {}
|
22
|
+
val1[:retries] = 1
|
23
|
+
val1[:task] = nil
|
24
|
+
val1[:request] = req1
|
25
|
+
val1[:last_executed] = Time.at(310953600)
|
26
|
+
|
27
|
+
requests = Hash.new
|
28
|
+
requests[req1] = val1
|
29
|
+
|
30
|
+
local_classifier = instance_double(LogStash::Filters::Empow::LocalClassifier)
|
31
|
+
allow(local_classifier).to receive(:classify).and_return(nil)
|
32
|
+
allow(local_classifier).to receive(:close)
|
33
|
+
|
34
|
+
center_result = {}
|
35
|
+
center_result[req1] = LogStash::Filters::Empow::FailureReponse.new("failure1")
|
36
|
+
|
37
|
+
online_classifer = instance_double(LogStash::Filters::Empow::ClassificationCenterClient)
|
38
|
+
allow(online_classifer).to receive(:classify).and_return(center_result)
|
39
|
+
|
40
|
+
bulk_processor = described_class.new(max_retries, batch_size, time_between_attempts, requests, online_classifer, local_classifier)
|
41
|
+
|
42
|
+
expect(online_classifer).to receive(:classify)
|
43
|
+
expect(local_classifier).to receive(:add_to_cache)
|
44
|
+
|
45
|
+
bulk_processor.execute
|
46
|
+
|
47
|
+
#expect(local_classifier).to receive(:add_to_cache)
|
48
|
+
|
49
|
+
# expect(res).to be_nil
|
50
|
+
#save_to_cache_and_db
|
51
|
+
|
52
|
+
expect(requests[req1]).to be_nil
|
53
|
+
|
54
|
+
#Timecop.freeze(Time.now + time_between_attempts)
|
55
|
+
#Timecop.freeze(Time.now + 1 + time_between_attempts)
|
56
|
+
end
|
57
|
+
|
58
|
+
it "single successful log" do
|
59
|
+
|
60
|
+
Timecop.freeze(Time.now)
|
61
|
+
|
62
|
+
req1 = "request1"
|
63
|
+
val1 = {}
|
64
|
+
val1[:retries] = 1
|
65
|
+
val1[:task] = nil
|
66
|
+
val1[:request] = req1
|
67
|
+
val1[:last_executed] = Time.at(310953600)
|
68
|
+
|
69
|
+
requests = Hash.new
|
70
|
+
requests[req1] = val1
|
71
|
+
|
72
|
+
local_classifier = instance_double(LogStash::Filters::Empow::LocalClassifier)
|
73
|
+
allow(local_classifier).to receive(:classify).and_return(nil)
|
74
|
+
allow(local_classifier).to receive(:close)
|
75
|
+
|
76
|
+
center_result = {}
|
77
|
+
center_result[req1] = LogStash::Filters::Empow::SuccessfulReponse.new("result1")
|
78
|
+
|
79
|
+
online_classifer = instance_double(LogStash::Filters::Empow::ClassificationCenterClient)
|
80
|
+
allow(online_classifer).to receive(:classify).and_return(center_result)
|
81
|
+
|
82
|
+
bulk_processor = described_class.new(max_retries, batch_size, time_between_attempts, requests, online_classifer, local_classifier)
|
83
|
+
|
84
|
+
expect(online_classifer).to receive(:classify)
|
85
|
+
expect(local_classifier).to receive(:save_to_cache_and_db)
|
86
|
+
|
87
|
+
bulk_processor.execute
|
88
|
+
|
89
|
+
expect(requests[req1]).to be_nil
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|