logstash-filter-empowclassifier 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +2 -0
- data/CONTRIBUTORS +11 -0
- data/Gemfile +2 -0
- data/LICENSE +11 -0
- data/README.md +90 -0
- data/lib/logstash/filters/center-client.rb +208 -0
- data/lib/logstash/filters/classification-request.rb +17 -0
- data/lib/logstash/filters/classifier-cache.rb +51 -0
- data/lib/logstash/filters/classifier.rb +325 -0
- data/lib/logstash/filters/cognito-client.rb +48 -0
- data/lib/logstash/filters/elastic-db.rb +128 -0
- data/lib/logstash/filters/empowclassifier.rb +249 -0
- data/lib/logstash/filters/field-handler.rb +127 -0
- data/lib/logstash/filters/local-classifier.rb +94 -0
- data/lib/logstash/filters/plugin-logic.rb +163 -0
- data/lib/logstash/filters/response.rb +36 -0
- data/lib/logstash/filters/utils.rb +46 -0
- data/logstash-filter-empowclassifier.gemspec +38 -0
- data/spec/filters/bulk-processor_spec.rb +92 -0
- data/spec/filters/center-client_spec.rb +88 -0
- data/spec/filters/classifier-cache_spec.rb +44 -0
- data/spec/filters/classifier_spec.rb +78 -0
- data/spec/filters/cognito-client_spec.rb +20 -0
- data/spec/filters/elastic-db_spec.rb +44 -0
- data/spec/filters/empowclassifier_spec.rb +103 -0
- data/spec/filters/field-handler_spec.rb +101 -0
- data/spec/filters/local-classifier_spec.rb +46 -0
- data/spec/filters/plugin-logic_spec.rb +127 -0
- data/spec/filters/utils_spec.rb +74 -0
- data/spec/spec_helper.rb +2 -0
- metadata +260 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
require "concurrent"
|
2
|
+
require_relative 'classifier-cache'
|
3
|
+
|
4
|
+
module LogStash; module Filters; module Empow;
|
5
|
+
class LocalClassifier
|
6
|
+
include LogStash::Util::Loggable
|
7
|
+
|
8
|
+
def initialize(cache_size, ttl, async_local_db, local_db)
|
9
|
+
@logger ||= self.logger
|
10
|
+
|
11
|
+
@logger.debug("initializing in memory cache")
|
12
|
+
@logger.debug("cache size #{cache_size}")
|
13
|
+
@logger.debug("cache ttl #{ttl}")
|
14
|
+
|
15
|
+
@cache ||= LogStash::Filters::Empow::ClassifierCache.new(cache_size, ttl)
|
16
|
+
@ttl = ttl
|
17
|
+
|
18
|
+
@local_db ||= local_db
|
19
|
+
|
20
|
+
@local_db_workers ||= Concurrent::ThreadPoolExecutor.new(min_threads: 1, max_threads: 1)
|
21
|
+
@async_local_db ||= async_local_db
|
22
|
+
end
|
23
|
+
|
24
|
+
def close
|
25
|
+
@logger.debug("shutting down local classifier")
|
26
|
+
|
27
|
+
@local_db_workers.shutdown if !@local_db.nil?
|
28
|
+
|
29
|
+
@local_db_workers.wait_for_termination(1)
|
30
|
+
@logger.debug("local classifier shut down")
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def classify(key)
|
35
|
+
if !key.nil?
|
36
|
+
cached_result = @cache.classify(key)
|
37
|
+
return cached_result if !cached_result.nil?
|
38
|
+
end
|
39
|
+
|
40
|
+
return classify_using_local_database(key)
|
41
|
+
end
|
42
|
+
|
43
|
+
def add_to_cache(key, val, expiration_time)
|
44
|
+
return if key.nil?
|
45
|
+
|
46
|
+
@logger.debug? and @logger.info("adding #{key} to cache")
|
47
|
+
|
48
|
+
@cache.put(key, val, Time.now+3600)
|
49
|
+
end
|
50
|
+
|
51
|
+
def save_to_cache_and_db(key, val, expiration_time)
|
52
|
+
return if key.nil?
|
53
|
+
|
54
|
+
@logger.debug? and @logger.info("adding #{key} to the local db and cache")
|
55
|
+
|
56
|
+
product_type = key[:product_type]
|
57
|
+
product = key[:product]
|
58
|
+
term = key[:term]
|
59
|
+
|
60
|
+
doc_id = "#{product_type}-#{product}-term"
|
61
|
+
|
62
|
+
@local_db.save(doc_id, product_type, product, term, val) if !@local_db.nil?
|
63
|
+
add_to_cache(key, val, expiration_time)
|
64
|
+
end
|
65
|
+
|
66
|
+
def read_from_local_database(key)
|
67
|
+
res = @local_db.query(key[:product_type], key[:product], key[:term])
|
68
|
+
|
69
|
+
if !res.nil?
|
70
|
+
@logger.debug("adding result from db to local cache")
|
71
|
+
add_to_cache(key, res, Time.now + @ttl)
|
72
|
+
end
|
73
|
+
|
74
|
+
return res
|
75
|
+
end
|
76
|
+
|
77
|
+
def read_from_local_database_async(key)
|
78
|
+
@local_db_workers.post do
|
79
|
+
read_from_local_database(key)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def classify_using_local_database(key)
|
84
|
+
return nil if @local_db.nil? # if a local db wasn't configured
|
85
|
+
|
86
|
+
if (@async_local_db)
|
87
|
+
read_from_local_database_async(key)
|
88
|
+
return nil
|
89
|
+
end
|
90
|
+
|
91
|
+
return read_from_local_database(key)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end; end; end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
require 'time'
|
2
|
+
require "concurrent"
|
3
|
+
require_relative "classification-request"
|
4
|
+
require_relative "field-handler"
|
5
|
+
require_relative 'response'
|
6
|
+
require_relative 'utils'
|
7
|
+
|
8
|
+
module LogStash; module Filters; module Empow;
|
9
|
+
class PluginLogic
|
10
|
+
include LogStash::Util::Loggable
|
11
|
+
|
12
|
+
def initialize(classifer, field_handler, max_parking_time, max_parked_events, tag_on_timeout, tag_on_error)
|
13
|
+
@logger ||= self.logger
|
14
|
+
#@logger.info("initializing classifier")
|
15
|
+
|
16
|
+
@field_handler = field_handler
|
17
|
+
|
18
|
+
@max_parking_time = max_parking_time
|
19
|
+
@max_parked_events = max_parked_events
|
20
|
+
@tag_on_timeout = tag_on_timeout
|
21
|
+
@tag_on_error = tag_on_error
|
22
|
+
|
23
|
+
@classifer = classifer
|
24
|
+
@parked_events = Concurrent::Array.new
|
25
|
+
end
|
26
|
+
|
27
|
+
def close
|
28
|
+
@classifer.close
|
29
|
+
end
|
30
|
+
|
31
|
+
def classify(event)
|
32
|
+
request = @field_handler.event_to_classification_request(event)
|
33
|
+
|
34
|
+
if request.nil?
|
35
|
+
@tag_on_error.each{|tag| event.tag(tag)}
|
36
|
+
return event
|
37
|
+
end
|
38
|
+
|
39
|
+
if classify_event(request, event)
|
40
|
+
return event
|
41
|
+
else
|
42
|
+
park(event)
|
43
|
+
|
44
|
+
if @parked_events.length > @max_parked_events
|
45
|
+
tuple = @parked_events.shift
|
46
|
+
|
47
|
+
if !tuple.nil?
|
48
|
+
# todo: consider classifying the unparked event one last time
|
49
|
+
return tuple[:event]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
return nil
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def flush(options = {})
|
58
|
+
# tag flushed events,
|
59
|
+
events_to_flush = []
|
60
|
+
|
61
|
+
if options[:final] # indicating "final flush" special event, flush everything
|
62
|
+
while tuple = @parked_events.shift do
|
63
|
+
events_to_flush << tuple[:event]
|
64
|
+
end
|
65
|
+
else
|
66
|
+
@parked_events.delete_if do |tuple|
|
67
|
+
process_parked_event(tuple, events_to_flush)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
return events_to_flush
|
72
|
+
end
|
73
|
+
|
74
|
+
private def process_parked_event(tuple, events_to_flush)
|
75
|
+
event = tuple[:event]
|
76
|
+
request = @field_handler.event_to_classification_request(event)
|
77
|
+
|
78
|
+
begin
|
79
|
+
res = @classifer.classify(request)
|
80
|
+
|
81
|
+
if (parking_time_expired(tuple) or is_valid_classification(res))
|
82
|
+
tag_event(res, event)
|
83
|
+
|
84
|
+
# if we're releasing this event based on time expiration, tag it with timeout
|
85
|
+
if res.nil? or !res.is_final
|
86
|
+
@tag_on_timeout.each{|tag| event.tag(tag)}
|
87
|
+
end
|
88
|
+
|
89
|
+
events_to_flush << event
|
90
|
+
return true
|
91
|
+
end
|
92
|
+
|
93
|
+
rescue StandardError => e
|
94
|
+
@logger.error("an error occured while processing event, event flushed backed to the stream", :request => request, :backtrace => e.backtrace)
|
95
|
+
return true # so that this event will be flushed out of the plugin
|
96
|
+
end
|
97
|
+
|
98
|
+
return false
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
def is_unauthorized(classification)
|
103
|
+
return (!classification.nil? and classification.kind_of?(LogStash::Filters::Empow::UnauthorizedReponse))
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
def classify_event(request, event)
|
108
|
+
res = @classifer.classify(request)
|
109
|
+
|
110
|
+
if is_valid_classification(res)
|
111
|
+
tag_event(res, event)
|
112
|
+
return true
|
113
|
+
end
|
114
|
+
|
115
|
+
return false
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
def is_valid_classification(classification)
|
120
|
+
return (!classification.nil? and classification.is_final())
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
def tag_event(classification, event)
|
125
|
+
return if classification.nil?
|
126
|
+
|
127
|
+
responseBody = classification.response
|
128
|
+
|
129
|
+
@logger.debug("classification response", :classification => responseBody)
|
130
|
+
|
131
|
+
response = responseBody["response"]
|
132
|
+
|
133
|
+
if !response.nil? && response.size > 0
|
134
|
+
response.each do |k, v|
|
135
|
+
event.set("[empow_classification_response][#{k}]", v)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
if !classification.is_successful()
|
140
|
+
@tag_on_error.each{|tag| event.tag(tag)}
|
141
|
+
|
142
|
+
if (!responseBody.nil?)
|
143
|
+
LogStash::Filters::Empow::Utils.add_error(event, responseBody)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
private
|
149
|
+
def park(event)
|
150
|
+
tuple = {}
|
151
|
+
tuple[:event] = event
|
152
|
+
tuple[:time] = Time.now
|
153
|
+
|
154
|
+
@parked_events << tuple
|
155
|
+
end
|
156
|
+
|
157
|
+
private
|
158
|
+
def parking_time_expired(tuple)
|
159
|
+
return (Time.now - tuple[:time]) > @max_parking_time
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end; end; end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module LogStash
|
2
|
+
module Filters
|
3
|
+
module Empow
|
4
|
+
class AbstractResponse
|
5
|
+
attr_reader :response, :is_successful, :is_final
|
6
|
+
|
7
|
+
def initialize(response, is_successful, is_final)
|
8
|
+
@response = response
|
9
|
+
@is_successful = is_successful
|
10
|
+
@is_final = is_final
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class FailureResponse < AbstractResponse
|
15
|
+
def initialize(response)
|
16
|
+
super(response, false, true)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class UnauthorizedReponse < FailureResponse
|
21
|
+
end
|
22
|
+
|
23
|
+
class SuccessfulResponse < AbstractResponse
|
24
|
+
def initialize(response)
|
25
|
+
super(response, true, true)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class InProgressResponse < AbstractResponse
|
30
|
+
def initialize(response)
|
31
|
+
super(response, true, false)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module LogStash; module Filters; module Empow
|
2
|
+
|
3
|
+
class Utils
|
4
|
+
TRUTHY_VALUES = [true, 1, '1']
|
5
|
+
FALSEY_VALUES = [false, 0, '0']
|
6
|
+
|
7
|
+
def self.is_blank_string(txt)
|
8
|
+
return (txt.nil? or txt.strip.length == 0)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.convert_to_boolean(val)
|
12
|
+
return nil if val.nil?
|
13
|
+
|
14
|
+
return true if TRUTHY_VALUES.include?(val)
|
15
|
+
|
16
|
+
return false if FALSEY_VALUES.include?(val)
|
17
|
+
|
18
|
+
return true if (val.is_a?(String) and val.downcase.strip == 'true')
|
19
|
+
|
20
|
+
return false if (val.is_a?(String) and val.downcase.strip == 'false')
|
21
|
+
|
22
|
+
return nil
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.add_error(event, msg)
|
26
|
+
tag_empow_messages(event, msg, 'empow_errors')
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.add_warn(event, msg)
|
30
|
+
tag_empow_messages(event, msg, 'empow_warnings')
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def self.tag_empow_messages(event, msg, block)
|
35
|
+
messages = event.get(block)
|
36
|
+
|
37
|
+
# using arrayinstead of set, as set raises a logstash exception:
|
38
|
+
# No enum constant org.logstash.bivalues.BiValues.ORG_JRUBY_RUBYOBJECTVAR0
|
39
|
+
messages ||= Array.new
|
40
|
+
messages << msg
|
41
|
+
|
42
|
+
event.set(block, messages.uniq)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end; end; end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-filter-empowclassifier'
|
3
|
+
s.version = '0.3.15'
|
4
|
+
s.licenses = ['Apache-2.0']
|
5
|
+
s.summary = 'Logstash intent classification plugin client for accessing empow''s classifiction cloud'
|
6
|
+
#s.description = 'Write a longer description or delete this line.'
|
7
|
+
s.homepage = 'http://www.empowcybersecurity.com'
|
8
|
+
s.authors = ['empow', 'Assaf Abulafia', 'Rami Cohen']
|
9
|
+
s.email = ''
|
10
|
+
s.require_paths = ['lib']
|
11
|
+
|
12
|
+
# Files
|
13
|
+
s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
|
14
|
+
# Tests
|
15
|
+
|
16
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
17
|
+
|
18
|
+
# Special flag to let us know this is actually a logstash plugin
|
19
|
+
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "filter" }
|
20
|
+
|
21
|
+
# Gem dependencies
|
22
|
+
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
23
|
+
s.add_runtime_dependency 'rest-client', '~> 1.8', '>= 1.8.0'
|
24
|
+
s.add_runtime_dependency 'lru_redux', '~> 1.1', '>= 1.1.0'
|
25
|
+
s.add_runtime_dependency 'json', '~> 1.8', '>= 1.8'
|
26
|
+
#s.add_runtime_dependency 'rufus-scheduler'
|
27
|
+
s.add_runtime_dependency 'hashie'
|
28
|
+
#s.add_runtime_dependency "murmurhash3"
|
29
|
+
|
30
|
+
s.add_development_dependency 'aws-sdk', '~> 3'
|
31
|
+
|
32
|
+
s.add_development_dependency 'logstash-devutils'
|
33
|
+
# s.add_runtime_dependency 'jwt', '~> 2.1', '>= 2.1.0'
|
34
|
+
s.add_development_dependency "timecop", "~> 0.7"
|
35
|
+
s.add_development_dependency "webmock", "~> 1.22", ">= 1.21.0"
|
36
|
+
|
37
|
+
s.add_development_dependency 'elasticsearch'
|
38
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require "logstash/filters/classifier"
|
3
|
+
require "logstash/filters/local-classifier"
|
4
|
+
require "logstash/filters/classification-request"
|
5
|
+
require "logstash/filters/center-client"
|
6
|
+
require "logstash/filters/response"
|
7
|
+
require 'timecop'
|
8
|
+
|
9
|
+
describe LogStash::Filters::Empow::Classification::BulkProcessor do
|
10
|
+
#empow_user, empow_password, cache_size, ttl, async_local_db, elastic_hosts, elastic_index, elastic_username, elastic_password
|
11
|
+
let(:time_between_attempts) { 1 }
|
12
|
+
let(:batch_size) { 10 }
|
13
|
+
let(:max_retries) { 5 }
|
14
|
+
|
15
|
+
describe "test with mocked classifiers" do
|
16
|
+
it "single failed log" do
|
17
|
+
|
18
|
+
Timecop.freeze(Time.now)
|
19
|
+
|
20
|
+
req1 = "request1"
|
21
|
+
val1 = {}
|
22
|
+
val1[:retries] = 1
|
23
|
+
val1[:task] = nil
|
24
|
+
val1[:request] = req1
|
25
|
+
val1[:last_executed] = Time.at(310953600)
|
26
|
+
|
27
|
+
requests = Hash.new
|
28
|
+
requests[req1] = val1
|
29
|
+
|
30
|
+
local_classifier = instance_double(LogStash::Filters::Empow::LocalClassifier)
|
31
|
+
allow(local_classifier).to receive(:classify).and_return(nil)
|
32
|
+
allow(local_classifier).to receive(:close)
|
33
|
+
|
34
|
+
center_result = {}
|
35
|
+
center_result[req1] = LogStash::Filters::Empow::FailureReponse.new("failure1")
|
36
|
+
|
37
|
+
online_classifer = instance_double(LogStash::Filters::Empow::ClassificationCenterClient)
|
38
|
+
allow(online_classifer).to receive(:classify).and_return(center_result)
|
39
|
+
|
40
|
+
bulk_processor = described_class.new(max_retries, batch_size, time_between_attempts, requests, online_classifer, local_classifier)
|
41
|
+
|
42
|
+
expect(online_classifer).to receive(:classify)
|
43
|
+
expect(local_classifier).to receive(:add_to_cache)
|
44
|
+
|
45
|
+
bulk_processor.execute
|
46
|
+
|
47
|
+
#expect(local_classifier).to receive(:add_to_cache)
|
48
|
+
|
49
|
+
# expect(res).to be_nil
|
50
|
+
#save_to_cache_and_db
|
51
|
+
|
52
|
+
expect(requests[req1]).to be_nil
|
53
|
+
|
54
|
+
#Timecop.freeze(Time.now + time_between_attempts)
|
55
|
+
#Timecop.freeze(Time.now + 1 + time_between_attempts)
|
56
|
+
end
|
57
|
+
|
58
|
+
it "single successful log" do
|
59
|
+
|
60
|
+
Timecop.freeze(Time.now)
|
61
|
+
|
62
|
+
req1 = "request1"
|
63
|
+
val1 = {}
|
64
|
+
val1[:retries] = 1
|
65
|
+
val1[:task] = nil
|
66
|
+
val1[:request] = req1
|
67
|
+
val1[:last_executed] = Time.at(310953600)
|
68
|
+
|
69
|
+
requests = Hash.new
|
70
|
+
requests[req1] = val1
|
71
|
+
|
72
|
+
local_classifier = instance_double(LogStash::Filters::Empow::LocalClassifier)
|
73
|
+
allow(local_classifier).to receive(:classify).and_return(nil)
|
74
|
+
allow(local_classifier).to receive(:close)
|
75
|
+
|
76
|
+
center_result = {}
|
77
|
+
center_result[req1] = LogStash::Filters::Empow::SuccessfulReponse.new("result1")
|
78
|
+
|
79
|
+
online_classifer = instance_double(LogStash::Filters::Empow::ClassificationCenterClient)
|
80
|
+
allow(online_classifer).to receive(:classify).and_return(center_result)
|
81
|
+
|
82
|
+
bulk_processor = described_class.new(max_retries, batch_size, time_between_attempts, requests, online_classifer, local_classifier)
|
83
|
+
|
84
|
+
expect(online_classifer).to receive(:classify)
|
85
|
+
expect(local_classifier).to receive(:save_to_cache_and_db)
|
86
|
+
|
87
|
+
bulk_processor.execute
|
88
|
+
|
89
|
+
expect(requests[req1]).to be_nil
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|