logstash-filter-threats_classifier 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +2 -0
- data/CONTRIBUTORS +11 -0
- data/Gemfile +2 -0
- data/LICENSE +11 -0
- data/README.md +64 -0
- data/lib/logstash/filters/center-client.rb +213 -0
- data/lib/logstash/filters/classification-request.rb +17 -0
- data/lib/logstash/filters/classifier-cache.rb +51 -0
- data/lib/logstash/filters/classifier.rb +335 -0
- data/lib/logstash/filters/cognito-client.rb +48 -0
- data/lib/logstash/filters/elastic-db.rb +128 -0
- data/lib/logstash/filters/field-handler.rb +127 -0
- data/lib/logstash/filters/local-classifier.rb +94 -0
- data/lib/logstash/filters/plugin-logic.rb +166 -0
- data/lib/logstash/filters/response.rb +36 -0
- data/lib/logstash/filters/threats_classifier.rb +230 -0
- data/lib/logstash/filters/utils.rb +46 -0
- data/logstash-filter-threats_classifier.gemspec +38 -0
- data/spec/filters/bulk-processor_spec.rb +92 -0
- data/spec/filters/classifier-cache_spec.rb +44 -0
- data/spec/filters/classifier_spec.rb +78 -0
- data/spec/filters/cognito-client_spec.rb +20 -0
- data/spec/filters/field-handler_spec.rb +101 -0
- data/spec/filters/local-classifier_spec.rb +46 -0
- data/spec/filters/plugin-logic_spec.rb +127 -0
- data/spec/filters/threats-classifier_spec.rb +103 -0
- data/spec/filters/utils_spec.rb +74 -0
- data/spec/spec_helper.rb +2 -0
- metadata +256 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8e639f011ddb86c409900ce85b797485251a1d042b4b1909d333a0abe4144869
|
4
|
+
data.tar.gz: 22650c4aef9462fa2cfb50f37949a8384ddc7a2ae7887996491ec1eacc872b2f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2ec63f49cae8ccc6b7f55b709c45cad07d4f565ecc88afe829711f689e89973111d74392689835e42f4a7d2f9d3bf59ab3dd0317311fe0e1d7c0765532644b9a
|
7
|
+
data.tar.gz: e87a67d8b345d894b982c0d1cdaec12eabd43664ed52f86feda3d64a637712ed2901a975a79c82a44905917ec0cd896a9d3552ba1099a9169247276995fca0a4
|
data/CHANGELOG.md
ADDED
data/CONTRIBUTORS
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
The following is a list of people who have contributed ideas, code, bug
|
2
|
+
reports, or in general have helped logstash along its way.
|
3
|
+
|
4
|
+
Contributors:
|
5
|
+
Assaf Abulafia
|
6
|
+
Rami Cohen
|
7
|
+
|
8
|
+
Note: If you've sent us patches, bug reports, or otherwise contributed to
|
9
|
+
Logstash, and you aren't on the list above and want to be, please let us know
|
10
|
+
and we'll make sure you're here. Contributions from folks like you are what make
|
11
|
+
open source awesome.
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
2
|
+
you may not use this file except in compliance with the License.
|
3
|
+
You may obtain a copy of the License at
|
4
|
+
|
5
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
6
|
+
|
7
|
+
Unless required by applicable law or agreed to in writing, software
|
8
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
See the License for the specific language governing permissions and
|
11
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# empow classification plugin
|
2
|
+
|
3
|
+
This is a plugin for [Logstash](https://github.com/elastic/logstash).
|
4
|
+
|
5
|
+
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
6
|
+
|
7
|
+
<a href="https://badge.fury.io/rb/logstash-filter-empowclassifier"><img src="https://badge.fury.io/rb/logstash-filter-empowclassifier.svg" alt="Gem Version" height="18"></a>
|
8
|
+
|
9
|
+
# Using the empowclassifier plugin
|
10
|
+
|
11
|
+
## Example
|
12
|
+
A log may look like this before the classification (in json form):
|
13
|
+
```
|
14
|
+
{
|
15
|
+
"product_type": "IDS",
|
16
|
+
"product_name": "snort",
|
17
|
+
"threat": { "signature": "1:234" }
|
18
|
+
}
|
19
|
+
```
|
20
|
+
|
21
|
+
After filtering, using the plugin, the response would be contain these fields:
|
22
|
+
```
|
23
|
+
{
|
24
|
+
"signatureTactics": [
|
25
|
+
{
|
26
|
+
"tactic": "Full compromise - active patterns",
|
27
|
+
"attackStage": "Infiltration",
|
28
|
+
"isSrcPerformer": true
|
29
|
+
}
|
30
|
+
]
|
31
|
+
}
|
32
|
+
```
|
33
|
+
signatureTactics is an array of the tactics classified by empow.
|
34
|
+
|
35
|
+
each result contains the actual tactic, the attack stage empow classified for this log (determined by the tactic and whether the source and dest are within the user’s network), and whether the source was the performer or the victim of this attack.
|
36
|
+
|
37
|
+
## Installing the plugin
|
38
|
+
```sh
|
39
|
+
bin/logstash-plugin install logstash-filter-empowclassifier
|
40
|
+
```
|
41
|
+
|
42
|
+
## Usage
|
43
|
+
```
|
44
|
+
input {
|
45
|
+
...
|
46
|
+
}
|
47
|
+
|
48
|
+
filter {
|
49
|
+
empowclassifier {
|
50
|
+
username => "cosmo@kramerica.com"
|
51
|
+
password => "12345"
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
output {
|
56
|
+
...
|
57
|
+
}
|
58
|
+
```
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
|
64
|
+
I like rice. Rice is great if you're hungry and want 2000 of something.
|
@@ -0,0 +1,213 @@
|
|
1
|
+
require "rest-client"
|
2
|
+
require "json"
|
3
|
+
require 'aws-sdk'
|
4
|
+
require_relative 'cognito-client'
|
5
|
+
require_relative 'response'
|
6
|
+
require_relative 'utils'
|
7
|
+
|
8
|
+
|
9
|
+
module LogStash
|
10
|
+
module Filters
|
11
|
+
module Empow
|
12
|
+
class ClassificationCenterClient
|
13
|
+
include LogStash::Util::Loggable
|
14
|
+
|
15
|
+
def initialize(username, password, aws_client_id, url_base)
|
16
|
+
@logger = self.logger
|
17
|
+
|
18
|
+
@token = nil
|
19
|
+
@url_base = url_base
|
20
|
+
|
21
|
+
aws_region = 'us-east-2'
|
22
|
+
|
23
|
+
@cognito_client = LogStash::Filters::Empow::CognitoClient.new(username, password, aws_region, aws_client_id)
|
24
|
+
|
25
|
+
@last_authenticate_minute = 0
|
26
|
+
end
|
27
|
+
|
28
|
+
public
|
29
|
+
def authenticate
|
30
|
+
# fixme: should check token expiration and throttle connections on failure
|
31
|
+
|
32
|
+
@token = nil
|
33
|
+
|
34
|
+
@logger.debug("reconnecting to the classfication center")
|
35
|
+
|
36
|
+
current_minute = (Time.now.to_i / 60)
|
37
|
+
if @last_authenticate_minute < current_minute
|
38
|
+
@last_authenticate_minute = current_minute
|
39
|
+
@last_minute_failed_login_count = 0
|
40
|
+
@last_authentication_error = ''
|
41
|
+
end
|
42
|
+
|
43
|
+
# avoid too many authentication requests
|
44
|
+
if @last_minute_failed_login_count < 3
|
45
|
+
begin
|
46
|
+
@token = @cognito_client.authenticate
|
47
|
+
rescue Aws::CognitoIdentityProvider::Errors::NotAuthorizedException, Aws::CognitoIdentityProvider::Errors::UserNotFoundException, Aws::CognitoIdentityProvider::Errors::UserNotConfirmedException => e
|
48
|
+
@logger.warn("unable to authenticate with classification center", :error => e)
|
49
|
+
@last_authentication_error = e.to_s
|
50
|
+
inc_unsuccessful_logins()
|
51
|
+
rescue StandardError => e
|
52
|
+
@logger.warn("unable to authenticate with classification center", :error => e.class.name)
|
53
|
+
@last_authentication_error = e.class.name.to_s
|
54
|
+
inc_unsuccessful_logins()
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
return (!@token.nil?)
|
59
|
+
end
|
60
|
+
|
61
|
+
private def inc_unsuccessful_logins()
|
62
|
+
@last_minute_failed_login_count = @last_minute_failed_login_count + 1
|
63
|
+
end
|
64
|
+
|
65
|
+
public
|
66
|
+
def classify(requests)
|
67
|
+
authenticate if @token.nil? # try connecting if not already connected
|
68
|
+
|
69
|
+
res = nil
|
70
|
+
|
71
|
+
begin
|
72
|
+
res = classify_online(requests)
|
73
|
+
|
74
|
+
rescue RestClient::Unauthorized, RestClient::Forbidden, RestClient::UpgradeRequired => err
|
75
|
+
@logger.debug("reconnecting to the empow cloud", :error => err)
|
76
|
+
|
77
|
+
if !authenticate
|
78
|
+
return unauthorized_bulk_response(@last_authentication_error, requests)
|
79
|
+
end
|
80
|
+
|
81
|
+
begin
|
82
|
+
res = classify_online(requests)
|
83
|
+
rescue StandardError => e
|
84
|
+
@logger.debug("encountered an unexpected error on the 2nd attempt", :error => e, :backtrace => e.backtrace)
|
85
|
+
|
86
|
+
error_message = rescue_http_error_result(e)
|
87
|
+
|
88
|
+
return bulk_error(error_message, requests)
|
89
|
+
end
|
90
|
+
|
91
|
+
rescue StandardError => e
|
92
|
+
@logger.error("encountered an unexpected error while querying the center", :error => e)
|
93
|
+
|
94
|
+
error_message = rescue_http_error_result(e)
|
95
|
+
|
96
|
+
return bulk_error(error_message, requests)
|
97
|
+
end
|
98
|
+
|
99
|
+
if res.nil? || res.strip.length == 0
|
100
|
+
return bulk_error("no content", requests)
|
101
|
+
end
|
102
|
+
|
103
|
+
parsed_json = nil
|
104
|
+
|
105
|
+
begin
|
106
|
+
parsed_json = JSON.parse(res)
|
107
|
+
rescue StandardError => e
|
108
|
+
@logger.error("unable to parse json", :json => res)
|
109
|
+
return bulk_error("invalid request", requests)
|
110
|
+
end
|
111
|
+
|
112
|
+
return successful_response(requests, parsed_json)
|
113
|
+
end
|
114
|
+
|
115
|
+
private
|
116
|
+
def rescue_http_error_result(http_error)
|
117
|
+
if (http_error.nil? \
|
118
|
+
or (!defined?(http_error.http_body) or LogStash::Filters::Empow::Utils.is_blank_string(http_error.http_body)))
|
119
|
+
return http_error.to_s
|
120
|
+
else
|
121
|
+
err = http_error.http_body
|
122
|
+
|
123
|
+
begin
|
124
|
+
res = JSON.parse(err)
|
125
|
+
msg = res['message']
|
126
|
+
|
127
|
+
return err if LogStash::Filters::Empow::Utils.is_blank_string(msg)
|
128
|
+
|
129
|
+
return msg
|
130
|
+
rescue StandardError => e
|
131
|
+
@logger.debug("unable to read message body", :error => e)
|
132
|
+
return http_error.http_body
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
private
|
138
|
+
def classify_online(bulk_requests)
|
139
|
+
return nil if bulk_requests.nil? or bulk_requests.size == 0
|
140
|
+
|
141
|
+
payload = Array.new(bulk_requests.size)
|
142
|
+
|
143
|
+
bulk_size = bulk_requests.size
|
144
|
+
|
145
|
+
bulk_size.times do |i|
|
146
|
+
payload[i] = bulk_requests[i].to_h
|
147
|
+
end
|
148
|
+
|
149
|
+
payload_json = payload.to_json
|
150
|
+
|
151
|
+
@logger.debug("before online request", :payload => payload_json)
|
152
|
+
|
153
|
+
return RestClient::Request.execute(
|
154
|
+
method: :post,
|
155
|
+
url: "#{@url_base}/intent",
|
156
|
+
payload: payload_json,
|
157
|
+
timeout: 30,
|
158
|
+
headers: { content_type: 'application/json', accept: 'application/json', authorization: @token, Bulksize: bulk_size }
|
159
|
+
).body
|
160
|
+
end
|
161
|
+
|
162
|
+
private
|
163
|
+
def unauthorized_bulk_response(error_message, requests)
|
164
|
+
return bulk_error_by_type(LogStash::Filters::Empow::UnauthorizedReponse, error_message, requests)
|
165
|
+
end
|
166
|
+
|
167
|
+
private
|
168
|
+
def bulk_error(error_message, requests)
|
169
|
+
return bulk_error_by_type(LogStash::Filters::Empow::FailureResponse, error_message, requests)
|
170
|
+
end
|
171
|
+
|
172
|
+
private
|
173
|
+
def bulk_error_by_type(my_type, error_message, requests)
|
174
|
+
results = Hash.new
|
175
|
+
|
176
|
+
requests.each do |req|
|
177
|
+
res = my_type.new(error_message)
|
178
|
+
results[req] = res
|
179
|
+
end
|
180
|
+
|
181
|
+
return results
|
182
|
+
end
|
183
|
+
|
184
|
+
def successful_response(requests, responses)
|
185
|
+
|
186
|
+
results = Hash.new
|
187
|
+
|
188
|
+
responses.each_with_index do |response, i|
|
189
|
+
req = requests[i]
|
190
|
+
res = nil
|
191
|
+
|
192
|
+
status = response['responseStatus']
|
193
|
+
|
194
|
+
case status
|
195
|
+
when 'SUCCESS'
|
196
|
+
res = LogStash::Filters::Empow::SuccessfulResponse.new(response)
|
197
|
+
when 'IN_PROGRESS'
|
198
|
+
res = LogStash::Filters::Empow::InProgressResponse.new(response)
|
199
|
+
else
|
200
|
+
failure_reason = response['failedReason']
|
201
|
+
res = LogStash::Filters::Empow::FailureResponse.new(failure_reason)
|
202
|
+
end
|
203
|
+
|
204
|
+
results[req] = res
|
205
|
+
end
|
206
|
+
|
207
|
+
return results
|
208
|
+
end
|
209
|
+
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module LogStash; module Filters; module Empow;
|
2
|
+
class LogStash::Filters::Empow::ClassificationRequest < Struct.new(:product_type, :product, :term, :is_src_internal, :is_dst_internal)
|
3
|
+
def initialize(product_type, product, term, is_src_internal, is_dst_internal)
|
4
|
+
if product_type.nil?
|
5
|
+
raise ArgumentError, 'product type cannot be empty'
|
6
|
+
end
|
7
|
+
|
8
|
+
product_type = product_type.upcase.strip
|
9
|
+
|
10
|
+
unless product.nil?
|
11
|
+
product = product.downcase.strip
|
12
|
+
end
|
13
|
+
|
14
|
+
super(product_type, product, term, is_src_internal, is_dst_internal)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end; end; end;
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'time'
|
2
|
+
require "lru_redux"
|
3
|
+
|
4
|
+
module LogStash
|
5
|
+
module Filters
|
6
|
+
module Empow
|
7
|
+
class ClassifierCache
|
8
|
+
include LogStash::Util::Loggable
|
9
|
+
|
10
|
+
def initialize(cache_size, ttl)
|
11
|
+
@logger ||= self.logger
|
12
|
+
|
13
|
+
@logger.debug("cache size #{cache_size}")
|
14
|
+
|
15
|
+
@lru_cache ||= LruRedux::TTL::ThreadSafeCache.new(cache_size, ttl)
|
16
|
+
end
|
17
|
+
|
18
|
+
def classify(key)
|
19
|
+
return nil if key.nil?
|
20
|
+
|
21
|
+
tuple = @lru_cache[key]
|
22
|
+
|
23
|
+
return nil if tuple.nil?
|
24
|
+
|
25
|
+
expiration_time = tuple[:expiration_time]
|
26
|
+
|
27
|
+
if Time.now > expiration_time
|
28
|
+
@lru_cache.evict(key)
|
29
|
+
return nil
|
30
|
+
end
|
31
|
+
|
32
|
+
res = tuple[:val]
|
33
|
+
|
34
|
+
return res
|
35
|
+
end
|
36
|
+
|
37
|
+
def put(key, val, expiration_time)
|
38
|
+
return if key.nil?
|
39
|
+
|
40
|
+
@logger.debug("caching new entry", :key => key, :val => val)
|
41
|
+
|
42
|
+
tuple = {}
|
43
|
+
tuple[:val] = val
|
44
|
+
tuple[:expiration_time] = expiration_time
|
45
|
+
|
46
|
+
@lru_cache[key] = tuple
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,335 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'time'
|
3
|
+
java_import java.util.concurrent.ArrayBlockingQueue
|
4
|
+
java_import java.util.concurrent.TimeUnit
|
5
|
+
java_import java.lang.InterruptedException
|
6
|
+
|
7
|
+
require_relative 'response'
|
8
|
+
|
9
|
+
module LogStash; module Filters; module Empow;
|
10
|
+
class Classifier
|
11
|
+
include LogStash::Util::Loggable
|
12
|
+
|
13
|
+
MAX_CONCURRENT_REQUESTS = 10000
|
14
|
+
BATCH_TIMEOUT = 10
|
15
|
+
|
16
|
+
def initialize(online_classifer, local_classifier, online_classification_workers, batch_size, batch_interval, max_retries, time_between_queries)
|
17
|
+
@logger ||= self.logger
|
18
|
+
|
19
|
+
@logger.info("initializing classifier")
|
20
|
+
|
21
|
+
@local_classifier = local_classifier
|
22
|
+
@online_classifer = online_classifer
|
23
|
+
@batch_interval = batch_interval
|
24
|
+
@time_between_queries = time_between_queries
|
25
|
+
|
26
|
+
@inflight_requests = Concurrent::Hash.new
|
27
|
+
@new_request_queue = java.util.concurrent.ArrayBlockingQueue.new(MAX_CONCURRENT_REQUESTS)
|
28
|
+
|
29
|
+
@bulk_processor = Classification::BulkProcessor.new(max_retries, batch_size, time_between_queries, @inflight_requests, online_classifer, local_classifier, online_classification_workers)
|
30
|
+
|
31
|
+
@worker_pool = Concurrent::FixedThreadPool.new(1)
|
32
|
+
|
33
|
+
@worker_pool.post do
|
34
|
+
while @worker_pool.running? do
|
35
|
+
begin
|
36
|
+
management_task()
|
37
|
+
rescue StandardError => e
|
38
|
+
@logger.error("encountered an error while running the management task", :error => e, :backtrace => e.backtrace)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
@logger.debug("classifier initialized")
|
43
|
+
|
44
|
+
@last_action_time = Time.now
|
45
|
+
end
|
46
|
+
|
47
|
+
public
|
48
|
+
def close
|
49
|
+
@logger.info("shutting down empow's classifcation plugin")
|
50
|
+
|
51
|
+
@inflight_requests.clear()
|
52
|
+
|
53
|
+
@bulk_processor.close
|
54
|
+
|
55
|
+
@worker_pool.kill()
|
56
|
+
@worker_pool.wait_for_termination(5)
|
57
|
+
|
58
|
+
@logger.info("empow classifcation plugin closed")
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
def management_task
|
63
|
+
begin
|
64
|
+
current_time = Time.now
|
65
|
+
|
66
|
+
diff = (current_time - @bulk_processor.get_last_execution_time()).round
|
67
|
+
|
68
|
+
sleep_time = @batch_interval - diff
|
69
|
+
|
70
|
+
sleep_time = 0 if sleep_time < 0 # in case the rounding caused the number to be smaller than zero
|
71
|
+
|
72
|
+
dequeued_request = nil
|
73
|
+
begin
|
74
|
+
dequeued_request = @new_request_queue.poll(sleep_time, TimeUnit::SECONDS)
|
75
|
+
rescue java.lang.InterruptedException => e
|
76
|
+
end
|
77
|
+
|
78
|
+
# if this is a 'tick'
|
79
|
+
if dequeued_request.nil?
|
80
|
+
@bulk_processor.flush_current_batch
|
81
|
+
else
|
82
|
+
@bulk_processor.add_to_batch(dequeued_request)
|
83
|
+
end
|
84
|
+
|
85
|
+
# skip the 'tick' if the timer hasn't expired
|
86
|
+
return if current_time - @last_action_time < @time_between_queries
|
87
|
+
|
88
|
+
@last_action_time = current_time
|
89
|
+
|
90
|
+
@bulk_processor.retry_queued_requests()
|
91
|
+
rescue StandardError => e
|
92
|
+
@logger.error("encountered an error while running the management task", :error => e, :backtrace => e.backtrace)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
public
|
97
|
+
def classify(request)
|
98
|
+
return nil if request.nil?
|
99
|
+
|
100
|
+
res = @local_classifier.classify(request)
|
101
|
+
|
102
|
+
@logger.trace("cached result", :request => request, :res => res)
|
103
|
+
|
104
|
+
return res if !res.nil?
|
105
|
+
|
106
|
+
request_online_classifiction(request)
|
107
|
+
|
108
|
+
return nil
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
def request_online_classifiction(req)
|
113
|
+
existing_request = @inflight_requests[req]
|
114
|
+
|
115
|
+
return if !existing_request.nil? # request already handled by a worker
|
116
|
+
|
117
|
+
@logger.debug("adding request to online classification queue", :request => req)
|
118
|
+
|
119
|
+
task = create_task(req)
|
120
|
+
|
121
|
+
# mark request as in progress
|
122
|
+
@inflight_requests[req] = task
|
123
|
+
|
124
|
+
res = @new_request_queue.offer(req)
|
125
|
+
|
126
|
+
@logger.warn("queue full, request reject", :request => req) if !res
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
def create_task(request)
|
131
|
+
tuple = {}
|
132
|
+
tuple[:retries] = 0
|
133
|
+
tuple[:request] = request
|
134
|
+
tuple[:last_executed] = Time.at(310953600)
|
135
|
+
|
136
|
+
return tuple
|
137
|
+
end
|
138
|
+
end # class Classifier
|
139
|
+
|
140
|
+
module Classification
|
141
|
+
|
142
|
+
class BulkProcessor
|
143
|
+
include LogStash::Util::Loggable
|
144
|
+
|
145
|
+
ERROR_TTL_SECS = 60
|
146
|
+
THREAD_IDLE_TIME = 60
|
147
|
+
BATCH_TIMEOUT = 10
|
148
|
+
|
149
|
+
public
|
150
|
+
def initialize(max_retries, batch_size, sec_between_attempts, requests_queue, online_classifer, local_classifier, max_concurrent_threads)
|
151
|
+
@logger ||= self.logger
|
152
|
+
|
153
|
+
@max_retries = max_retries
|
154
|
+
@max_batch_size = batch_size
|
155
|
+
@sec_between_attempts = sec_between_attempts
|
156
|
+
@requests_queue = requests_queue
|
157
|
+
@online_classifer = online_classifer
|
158
|
+
@local_classifier = local_classifier
|
159
|
+
|
160
|
+
@online_classification_workers = Concurrent::ThreadPoolExecutor.new(min_threads: 1, max_threads: max_concurrent_threads, idletime: THREAD_IDLE_TIME)
|
161
|
+
|
162
|
+
clear_batch(Time.now)
|
163
|
+
end
|
164
|
+
|
165
|
+
public
|
166
|
+
def close
|
167
|
+
@online_classification_workers.kill()
|
168
|
+
@online_classification_workers.wait_for_termination(10)
|
169
|
+
end
|
170
|
+
|
171
|
+
public
|
172
|
+
def add_to_batch(request)
|
173
|
+
# add the new request to the batch
|
174
|
+
@current_batch_size = @current_batch_size + 1
|
175
|
+
@current_batch << request
|
176
|
+
|
177
|
+
flush_current_batch
|
178
|
+
end
|
179
|
+
|
180
|
+
public
|
181
|
+
def flush_current_batch
|
182
|
+
current_time = Time.now
|
183
|
+
|
184
|
+
# check if the current batch is full or timed out
|
185
|
+
if (@current_batch_size == @max_batch_size \
|
186
|
+
or (@current_batch_size > 0 and (current_time - @last_execution_time) > BATCH_TIMEOUT))
|
187
|
+
|
188
|
+
bulk_size = @current_batch_size
|
189
|
+
batch = @current_batch
|
190
|
+
|
191
|
+
@online_classification_workers.post do
|
192
|
+
st = Time.now
|
193
|
+
classify_online(batch)
|
194
|
+
et = Time.now
|
195
|
+
diff = (et - st)
|
196
|
+
|
197
|
+
@logger.debug("response received", :bulk_size => bulk_size, :time => diff)
|
198
|
+
end
|
199
|
+
|
200
|
+
clear_batch(current_time)
|
201
|
+
elsif @current_batch_size == 0
|
202
|
+
@last_execution_time = current_time
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
public
|
207
|
+
def get_last_execution_time
|
208
|
+
return @last_execution_time
|
209
|
+
end
|
210
|
+
|
211
|
+
private
|
212
|
+
def clear_batch(current_time)
|
213
|
+
@current_batch = Array.new
|
214
|
+
@current_batch_size = 0
|
215
|
+
@last_execution_time = current_time
|
216
|
+
end
|
217
|
+
|
218
|
+
public
|
219
|
+
def retry_queued_requests
|
220
|
+
@logger.debug("retrying queued requests")
|
221
|
+
|
222
|
+
current_time = Time.now
|
223
|
+
batch_size = 0
|
224
|
+
batch = Array.new
|
225
|
+
|
226
|
+
@requests_queue.each do |k, v|
|
227
|
+
last_execution_time = v[:last_executed]
|
228
|
+
|
229
|
+
if batch_size == @max_batch_size
|
230
|
+
@online_classification_workers.post do
|
231
|
+
classify_online(batch)
|
232
|
+
end
|
233
|
+
|
234
|
+
batch_size = 0
|
235
|
+
batch = Array.new
|
236
|
+
end
|
237
|
+
|
238
|
+
if last_execution_time + @sec_between_attempts > current_time
|
239
|
+
next
|
240
|
+
end
|
241
|
+
|
242
|
+
batch << k
|
243
|
+
|
244
|
+
v[:last_executed] = current_time
|
245
|
+
v[:retries] = v[:retries] + 1
|
246
|
+
|
247
|
+
batch_size = batch_size + 1
|
248
|
+
end
|
249
|
+
|
250
|
+
if batch_size > 0
|
251
|
+
@online_classification_workers.post do
|
252
|
+
classify_online(batch)
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
# remove requests that were in the queue for too long
|
257
|
+
@requests_queue.delete_if {|key, value| value[:retries] >= @max_retries }
|
258
|
+
end
|
259
|
+
|
260
|
+
private
|
261
|
+
def classify_online(bulk_request)
|
262
|
+
|
263
|
+
results = nil
|
264
|
+
current_time = Time.now
|
265
|
+
|
266
|
+
batch = Array.new
|
267
|
+
|
268
|
+
bulk_request.each do |req|
|
269
|
+
task = @requests_queue[req]
|
270
|
+
|
271
|
+
next if task.nil? # resolved by an earlier thread
|
272
|
+
|
273
|
+
task[:last_executed] = current_time
|
274
|
+
task[:retries] = task[:retries] + 1
|
275
|
+
|
276
|
+
batch << req
|
277
|
+
end
|
278
|
+
|
279
|
+
begin
|
280
|
+
results = @online_classifer.classify(batch)
|
281
|
+
rescue StandardError => e
|
282
|
+
@logger.debug("bulk request ended with a failure, all requests will be removed from queue", :error => e, :backtrace => e.backtrace)
|
283
|
+
|
284
|
+
batch.each do |req|
|
285
|
+
@requests_queue.delete(request)
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
if results.size != batch.size
|
290
|
+
@logger.warn("response array isn't the same size as result array. requests: #{batch.size}. results: #{results.size}")
|
291
|
+
return
|
292
|
+
end
|
293
|
+
|
294
|
+
results.each do |request, res|
|
295
|
+
@logger.debug("processing response", :request => request, :response => res)
|
296
|
+
|
297
|
+
begin
|
298
|
+
expiration_time = Time.now + get_response_ttl(res)
|
299
|
+
|
300
|
+
if res.is_successful
|
301
|
+
# validate the response if needed
|
302
|
+
# put the result in memory and in the local db
|
303
|
+
@local_classifier.save_to_cache_and_db(request, res, expiration_time)
|
304
|
+
else
|
305
|
+
@local_classifier.add_to_cache(request, res, expiration_time) # log the failed result for tagging
|
306
|
+
end
|
307
|
+
rescue StandardError => e
|
308
|
+
@logger.error("encountered an error while trying to process result", :request => request, :error => e, :backtrace => e.backtrace)
|
309
|
+
end
|
310
|
+
|
311
|
+
if res.is_final # in case of anti-malware, the result may change till the classification process is done
|
312
|
+
@requests_queue.delete(request)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
private def get_response_ttl(res)
|
318
|
+
return ERROR_TTL_SECS if !res.is_successful
|
319
|
+
|
320
|
+
responseBody = res.response
|
321
|
+
|
322
|
+
ttl = responseBody['ttlseconds']
|
323
|
+
|
324
|
+
if ttl.nil? or ttl < 0
|
325
|
+
ttl = 60
|
326
|
+
end
|
327
|
+
|
328
|
+
return ttl
|
329
|
+
end
|
330
|
+
|
331
|
+
end # class BulkProcessor
|
332
|
+
|
333
|
+
end # module Classification
|
334
|
+
|
335
|
+
end; end; end
|