logstash-filter-accesswatch 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/logstash/filters/accesswatch.rb +113 -96
- data/logstash-filter-accesswatch.gemspec +5 -6
- metadata +20 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2f75068c27fcfbe8f6630eeb56055d994e30fda75f0cf342fd94cb84ad2c06fc
|
4
|
+
data.tar.gz: 72df9db9018bd46970ac20827b17b26cc2ac11fbaee8e8348eaf57e67198888a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d6831752d2352e47e7613ecf1c1edd72f82bd904fb4475e19d8ef509cc8fcbe6475fc6aef8d49be1736262cfa81dd5790cf40a24b337137caa4db3d28891553
|
7
|
+
data.tar.gz: 61d7c93786d2590784452dafba8833733079f72e4bbe3f40a97bd12e412dfbd208f2d7a61117bf65b86f6e1650d1565ece9dd48fd6c31227f41f1a3b56bb27a2
|
@@ -1,130 +1,147 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require "logstash/filters/base"
|
3
3
|
require "logstash/namespace"
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require 'digest'
|
4
|
+
require "logstash/plugin_mixins/http_client"
|
5
|
+
require "json"
|
6
|
+
require "digest"
|
7
|
+
require "lru_redux"
|
9
8
|
|
10
9
|
# The Access Watch filter adds information about robots visiting
|
11
10
|
# your website based on data from our robots database.
|
12
|
-
#
|
13
|
-
# The following fields might be created:
|
14
|
-
# [identity][type] "robot" If the visitor is a robot.
|
15
|
-
# [reputation][status] string The reputation of the visitor (see below).
|
16
|
-
# [robot][id] number A unique robot identifier
|
17
|
-
# [robot][name] string A robot's name to display to the user.
|
18
|
-
# [robot][url] string A link to the robot's page on the Access Watch database.
|
19
|
-
#
|
20
|
-
# Access Watch defines the following reputation statuses:
|
21
|
-
#
|
22
|
-
# nice perfect, as far as we know you can trust this entity
|
23
|
-
# ok all right, so far no reason to worry about this entity
|
24
|
-
# suspicious warning, nothing really bad, but the entity is on our radar
|
25
|
-
# bad danger, there is good reasons to watch or block this entity
|
26
|
-
#
|
27
|
-
# This filter requires the Access Watch `robots.json` file to run.
|
28
|
-
#
|
29
11
|
|
30
12
|
class LogStash::Filters::Accesswatch < LogStash::Filters::Base
|
31
13
|
|
14
|
+
include LogStash::PluginMixins::HttpClient
|
15
|
+
|
32
16
|
config_name "accesswatch"
|
33
17
|
|
34
|
-
#
|
35
|
-
|
36
|
-
|
37
|
-
#
|
38
|
-
config :
|
18
|
+
# Your API Key
|
19
|
+
config :api_key, :validate => :string, :required => true
|
20
|
+
|
21
|
+
# The size of the local cache, 0 to deactivate
|
22
|
+
config :cache_size, :validate => :number, :default => 10000
|
39
23
|
|
40
24
|
# The field containing the IP address.
|
41
25
|
config :ip_source, :validate => :string, :required => true
|
42
26
|
|
43
27
|
# The field containing the User-Agent string.
|
44
|
-
config :
|
45
|
-
|
46
|
-
#
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
28
|
+
config :user_agent_source, :validate => :string, :required => true
|
29
|
+
|
30
|
+
# The destination field for address data
|
31
|
+
config :address_destination, :validate => :string
|
32
|
+
|
33
|
+
# The destination field for user-agent data
|
34
|
+
config :user_agent_destination, :validate => :string
|
35
|
+
|
36
|
+
# The destination field for robot data
|
37
|
+
config :robot_destination, :validate => :string
|
38
|
+
|
39
|
+
# The destination field for reputation data
|
40
|
+
config :reputation_destination, :validate => :string
|
41
|
+
|
42
|
+
@@address_keys = ["value", "hostname", "country_code", "flags"]
|
43
|
+
@@robot_keys = ["id", "name", "url"]
|
44
|
+
|
45
|
+
public
|
46
|
+
def register
|
47
|
+
if @cache_size > 0
|
48
|
+
@cache = LruRedux::ThreadSafeCache.new(@cache_size)
|
49
|
+
end
|
53
50
|
end
|
54
51
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
52
|
+
def handle_response(response)
|
53
|
+
data = JSON.parse(response.body)
|
54
|
+
if response.code == 200
|
55
|
+
{:status => :success,
|
56
|
+
:data => data}
|
57
|
+
else
|
58
|
+
{:status => :error,
|
59
|
+
:code => data["code"],
|
60
|
+
:message => data["message"]}
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def url(path)
|
65
|
+
"http://api.access.watch#{path}"
|
66
|
+
end
|
67
|
+
|
68
|
+
def get_json(path)
|
69
|
+
response = self.client.get(self.url(path),
|
70
|
+
headers: {"Api-Key" => @api_key,
|
71
|
+
"Accept" => "application/json",
|
72
|
+
"User-Agent" => "Access Watch Logstash Plugin/0.2.0"})
|
73
|
+
self.handle_response(response)
|
74
|
+
end
|
75
|
+
|
76
|
+
def post_json(path, data)
|
77
|
+
response = self.client.post(self.url(path),
|
78
|
+
headers: {"Api-Key" => @api_key,
|
79
|
+
"Accept" => "application/json",
|
80
|
+
"Content-Type" => "application/json",
|
81
|
+
"User-Agent" => "Access Watch Logstash Plugin/0.2.0"},
|
82
|
+
body: JSON.generate(data))
|
83
|
+
self.handle_response(response)
|
84
|
+
end
|
85
|
+
|
86
|
+
def with_cache(id, &block)
|
87
|
+
if @cache
|
88
|
+
@cache.getset(id) { block.call }
|
89
|
+
else
|
90
|
+
block.call
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def fetch_address(ip)
|
95
|
+
self.with_cache("ip-#{ip}") {
|
96
|
+
self.get_json("/1.1/address/#{ip}")
|
65
97
|
}
|
66
|
-
return res
|
67
98
|
end
|
68
99
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
robots = JSON.parse(file)
|
73
|
-
robots.each {|robot|
|
74
|
-
if !robot['cidrs'].nil?
|
75
|
-
robot['cidrs'] = robot['cidrs'].collect {|cidr| cidr2range(cidr)}
|
76
|
-
end
|
100
|
+
def fetch_user_agent(user_agent)
|
101
|
+
self.with_cache("ua-#{Digest::MD5.hexdigest(user_agent)}") {
|
102
|
+
self.post_json("/1.1/user-agent", {:value => user_agent})
|
77
103
|
}
|
78
|
-
@ip2robots = group_by_multi(robots, 'ips')
|
79
|
-
@cidr2robots = group_by_multi(robots, 'cidrs')
|
80
|
-
@ip2cidrs = IntervalTree::Tree.new(@cidr2robots.keys)
|
81
|
-
@ua2robots = group_by_multi(robots, 'uas')
|
82
104
|
end
|
83
105
|
|
84
|
-
|
85
|
-
|
86
|
-
|
106
|
+
def fetch_identity(ip, user_agent)
|
107
|
+
ip = ip || ""
|
108
|
+
user_agent = user_agent || ""
|
109
|
+
self.with_cache("identity-#{Digest::MD5.hexdigest(ip)}-#{Digest::MD5.hexdigest(user_agent)}") {
|
110
|
+
self.post_json("/1.1/identity", {:address => ip, :user_agent => user_agent})
|
111
|
+
}
|
87
112
|
end
|
88
113
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
if ip
|
96
|
-
i = ip.ipv4? ? ip.ipv4_mapped.to_i : ip.to_i # convert IP to arbitrary length integer
|
97
|
-
ip_candidates = @ip2robots[i]
|
98
|
-
cidrs = @ip2cidrs.search(i)
|
99
|
-
cidr_candidates = cidrs.collect {|cidr| @cidr2robots[cidr]}.reduce([], :concat) unless cidrs.nil?
|
100
|
-
end
|
101
|
-
# Look for robots with the same User-Agent
|
102
|
-
ua_candidates = []
|
103
|
-
if ua
|
104
|
-
ua_candidates = @ua2robots[Digest::MD5.hexdigest(ua)]
|
105
|
-
end
|
106
|
-
# Make a final decision
|
107
|
-
robots = ((ip_candidates | cidr_candidates) & ua_candidates)
|
108
|
-
if !robots.empty?
|
109
|
-
robot = robots[0]
|
110
|
-
url = "https://access.watch/database/robots/#{robot['reputation']}/#{robot['urlid'] or robot['id']}"
|
111
|
-
{'identity' => {'type' => 'robot'},
|
112
|
-
'robot' => {'id' => robot['id'],
|
113
|
-
'name' => robot['name'],
|
114
|
-
'url' => url},
|
115
|
-
'reputation' => {'status' => robot['reputation']}}
|
114
|
+
def augment(event, destination, data, keys=nil)
|
115
|
+
if destination && data
|
116
|
+
event.set(destination,
|
117
|
+
data.select {|k, v|
|
118
|
+
(keys.nil? or keys.include?(k)) && !(v.nil? || v.empty?)
|
119
|
+
})
|
116
120
|
end
|
117
121
|
end
|
118
122
|
|
119
123
|
public
|
120
124
|
def filter(event)
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
125
|
+
ip = event.get(@ip_source)
|
126
|
+
user_agent = event.get(@user_agent_source)
|
127
|
+
if @ip_source and @user_agent_source
|
128
|
+
response = self.fetch_identity(ip, user_agent)
|
129
|
+
if response[:status] == :success
|
130
|
+
data = response[:data]
|
131
|
+
self.augment(event, @address_destination, data["address"], @@address_keys)
|
132
|
+
self.augment(event, @robot_destination, data["robot"], @@robot_keys)
|
133
|
+
self.augment(event, @reputation_destination, data["reputation"])
|
134
|
+
end
|
135
|
+
elsif @ip_source
|
136
|
+
response = self.fetch_address(ip)
|
137
|
+
if response[:status] == :success
|
138
|
+
self.augment(event, @address_destination, response[:data], @@address_keys)
|
139
|
+
end
|
140
|
+
else
|
141
|
+
response = self.fetch_user_agent(user_agent)
|
142
|
+
if response[:status] == :success
|
143
|
+
self.augment(event, @user_agent_destination, response[:data])
|
144
|
+
end
|
128
145
|
end
|
129
146
|
filter_matched(event)
|
130
147
|
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
Gem::Specification.new do |s|
|
3
3
|
s.name = 'logstash-filter-accesswatch'
|
4
|
-
s.version = '0.
|
4
|
+
s.version = '0.2.0'
|
5
5
|
s.licenses = ['Apache-2.0']
|
6
6
|
s.summary = 'The Logstash filter plugin for Access Watch (http://access.watch).'
|
7
|
-
s.description = 'The Access Watch filter adds information about robots visiting your website based on data from our
|
7
|
+
s.description = 'The Access Watch filter adds information about robots visiting your website based on data from our robot database.'
|
8
8
|
s.homepage = 'http://access.watch'
|
9
9
|
s.authors = ['Benoît Fleury']
|
10
10
|
s.email = 'benoit@access.watch'
|
@@ -12,16 +12,15 @@ Gem::Specification.new do |s|
|
|
12
12
|
|
13
13
|
# Files
|
14
14
|
s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','Gemfile','LICENSE']
|
15
|
-
# Tests
|
16
|
-
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
17
15
|
|
18
16
|
# Special flag to let us know this is actually a logstash plugin
|
19
17
|
s.metadata = { 'logstash_plugin' => 'true',
|
20
18
|
'logstash_group' => 'filter' }
|
21
19
|
|
22
20
|
# Gem dependencies
|
23
|
-
s.add_runtime_dependency 'logstash-core-plugin-api',
|
24
|
-
s.add_runtime_dependency '
|
21
|
+
s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
|
22
|
+
s.add_runtime_dependency 'logstash-mixin-http_client', '~> 5.2'
|
23
|
+
s.add_runtime_dependency 'lru_redux', '~> 1.1'
|
25
24
|
|
26
25
|
s.add_development_dependency 'logstash-devutils', '1.3.3'
|
27
26
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-accesswatch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benoît Fleury
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-07-
|
11
|
+
date: 2017-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -29,15 +29,29 @@ dependencies:
|
|
29
29
|
requirements:
|
30
30
|
- - "~>"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
33
|
-
name:
|
32
|
+
version: '5.2'
|
33
|
+
name: logstash-mixin-http_client
|
34
34
|
prerelease: false
|
35
35
|
type: :runtime
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: '5.2'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '1.1'
|
47
|
+
name: lru_redux
|
48
|
+
prerelease: false
|
49
|
+
type: :runtime
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.1'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
requirement: !ruby/object:Gem::Requirement
|
43
57
|
requirements:
|
@@ -53,7 +67,7 @@ dependencies:
|
|
53
67
|
- !ruby/object:Gem::Version
|
54
68
|
version: 1.3.3
|
55
69
|
description: The Access Watch filter adds information about robots visiting your website
|
56
|
-
based on data from our
|
70
|
+
based on data from our robot database.
|
57
71
|
email: benoit@access.watch
|
58
72
|
executables: []
|
59
73
|
extensions: []
|