logstash-filter-accesswatch 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/logstash/filters/accesswatch.rb +113 -96
- data/logstash-filter-accesswatch.gemspec +5 -6
- metadata +20 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2f75068c27fcfbe8f6630eeb56055d994e30fda75f0cf342fd94cb84ad2c06fc
|
4
|
+
data.tar.gz: 72df9db9018bd46970ac20827b17b26cc2ac11fbaee8e8348eaf57e67198888a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d6831752d2352e47e7613ecf1c1edd72f82bd904fb4475e19d8ef509cc8fcbe6475fc6aef8d49be1736262cfa81dd5790cf40a24b337137caa4db3d28891553
|
7
|
+
data.tar.gz: 61d7c93786d2590784452dafba8833733079f72e4bbe3f40a97bd12e412dfbd208f2d7a61117bf65b86f6e1650d1565ece9dd48fd6c31227f41f1a3b56bb27a2
|
@@ -1,130 +1,147 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require "logstash/filters/base"
|
3
3
|
require "logstash/namespace"
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require 'digest'
|
4
|
+
require "logstash/plugin_mixins/http_client"
|
5
|
+
require "json"
|
6
|
+
require "digest"
|
7
|
+
require "lru_redux"
|
9
8
|
|
10
9
|
# The Access Watch filter adds information about robots visiting
|
11
10
|
# your website based on data from our robots database.
|
12
|
-
#
|
13
|
-
# The following fields might be created:
|
14
|
-
# [identity][type] "robot" If the visitor is a robot.
|
15
|
-
# [reputation][status] string The reputation of the visitor (see below).
|
16
|
-
# [robot][id] number A unique robot identifier
|
17
|
-
# [robot][name] string A robot's name to display to the user.
|
18
|
-
# [robot][url] string A link to the robot's page on the Access Watch database.
|
19
|
-
#
|
20
|
-
# Access Watch defines the following reputation statuses:
|
21
|
-
#
|
22
|
-
# nice perfect, as far as we know you can trust this entity
|
23
|
-
# ok all right, so far no reason to worry about this entity
|
24
|
-
# suspicious warning, nothing really bad, but the entity is on our radar
|
25
|
-
# bad danger, there is good reasons to watch or block this entity
|
26
|
-
#
|
27
|
-
# This filter requires the Access Watch `robots.json` file to run.
|
28
|
-
#
|
29
11
|
|
30
12
|
class LogStash::Filters::Accesswatch < LogStash::Filters::Base
|
31
13
|
|
14
|
+
include LogStash::PluginMixins::HttpClient
|
15
|
+
|
32
16
|
config_name "accesswatch"
|
33
17
|
|
34
|
-
#
|
35
|
-
|
36
|
-
|
37
|
-
#
|
38
|
-
config :
|
18
|
+
# Your API Key
|
19
|
+
config :api_key, :validate => :string, :required => true
|
20
|
+
|
21
|
+
# The size of the local cache, 0 to deactivate
|
22
|
+
config :cache_size, :validate => :number, :default => 10000
|
39
23
|
|
40
24
|
# The field containing the IP address.
|
41
25
|
config :ip_source, :validate => :string, :required => true
|
42
26
|
|
43
27
|
# The field containing the User-Agent string.
|
44
|
-
config :
|
45
|
-
|
46
|
-
#
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
28
|
+
config :user_agent_source, :validate => :string, :required => true
|
29
|
+
|
30
|
+
# The destination field for address data
|
31
|
+
config :address_destination, :validate => :string
|
32
|
+
|
33
|
+
# The destination field for user-agent data
|
34
|
+
config :user_agent_destination, :validate => :string
|
35
|
+
|
36
|
+
# The destination field for robot data
|
37
|
+
config :robot_destination, :validate => :string
|
38
|
+
|
39
|
+
# The destination field for reputation data
|
40
|
+
config :reputation_destination, :validate => :string
|
41
|
+
|
42
|
+
@@address_keys = ["value", "hostname", "country_code", "flags"]
|
43
|
+
@@robot_keys = ["id", "name", "url"]
|
44
|
+
|
45
|
+
public
|
46
|
+
def register
|
47
|
+
if @cache_size > 0
|
48
|
+
@cache = LruRedux::ThreadSafeCache.new(@cache_size)
|
49
|
+
end
|
53
50
|
end
|
54
51
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
52
|
+
def handle_response(response)
|
53
|
+
data = JSON.parse(response.body)
|
54
|
+
if response.code == 200
|
55
|
+
{:status => :success,
|
56
|
+
:data => data}
|
57
|
+
else
|
58
|
+
{:status => :error,
|
59
|
+
:code => data["code"],
|
60
|
+
:message => data["message"]}
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def url(path)
|
65
|
+
"http://api.access.watch#{path}"
|
66
|
+
end
|
67
|
+
|
68
|
+
def get_json(path)
|
69
|
+
response = self.client.get(self.url(path),
|
70
|
+
headers: {"Api-Key" => @api_key,
|
71
|
+
"Accept" => "application/json",
|
72
|
+
"User-Agent" => "Access Watch Logstash Plugin/0.2.0"})
|
73
|
+
self.handle_response(response)
|
74
|
+
end
|
75
|
+
|
76
|
+
def post_json(path, data)
|
77
|
+
response = self.client.post(self.url(path),
|
78
|
+
headers: {"Api-Key" => @api_key,
|
79
|
+
"Accept" => "application/json",
|
80
|
+
"Content-Type" => "application/json",
|
81
|
+
"User-Agent" => "Access Watch Logstash Plugin/0.2.0"},
|
82
|
+
body: JSON.generate(data))
|
83
|
+
self.handle_response(response)
|
84
|
+
end
|
85
|
+
|
86
|
+
def with_cache(id, &block)
|
87
|
+
if @cache
|
88
|
+
@cache.getset(id) { block.call }
|
89
|
+
else
|
90
|
+
block.call
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def fetch_address(ip)
|
95
|
+
self.with_cache("ip-#{ip}") {
|
96
|
+
self.get_json("/1.1/address/#{ip}")
|
65
97
|
}
|
66
|
-
return res
|
67
98
|
end
|
68
99
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
robots = JSON.parse(file)
|
73
|
-
robots.each {|robot|
|
74
|
-
if !robot['cidrs'].nil?
|
75
|
-
robot['cidrs'] = robot['cidrs'].collect {|cidr| cidr2range(cidr)}
|
76
|
-
end
|
100
|
+
def fetch_user_agent(user_agent)
|
101
|
+
self.with_cache("ua-#{Digest::MD5.hexdigest(user_agent)}") {
|
102
|
+
self.post_json("/1.1/user-agent", {:value => user_agent})
|
77
103
|
}
|
78
|
-
@ip2robots = group_by_multi(robots, 'ips')
|
79
|
-
@cidr2robots = group_by_multi(robots, 'cidrs')
|
80
|
-
@ip2cidrs = IntervalTree::Tree.new(@cidr2robots.keys)
|
81
|
-
@ua2robots = group_by_multi(robots, 'uas')
|
82
104
|
end
|
83
105
|
|
84
|
-
|
85
|
-
|
86
|
-
|
106
|
+
def fetch_identity(ip, user_agent)
|
107
|
+
ip = ip || ""
|
108
|
+
user_agent = user_agent || ""
|
109
|
+
self.with_cache("identity-#{Digest::MD5.hexdigest(ip)}-#{Digest::MD5.hexdigest(user_agent)}") {
|
110
|
+
self.post_json("/1.1/identity", {:address => ip, :user_agent => user_agent})
|
111
|
+
}
|
87
112
|
end
|
88
113
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
if ip
|
96
|
-
i = ip.ipv4? ? ip.ipv4_mapped.to_i : ip.to_i # convert IP to arbitrary length integer
|
97
|
-
ip_candidates = @ip2robots[i]
|
98
|
-
cidrs = @ip2cidrs.search(i)
|
99
|
-
cidr_candidates = cidrs.collect {|cidr| @cidr2robots[cidr]}.reduce([], :concat) unless cidrs.nil?
|
100
|
-
end
|
101
|
-
# Look for robots with the same User-Agent
|
102
|
-
ua_candidates = []
|
103
|
-
if ua
|
104
|
-
ua_candidates = @ua2robots[Digest::MD5.hexdigest(ua)]
|
105
|
-
end
|
106
|
-
# Make a final decision
|
107
|
-
robots = ((ip_candidates | cidr_candidates) & ua_candidates)
|
108
|
-
if !robots.empty?
|
109
|
-
robot = robots[0]
|
110
|
-
url = "https://access.watch/database/robots/#{robot['reputation']}/#{robot['urlid'] or robot['id']}"
|
111
|
-
{'identity' => {'type' => 'robot'},
|
112
|
-
'robot' => {'id' => robot['id'],
|
113
|
-
'name' => robot['name'],
|
114
|
-
'url' => url},
|
115
|
-
'reputation' => {'status' => robot['reputation']}}
|
114
|
+
def augment(event, destination, data, keys=nil)
|
115
|
+
if destination && data
|
116
|
+
event.set(destination,
|
117
|
+
data.select {|k, v|
|
118
|
+
(keys.nil? or keys.include?(k)) && !(v.nil? || v.empty?)
|
119
|
+
})
|
116
120
|
end
|
117
121
|
end
|
118
122
|
|
119
123
|
public
|
120
124
|
def filter(event)
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
125
|
+
ip = event.get(@ip_source)
|
126
|
+
user_agent = event.get(@user_agent_source)
|
127
|
+
if @ip_source and @user_agent_source
|
128
|
+
response = self.fetch_identity(ip, user_agent)
|
129
|
+
if response[:status] == :success
|
130
|
+
data = response[:data]
|
131
|
+
self.augment(event, @address_destination, data["address"], @@address_keys)
|
132
|
+
self.augment(event, @robot_destination, data["robot"], @@robot_keys)
|
133
|
+
self.augment(event, @reputation_destination, data["reputation"])
|
134
|
+
end
|
135
|
+
elsif @ip_source
|
136
|
+
response = self.fetch_address(ip)
|
137
|
+
if response[:status] == :success
|
138
|
+
self.augment(event, @address_destination, response[:data], @@address_keys)
|
139
|
+
end
|
140
|
+
else
|
141
|
+
response = self.fetch_user_agent(user_agent)
|
142
|
+
if response[:status] == :success
|
143
|
+
self.augment(event, @user_agent_destination, response[:data])
|
144
|
+
end
|
128
145
|
end
|
129
146
|
filter_matched(event)
|
130
147
|
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
Gem::Specification.new do |s|
|
3
3
|
s.name = 'logstash-filter-accesswatch'
|
4
|
-
s.version = '0.
|
4
|
+
s.version = '0.2.0'
|
5
5
|
s.licenses = ['Apache-2.0']
|
6
6
|
s.summary = 'The Logstash filter plugin for Access Watch (http://access.watch).'
|
7
|
-
s.description = 'The Access Watch filter adds information about robots visiting your website based on data from our
|
7
|
+
s.description = 'The Access Watch filter adds information about robots visiting your website based on data from our robot database.'
|
8
8
|
s.homepage = 'http://access.watch'
|
9
9
|
s.authors = ['Benoît Fleury']
|
10
10
|
s.email = 'benoit@access.watch'
|
@@ -12,16 +12,15 @@ Gem::Specification.new do |s|
|
|
12
12
|
|
13
13
|
# Files
|
14
14
|
s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','Gemfile','LICENSE']
|
15
|
-
# Tests
|
16
|
-
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
17
15
|
|
18
16
|
# Special flag to let us know this is actually a logstash plugin
|
19
17
|
s.metadata = { 'logstash_plugin' => 'true',
|
20
18
|
'logstash_group' => 'filter' }
|
21
19
|
|
22
20
|
# Gem dependencies
|
23
|
-
s.add_runtime_dependency 'logstash-core-plugin-api',
|
24
|
-
s.add_runtime_dependency '
|
21
|
+
s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
|
22
|
+
s.add_runtime_dependency 'logstash-mixin-http_client', '~> 5.2'
|
23
|
+
s.add_runtime_dependency 'lru_redux', '~> 1.1'
|
25
24
|
|
26
25
|
s.add_development_dependency 'logstash-devutils', '1.3.3'
|
27
26
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-accesswatch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benoît Fleury
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-07-
|
11
|
+
date: 2017-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -29,15 +29,29 @@ dependencies:
|
|
29
29
|
requirements:
|
30
30
|
- - "~>"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
33
|
-
name:
|
32
|
+
version: '5.2'
|
33
|
+
name: logstash-mixin-http_client
|
34
34
|
prerelease: false
|
35
35
|
type: :runtime
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: '5.2'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '1.1'
|
47
|
+
name: lru_redux
|
48
|
+
prerelease: false
|
49
|
+
type: :runtime
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.1'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
requirement: !ruby/object:Gem::Requirement
|
43
57
|
requirements:
|
@@ -53,7 +67,7 @@ dependencies:
|
|
53
67
|
- !ruby/object:Gem::Version
|
54
68
|
version: 1.3.3
|
55
69
|
description: The Access Watch filter adds information about robots visiting your website
|
56
|
-
based on data from our
|
70
|
+
based on data from our robot database.
|
57
71
|
email: benoit@access.watch
|
58
72
|
executables: []
|
59
73
|
extensions: []
|