logstash-filter-accesswatch 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 34157f7c1c0392ebb20368b31fbf0007b79df4109c016e77a107732b4b41fbe2
4
- data.tar.gz: cf687eb6c068bf62d241b838429ad1972a742e5318e41a27492b7c44d2a97078
3
+ metadata.gz: 2f75068c27fcfbe8f6630eeb56055d994e30fda75f0cf342fd94cb84ad2c06fc
4
+ data.tar.gz: 72df9db9018bd46970ac20827b17b26cc2ac11fbaee8e8348eaf57e67198888a
5
5
  SHA512:
6
- metadata.gz: b280d78583d33c7768afc4ec852f3bc26da8979c1713df177cb746978d765f59ef361985d5a5e47f3cbc26088f9abd6eacdf1ca5479d7c53e8b488fafc903e68
7
- data.tar.gz: c5717ab2193781ee5024f5a28f6757b330c1c631f64c11c56192e32107a5bc739efaa3e8cd7a8e6bf71b726b9a2e180d5cc7a98ebf8224dc3145f69cf05ff4fe
6
+ metadata.gz: 5d6831752d2352e47e7613ecf1c1edd72f82bd904fb4475e19d8ef509cc8fcbe6475fc6aef8d49be1736262cfa81dd5790cf40a24b337137caa4db3d28891553
7
+ data.tar.gz: 61d7c93786d2590784452dafba8833733079f72e4bbe3f40a97bd12e412dfbd208f2d7a61117bf65b86f6e1650d1565ece9dd48fd6c31227f41f1a3b56bb27a2
@@ -1,130 +1,147 @@
1
1
  # encoding: utf-8
2
2
  require "logstash/filters/base"
3
3
  require "logstash/namespace"
4
- require 'json'
5
- require 'set'
6
- require 'ipaddr'
7
- require 'interval_tree'
8
- require 'digest'
4
+ require "logstash/plugin_mixins/http_client"
5
+ require "json"
6
+ require "digest"
7
+ require "lru_redux"
9
8
 
10
9
  # The Access Watch filter adds information about robots visiting
11
10
  # your website based on data from our robots database.
12
- #
13
- # The following fields might be created:
14
- # [identity][type] "robot" If the visitor is a robot.
15
- # [reputation][status] string The reputation of the visitor (see below).
16
- # [robot][id] number A unique robot identifier
17
- # [robot][name] string A robot's name to display to the user.
18
- # [robot][url] string A link to the robot's page on the Access Watch database.
19
- #
20
- # Access Watch defines the following reputation statuses:
21
- #
22
- # nice perfect, as far as we know you can trust this entity
23
- # ok all right, so far no reason to worry about this entity
24
- # suspicious warning, nothing really bad, but the entity is on our radar
25
- # bad danger, there is good reasons to watch or block this entity
26
- #
27
- # This filter requires the Access Watch `robots.json` file to run.
28
- #
29
11
 
30
12
  class LogStash::Filters::Accesswatch < LogStash::Filters::Base
31
13
 
14
+ include LogStash::PluginMixins::HttpClient
15
+
32
16
  config_name "accesswatch"
33
17
 
34
- # The path to the Access Watch database file.
35
- #
36
- # If not specified, this will default to './robots.json'.
37
- #
38
- config :db_path, :validate => :path, :default => "./robots.json"
18
+ # Your API Key
19
+ config :api_key, :validate => :string, :required => true
20
+
21
+ # The size of the local cache, 0 to deactivate
22
+ config :cache_size, :validate => :number, :default => 10000
39
23
 
40
24
  # The field containing the IP address.
41
25
  config :ip_source, :validate => :string, :required => true
42
26
 
43
27
  # The field containing the User-Agent string.
44
- config :ua_source, :validate => :string, :required => true
45
-
46
- # Transform a CIDR described as a 2-array [start size]
47
- # into a Ruby 3-dotted range.
48
- private
49
- def cidr2range(cidr)
50
- first = cidr[0]
51
- last = first + cidr[1]
52
- (first...last)
28
+ config :user_agent_source, :validate => :string, :required => true
29
+
30
+ # The destination field for address data
31
+ config :address_destination, :validate => :string
32
+
33
+ # The destination field for user-agent data
34
+ config :user_agent_destination, :validate => :string
35
+
36
+ # The destination field for robot data
37
+ config :robot_destination, :validate => :string
38
+
39
+ # The destination field for reputation data
40
+ config :reputation_destination, :validate => :string
41
+
42
+ @@address_keys = ["value", "hostname", "country_code", "flags"]
43
+ @@robot_keys = ["id", "name", "url"]
44
+
45
+ public
46
+ def register
47
+ if @cache_size > 0
48
+ @cache = LruRedux::ThreadSafeCache.new(@cache_size)
49
+ end
53
50
  end
54
51
 
55
- # Group elements of a collection by each value of a multi-valued attribute
56
- private
57
- def group_by_multi(coll, key)
58
- res = Hash.new {|hash, key| hash[key] = Array.new}
59
- coll.each {|el|
60
- if !el[key].nil?
61
- el[key].each {|val|
62
- res[val].push(el)
63
- }
64
- end
52
+ def handle_response(response)
53
+ data = JSON.parse(response.body)
54
+ if response.code == 200
55
+ {:status => :success,
56
+ :data => data}
57
+ else
58
+ {:status => :error,
59
+ :code => data["code"],
60
+ :message => data["message"]}
61
+ end
62
+ end
63
+
64
+ def url(path)
65
+ "http://api.access.watch#{path}"
66
+ end
67
+
68
+ def get_json(path)
69
+ response = self.client.get(self.url(path),
70
+ headers: {"Api-Key" => @api_key,
71
+ "Accept" => "application/json",
72
+ "User-Agent" => "Access Watch Logstash Plugin/0.2.0"})
73
+ self.handle_response(response)
74
+ end
75
+
76
+ def post_json(path, data)
77
+ response = self.client.post(self.url(path),
78
+ headers: {"Api-Key" => @api_key,
79
+ "Accept" => "application/json",
80
+ "Content-Type" => "application/json",
81
+ "User-Agent" => "Access Watch Logstash Plugin/0.2.0"},
82
+ body: JSON.generate(data))
83
+ self.handle_response(response)
84
+ end
85
+
86
+ def with_cache(id, &block)
87
+ if @cache
88
+ @cache.getset(id) { block.call }
89
+ else
90
+ block.call
91
+ end
92
+ end
93
+
94
+ def fetch_address(ip)
95
+ self.with_cache("ip-#{ip}") {
96
+ self.get_json("/1.1/address/#{ip}")
65
97
  }
66
- return res
67
98
  end
68
99
 
69
- private
70
- def build_indices(filename)
71
- file = File.read(filename)
72
- robots = JSON.parse(file)
73
- robots.each {|robot|
74
- if !robot['cidrs'].nil?
75
- robot['cidrs'] = robot['cidrs'].collect {|cidr| cidr2range(cidr)}
76
- end
100
+ def fetch_user_agent(user_agent)
101
+ self.with_cache("ua-#{Digest::MD5.hexdigest(user_agent)}") {
102
+ self.post_json("/1.1/user-agent", {:value => user_agent})
77
103
  }
78
- @ip2robots = group_by_multi(robots, 'ips')
79
- @cidr2robots = group_by_multi(robots, 'cidrs')
80
- @ip2cidrs = IntervalTree::Tree.new(@cidr2robots.keys)
81
- @ua2robots = group_by_multi(robots, 'uas')
82
104
  end
83
105
 
84
- public
85
- def register
86
- build_indices(@db_path)
106
+ def fetch_identity(ip, user_agent)
107
+ ip = ip || ""
108
+ user_agent = user_agent || ""
109
+ self.with_cache("identity-#{Digest::MD5.hexdigest(ip)}-#{Digest::MD5.hexdigest(user_agent)}") {
110
+ self.post_json("/1.1/identity", {:address => ip, :user_agent => user_agent})
111
+ }
87
112
  end
88
113
 
89
- # Take a User-Agent string and an IP address and return a robot description, or nil.
90
- private
91
- def detect(ua, ip)
92
- # Look for robots with the same IP addressor CIDR
93
- ip_candidates = []
94
- cidr_candidates = []
95
- if ip
96
- i = ip.ipv4? ? ip.ipv4_mapped.to_i : ip.to_i # convert IP to arbitrary length integer
97
- ip_candidates = @ip2robots[i]
98
- cidrs = @ip2cidrs.search(i)
99
- cidr_candidates = cidrs.collect {|cidr| @cidr2robots[cidr]}.reduce([], :concat) unless cidrs.nil?
100
- end
101
- # Look for robots with the same User-Agent
102
- ua_candidates = []
103
- if ua
104
- ua_candidates = @ua2robots[Digest::MD5.hexdigest(ua)]
105
- end
106
- # Make a final decision
107
- robots = ((ip_candidates | cidr_candidates) & ua_candidates)
108
- if !robots.empty?
109
- robot = robots[0]
110
- url = "https://access.watch/database/robots/#{robot['reputation']}/#{robot['urlid'] or robot['id']}"
111
- {'identity' => {'type' => 'robot'},
112
- 'robot' => {'id' => robot['id'],
113
- 'name' => robot['name'],
114
- 'url' => url},
115
- 'reputation' => {'status' => robot['reputation']}}
114
+ def augment(event, destination, data, keys=nil)
115
+ if destination && data
116
+ event.set(destination,
117
+ data.select {|k, v|
118
+ (keys.nil? or keys.include?(k)) && !(v.nil? || v.empty?)
119
+ })
116
120
  end
117
121
  end
118
122
 
119
123
  public
120
124
  def filter(event)
121
- ip_s = event.get(@ip_source)
122
- ip = IPAddr.new ip_s unless ip_s.nil?
123
- robot = detect(event.get(@ua_source), ip)
124
- if robot
125
- event.set('identity', robot['identity']) unless robot['identity'].nil?
126
- event.set('robot', robot['robot']) unless robot['robot'].nil?
127
- event.set('reputation', robot['reputation']) unless robot['reputation'].nil?
125
+ ip = event.get(@ip_source)
126
+ user_agent = event.get(@user_agent_source)
127
+ if @ip_source and @user_agent_source
128
+ response = self.fetch_identity(ip, user_agent)
129
+ if response[:status] == :success
130
+ data = response[:data]
131
+ self.augment(event, @address_destination, data["address"], @@address_keys)
132
+ self.augment(event, @robot_destination, data["robot"], @@robot_keys)
133
+ self.augment(event, @reputation_destination, data["reputation"])
134
+ end
135
+ elsif @ip_source
136
+ response = self.fetch_address(ip)
137
+ if response[:status] == :success
138
+ self.augment(event, @address_destination, response[:data], @@address_keys)
139
+ end
140
+ else
141
+ response = self.fetch_user_agent(user_agent)
142
+ if response[:status] == :success
143
+ self.augment(event, @user_agent_destination, response[:data])
144
+ end
128
145
  end
129
146
  filter_matched(event)
130
147
  end
@@ -1,10 +1,10 @@
1
1
  # coding: utf-8
2
2
  Gem::Specification.new do |s|
3
3
  s.name = 'logstash-filter-accesswatch'
4
- s.version = '0.1.0'
4
+ s.version = '0.2.0'
5
5
  s.licenses = ['Apache-2.0']
6
6
  s.summary = 'The Logstash filter plugin for Access Watch (http://access.watch).'
7
- s.description = 'The Access Watch filter adds information about robots visiting your website based on data from our robots database.'
7
+ s.description = 'The Access Watch filter adds information about robots visiting your website based on data from our robot database.'
8
8
  s.homepage = 'http://access.watch'
9
9
  s.authors = ['Benoît Fleury']
10
10
  s.email = 'benoit@access.watch'
@@ -12,16 +12,15 @@ Gem::Specification.new do |s|
12
12
 
13
13
  # Files
14
14
  s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','Gemfile','LICENSE']
15
- # Tests
16
- s.test_files = s.files.grep(%r{^(test|spec|features)/})
17
15
 
18
16
  # Special flag to let us know this is actually a logstash plugin
19
17
  s.metadata = { 'logstash_plugin' => 'true',
20
18
  'logstash_group' => 'filter' }
21
19
 
22
20
  # Gem dependencies
23
- s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
24
- s.add_runtime_dependency 'augmented_interval_tree', '~> 0.1.1'
21
+ s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
22
+ s.add_runtime_dependency 'logstash-mixin-http_client', '~> 5.2'
23
+ s.add_runtime_dependency 'lru_redux', '~> 1.1'
25
24
 
26
25
  s.add_development_dependency 'logstash-devutils', '1.3.3'
27
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-accesswatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benoît Fleury
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-07-18 00:00:00.000000000 Z
11
+ date: 2017-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -29,15 +29,29 @@ dependencies:
29
29
  requirements:
30
30
  - - "~>"
31
31
  - !ruby/object:Gem::Version
32
- version: 0.1.1
33
- name: augmented_interval_tree
32
+ version: '5.2'
33
+ name: logstash-mixin-http_client
34
34
  prerelease: false
35
35
  type: :runtime
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.1.1
40
+ version: '5.2'
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.1'
47
+ name: lru_redux
48
+ prerelease: false
49
+ type: :runtime
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.1'
41
55
  - !ruby/object:Gem::Dependency
42
56
  requirement: !ruby/object:Gem::Requirement
43
57
  requirements:
@@ -53,7 +67,7 @@ dependencies:
53
67
  - !ruby/object:Gem::Version
54
68
  version: 1.3.3
55
69
  description: The Access Watch filter adds information about robots visiting your website
56
- based on data from our robots database.
70
+ based on data from our robot database.
57
71
  email: benoit@access.watch
58
72
  executables: []
59
73
  extensions: []