logstash-filter-accesswatch 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 34157f7c1c0392ebb20368b31fbf0007b79df4109c016e77a107732b4b41fbe2
4
- data.tar.gz: cf687eb6c068bf62d241b838429ad1972a742e5318e41a27492b7c44d2a97078
3
+ metadata.gz: 2f75068c27fcfbe8f6630eeb56055d994e30fda75f0cf342fd94cb84ad2c06fc
4
+ data.tar.gz: 72df9db9018bd46970ac20827b17b26cc2ac11fbaee8e8348eaf57e67198888a
5
5
  SHA512:
6
- metadata.gz: b280d78583d33c7768afc4ec852f3bc26da8979c1713df177cb746978d765f59ef361985d5a5e47f3cbc26088f9abd6eacdf1ca5479d7c53e8b488fafc903e68
7
- data.tar.gz: c5717ab2193781ee5024f5a28f6757b330c1c631f64c11c56192e32107a5bc739efaa3e8cd7a8e6bf71b726b9a2e180d5cc7a98ebf8224dc3145f69cf05ff4fe
6
+ metadata.gz: 5d6831752d2352e47e7613ecf1c1edd72f82bd904fb4475e19d8ef509cc8fcbe6475fc6aef8d49be1736262cfa81dd5790cf40a24b337137caa4db3d28891553
7
+ data.tar.gz: 61d7c93786d2590784452dafba8833733079f72e4bbe3f40a97bd12e412dfbd208f2d7a61117bf65b86f6e1650d1565ece9dd48fd6c31227f41f1a3b56bb27a2
@@ -1,130 +1,147 @@
1
1
  # encoding: utf-8
2
2
  require "logstash/filters/base"
3
3
  require "logstash/namespace"
4
- require 'json'
5
- require 'set'
6
- require 'ipaddr'
7
- require 'interval_tree'
8
- require 'digest'
4
+ require "logstash/plugin_mixins/http_client"
5
+ require "json"
6
+ require "digest"
7
+ require "lru_redux"
9
8
 
10
9
  # The Access Watch filter adds information about robots visiting
11
10
  # your website based on data from our robots database.
12
- #
13
- # The following fields might be created:
14
- # [identity][type] "robot" If the visitor is a robot.
15
- # [reputation][status] string The reputation of the visitor (see below).
16
- # [robot][id] number A unique robot identifier
17
- # [robot][name] string A robot's name to display to the user.
18
- # [robot][url] string A link to the robot's page on the Access Watch database.
19
- #
20
- # Access Watch defines the following reputation statuses:
21
- #
22
- # nice perfect, as far as we know you can trust this entity
23
- # ok all right, so far no reason to worry about this entity
24
- # suspicious warning, nothing really bad, but the entity is on our radar
25
- # bad danger, there is good reasons to watch or block this entity
26
- #
27
- # This filter requires the Access Watch `robots.json` file to run.
28
- #
29
11
 
30
12
  class LogStash::Filters::Accesswatch < LogStash::Filters::Base
31
13
 
14
+ include LogStash::PluginMixins::HttpClient
15
+
32
16
  config_name "accesswatch"
33
17
 
34
- # The path to the Access Watch database file.
35
- #
36
- # If not specified, this will default to './robots.json'.
37
- #
38
- config :db_path, :validate => :path, :default => "./robots.json"
18
+ # Your API Key
19
+ config :api_key, :validate => :string, :required => true
20
+
21
+ # The size of the local cache, 0 to deactivate
22
+ config :cache_size, :validate => :number, :default => 10000
39
23
 
40
24
  # The field containing the IP address.
41
25
  config :ip_source, :validate => :string, :required => true
42
26
 
43
27
  # The field containing the User-Agent string.
44
- config :ua_source, :validate => :string, :required => true
45
-
46
- # Transform a CIDR described as a 2-array [start size]
47
- # into a Ruby 3-dotted range.
48
- private
49
- def cidr2range(cidr)
50
- first = cidr[0]
51
- last = first + cidr[1]
52
- (first...last)
28
+ config :user_agent_source, :validate => :string, :required => true
29
+
30
+ # The destination field for address data
31
+ config :address_destination, :validate => :string
32
+
33
+ # The destination field for user-agent data
34
+ config :user_agent_destination, :validate => :string
35
+
36
+ # The destination field for robot data
37
+ config :robot_destination, :validate => :string
38
+
39
+ # The destination field for reputation data
40
+ config :reputation_destination, :validate => :string
41
+
42
+ @@address_keys = ["value", "hostname", "country_code", "flags"]
43
+ @@robot_keys = ["id", "name", "url"]
44
+
45
+ public
46
+ def register
47
+ if @cache_size > 0
48
+ @cache = LruRedux::ThreadSafeCache.new(@cache_size)
49
+ end
53
50
  end
54
51
 
55
- # Group elements of a collection by each value of a multi-valued attribute
56
- private
57
- def group_by_multi(coll, key)
58
- res = Hash.new {|hash, key| hash[key] = Array.new}
59
- coll.each {|el|
60
- if !el[key].nil?
61
- el[key].each {|val|
62
- res[val].push(el)
63
- }
64
- end
52
+ def handle_response(response)
53
+ data = JSON.parse(response.body)
54
+ if response.code == 200
55
+ {:status => :success,
56
+ :data => data}
57
+ else
58
+ {:status => :error,
59
+ :code => data["code"],
60
+ :message => data["message"]}
61
+ end
62
+ end
63
+
64
+ def url(path)
65
+ "http://api.access.watch#{path}"
66
+ end
67
+
68
+ def get_json(path)
69
+ response = self.client.get(self.url(path),
70
+ headers: {"Api-Key" => @api_key,
71
+ "Accept" => "application/json",
72
+ "User-Agent" => "Access Watch Logstash Plugin/0.2.0"})
73
+ self.handle_response(response)
74
+ end
75
+
76
+ def post_json(path, data)
77
+ response = self.client.post(self.url(path),
78
+ headers: {"Api-Key" => @api_key,
79
+ "Accept" => "application/json",
80
+ "Content-Type" => "application/json",
81
+ "User-Agent" => "Access Watch Logstash Plugin/0.2.0"},
82
+ body: JSON.generate(data))
83
+ self.handle_response(response)
84
+ end
85
+
86
+ def with_cache(id, &block)
87
+ if @cache
88
+ @cache.getset(id) { block.call }
89
+ else
90
+ block.call
91
+ end
92
+ end
93
+
94
+ def fetch_address(ip)
95
+ self.with_cache("ip-#{ip}") {
96
+ self.get_json("/1.1/address/#{ip}")
65
97
  }
66
- return res
67
98
  end
68
99
 
69
- private
70
- def build_indices(filename)
71
- file = File.read(filename)
72
- robots = JSON.parse(file)
73
- robots.each {|robot|
74
- if !robot['cidrs'].nil?
75
- robot['cidrs'] = robot['cidrs'].collect {|cidr| cidr2range(cidr)}
76
- end
100
+ def fetch_user_agent(user_agent)
101
+ self.with_cache("ua-#{Digest::MD5.hexdigest(user_agent)}") {
102
+ self.post_json("/1.1/user-agent", {:value => user_agent})
77
103
  }
78
- @ip2robots = group_by_multi(robots, 'ips')
79
- @cidr2robots = group_by_multi(robots, 'cidrs')
80
- @ip2cidrs = IntervalTree::Tree.new(@cidr2robots.keys)
81
- @ua2robots = group_by_multi(robots, 'uas')
82
104
  end
83
105
 
84
- public
85
- def register
86
- build_indices(@db_path)
106
+ def fetch_identity(ip, user_agent)
107
+ ip = ip || ""
108
+ user_agent = user_agent || ""
109
+ self.with_cache("identity-#{Digest::MD5.hexdigest(ip)}-#{Digest::MD5.hexdigest(user_agent)}") {
110
+ self.post_json("/1.1/identity", {:address => ip, :user_agent => user_agent})
111
+ }
87
112
  end
88
113
 
89
- # Take a User-Agent string and an IP address and return a robot description, or nil.
90
- private
91
- def detect(ua, ip)
92
- # Look for robots with the same IP addressor CIDR
93
- ip_candidates = []
94
- cidr_candidates = []
95
- if ip
96
- i = ip.ipv4? ? ip.ipv4_mapped.to_i : ip.to_i # convert IP to arbitrary length integer
97
- ip_candidates = @ip2robots[i]
98
- cidrs = @ip2cidrs.search(i)
99
- cidr_candidates = cidrs.collect {|cidr| @cidr2robots[cidr]}.reduce([], :concat) unless cidrs.nil?
100
- end
101
- # Look for robots with the same User-Agent
102
- ua_candidates = []
103
- if ua
104
- ua_candidates = @ua2robots[Digest::MD5.hexdigest(ua)]
105
- end
106
- # Make a final decision
107
- robots = ((ip_candidates | cidr_candidates) & ua_candidates)
108
- if !robots.empty?
109
- robot = robots[0]
110
- url = "https://access.watch/database/robots/#{robot['reputation']}/#{robot['urlid'] or robot['id']}"
111
- {'identity' => {'type' => 'robot'},
112
- 'robot' => {'id' => robot['id'],
113
- 'name' => robot['name'],
114
- 'url' => url},
115
- 'reputation' => {'status' => robot['reputation']}}
114
+ def augment(event, destination, data, keys=nil)
115
+ if destination && data
116
+ event.set(destination,
117
+ data.select {|k, v|
118
+ (keys.nil? or keys.include?(k)) && !(v.nil? || v.empty?)
119
+ })
116
120
  end
117
121
  end
118
122
 
119
123
  public
120
124
  def filter(event)
121
- ip_s = event.get(@ip_source)
122
- ip = IPAddr.new ip_s unless ip_s.nil?
123
- robot = detect(event.get(@ua_source), ip)
124
- if robot
125
- event.set('identity', robot['identity']) unless robot['identity'].nil?
126
- event.set('robot', robot['robot']) unless robot['robot'].nil?
127
- event.set('reputation', robot['reputation']) unless robot['reputation'].nil?
125
+ ip = event.get(@ip_source)
126
+ user_agent = event.get(@user_agent_source)
127
+ if @ip_source and @user_agent_source
128
+ response = self.fetch_identity(ip, user_agent)
129
+ if response[:status] == :success
130
+ data = response[:data]
131
+ self.augment(event, @address_destination, data["address"], @@address_keys)
132
+ self.augment(event, @robot_destination, data["robot"], @@robot_keys)
133
+ self.augment(event, @reputation_destination, data["reputation"])
134
+ end
135
+ elsif @ip_source
136
+ response = self.fetch_address(ip)
137
+ if response[:status] == :success
138
+ self.augment(event, @address_destination, response[:data], @@address_keys)
139
+ end
140
+ else
141
+ response = self.fetch_user_agent(user_agent)
142
+ if response[:status] == :success
143
+ self.augment(event, @user_agent_destination, response[:data])
144
+ end
128
145
  end
129
146
  filter_matched(event)
130
147
  end
@@ -1,10 +1,10 @@
1
1
  # coding: utf-8
2
2
  Gem::Specification.new do |s|
3
3
  s.name = 'logstash-filter-accesswatch'
4
- s.version = '0.1.0'
4
+ s.version = '0.2.0'
5
5
  s.licenses = ['Apache-2.0']
6
6
  s.summary = 'The Logstash filter plugin for Access Watch (http://access.watch).'
7
- s.description = 'The Access Watch filter adds information about robots visiting your website based on data from our robots database.'
7
+ s.description = 'The Access Watch filter adds information about robots visiting your website based on data from our robot database.'
8
8
  s.homepage = 'http://access.watch'
9
9
  s.authors = ['Benoît Fleury']
10
10
  s.email = 'benoit@access.watch'
@@ -12,16 +12,15 @@ Gem::Specification.new do |s|
12
12
 
13
13
  # Files
14
14
  s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','Gemfile','LICENSE']
15
- # Tests
16
- s.test_files = s.files.grep(%r{^(test|spec|features)/})
17
15
 
18
16
  # Special flag to let us know this is actually a logstash plugin
19
17
  s.metadata = { 'logstash_plugin' => 'true',
20
18
  'logstash_group' => 'filter' }
21
19
 
22
20
  # Gem dependencies
23
- s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
24
- s.add_runtime_dependency 'augmented_interval_tree', '~> 0.1.1'
21
+ s.add_runtime_dependency 'logstash-core-plugin-api', '~> 2.0'
22
+ s.add_runtime_dependency 'logstash-mixin-http_client', '~> 5.2'
23
+ s.add_runtime_dependency 'lru_redux', '~> 1.1'
25
24
 
26
25
  s.add_development_dependency 'logstash-devutils', '1.3.3'
27
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-accesswatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benoît Fleury
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-07-18 00:00:00.000000000 Z
11
+ date: 2017-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -29,15 +29,29 @@ dependencies:
29
29
  requirements:
30
30
  - - "~>"
31
31
  - !ruby/object:Gem::Version
32
- version: 0.1.1
33
- name: augmented_interval_tree
32
+ version: '5.2'
33
+ name: logstash-mixin-http_client
34
34
  prerelease: false
35
35
  type: :runtime
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.1.1
40
+ version: '5.2'
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.1'
47
+ name: lru_redux
48
+ prerelease: false
49
+ type: :runtime
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.1'
41
55
  - !ruby/object:Gem::Dependency
42
56
  requirement: !ruby/object:Gem::Requirement
43
57
  requirements:
@@ -53,7 +67,7 @@ dependencies:
53
67
  - !ruby/object:Gem::Version
54
68
  version: 1.3.3
55
69
  description: The Access Watch filter adds information about robots visiting your website
56
- based on data from our robots database.
70
+ based on data from our robot database.
57
71
  email: benoit@access.watch
58
72
  executables: []
59
73
  extensions: []