logstash-filter-ezproxy 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTORS +1 -1
- data/lib/logstash/filters/ezproxy.rb +48 -65
- data/lib/logstash/filters/ezproxy_providers.rb +1 -0
- data/lib/logstash/filters/{cambridge.rb → ezproxy_providers/cambridge.rb} +5 -3
- data/lib/logstash/filters/{dawsonera.rb → ezproxy_providers/dawsonera.rb} +3 -2
- data/lib/logstash/filters/{proquest.rb → ezproxy_providers/ebookcentral.rb} +4 -2
- data/lib/logstash/filters/{ebscohost.rb → ezproxy_providers/ebscohost.rb} +3 -2
- data/lib/logstash/filters/ezproxy_providers/elsevier.rb +18 -0
- data/lib/logstash/filters/{emerald.rb → ezproxy_providers/emerald.rb} +4 -2
- data/lib/logstash/filters/{ft.rb → ezproxy_providers/ft.rb} +3 -3
- data/lib/logstash/filters/{gale.rb → ezproxy_providers/gale.rb} +4 -2
- data/lib/logstash/filters/{heinonline.rb → ezproxy_providers/heinonline.rb} +3 -2
- data/lib/logstash/filters/{ieee.rb → ezproxy_providers/ieee.rb} +3 -4
- data/lib/logstash/filters/{jstor.rb → ezproxy_providers/jstor.rb} +3 -2
- data/lib/logstash/filters/{lexis_webanalytics.rb → ezproxy_providers/lexis_webanalytics.rb} +3 -2
- data/lib/logstash/filters/{lexisnexis.rb → ezproxy_providers/lexisnexis.rb} +3 -3
- data/lib/logstash/filters/{myilibrary.rb → ezproxy_providers/myilibrary.rb} +7 -3
- data/lib/logstash/filters/{oxford.rb → ezproxy_providers/oxford.rb} +5 -3
- data/lib/logstash/filters/ezproxy_providers/proquest-search.rb +27 -0
- data/lib/logstash/filters/{sage.rb → ezproxy_providers/sage.rb} +3 -3
- data/lib/logstash/filters/{sciencedirect.rb → ezproxy_providers/sciencedirect.rb} +3 -4
- data/lib/logstash/filters/{scopus.rb → ezproxy_providers/scopus.rb} +3 -2
- data/lib/logstash/filters/{springer.rb → ezproxy_providers/springer.rb} +3 -3
- data/lib/logstash/filters/{tandf.rb → ezproxy_providers/tandf.rb} +3 -5
- data/lib/logstash/filters/{webofknowledge.rb → ezproxy_providers/webofknowledge.rb} +4 -2
- data/lib/logstash/filters/ezproxy_providers/westlaw.rb +18 -0
- data/lib/logstash/filters/{wiley.rb → ezproxy_providers/wiley.rb} +3 -3
- data/lib/logstash/helpers/param_helper.rb +0 -8
- data/logstash-filter-ezproxy.gemspec +2 -2
- data/spec/filters/cambridge/cambridge_spec.rb +3 -3
- data/spec/filters/dawsonera/dawsonera_spec.rb +3 -3
- data/spec/filters/{proquest/proquest.2018-02-09.csv → ebookcentral/ebookcentral.2018-02-09.csv} +0 -0
- data/spec/filters/{proquest/proquest_spec.rb → ebookcentral/ebookcentral.rb} +4 -4
- data/spec/filters/ebscohost/ebscohost_spec.rb +3 -3
- data/spec/filters/elsevier/elsevier.2018-02-20.csv +3 -0
- data/spec/filters/elsevier/elsevier_spec.rb +18 -0
- data/spec/filters/emerald/emerald_spec.rb +3 -3
- data/spec/filters/ft/ft_spec.rb +3 -3
- data/spec/filters/gale/gale_spec.rb +3 -3
- data/spec/filters/heinonline/heinonline_spec.rb +3 -3
- data/spec/filters/ieee/ieee_spec.rb +3 -3
- data/spec/filters/jstor/jstor_spec.rb +3 -3
- data/spec/filters/lexis_webanalytics/lexis_webanalytics_spec.rb +3 -3
- data/spec/filters/lexisnexis/lexisnexis_spec.rb +3 -3
- data/spec/filters/myilibrary/myilibrary_spec.rb +3 -3
- data/spec/filters/oxford/oxford_spec.rb +3 -3
- data/spec/filters/proquest-search/proquest-search.2018-02-19.csv +4 -0
- data/spec/filters/proquest-search/proquest-search_spec.rb +19 -0
- data/spec/filters/sage/sage_spec.rb +3 -3
- data/spec/filters/sciencedirect/sciencedirect_spec.rb +3 -3
- data/spec/filters/scopus/scopus_spec.rb +3 -3
- data/spec/filters/springer/springer_spec.rb +3 -3
- data/spec/filters/tandf/tandf_spec.rb +3 -3
- data/spec/filters/webofknowledge/webofknowledge_spec.rb +3 -3
- data/spec/filters/westlaw/westlaw.2018-02-20.csv +3 -0
- data/spec/filters/westlaw/westlaw_spec.rb +18 -0
- data/spec/filters/wiley/wiley_spec.rb +4 -4
- metadata +44 -28
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a86f6f54351a599519f27e04c0b1ec9bbe9a6a8c802dac09634d86476c212ef
|
4
|
+
data.tar.gz: 8fea5f5da0e5da09b6b7c750f81fec7347708e994748b0a72361e8791ff0fe56
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2fd7d975bce60293bf1ede3d0c78f294fd25e94836d5a29d36c63d8e5eb83f81ad6137e88a9c018054f97814a1da0da48fa675092175c87ee5820d3c2ca0d62d
|
7
|
+
data.tar.gz: 6889ce78ced9789480e1d2962743c74cca3b26b5cebfbc693b40e91ddf2821c3d14f98c90f62666c9f9007a7e43da72ed223778be68f5c44c50840bb5ae5f14e
|
data/CONTRIBUTORS
CHANGED
@@ -2,7 +2,7 @@ The following is a list of people who have contributed ideas, code, bug
|
|
2
2
|
reports, or in general have helped logstash along its way.
|
3
3
|
|
4
4
|
Contributors:
|
5
|
-
* Dom Belcher -
|
5
|
+
* Dom Belcher - d.belcher@lancaster.ac.uk
|
6
6
|
|
7
7
|
Note: If you've sent us patches, bug reports, or otherwise contributed to
|
8
8
|
Logstash, and you aren't on the list above and want to be, please let us know
|
@@ -1,32 +1,12 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require "logstash/filters/base"
|
3
3
|
require "logstash/namespace"
|
4
|
-
require_relative '../helpers/url_parser'
|
5
|
-
require_relative '../helpers/param_helper'
|
6
|
-
require_relative '../helpers/mime_helper'
|
7
|
-
require_relative "./cambridge"
|
8
|
-
require_relative "./dawsonera"
|
9
|
-
require_relative "./ebscohost"
|
10
|
-
require_relative "./emerald"
|
11
|
-
require_relative "./ft"
|
12
|
-
require_relative "./gale"
|
13
|
-
require_relative "./heinonline"
|
14
|
-
require_relative "./ieee"
|
15
|
-
require_relative "./jstor"
|
16
|
-
require_relative "./lexis_webanalytics"
|
17
|
-
require_relative "./lexisnexis"
|
18
|
-
require_relative "./myilibrary"
|
19
|
-
require_relative "./proquest"
|
20
|
-
require_relative "./sage"
|
21
|
-
require_relative "./sciencedirect"
|
22
|
-
require_relative "./scopus"
|
23
|
-
require_relative "./springer"
|
24
|
-
require_relative "./tandf"
|
25
|
-
require_relative "./webofknowledge"
|
26
|
-
require_relative "./wiley"
|
27
4
|
require 'uri'
|
28
5
|
require 'cgi'
|
29
6
|
|
7
|
+
Dir[File.dirname(__FILE__) + '/../helpers/*.rb'].each { |file| require file }
|
8
|
+
Dir[File.dirname(__FILE__) + '/./ezproxy_providers/*.rb'].each { |file| require file }
|
9
|
+
|
30
10
|
# This filter will replace the contents of the default
|
31
11
|
# message field with whatever you specify in the configuration.
|
32
12
|
#
|
@@ -38,7 +18,11 @@ class LogStash::Filters::Ezproxy < LogStash::Filters::Base
|
|
38
18
|
#
|
39
19
|
# filter {
|
40
20
|
# {
|
41
|
-
#
|
21
|
+
# url => "Field containing url to parse"
|
22
|
+
# target => "Field to output metadata to - default is request_metadata"
|
23
|
+
# mime_tag => true|false set inital mime types for all requests based on file extensions - default false
|
24
|
+
# doc_id_tag => true|false set unit and title id from docID like params - default false
|
25
|
+
# path_tag => true|false add request path to metadata - default false
|
42
26
|
# }
|
43
27
|
# }
|
44
28
|
#
|
@@ -46,38 +30,46 @@ class LogStash::Filters::Ezproxy < LogStash::Filters::Base
|
|
46
30
|
|
47
31
|
# The url to be parsed by the filter
|
48
32
|
config :url, :validate => :string, :required => true
|
33
|
+
config :target, :validate => :string, :default => "request_metadata"
|
34
|
+
config :mime_tag, :validate => :boolean, :default => false
|
35
|
+
config :doc_id_tag, :validate => :boolean, :default => false
|
36
|
+
config :path_tag, :validate => :boolean, :default => false
|
49
37
|
|
50
38
|
@@hosts = {
|
51
|
-
"www.cambridge.org" => lambda { |path, params, uri| Cambridge
|
52
|
-
"www.dawsonera.com" => lambda { |path, params, uri| DawsonEra::parse(path, params) },
|
53
|
-
"
|
54
|
-
"
|
55
|
-
"
|
56
|
-
"
|
57
|
-
"
|
58
|
-
"
|
59
|
-
"
|
60
|
-
"
|
61
|
-
"
|
62
|
-
"
|
63
|
-
"
|
64
|
-
"
|
39
|
+
"www.cambridge.org" => lambda { |path, params, uri| EzproxyProviders::Cambridge.parse(path, params, uri) },
|
40
|
+
"www.dawsonera.com" => lambda { |path, params, uri| EzproxyProviders::DawsonEra::parse(path, params) },
|
41
|
+
"ebookcentral.proquest.com" => lambda { |path, params, uri| EzproxyProviders::EBookCentral::parse(path, params) },
|
42
|
+
"ebscohost.com" => lambda { |path, params, uri| EzproxyProviders::Ebscohost::parse(path, params, uri) },
|
43
|
+
"elsevierelibrary.co.uk" => lambda { |path, params, uri| EzproxyProviders::Elsevier::parse(path, params) },
|
44
|
+
"emeraldinsight.com" => lambda { |path, params, uri| EzproxyProviders::Emerald::parse(path, params) },
|
45
|
+
"www.ft.com" => lambda { |path, params, uri| EzproxyProviders::FT::parse(path, params) },
|
46
|
+
"galegroup.com" => lambda { |path, params, uri| EzproxyProviders::Gale::parse(path, params) },
|
47
|
+
"heinonline.org" => lambda { |path, params, uri| EzproxyProviders::HeinOnline::parse(path, params) },
|
48
|
+
"ieee.org" => lambda { |path, params, uri| EzproxyProviders::IEEE::parse(path, params) },
|
49
|
+
"www.jstor.org" => lambda { |path, params, uri| EzproxyProviders::Jstor::parse(path, params) },
|
50
|
+
"www.lexisnexis.com" => lambda { |path, params, uri| EzproxyProviders::LexisNexis::parse(path, params) },
|
51
|
+
"webanalytics.lexisnexis.com" => lambda { |path, params, uri| EzproxyProviders::LexisWebAnalytics::parse(path, params, uri) },
|
52
|
+
"lib.myilibrary.com" => lambda { |path, params, uri| EzproxyProviders::MyILibrary::parse(path, params) },
|
53
|
+
"www.oxfordscholarship.com" => lambda { |path, params, uri| EzproxyProviders::Oxford::parse(path, params) },
|
54
|
+
"search.proquest.com" => lambda { |path, params, uri| EzproxyProviders::ProquestSearch::parse(path, params) },
|
55
|
+
"journals.sagepub.com" => lambda { |path, params, uri| EzproxyProviders::Sage::parse(path, params) },
|
65
56
|
"els-cdn.com" => lambda { |path, params, uri|
|
66
|
-
return ScienceDirect::parse(path, params).merge!({ 'provider' => 'elsevier-cdn'})
|
57
|
+
return EzproxyProviders::ScienceDirect::parse(path, params).merge!({ 'provider' => 'elsevier-cdn' })
|
67
58
|
},
|
68
59
|
"sciencedirect.com" => lambda { |path, params, uri|
|
69
|
-
return ScienceDirect::parse(path, params).merge!({ 'provider' => 'sciencedirect'})
|
60
|
+
return EzproxyProviders::ScienceDirect::parse(path, params).merge!({ 'provider' => 'sciencedirect' })
|
70
61
|
},
|
71
|
-
"scopus.com" => lambda { |path, params, uri| Scopus::parse(path, params) },
|
72
|
-
"springer.com" => lambda { |path, params, uri| Springer::parse(path, params) },
|
73
|
-
"www.tandfonline.com" => lambda { |path, params, uri| TandF::parse(path, params)},
|
62
|
+
"scopus.com" => lambda { |path, params, uri| EzproxyProviders::Scopus::parse(path, params) },
|
63
|
+
"springer.com" => lambda { |path, params, uri| EzproxyProviders::Springer::parse(path, params) },
|
64
|
+
"www.tandfonline.com" => lambda { |path, params, uri| EzproxyProviders::TandF::parse(path, params) },
|
74
65
|
"thomsonreuters.com" => lambda { |path, params, uri|
|
75
|
-
return WebOfKnowledge::parse(path, params).merge!({ 'provider' => 'thomsonreuters'})
|
66
|
+
return EzproxyProviders::WebOfKnowledge::parse(path, params).merge!({ 'provider' => 'thomsonreuters' })
|
76
67
|
},
|
77
68
|
"webofknowledge.com" => lambda { |path, params, uri|
|
78
|
-
return WebOfKnowledge::parse(path, params).merge!({ 'provider' => 'webofknowledge'})
|
69
|
+
return EzproxyProviders::WebOfKnowledge::parse(path, params).merge!({ 'provider' => 'webofknowledge' })
|
79
70
|
},
|
80
|
-
"
|
71
|
+
"westlaw.co.uk" => lambda { |path, params, uri| EzproxyProviders::Westlaw::parse(path, params) },
|
72
|
+
"wiley.com" => lambda { |path, params, uri| EzproxyProviders::Wiley::parse(path, params) }
|
81
73
|
}
|
82
74
|
|
83
75
|
|
@@ -106,26 +98,15 @@ class LogStash::Filters::Ezproxy < LogStash::Filters::Base
|
|
106
98
|
path = parsed_url['path']
|
107
99
|
params = parsed_url['params']
|
108
100
|
|
109
|
-
# if (uri.host == "ezproxy.lancs.ac.uk")
|
110
|
-
# if (uri.query)
|
111
|
-
# puts uri
|
112
|
-
# params = CGI::parse(uri.query)
|
113
|
-
# if params.key?('url')
|
114
|
-
# uri = URI(params['url'][0])
|
115
|
-
# elsif params.key?('qurl')
|
116
|
-
# uri = URI(params['qurl'][0])
|
117
|
-
# end
|
118
|
-
# event.tag("requested_host_ezproxy")
|
119
|
-
# event.set("requested_host", uri.host)
|
120
|
-
# end
|
121
|
-
# end
|
122
|
-
|
123
101
|
unless uri == nil
|
124
102
|
|
125
|
-
|
126
103
|
begin
|
127
|
-
|
128
|
-
|
104
|
+
if @mime_tag
|
105
|
+
data.merge!(Mime::parse(path))
|
106
|
+
end
|
107
|
+
if @doc_id_tag
|
108
|
+
data.merge!(ParamHelper::parse(params))
|
109
|
+
end
|
129
110
|
|
130
111
|
@@hosts.each do |key, value|
|
131
112
|
if uri.host.include?(key)
|
@@ -142,8 +123,10 @@ class LogStash::Filters::Ezproxy < LogStash::Filters::Base
|
|
142
123
|
puts uri
|
143
124
|
event.tag("ezproxy_parse_failure")
|
144
125
|
end
|
145
|
-
|
146
|
-
|
126
|
+
if @path_tag
|
127
|
+
data['path'] = path
|
128
|
+
end
|
129
|
+
event.set(@target, data)
|
147
130
|
else
|
148
131
|
event.tag("ezproxy_parse_failure")
|
149
132
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
module EzproxyProviders end
|
@@ -1,5 +1,7 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../ezproxy_providers'
|
2
|
+
|
3
|
+
class EzproxyProviders::Cambridge
|
4
|
+
def self.parse (path, params, uri)
|
3
5
|
|
4
6
|
url = uri.to_s
|
5
7
|
|
@@ -120,4 +122,4 @@ module Cambridge
|
|
120
122
|
return data
|
121
123
|
|
122
124
|
end
|
123
|
-
end
|
125
|
+
end
|
@@ -1,8 +1,9 @@
|
|
1
1
|
|
2
2
|
require 'uri'
|
3
3
|
require 'cgi'
|
4
|
+
require_relative '../ezproxy_providers'
|
4
5
|
|
5
|
-
|
6
|
+
class EzproxyProviders::Ebscohost
|
6
7
|
|
7
8
|
@openUrlFields = {
|
8
9
|
'issn' => 'print_identifier',
|
@@ -14,7 +15,7 @@ module Ebscohost
|
|
14
15
|
'id' => 'unit_id'
|
15
16
|
}
|
16
17
|
|
17
|
-
def
|
18
|
+
def self.parse (path, params, uri)
|
18
19
|
|
19
20
|
data = {
|
20
21
|
"provider" => "ebscohost"
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative '../ezproxy_providers'
|
2
|
+
|
3
|
+
class EzproxyProviders::Elsevier
|
4
|
+
def self.parse (path, params)
|
5
|
+
|
6
|
+
data = {
|
7
|
+
"provider" => "elsevier"
|
8
|
+
}
|
9
|
+
|
10
|
+
if match = /\/(product|pdfreader)\/([a-z0-9\-]+)/i.match(path)
|
11
|
+
data['unit_id'] = match[2]
|
12
|
+
data['title_id'] = match[2]
|
13
|
+
end
|
14
|
+
|
15
|
+
return data
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
@@ -1,5 +1,7 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../ezproxy_providers'
|
2
|
+
|
3
|
+
class EzproxyProviders::MyILibrary
|
4
|
+
def self.parse (path, params)
|
3
5
|
|
4
6
|
data = {
|
5
7
|
"provider" => "myilibrary"
|
@@ -15,7 +17,9 @@ module MyILibrary
|
|
15
17
|
|
16
18
|
elsif (/\/Viewer\/getImage\_Servlet\.aspx/i.match(path))
|
17
19
|
data['rtype'] = 'BOOK_PAGE'
|
18
|
-
|
20
|
+
if params.key?('codec')
|
21
|
+
data['mime'] = params['codec'][0].upcase
|
22
|
+
end
|
19
23
|
|
20
24
|
elsif (/\/Viewer\/get[DP]MP\_Servlet\.aspx/i.match(path))
|
21
25
|
data['rtype'] = 'BOOK_PAGE'
|
@@ -1,5 +1,7 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../ezproxy_providers'
|
2
|
+
|
3
|
+
class EzproxyProviders::Oxford
|
4
|
+
def self.parse (path, params)
|
3
5
|
|
4
6
|
data = {
|
5
7
|
"provider" => "oxford"
|
@@ -25,7 +27,7 @@ module Oxford
|
|
25
27
|
data['rtype'] = 'BOOK'
|
26
28
|
end
|
27
29
|
|
28
|
-
if params.key?('print')
|
30
|
+
if params.key?('print') && !params['print'][0].nil?
|
29
31
|
data['mime'] = params['print'][0].upcase
|
30
32
|
end
|
31
33
|
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require_relative '../ezproxy_providers'
|
2
|
+
|
3
|
+
class EzproxyProviders::ProquestSearch
|
4
|
+
def self.parse (path, params)
|
5
|
+
|
6
|
+
data = {
|
7
|
+
"provider" => "proquest"
|
8
|
+
}
|
9
|
+
|
10
|
+
if params.key?('t:ac')
|
11
|
+
data['unit_id'] = params['t:ac'][0]
|
12
|
+
data['title_id'] = params['t:ac'][0]
|
13
|
+
end
|
14
|
+
|
15
|
+
if (match = /\/docview\/([0-9]+)\//i.match(path))
|
16
|
+
data['unit_id'] = match[1]
|
17
|
+
data['title_id'] = match[1]
|
18
|
+
|
19
|
+
if /fulltextpdf/i.match(path)
|
20
|
+
data['mime'] = 'PDF'
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
return data
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|