logstash-filter-ezproxy 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTORS +1 -1
- data/lib/logstash/filters/ezproxy.rb +48 -65
- data/lib/logstash/filters/ezproxy_providers.rb +1 -0
- data/lib/logstash/filters/{cambridge.rb → ezproxy_providers/cambridge.rb} +5 -3
- data/lib/logstash/filters/{dawsonera.rb → ezproxy_providers/dawsonera.rb} +3 -2
- data/lib/logstash/filters/{proquest.rb → ezproxy_providers/ebookcentral.rb} +4 -2
- data/lib/logstash/filters/{ebscohost.rb → ezproxy_providers/ebscohost.rb} +3 -2
- data/lib/logstash/filters/ezproxy_providers/elsevier.rb +18 -0
- data/lib/logstash/filters/{emerald.rb → ezproxy_providers/emerald.rb} +4 -2
- data/lib/logstash/filters/{ft.rb → ezproxy_providers/ft.rb} +3 -3
- data/lib/logstash/filters/{gale.rb → ezproxy_providers/gale.rb} +4 -2
- data/lib/logstash/filters/{heinonline.rb → ezproxy_providers/heinonline.rb} +3 -2
- data/lib/logstash/filters/{ieee.rb → ezproxy_providers/ieee.rb} +3 -4
- data/lib/logstash/filters/{jstor.rb → ezproxy_providers/jstor.rb} +3 -2
- data/lib/logstash/filters/{lexis_webanalytics.rb → ezproxy_providers/lexis_webanalytics.rb} +3 -2
- data/lib/logstash/filters/{lexisnexis.rb → ezproxy_providers/lexisnexis.rb} +3 -3
- data/lib/logstash/filters/{myilibrary.rb → ezproxy_providers/myilibrary.rb} +7 -3
- data/lib/logstash/filters/{oxford.rb → ezproxy_providers/oxford.rb} +5 -3
- data/lib/logstash/filters/ezproxy_providers/proquest-search.rb +27 -0
- data/lib/logstash/filters/{sage.rb → ezproxy_providers/sage.rb} +3 -3
- data/lib/logstash/filters/{sciencedirect.rb → ezproxy_providers/sciencedirect.rb} +3 -4
- data/lib/logstash/filters/{scopus.rb → ezproxy_providers/scopus.rb} +3 -2
- data/lib/logstash/filters/{springer.rb → ezproxy_providers/springer.rb} +3 -3
- data/lib/logstash/filters/{tandf.rb → ezproxy_providers/tandf.rb} +3 -5
- data/lib/logstash/filters/{webofknowledge.rb → ezproxy_providers/webofknowledge.rb} +4 -2
- data/lib/logstash/filters/ezproxy_providers/westlaw.rb +18 -0
- data/lib/logstash/filters/{wiley.rb → ezproxy_providers/wiley.rb} +3 -3
- data/lib/logstash/helpers/param_helper.rb +0 -8
- data/logstash-filter-ezproxy.gemspec +2 -2
- data/spec/filters/cambridge/cambridge_spec.rb +3 -3
- data/spec/filters/dawsonera/dawsonera_spec.rb +3 -3
- data/spec/filters/{proquest/proquest.2018-02-09.csv → ebookcentral/ebookcentral.2018-02-09.csv} +0 -0
- data/spec/filters/{proquest/proquest_spec.rb → ebookcentral/ebookcentral.rb} +4 -4
- data/spec/filters/ebscohost/ebscohost_spec.rb +3 -3
- data/spec/filters/elsevier/elsevier.2018-02-20.csv +3 -0
- data/spec/filters/elsevier/elsevier_spec.rb +18 -0
- data/spec/filters/emerald/emerald_spec.rb +3 -3
- data/spec/filters/ft/ft_spec.rb +3 -3
- data/spec/filters/gale/gale_spec.rb +3 -3
- data/spec/filters/heinonline/heinonline_spec.rb +3 -3
- data/spec/filters/ieee/ieee_spec.rb +3 -3
- data/spec/filters/jstor/jstor_spec.rb +3 -3
- data/spec/filters/lexis_webanalytics/lexis_webanalytics_spec.rb +3 -3
- data/spec/filters/lexisnexis/lexisnexis_spec.rb +3 -3
- data/spec/filters/myilibrary/myilibrary_spec.rb +3 -3
- data/spec/filters/oxford/oxford_spec.rb +3 -3
- data/spec/filters/proquest-search/proquest-search.2018-02-19.csv +4 -0
- data/spec/filters/proquest-search/proquest-search_spec.rb +19 -0
- data/spec/filters/sage/sage_spec.rb +3 -3
- data/spec/filters/sciencedirect/sciencedirect_spec.rb +3 -3
- data/spec/filters/scopus/scopus_spec.rb +3 -3
- data/spec/filters/springer/springer_spec.rb +3 -3
- data/spec/filters/tandf/tandf_spec.rb +3 -3
- data/spec/filters/webofknowledge/webofknowledge_spec.rb +3 -3
- data/spec/filters/westlaw/westlaw.2018-02-20.csv +3 -0
- data/spec/filters/westlaw/westlaw_spec.rb +18 -0
- data/spec/filters/wiley/wiley_spec.rb +4 -4
- metadata +44 -28
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a86f6f54351a599519f27e04c0b1ec9bbe9a6a8c802dac09634d86476c212ef
|
4
|
+
data.tar.gz: 8fea5f5da0e5da09b6b7c750f81fec7347708e994748b0a72361e8791ff0fe56
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2fd7d975bce60293bf1ede3d0c78f294fd25e94836d5a29d36c63d8e5eb83f81ad6137e88a9c018054f97814a1da0da48fa675092175c87ee5820d3c2ca0d62d
|
7
|
+
data.tar.gz: 6889ce78ced9789480e1d2962743c74cca3b26b5cebfbc693b40e91ddf2821c3d14f98c90f62666c9f9007a7e43da72ed223778be68f5c44c50840bb5ae5f14e
|
data/CONTRIBUTORS
CHANGED
@@ -2,7 +2,7 @@ The following is a list of people who have contributed ideas, code, bug
|
|
2
2
|
reports, or in general have helped logstash along its way.
|
3
3
|
|
4
4
|
Contributors:
|
5
|
-
* Dom Belcher -
|
5
|
+
* Dom Belcher - d.belcher@lancaster.ac.uk
|
6
6
|
|
7
7
|
Note: If you've sent us patches, bug reports, or otherwise contributed to
|
8
8
|
Logstash, and you aren't on the list above and want to be, please let us know
|
@@ -1,32 +1,12 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require "logstash/filters/base"
|
3
3
|
require "logstash/namespace"
|
4
|
-
require_relative '../helpers/url_parser'
|
5
|
-
require_relative '../helpers/param_helper'
|
6
|
-
require_relative '../helpers/mime_helper'
|
7
|
-
require_relative "./cambridge"
|
8
|
-
require_relative "./dawsonera"
|
9
|
-
require_relative "./ebscohost"
|
10
|
-
require_relative "./emerald"
|
11
|
-
require_relative "./ft"
|
12
|
-
require_relative "./gale"
|
13
|
-
require_relative "./heinonline"
|
14
|
-
require_relative "./ieee"
|
15
|
-
require_relative "./jstor"
|
16
|
-
require_relative "./lexis_webanalytics"
|
17
|
-
require_relative "./lexisnexis"
|
18
|
-
require_relative "./myilibrary"
|
19
|
-
require_relative "./proquest"
|
20
|
-
require_relative "./sage"
|
21
|
-
require_relative "./sciencedirect"
|
22
|
-
require_relative "./scopus"
|
23
|
-
require_relative "./springer"
|
24
|
-
require_relative "./tandf"
|
25
|
-
require_relative "./webofknowledge"
|
26
|
-
require_relative "./wiley"
|
27
4
|
require 'uri'
|
28
5
|
require 'cgi'
|
29
6
|
|
7
|
+
Dir[File.dirname(__FILE__) + '/../helpers/*.rb'].each { |file| require file }
|
8
|
+
Dir[File.dirname(__FILE__) + '/./ezproxy_providers/*.rb'].each { |file| require file }
|
9
|
+
|
30
10
|
# This filter will replace the contents of the default
|
31
11
|
# message field with whatever you specify in the configuration.
|
32
12
|
#
|
@@ -38,7 +18,11 @@ class LogStash::Filters::Ezproxy < LogStash::Filters::Base
|
|
38
18
|
#
|
39
19
|
# filter {
|
40
20
|
# {
|
41
|
-
#
|
21
|
+
# url => "Field containing url to parse"
|
22
|
+
# target => "Field to output metadata to - default is request_metadata"
|
23
|
+
# mime_tag => true|false set inital mime types for all requests based on file extensions - default false
|
24
|
+
# doc_id_tag => true|false set unit and title id from docID like params - default false
|
25
|
+
# path_tag => true|false add request path to metadata - default false
|
42
26
|
# }
|
43
27
|
# }
|
44
28
|
#
|
@@ -46,38 +30,46 @@ class LogStash::Filters::Ezproxy < LogStash::Filters::Base
|
|
46
30
|
|
47
31
|
# The url to be parsed by the filter
|
48
32
|
config :url, :validate => :string, :required => true
|
33
|
+
config :target, :validate => :string, :default => "request_metadata"
|
34
|
+
config :mime_tag, :validate => :boolean, :default => false
|
35
|
+
config :doc_id_tag, :validate => :boolean, :default => false
|
36
|
+
config :path_tag, :validate => :boolean, :default => false
|
49
37
|
|
50
38
|
@@hosts = {
|
51
|
-
"www.cambridge.org" => lambda { |path, params, uri| Cambridge
|
52
|
-
"www.dawsonera.com" => lambda { |path, params, uri| DawsonEra::parse(path, params) },
|
53
|
-
"
|
54
|
-
"
|
55
|
-
"
|
56
|
-
"
|
57
|
-
"
|
58
|
-
"
|
59
|
-
"
|
60
|
-
"
|
61
|
-
"
|
62
|
-
"
|
63
|
-
"
|
64
|
-
"
|
39
|
+
"www.cambridge.org" => lambda { |path, params, uri| EzproxyProviders::Cambridge.parse(path, params, uri) },
|
40
|
+
"www.dawsonera.com" => lambda { |path, params, uri| EzproxyProviders::DawsonEra::parse(path, params) },
|
41
|
+
"ebookcentral.proquest.com" => lambda { |path, params, uri| EzproxyProviders::EBookCentral::parse(path, params) },
|
42
|
+
"ebscohost.com" => lambda { |path, params, uri| EzproxyProviders::Ebscohost::parse(path, params, uri) },
|
43
|
+
"elsevierelibrary.co.uk" => lambda { |path, params, uri| EzproxyProviders::Elsevier::parse(path, params) },
|
44
|
+
"emeraldinsight.com" => lambda { |path, params, uri| EzproxyProviders::Emerald::parse(path, params) },
|
45
|
+
"www.ft.com" => lambda { |path, params, uri| EzproxyProviders::FT::parse(path, params) },
|
46
|
+
"galegroup.com" => lambda { |path, params, uri| EzproxyProviders::Gale::parse(path, params) },
|
47
|
+
"heinonline.org" => lambda { |path, params, uri| EzproxyProviders::HeinOnline::parse(path, params) },
|
48
|
+
"ieee.org" => lambda { |path, params, uri| EzproxyProviders::IEEE::parse(path, params) },
|
49
|
+
"www.jstor.org" => lambda { |path, params, uri| EzproxyProviders::Jstor::parse(path, params) },
|
50
|
+
"www.lexisnexis.com" => lambda { |path, params, uri| EzproxyProviders::LexisNexis::parse(path, params) },
|
51
|
+
"webanalytics.lexisnexis.com" => lambda { |path, params, uri| EzproxyProviders::LexisWebAnalytics::parse(path, params, uri) },
|
52
|
+
"lib.myilibrary.com" => lambda { |path, params, uri| EzproxyProviders::MyILibrary::parse(path, params) },
|
53
|
+
"www.oxfordscholarship.com" => lambda { |path, params, uri| EzproxyProviders::Oxford::parse(path, params) },
|
54
|
+
"search.proquest.com" => lambda { |path, params, uri| EzproxyProviders::ProquestSearch::parse(path, params) },
|
55
|
+
"journals.sagepub.com" => lambda { |path, params, uri| EzproxyProviders::Sage::parse(path, params) },
|
65
56
|
"els-cdn.com" => lambda { |path, params, uri|
|
66
|
-
return ScienceDirect::parse(path, params).merge!({ 'provider' => 'elsevier-cdn'})
|
57
|
+
return EzproxyProviders::ScienceDirect::parse(path, params).merge!({ 'provider' => 'elsevier-cdn' })
|
67
58
|
},
|
68
59
|
"sciencedirect.com" => lambda { |path, params, uri|
|
69
|
-
return ScienceDirect::parse(path, params).merge!({ 'provider' => 'sciencedirect'})
|
60
|
+
return EzproxyProviders::ScienceDirect::parse(path, params).merge!({ 'provider' => 'sciencedirect' })
|
70
61
|
},
|
71
|
-
"scopus.com" => lambda { |path, params, uri| Scopus::parse(path, params) },
|
72
|
-
"springer.com" => lambda { |path, params, uri| Springer::parse(path, params) },
|
73
|
-
"www.tandfonline.com" => lambda { |path, params, uri| TandF::parse(path, params)},
|
62
|
+
"scopus.com" => lambda { |path, params, uri| EzproxyProviders::Scopus::parse(path, params) },
|
63
|
+
"springer.com" => lambda { |path, params, uri| EzproxyProviders::Springer::parse(path, params) },
|
64
|
+
"www.tandfonline.com" => lambda { |path, params, uri| EzproxyProviders::TandF::parse(path, params) },
|
74
65
|
"thomsonreuters.com" => lambda { |path, params, uri|
|
75
|
-
return WebOfKnowledge::parse(path, params).merge!({ 'provider' => 'thomsonreuters'})
|
66
|
+
return EzproxyProviders::WebOfKnowledge::parse(path, params).merge!({ 'provider' => 'thomsonreuters' })
|
76
67
|
},
|
77
68
|
"webofknowledge.com" => lambda { |path, params, uri|
|
78
|
-
return WebOfKnowledge::parse(path, params).merge!({ 'provider' => 'webofknowledge'})
|
69
|
+
return EzproxyProviders::WebOfKnowledge::parse(path, params).merge!({ 'provider' => 'webofknowledge' })
|
79
70
|
},
|
80
|
-
"
|
71
|
+
"westlaw.co.uk" => lambda { |path, params, uri| EzproxyProviders::Westlaw::parse(path, params) },
|
72
|
+
"wiley.com" => lambda { |path, params, uri| EzproxyProviders::Wiley::parse(path, params) }
|
81
73
|
}
|
82
74
|
|
83
75
|
|
@@ -106,26 +98,15 @@ class LogStash::Filters::Ezproxy < LogStash::Filters::Base
|
|
106
98
|
path = parsed_url['path']
|
107
99
|
params = parsed_url['params']
|
108
100
|
|
109
|
-
# if (uri.host == "ezproxy.lancs.ac.uk")
|
110
|
-
# if (uri.query)
|
111
|
-
# puts uri
|
112
|
-
# params = CGI::parse(uri.query)
|
113
|
-
# if params.key?('url')
|
114
|
-
# uri = URI(params['url'][0])
|
115
|
-
# elsif params.key?('qurl')
|
116
|
-
# uri = URI(params['qurl'][0])
|
117
|
-
# end
|
118
|
-
# event.tag("requested_host_ezproxy")
|
119
|
-
# event.set("requested_host", uri.host)
|
120
|
-
# end
|
121
|
-
# end
|
122
|
-
|
123
101
|
unless uri == nil
|
124
102
|
|
125
|
-
|
126
103
|
begin
|
127
|
-
|
128
|
-
|
104
|
+
if @mime_tag
|
105
|
+
data.merge!(Mime::parse(path))
|
106
|
+
end
|
107
|
+
if @doc_id_tag
|
108
|
+
data.merge!(ParamHelper::parse(params))
|
109
|
+
end
|
129
110
|
|
130
111
|
@@hosts.each do |key, value|
|
131
112
|
if uri.host.include?(key)
|
@@ -142,8 +123,10 @@ class LogStash::Filters::Ezproxy < LogStash::Filters::Base
|
|
142
123
|
puts uri
|
143
124
|
event.tag("ezproxy_parse_failure")
|
144
125
|
end
|
145
|
-
|
146
|
-
|
126
|
+
if @path_tag
|
127
|
+
data['path'] = path
|
128
|
+
end
|
129
|
+
event.set(@target, data)
|
147
130
|
else
|
148
131
|
event.tag("ezproxy_parse_failure")
|
149
132
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
module EzproxyProviders end
|
@@ -1,5 +1,7 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../ezproxy_providers'
|
2
|
+
|
3
|
+
class EzproxyProviders::Cambridge
|
4
|
+
def self.parse (path, params, uri)
|
3
5
|
|
4
6
|
url = uri.to_s
|
5
7
|
|
@@ -120,4 +122,4 @@ module Cambridge
|
|
120
122
|
return data
|
121
123
|
|
122
124
|
end
|
123
|
-
end
|
125
|
+
end
|
@@ -1,8 +1,9 @@
|
|
1
1
|
|
2
2
|
require 'uri'
|
3
3
|
require 'cgi'
|
4
|
+
require_relative '../ezproxy_providers'
|
4
5
|
|
5
|
-
|
6
|
+
class EzproxyProviders::Ebscohost
|
6
7
|
|
7
8
|
@openUrlFields = {
|
8
9
|
'issn' => 'print_identifier',
|
@@ -14,7 +15,7 @@ module Ebscohost
|
|
14
15
|
'id' => 'unit_id'
|
15
16
|
}
|
16
17
|
|
17
|
-
def
|
18
|
+
def self.parse (path, params, uri)
|
18
19
|
|
19
20
|
data = {
|
20
21
|
"provider" => "ebscohost"
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative '../ezproxy_providers'
|
2
|
+
|
3
|
+
class EzproxyProviders::Elsevier
|
4
|
+
def self.parse (path, params)
|
5
|
+
|
6
|
+
data = {
|
7
|
+
"provider" => "elsevier"
|
8
|
+
}
|
9
|
+
|
10
|
+
if match = /\/(product|pdfreader)\/([a-z0-9\-]+)/i.match(path)
|
11
|
+
data['unit_id'] = match[2]
|
12
|
+
data['title_id'] = match[2]
|
13
|
+
end
|
14
|
+
|
15
|
+
return data
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
@@ -1,5 +1,7 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../ezproxy_providers'
|
2
|
+
|
3
|
+
class EzproxyProviders::MyILibrary
|
4
|
+
def self.parse (path, params)
|
3
5
|
|
4
6
|
data = {
|
5
7
|
"provider" => "myilibrary"
|
@@ -15,7 +17,9 @@ module MyILibrary
|
|
15
17
|
|
16
18
|
elsif (/\/Viewer\/getImage\_Servlet\.aspx/i.match(path))
|
17
19
|
data['rtype'] = 'BOOK_PAGE'
|
18
|
-
|
20
|
+
if params.key?('codec')
|
21
|
+
data['mime'] = params['codec'][0].upcase
|
22
|
+
end
|
19
23
|
|
20
24
|
elsif (/\/Viewer\/get[DP]MP\_Servlet\.aspx/i.match(path))
|
21
25
|
data['rtype'] = 'BOOK_PAGE'
|
@@ -1,5 +1,7 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../ezproxy_providers'
|
2
|
+
|
3
|
+
class EzproxyProviders::Oxford
|
4
|
+
def self.parse (path, params)
|
3
5
|
|
4
6
|
data = {
|
5
7
|
"provider" => "oxford"
|
@@ -25,7 +27,7 @@ module Oxford
|
|
25
27
|
data['rtype'] = 'BOOK'
|
26
28
|
end
|
27
29
|
|
28
|
-
if params.key?('print')
|
30
|
+
if params.key?('print') && !params['print'][0].nil?
|
29
31
|
data['mime'] = params['print'][0].upcase
|
30
32
|
end
|
31
33
|
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require_relative '../ezproxy_providers'
|
2
|
+
|
3
|
+
class EzproxyProviders::ProquestSearch
|
4
|
+
def self.parse (path, params)
|
5
|
+
|
6
|
+
data = {
|
7
|
+
"provider" => "proquest"
|
8
|
+
}
|
9
|
+
|
10
|
+
if params.key?('t:ac')
|
11
|
+
data['unit_id'] = params['t:ac'][0]
|
12
|
+
data['title_id'] = params['t:ac'][0]
|
13
|
+
end
|
14
|
+
|
15
|
+
if (match = /\/docview\/([0-9]+)\//i.match(path))
|
16
|
+
data['unit_id'] = match[1]
|
17
|
+
data['title_id'] = match[1]
|
18
|
+
|
19
|
+
if /fulltextpdf/i.match(path)
|
20
|
+
data['mime'] = 'PDF'
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
return data
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|