logstash-filter-ezproxy 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/lib/logstash/filters/cambridge.rb +123 -0
- data/lib/logstash/filters/dawsonera.rb +0 -5
- data/lib/logstash/filters/ebscohost.rb +26 -0
- data/lib/logstash/filters/emerald.rb +2 -6
- data/lib/logstash/filters/ezproxy.rb +79 -40
- data/lib/logstash/filters/gale.rb +21 -10
- data/lib/logstash/filters/heinonline.rb +44 -0
- data/lib/logstash/filters/lexis_webanalytics.rb +81 -0
- data/lib/logstash/filters/myilibrary.rb +29 -0
- data/lib/logstash/filters/oxford.rb +38 -0
- data/lib/logstash/filters/proquest.rb +42 -0
- data/lib/logstash/filters/sciencedirect.rb +1 -3
- data/lib/logstash/filters/scopus.rb +57 -0
- data/lib/logstash/filters/springer.rb +1 -1
- data/lib/logstash/filters/webofknowledge.rb +85 -0
- data/lib/logstash/filters/wiley.rb +154 -116
- data/lib/logstash/helpers/mime_helper.rb +38 -0
- data/lib/logstash/helpers/param_helper.rb +32 -0
- data/lib/logstash/helpers/url_parser.rb +2 -2
- data/logstash-filter-ezproxy.gemspec +2 -2
- data/spec/filters/cambridge/cambridge.2013-10-28.csv +13 -0
- data/spec/filters/cambridge/cambridge_spec.rb +27 -0
- data/spec/filters/ebscohost/ebscohost.2014-08-21.csv +5 -0
- data/spec/filters/ezproxy_spec.rb +1 -1
- data/spec/filters/gale/gale_spec.rb +0 -2
- data/spec/filters/heinonline/heinonline.2015-05-18.csv +12 -0
- data/spec/filters/heinonline/heinonline_spec.rb +20 -0
- data/spec/filters/lexis_webanalytics/lexis360.2017-04-28.csv +9 -0
- data/spec/filters/lexis_webanalytics/lexis_webanalytics_spec.rb +21 -0
- data/spec/filters/myilibrary/myilibrary.2018-02-09.csv +6 -0
- data/spec/filters/myilibrary/myilibrary_spec.rb +20 -0
- data/spec/filters/oxford/oxford.2018-02-15.csv +5 -0
- data/spec/filters/oxford/oxford_spec.rb +21 -0
- data/spec/filters/proquest/proquest.2018-02-09.csv +6 -0
- data/spec/filters/proquest/proquest_spec.rb +21 -0
- data/spec/filters/scopus/scopus.2016-07-18.csv +7 -0
- data/spec/filters/scopus/scopus_spec.rb +19 -0
- data/spec/filters/webofknowledge/webofknowledge_spec.rb +21 -0
- data/spec/filters/webofknowledge/wos.2017-01-13.csv +11 -0
- data/spec/filters/wiley/wiley.2018-02-07.csv +9 -0
- data/spec/filters/wiley/wiley_spec.rb +19 -0
- metadata +50 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eebd1de06645fbe8aeab87e450179bfd036476a62a9bbad3eb41dc0a68005551
|
4
|
+
data.tar.gz: 2e40d938da326ffb9b0c7b277f4052a5c57ed752173b91ad7c2c789961833a04
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9a622e16fff02eaed6b7267a047092d95a5d9788fc0705bdd63336859b9a8cddbb75b79e4c551d1fae90de6740a6b69f7f537ea8f48a589dfb2db5c3be378677
|
7
|
+
data.tar.gz: 9ba08935fa09b47a6da50d16f6acf3d66c4987f7bc09a2dff5889603286ecd3faf2b4fa77071c544ccdf560fa6de5d8b210dfd07b4904e02691ac16b12f4a9f9
|
data/Gemfile
CHANGED
@@ -0,0 +1,123 @@
|
|
1
|
+
module Cambridge
|
2
|
+
def Cambridge.parse (path, params, uri)
|
3
|
+
|
4
|
+
url = uri.to_s
|
5
|
+
|
6
|
+
data = {
|
7
|
+
"provider" => "cambridge"
|
8
|
+
}
|
9
|
+
|
10
|
+
if (params.key?('url'))
|
11
|
+
url = params['url'][0]
|
12
|
+
end
|
13
|
+
|
14
|
+
if (params.key?('fileId') && !/\|/.match(params['fileId'][0]))
|
15
|
+
data['print_identifier'] = params['fileId'][0][1, 4] + '-' + params['fileId'][0][5, 4]
|
16
|
+
data['unit_id'] = params['fileId'][0]
|
17
|
+
end
|
18
|
+
|
19
|
+
if params.key?('jid')
|
20
|
+
data['title_id'] = params['jid'][0]
|
21
|
+
end
|
22
|
+
|
23
|
+
match = /\/action\/([a-z]+)/i.match(url)
|
24
|
+
|
25
|
+
if (match)
|
26
|
+
case (match[1])
|
27
|
+
when 'displayJournal'
|
28
|
+
data['unit_id'] = params['jid'][0]
|
29
|
+
data['rtype'] = 'TOC'
|
30
|
+
data['mime'] = 'MISC'
|
31
|
+
|
32
|
+
when 'displayJournalTab'
|
33
|
+
data['rtype'] = 'TOC'
|
34
|
+
data['mime'] = 'MISC'
|
35
|
+
|
36
|
+
when 'displayIssue'
|
37
|
+
data['rtype'] = 'TOC'
|
38
|
+
data['mime'] = 'MISC'
|
39
|
+
data['unit_id'] = params['iid'][0]
|
40
|
+
data['volume'] = params['volumeId'][0]
|
41
|
+
data['issue'] = params['issueId'][0]
|
42
|
+
|
43
|
+
when 'displayFulltext'
|
44
|
+
if (params.key?('pdftype'))
|
45
|
+
data['unit_id'] = params['aid'][0]
|
46
|
+
data['rtype'] = 'ARTICLE'
|
47
|
+
data['mime'] = 'PDF'
|
48
|
+
data['volume'] = params['volumeId'][0]
|
49
|
+
data['issue'] = params['issueId'][0]
|
50
|
+
|
51
|
+
else
|
52
|
+
if (!data.key?('unit_id'))
|
53
|
+
data['unit_id'] = params['aid'][0]
|
54
|
+
end
|
55
|
+
|
56
|
+
data['rtype'] = 'ARTICLE'
|
57
|
+
data['mime'] = 'HTML'
|
58
|
+
data['volume'] = params['volumeId'][0]
|
59
|
+
data['issue'] = params['issueId'][0]
|
60
|
+
|
61
|
+
if (params.key?('fileId'))
|
62
|
+
data['doi'] = '10.1017/' + params['fileId'][0]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
when 'displayAbstract'
|
67
|
+
data['rtype'] = 'ABS'
|
68
|
+
data['mime'] = 'HTML'
|
69
|
+
if (params.key?('fileId'))
|
70
|
+
data['doi'] = '10.1017/' + params['fileId'][0]
|
71
|
+
end
|
72
|
+
|
73
|
+
else
|
74
|
+
data['title_id'] = nil
|
75
|
+
end
|
76
|
+
|
77
|
+
elsif (match = /^\/core\/services\/aop-cambridge-core\/content\/view\/[a-z0-9]+\/(([SB]?[0-9]+)[a-z0-9._-]+)\.pdf\//i.match(path))
|
78
|
+
data['mime'] = 'PDF'
|
79
|
+
data['unit_id'] = match[1]
|
80
|
+
|
81
|
+
if (/^S/i.match(match[2]))
|
82
|
+
data['rtype'] = 'ARTICLE'
|
83
|
+
data['pii'] = match[2]
|
84
|
+
else
|
85
|
+
data['rtype'] = 'BOOK_SECTION'
|
86
|
+
data['online_identifier'] = match[2]
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
elsif (match = /^\/core\/journals\/([a-z-]+)\/(article|issue)\/([a-z0-9-]+)/i.match(path))
|
91
|
+
case (match[2])
|
92
|
+
when 'article'
|
93
|
+
data['mime'] = 'HTML'
|
94
|
+
data['rtype'] = 'ARTICLE'
|
95
|
+
data['unit_id'] = match[3].split('/')[0]
|
96
|
+
data['title_id'] = match[1]
|
97
|
+
|
98
|
+
when 'issue'
|
99
|
+
data['mime'] = 'MISC'
|
100
|
+
data['rtype'] = 'TOC'
|
101
|
+
data['unit_id'] = match[1] + '/issue/'
|
102
|
+
data['title_id'] = match[1]
|
103
|
+
end
|
104
|
+
|
105
|
+
elsif (match = /^\/core\/books\/([a-z0-9-]+)\/[a-z0-9]+$/i.match(path))
|
106
|
+
data['rtype'] = 'TOC'
|
107
|
+
data['mime'] = 'MISC'
|
108
|
+
data['unit_id'] = match[1]
|
109
|
+
data['title_id'] = match[1]
|
110
|
+
|
111
|
+
elsif (match = /^\/core\/books\/(([a-z0-9-]+)\/[a-z0-9-]+)\/[a-z0-9]+\/core-reader$/i.match(path))
|
112
|
+
data['rtype'] = 'BOOK_SECTION'
|
113
|
+
data['mime'] = 'HTML'
|
114
|
+
data['unit_id'] = match[1]
|
115
|
+
data['title_id'] = match[2]
|
116
|
+
else
|
117
|
+
data['title_id'] = nil
|
118
|
+
end
|
119
|
+
|
120
|
+
return data
|
121
|
+
|
122
|
+
end
|
123
|
+
end
|
@@ -1,7 +1,4 @@
|
|
1
1
|
|
2
|
-
require 'uri'
|
3
|
-
require 'cgi'
|
4
|
-
|
5
2
|
module DawsonEra
|
6
3
|
def DawsonEra.parse (path, params)
|
7
4
|
|
@@ -9,8 +6,6 @@ module DawsonEra
|
|
9
6
|
"provider" => "dawsonera"
|
10
7
|
}
|
11
8
|
|
12
|
-
params = {}
|
13
|
-
|
14
9
|
if (match = /^(\/abstract\/([0-9]+))$/.match(path))
|
15
10
|
data['rtype'] = 'ABS'
|
16
11
|
data['mime'] = 'MISC'
|
@@ -20,6 +20,31 @@ module Ebscohost
|
|
20
20
|
"provider" => "ebscohost"
|
21
21
|
}
|
22
22
|
|
23
|
+
if (/^\/ebscoviewerservice/i.match(path))
|
24
|
+
data['rtype'] = 'EBOOK'
|
25
|
+
end
|
26
|
+
|
27
|
+
if (params.key?('an'))
|
28
|
+
data['unit_id'] = params['an'][0]
|
29
|
+
data['title_id'] = params['an'][0]
|
30
|
+
end
|
31
|
+
|
32
|
+
if (params.key?('retrievalFormat'))
|
33
|
+
data['mime'] = params['retrievalFormat'][0]
|
34
|
+
end
|
35
|
+
|
36
|
+
artifact_params = %w(artifactId theDoid doid)
|
37
|
+
|
38
|
+
artifact_params.each do |p|
|
39
|
+
if params.key?(p)
|
40
|
+
if (param_match = /(.+)\$(.+)\$([a-zA-Z]+)/.match(params[p][0]))
|
41
|
+
data['unit_id'] = param_match[2]
|
42
|
+
data['title_id'] = param_match[2]
|
43
|
+
data['mime'] = param_match[3]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
23
48
|
if ((match = /^\/(ehost|eds)\/([a-z]+)(?:\/[a-z]+)?$/i.match(path)))
|
24
49
|
category = match[2].downcase
|
25
50
|
|
@@ -100,6 +125,7 @@ module Ebscohost
|
|
100
125
|
if (data['unit_id'] && data['unit_id'].downcase.start_with?('doi:'))
|
101
126
|
data['doi'] = data['unit_id'] = data['unit_id'][4..-1]
|
102
127
|
end
|
128
|
+
|
103
129
|
end
|
104
130
|
|
105
131
|
return data
|
@@ -1,7 +1,3 @@
|
|
1
|
-
|
2
|
-
require 'uri'
|
3
|
-
require 'cgi'
|
4
|
-
|
5
1
|
module Emerald
|
6
2
|
def Emerald.parse (path, params)
|
7
3
|
|
@@ -40,7 +36,7 @@ module Emerald
|
|
40
36
|
|
41
37
|
data['mime'] = 'MISC'
|
42
38
|
data['title_id'] = match[1]
|
43
|
-
data['unit_id'] = 'loi/' +match[1]
|
39
|
+
data['unit_id'] = 'loi/' + match[1]
|
44
40
|
elsif ((match = /^\/toc\/([a-z]+)\/([0-9]+)\/([0-9]+)/.match(path)))
|
45
41
|
data['rtype'] = 'TOC'
|
46
42
|
data['mime'] = 'MISC'
|
@@ -62,7 +58,7 @@ module Emerald
|
|
62
58
|
end
|
63
59
|
|
64
60
|
data['title_id'] = match[4]
|
65
|
-
data['unit_id'] =data['doi'] = match[2] + '/' + match[3]
|
61
|
+
data['unit_id'] = data['doi'] = match[2] + '/' + match[3]
|
66
62
|
elsif ((match = /^\/doi\/([a-z]+)\/([0-9]{2}\.[0-9]{4,5})\/([0-9]+)$/.match(path)))
|
67
63
|
|
68
64
|
if (match[1] === 'abs')
|
@@ -2,19 +2,28 @@
|
|
2
2
|
require "logstash/filters/base"
|
3
3
|
require "logstash/namespace"
|
4
4
|
require_relative '../helpers/url_parser'
|
5
|
+
require_relative '../helpers/param_helper'
|
6
|
+
require_relative '../helpers/mime_helper'
|
7
|
+
require_relative "./cambridge"
|
8
|
+
require_relative "./dawsonera"
|
9
|
+
require_relative "./ebscohost"
|
10
|
+
require_relative "./emerald"
|
11
|
+
require_relative "./ft"
|
12
|
+
require_relative "./gale"
|
13
|
+
require_relative "./heinonline"
|
14
|
+
require_relative "./ieee"
|
5
15
|
require_relative "./jstor"
|
16
|
+
require_relative "./lexis_webanalytics"
|
6
17
|
require_relative "./lexisnexis"
|
18
|
+
require_relative "./myilibrary"
|
19
|
+
require_relative "./proquest"
|
7
20
|
require_relative "./sage"
|
8
|
-
require_relative "./wiley"
|
9
21
|
require_relative "./sciencedirect"
|
10
|
-
require_relative "./
|
11
|
-
require_relative "./tandf"
|
12
|
-
require_relative "./emerald"
|
13
|
-
require_relative "./ebscohost"
|
14
|
-
require_relative "./gale"
|
15
|
-
require_relative "./ft"
|
22
|
+
require_relative "./scopus"
|
16
23
|
require_relative "./springer"
|
17
|
-
require_relative "./
|
24
|
+
require_relative "./tandf"
|
25
|
+
require_relative "./webofknowledge"
|
26
|
+
require_relative "./wiley"
|
18
27
|
require 'uri'
|
19
28
|
require 'cgi'
|
20
29
|
|
@@ -38,9 +47,38 @@ class LogStash::Filters::Ezproxy < LogStash::Filters::Base
|
|
38
47
|
# The url to be parsed by the filter
|
39
48
|
config :url, :validate => :string, :required => true
|
40
49
|
|
41
|
-
|
42
|
-
|
43
|
-
|
50
|
+
@@hosts = {
|
51
|
+
"www.cambridge.org" => lambda { |path, params, uri| Cambridge::parse(path, params, uri) },
|
52
|
+
"www.dawsonera.com" => lambda { |path, params, uri| DawsonEra::parse(path, params) },
|
53
|
+
"ebscohost.com" => lambda { |path, params, uri| Ebscohost::parse(path, params, uri) },
|
54
|
+
"emeraldinsight.com" => lambda { |path, params, uri| Emerald::parse(path, params) },
|
55
|
+
"www.ft.com" => lambda { |path, params, uri| FT::parse(path, params) },
|
56
|
+
"galegroup.com" => lambda { |path, params, uri| Gale::parse(path, params) },
|
57
|
+
"heinonline.org" => lambda { |path, params, uri| HeinOnline::parse(path, params) },
|
58
|
+
"ieee.org" => lambda { |path, params, uri| IEEE::parse(path, params) },
|
59
|
+
"www.jstor.org" => lambda { |path, params, uri| Jstor::parse(path, params)},
|
60
|
+
"www.lexisnexis.com" => lambda { |path, params, uri| LexisNexis::parse(path, params) },
|
61
|
+
"webanalytics.lexisnexis.com" => lambda { |path, params, uri| LexisWebAnalytics::parse(path, params, uri) },
|
62
|
+
"lib.myilibrary.com" => lambda { |path, params, uri| MyILibrary::parse(path, params) },
|
63
|
+
"ebookcentral.proquest.com" => lambda { |path, params, uri| Proquest::parse(path, params) },
|
64
|
+
"journals.sagepub.com" => lambda { |path, params, uri| Sage::parse(path, params) },
|
65
|
+
"els-cdn.com" => lambda { |path, params, uri|
|
66
|
+
return ScienceDirect::parse(path, params).merge!({ 'provider' => 'elsevier-cdn'})
|
67
|
+
},
|
68
|
+
"sciencedirect.com" => lambda { |path, params, uri|
|
69
|
+
return ScienceDirect::parse(path, params).merge!({ 'provider' => 'sciencedirect'})
|
70
|
+
},
|
71
|
+
"scopus.com" => lambda { |path, params, uri| Scopus::parse(path, params) },
|
72
|
+
"springer.com" => lambda { |path, params, uri| Springer::parse(path, params) },
|
73
|
+
"www.tandfonline.com" => lambda { |path, params, uri| TandF::parse(path, params)},
|
74
|
+
"thomsonreuters.com" => lambda { |path, params, uri|
|
75
|
+
return WebOfKnowledge::parse(path, params).merge!({ 'provider' => 'thomsonreuters'})
|
76
|
+
},
|
77
|
+
"webofknowledge.com" => lambda { |path, params, uri|
|
78
|
+
return WebOfKnowledge::parse(path, params).merge!({ 'provider' => 'webofknowledge'})
|
79
|
+
},
|
80
|
+
"wiley.com" => lambda { |path, params, uri| Wiley::parse(path, params)}
|
81
|
+
}
|
44
82
|
|
45
83
|
|
46
84
|
public
|
@@ -50,7 +88,16 @@ class LogStash::Filters::Ezproxy < LogStash::Filters::Base
|
|
50
88
|
|
51
89
|
public
|
52
90
|
def filter(event)
|
53
|
-
|
91
|
+
begin
|
92
|
+
input = URI::extract(event.get(@url))[0]
|
93
|
+
rescue => e
|
94
|
+
puts e.message
|
95
|
+
puts "at"
|
96
|
+
puts e.backtrace.inspect
|
97
|
+
puts "for"
|
98
|
+
puts @url
|
99
|
+
event.tag("ezproxy_parse_failure")
|
100
|
+
end
|
54
101
|
|
55
102
|
data = {}
|
56
103
|
|
@@ -75,36 +122,28 @@ class LogStash::Filters::Ezproxy < LogStash::Filters::Base
|
|
75
122
|
|
76
123
|
unless uri == nil
|
77
124
|
|
78
|
-
|
79
|
-
|
80
|
-
data =
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
when uri.host.include?("els-cdn.com")
|
98
|
-
data = ScienceDirect::parse(path, params)
|
99
|
-
when uri.host.include?("springer.com")
|
100
|
-
data = Springer::parse(path, params)
|
101
|
-
when uri.host.include?("galegroup.com")
|
102
|
-
data = Gale::parse(path, params)
|
103
|
-
when uri.host.include?("ieee.org")
|
104
|
-
data = IEEE::parse(path, params)
|
125
|
+
|
126
|
+
begin
|
127
|
+
data = Mime::parse(path)
|
128
|
+
data.merge!(ParamHelper::parse(params))
|
129
|
+
|
130
|
+
@@hosts.each do |key, value|
|
131
|
+
if uri.host.include?(key)
|
132
|
+
data.merge!(value.call(path, params, uri))
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
event.tag("ezproxy_parse_success")
|
137
|
+
rescue => e
|
138
|
+
puts e.message
|
139
|
+
puts "at"
|
140
|
+
puts e.backtrace.inspect
|
141
|
+
puts "for"
|
142
|
+
puts uri
|
143
|
+
event.tag("ezproxy_parse_failure")
|
105
144
|
end
|
145
|
+
data['path'] = path
|
106
146
|
event.set("request_metadata", data)
|
107
|
-
event.tag("ezproxy_parse_success")
|
108
147
|
else
|
109
148
|
event.tag("ezproxy_parse_failure")
|
110
149
|
end
|
@@ -1,6 +1,3 @@
|
|
1
|
-
require 'uri'
|
2
|
-
require 'cgi'
|
3
|
-
|
4
1
|
module Gale
|
5
2
|
def Gale.parse (path, params)
|
6
3
|
|
@@ -38,37 +35,51 @@ module Gale
|
|
38
35
|
elsif (/^\/ps\/pdfViewer$/i.match(path))
|
39
36
|
data['rtype'] = 'BOOK'
|
40
37
|
data['mime'] = 'PDF'
|
41
|
-
|
38
|
+
if params.key?('docId')
|
39
|
+
data['unit_id'] = params['docId'][0]
|
40
|
+
end
|
42
41
|
|
43
42
|
elsif (/^\/gdc-artemis\/bulkPdfDownload$/i.match(path))
|
44
43
|
data['rtype'] = 'ARTICLE'
|
45
44
|
data['mime'] = 'PDF'
|
46
|
-
|
45
|
+
if params.key?('file_name')
|
46
|
+
data['unit_id'] = params['file_name'][0]
|
47
|
+
end
|
47
48
|
|
48
49
|
elsif (/^\/gdc\/artemis\/ManuscriptsDetailsPage\/ManuscriptsDetailsWindow$/i.match(path))
|
49
50
|
data['rtype'] = 'ARTICLE'
|
50
51
|
data['mime'] = 'HTML'
|
51
|
-
|
52
|
+
if params.key?('documentId')
|
53
|
+
data['unit_id'] = params['documentId'][0]
|
54
|
+
end
|
52
55
|
|
53
56
|
elsif (/^\/gdsc\/retrieve.do$/i.match(path))
|
54
57
|
data['rtype'] = 'ARTICLE'
|
55
58
|
data['mime'] = 'HTML'
|
56
|
-
|
59
|
+
if params.key?('contentSet')
|
60
|
+
data['unit_id'] = params['contentSet'][0]
|
61
|
+
end
|
57
62
|
|
58
63
|
elsif (/^\/gdsc\/downloadDocument.do$/i.match(path))
|
59
64
|
data['rtype'] = 'ARTICLE'
|
60
65
|
data['mime'] = 'PDF'
|
61
|
-
|
66
|
+
if params.key?('docId')
|
67
|
+
data['unit_id'] = params['docId'][0]
|
68
|
+
end
|
62
69
|
|
63
70
|
elsif (/^\/([a-z]+)\/([a-z]+)\/MonographsDetailsPage\/MonographsDetailsWindow$/i.match(path))
|
64
71
|
data['rtype'] = 'ARTICLE'
|
65
72
|
data['mime'] = 'HTML'
|
66
|
-
|
73
|
+
if params.key?('documentId')
|
74
|
+
data['unit_id'] = params['documentId'][0]
|
75
|
+
end
|
67
76
|
|
68
77
|
elsif (/^\/([a-z]+)\/archive\/FeatureArticlesDetailsPage\/FeatureArticlesDetailsWindow$/i.match(path))
|
69
78
|
data['rtype'] = 'ENCYCLOPAEDIA_ENTRY'
|
70
79
|
data['mime'] = 'HTML'
|
71
|
-
|
80
|
+
if params.key?('documentId')
|
81
|
+
data['unit_id'] = params['documentId'][0]
|
82
|
+
end
|
72
83
|
end
|
73
84
|
|
74
85
|
return data
|
@@ -0,0 +1,44 @@
|
|
1
|
+
|
2
|
+
module HeinOnline
|
3
|
+
def HeinOnline.parse (path, params)
|
4
|
+
|
5
|
+
data = {
|
6
|
+
"provider" => "heinonline"
|
7
|
+
}
|
8
|
+
|
9
|
+
if (/^\/HOL\/(P.*)/.match(path))
|
10
|
+
data['rtype'] = 'ARTICLE'
|
11
|
+
data['mime'] = 'PDF'
|
12
|
+
if (params.key?('handle'))
|
13
|
+
data['title_id'] = params['handle'][0].split('/')[1]
|
14
|
+
data['unit_id'] = params['handle'][0] +'/'+ (params['id'][0] || "")
|
15
|
+
if (params.key?('number_of_pages'))
|
16
|
+
data['unit_id'] += '/'+ params['number_of_pages'][0]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
elsif (/^\/HOL\/Index/.match(path))
|
22
|
+
data['rtype'] = 'TOC'
|
23
|
+
data['mime'] = 'HTML'
|
24
|
+
if (params.key?('index'))
|
25
|
+
data['title_id'] = params['index'][0].split('/')[1]
|
26
|
+
data['unit_id'] = params['index'][0]
|
27
|
+
end
|
28
|
+
|
29
|
+
elsif (/^\/HOL\/.*/.match(path))
|
30
|
+
data['rtype'] = 'TOC'
|
31
|
+
data['mime'] = 'HTML'
|
32
|
+
if (params.key?('handle'))
|
33
|
+
data['title_id'] = params['handle'][0].split('/')[1]
|
34
|
+
data['unit_id'] = params['handle'][0] +'/'+ (params['id'][0] || "")
|
35
|
+
if (params.key?('number_of_pages'))
|
36
|
+
data['unit_id'] += '/'+ params['number_of_pages'][0]
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
return data
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|