logstash-filter-ezproxy 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +2 -0
- data/CONTRIBUTORS +10 -0
- data/DEVELOPER.md +2 -0
- data/Gemfile +5 -0
- data/LICENSE +7 -0
- data/README.md +86 -0
- data/lib/logstash/filters/dawsonera.rb +41 -0
- data/lib/logstash/filters/ebscohost.rb +116 -0
- data/lib/logstash/filters/emerald.rb +96 -0
- data/lib/logstash/filters/ezproxy.rb +93 -0
- data/lib/logstash/filters/jstor.rb +112 -0
- data/lib/logstash/filters/lexisnexis.rb +37 -0
- data/lib/logstash/filters/sage.rb +39 -0
- data/lib/logstash/filters/sciencedirect.rb +171 -0
- data/lib/logstash/filters/tandf.rb +55 -0
- data/lib/logstash/filters/wiley.rb +202 -0
- data/logstash-filter-ezproxy.gemspec +21 -0
- data/spec/filters/dawsonera/dawsonera.2014-09-03.csv +4 -0
- data/spec/filters/dawsonera/dawsonera_spec.rb +15 -0
- data/spec/filters/ebscohost/ebscohost.2014-08-21.csv +13 -0
- data/spec/filters/ebscohost/ebscohost_spec.rb +22 -0
- data/spec/filters/emerald/emerald.2015-08-11.csv +15 -0
- data/spec/filters/emerald/emerald_spec.rb +17 -0
- data/spec/filters/ezproxy_spec.rb +53 -0
- data/spec/filters/jstor/jstor.2013-10-03.csv +18 -0
- data/spec/filters/jstor/jstor_spec.rb +20 -0
- data/spec/filters/lexisnexis/lexisnexis.2013-05-17.csv +2 -0
- data/spec/filters/lexisnexis/lexisnexis_spec.rb +15 -0
- data/spec/filters/sage/sage_spec.rb +16 -0
- data/spec/filters/sage/sagej.2016-12-05.csv +6 -0
- data/spec/filters/sciencedirect/sciencedirect_spec.rb +17 -0
- data/spec/filters/sciencedirect/sd.2013-01-09.csv +28 -0
- data/spec/filters/tandf/tandf.2015-03-25.csv +9 -0
- data/spec/filters/tandf/tandf_spec.rb +17 -0
- data/spec/filters/wiley/wiley.2013-04-15.csv +28 -0
- data/spec/filters/wiley/wiley_spec.rb +19 -0
- data/spec/spec_helper.rb +2 -0
- metadata +130 -0
@@ -0,0 +1,112 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'cgi'
|
3
|
+
|
4
|
+
module Jstor
|
5
|
+
def Jstor.parse (input)
|
6
|
+
|
7
|
+
uri = URI(URI.unescape(input))
|
8
|
+
|
9
|
+
url = uri.path
|
10
|
+
params = {}
|
11
|
+
if (uri.query)
|
12
|
+
params = CGI::parse(uri.query)
|
13
|
+
end
|
14
|
+
|
15
|
+
data = {
|
16
|
+
"provider" => "jstor"
|
17
|
+
}
|
18
|
+
doi_prefix = "10.2307"
|
19
|
+
|
20
|
+
|
21
|
+
if (match = /^\/journal\/([a-z0-9]+)$/i.match(url))
|
22
|
+
data["rtype"] = "TOC"
|
23
|
+
data["mime"] = "MISC"
|
24
|
+
data["unit_id"] = match[1]
|
25
|
+
data["title_id"] = match[1]
|
26
|
+
|
27
|
+
elsif (match = /^\/stable\/10\.[0-9]+\/(([a-z]+)\.([0-9]+)\.([0-9]+)\.issue-([0-9]+))$/i.match(url))
|
28
|
+
data["rtype"] = "TOC"
|
29
|
+
data["mime"] = "MISC"
|
30
|
+
data["unit_id"] = match[1]
|
31
|
+
data["title_id"] = match[2]
|
32
|
+
data["issue"] = match[5]
|
33
|
+
|
34
|
+
if match[3].length >= 4
|
35
|
+
data["publication_date"] = match[3]
|
36
|
+
data["vol"] = match[4]
|
37
|
+
else
|
38
|
+
data["vol"] = match[3]
|
39
|
+
end
|
40
|
+
|
41
|
+
elsif (match = /^\/stable\/((10\.[0-9]+\/)?([a-z0-9]+))$/i.match(url))
|
42
|
+
data["rtype"] = "TOC"
|
43
|
+
data["mime"] = "MISC"
|
44
|
+
data["unit_id"] = match[3]
|
45
|
+
data["title_id"] = match[3]
|
46
|
+
|
47
|
+
if match[2]
|
48
|
+
data["doi"] = match[1]
|
49
|
+
end
|
50
|
+
|
51
|
+
elsif (match = /^\/stable\/(i[0-9]+)$/i.match(url))
|
52
|
+
data["rtype"] = "TOC"
|
53
|
+
data["mime"] = "MISC"
|
54
|
+
data["unit_id"] = match[1]
|
55
|
+
data["title_id"] = match[1]
|
56
|
+
|
57
|
+
elsif (/^\/action\/showPublication$/i.match(url))
|
58
|
+
if (params["journalCode"])
|
59
|
+
data["title_id"] = params["journalCode"][0]
|
60
|
+
data["unit_id"] = params["journalCode"][0]
|
61
|
+
data["rtype"] = 'TOC'
|
62
|
+
data["mime"] = 'MISC'
|
63
|
+
end
|
64
|
+
|
65
|
+
elsif (match = /^\/stable\/(get_image|pdf|pdfplus)\/((10\.[0-9]+\/)?([a-z0-9.]+?))(?:\.pdf)?$/i.match(url))
|
66
|
+
data["unit_id"] = match[4]
|
67
|
+
data["doi"] = match[3] ? match[2] : doi_prefix + "/" + match[2]
|
68
|
+
|
69
|
+
case match[1]
|
70
|
+
when 'get_image'
|
71
|
+
data["rtype"] = "ARTICLE_SECTION"
|
72
|
+
data["mime"] = "GIF"
|
73
|
+
when 'pdf'
|
74
|
+
data["rtype"] = "ARTICLE"
|
75
|
+
data["mime"] = "PDF"
|
76
|
+
when 'pdfplus'
|
77
|
+
data["rtype"] = "ARTICLE"
|
78
|
+
data["mime"] = "PDFPLUS"
|
79
|
+
end
|
80
|
+
|
81
|
+
idPattern = /^([a-z0-9]+)((?:\.(\d+))?\.(\d+)\.(\d+)\.(\w+))?/.match(match[4]) || [];
|
82
|
+
|
83
|
+
data["title_id"] = idPattern[1]
|
84
|
+
data["publication_date"] = idPattern[3]
|
85
|
+
data["vol"] = idPattern[4]
|
86
|
+
data["issue"] = idPattern[5]
|
87
|
+
|
88
|
+
if (idPattern[6] == 'cover')
|
89
|
+
data["rtype"] = 'COVER'
|
90
|
+
elsif (idPattern[6] == 'toc')
|
91
|
+
data["rtype"] = 'TOC'
|
92
|
+
else
|
93
|
+
if (idPattern[6] != nil)
|
94
|
+
first_page = idPattern[6].to_i
|
95
|
+
|
96
|
+
unless (first_page.to_f.nan?)
|
97
|
+
data["first_page"] = first_page.to_s
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
elsif (match = /^\/stable\/(info|view)\/([0-9]+)$/i.match(url))
|
103
|
+
data["rtype"] = match[1] === 'info' ? "ABS" : "PREVIEW"
|
104
|
+
data["mime"] = "MISC"
|
105
|
+
data["unit_id"] = match[2]
|
106
|
+
data["title_id"] = match[2]
|
107
|
+
data["issue"] = match[5]
|
108
|
+
end
|
109
|
+
|
110
|
+
return data
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module LexisNexis
|
4
|
+
def LexisNexis.parse (input)
|
5
|
+
uri = URI(URI.unescape(input))
|
6
|
+
|
7
|
+
path = uri.path
|
8
|
+
|
9
|
+
data = {
|
10
|
+
"provider" => "lexisnexis"
|
11
|
+
}
|
12
|
+
|
13
|
+
if (/\/droit\/results\/docview\/docview/.match(path))
|
14
|
+
if (uri.query)
|
15
|
+
params = CGI::parse(uri.query)
|
16
|
+
|
17
|
+
if (params["risb"])
|
18
|
+
data["title_id"] = params["risb"][0]
|
19
|
+
data["unit_id"] = params["risb"][0]
|
20
|
+
end
|
21
|
+
|
22
|
+
if (params["format"])
|
23
|
+
case params["format"][0]
|
24
|
+
when 'GNBFULL'
|
25
|
+
data["rtype"] = 'ARTICLE'
|
26
|
+
data["mime"] = 'HTML'
|
27
|
+
when 'AUTRECAS'
|
28
|
+
data["rtype"] = 'ARTICLE'
|
29
|
+
data["mime"] = 'HTML'
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
return data
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require "uri"
|
2
|
+
|
3
|
+
module Sage
|
4
|
+
def Sage.parse (input)
|
5
|
+
uri = URI(URI.unescape(input))
|
6
|
+
|
7
|
+
path = uri.path
|
8
|
+
|
9
|
+
data = {
|
10
|
+
"provider" => "sage"
|
11
|
+
}
|
12
|
+
|
13
|
+
if (match = /^\/(loi|toc)\/(([a-z]+)\/?([0-9]+)?\/?([0-9]+)?)$/i.match(path))
|
14
|
+
data["rtype"] = "TOC"
|
15
|
+
data["mime"] = "MISC"
|
16
|
+
data["title_id"] = match[3]
|
17
|
+
if (match[4])
|
18
|
+
data["unit_id"] = match[2]
|
19
|
+
end
|
20
|
+
|
21
|
+
elsif (match = /^\/doi\/([a-z]+)\/(([0-9]{2})\.([0-9]{4})\/([0-9]+))$/i.match(path))
|
22
|
+
data["rtype"] = "ARTICLE"
|
23
|
+
data["mime"] = "HTML"
|
24
|
+
data["doi"] = match[2]
|
25
|
+
data["unit_id"] = match[5]
|
26
|
+
|
27
|
+
case (match[1])
|
28
|
+
when "pdf"
|
29
|
+
data["mime"] = "PDF"
|
30
|
+
when "figure"
|
31
|
+
data["rtype"] = "FIGURE"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
return data
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module ScienceDirect
|
4
|
+
def ScienceDirect.parse (input)
|
5
|
+
uri = URI(URI.unescape(input))
|
6
|
+
|
7
|
+
path = uri.path
|
8
|
+
params = {}
|
9
|
+
|
10
|
+
if (uri.query)
|
11
|
+
params = CGI::parse(uri.query)
|
12
|
+
end
|
13
|
+
|
14
|
+
data = {
|
15
|
+
"provider" => "sciencedirect"
|
16
|
+
}
|
17
|
+
|
18
|
+
if (params.key?("_ob"))
|
19
|
+
if (params['_cdi'])
|
20
|
+
data['title_id'] = params['_cdi'][0]
|
21
|
+
end
|
22
|
+
|
23
|
+
case (params["_ob"][0])
|
24
|
+
when 'PdfDownloadURL'
|
25
|
+
data['mime'] = 'PDF'
|
26
|
+
|
27
|
+
data['rtype'] = 'ARTICLES_BUNDLE'
|
28
|
+
|
29
|
+
data['unit_id'] = params['_hubEid'][0]
|
30
|
+
data["pii"] = (params['_hubEid'][0] || '').split('-')[2]
|
31
|
+
|
32
|
+
|
33
|
+
if (params.key?("_isbn")|| params.key?('isBook'))
|
34
|
+
data['rtype'] = 'CHAPTERS_BUNDLE'
|
35
|
+
data['print_identifier'] = params['_isbn'][0]
|
36
|
+
data['title_id'] = params['_isbn'][0]
|
37
|
+
data['unit_id'] = params['_isbn'][0]
|
38
|
+
end
|
39
|
+
|
40
|
+
if (data["pii"])
|
41
|
+
data['title_id'] = data["pii"][1, 8]
|
42
|
+
data['print_identifier'] = data["pii"][1, 4] + "-" + data["pii"][5, 4]
|
43
|
+
end
|
44
|
+
|
45
|
+
when 'IssueURL'
|
46
|
+
data['title_id'] = (params['_tockey'][0] || '').split('#')[2]
|
47
|
+
data['rtype'] = 'TOC'
|
48
|
+
data['mime'] = 'MISC'
|
49
|
+
|
50
|
+
when 'ArticleURL'
|
51
|
+
case (params['_fmt'][0])
|
52
|
+
when 'summary'
|
53
|
+
data['rtype'] = 'ABS'
|
54
|
+
data['mime'] = 'MISC'
|
55
|
+
when 'full'
|
56
|
+
data['rtype'] = 'ARTICLE'
|
57
|
+
data['mime'] = 'HTML'
|
58
|
+
end
|
59
|
+
|
60
|
+
when 'MImg'
|
61
|
+
data['rtype'] = 'ARTICLE'
|
62
|
+
data['mime'] = 'PDF'
|
63
|
+
|
64
|
+
when 'MiamiImageURL'
|
65
|
+
if (params['_pii'])
|
66
|
+
|
67
|
+
data["pii"] = params['_pii'][0]
|
68
|
+
data['unit_id'] = params['_pii'][0]
|
69
|
+
|
70
|
+
if (params['_pii'][0][0] === 'B')
|
71
|
+
data['print_identifier'] = params['_pii'][0][1, 13]
|
72
|
+
data['title_id'] = data['print_identifier']
|
73
|
+
data['rtype'] = 'BOOK_SECTION'
|
74
|
+
data['mime'] = 'PDF'
|
75
|
+
|
76
|
+
else
|
77
|
+
data['print_identifier'] = params['_pii'][0][1, 4] + "-" + params['_pii'][0][5, 4]
|
78
|
+
data['title_id'] = params['_pii'][0][1, 8]
|
79
|
+
data['rtype'] = 'ARTICLE'
|
80
|
+
data['mime'] = 'PDF'
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
when 'PdfExcerptURL'
|
85
|
+
data['rtype'] = 'PREVIEW'
|
86
|
+
data['mime'] = 'PDF'
|
87
|
+
|
88
|
+
if (params['_imagekey'][0] && params['_piikey'][0])
|
89
|
+
data["pii"] = params['_piikey'][0]
|
90
|
+
if (match = /.?-[^-]+-([0-9]{4})([0-9]{3}[0-9Xx])([0-9A-Za-z]*)-main.pdf$/.match(params['_imagekey'][0]))
|
91
|
+
data['unit_id'] = params['_piikey'][0]
|
92
|
+
data['title_id'] = match[1] + match[2]
|
93
|
+
data['print_identifier'] = match[1] + '-' + match[2]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
elsif (match = /^\/science\/article\/pii\/(([SB])?([0-9]{7}(?:[0-9]{5})?[0-9Xx])[0-9A-Za-z]*)(\/pdf(?:ft)?)?$/.match(path))
|
99
|
+
|
100
|
+
data["pii"] = match[1]
|
101
|
+
data['unit_id'] = match[1]
|
102
|
+
data['mime'] = match[4] ? 'PDF' : 'HTML'
|
103
|
+
|
104
|
+
if (match[2] == 'B')
|
105
|
+
data['rtype'] = 'BOOK_SECTION'
|
106
|
+
data['title_id'] = match[3]
|
107
|
+
data['print_identifier'] = match[3]
|
108
|
+
else
|
109
|
+
data['rtype'] = 'ARTICLE'
|
110
|
+
data['title_id'] = match[3][0, 8]
|
111
|
+
data['print_identifier'] = match[3][0, 4] + "-" + match[3][4, 4]
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
elsif ((match = /^\/science\/(journal|bookseries|handbooks|handbooks|book)\/([0-9Xx]{8,})(\/[0-9]+)?(\/[0-9]+)?$/.match(path)))
|
116
|
+
|
117
|
+
data['rtype'] = 'TOC'
|
118
|
+
data['mime'] = 'MISC'
|
119
|
+
data['unit_id'] = match[2]
|
120
|
+
data['title_id'] = match[2]
|
121
|
+
|
122
|
+
if (match[3])
|
123
|
+
data['unit_id'] += match[3]
|
124
|
+
end
|
125
|
+
|
126
|
+
case (match[1])
|
127
|
+
when 'journal', 'handbooks', 'bookseries'
|
128
|
+
data['print_identifier'] = match[2][0, 4] + "-" + match[2][4, 4]
|
129
|
+
when 'book'
|
130
|
+
data['print_identifier'] = match[2]
|
131
|
+
end
|
132
|
+
|
133
|
+
elsif (match = /^\/science\/MiamiMultiMediaURL\/[^\/]+(S([0-9]{4})([0-9]{3}[0-9Xx])[a-zA-Z0-9]*).*\.pdf$/.match(path))
|
134
|
+
|
135
|
+
data["pii"] = match[1]
|
136
|
+
data['unit_id'] = match[1]
|
137
|
+
data['title_id'] = match[2] + match[3]
|
138
|
+
data['print_identifier'] = match[2] + "-" + match[3]
|
139
|
+
data['rtype'] = 'ARTICLE'
|
140
|
+
data['mime'] = 'PDF'
|
141
|
+
|
142
|
+
elsif (match = /^\/(([SB])?([0-9]{7}(?:[0-9]{5})?[0-9Xx])[0-9A-Za-z]*)\/[0-9A-Za-z\-.]*-main\.pdf$/.match(path))
|
143
|
+
|
144
|
+
data["pii"] = match[1]
|
145
|
+
data['unit_id'] = match[1]
|
146
|
+
data['mime'] = 'PDF'
|
147
|
+
|
148
|
+
if (match[2] === 'B')
|
149
|
+
data['rtype'] = 'BOOK_SECTION'
|
150
|
+
data['title_id'] = match[3]
|
151
|
+
data['print_identifier'] = match[3]
|
152
|
+
else
|
153
|
+
data['rtype'] = 'ARTICLE'
|
154
|
+
data['title_id'] = match[3][0, 8]
|
155
|
+
data['print_identifier'] = match[3][0, 4] + "-" + match[3][4, 4]
|
156
|
+
end
|
157
|
+
|
158
|
+
elsif (path == '/science/publication')
|
159
|
+
data['rtype'] = 'TOC'
|
160
|
+
data['mime'] = 'MISC'
|
161
|
+
|
162
|
+
if (params['issn'])
|
163
|
+
data['print_identifier'] = params['issn'][0][0, 4] + "-" + params['issn'][0][4, 4]
|
164
|
+
data['unit_id'] = data['print_identifier']
|
165
|
+
data['title_id'] = params['issn'][0]
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
return data
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
|
2
|
+
require 'uri'
|
3
|
+
require 'cgi'
|
4
|
+
|
5
|
+
module TandF
|
6
|
+
def TandF.parse (input)
|
7
|
+
|
8
|
+
uri = URI(URI.unescape(input))
|
9
|
+
|
10
|
+
path = uri.path
|
11
|
+
params = {}
|
12
|
+
if (uri.query)
|
13
|
+
params = CGI::parse(uri.query)
|
14
|
+
end
|
15
|
+
|
16
|
+
data = {
|
17
|
+
"provider" => "tandf"
|
18
|
+
}
|
19
|
+
|
20
|
+
if ((match = /^\/doi\/(full|pdf|abs)\/([0-9.]+\/([0-9a-z.]+))$/.match(path)))
|
21
|
+
data['doi'] = match[2]
|
22
|
+
data['unit_id'] = match[3]
|
23
|
+
|
24
|
+
if (/^[0-9]{8}/.match(match[3]))
|
25
|
+
data['print_identifier'] = match[3][0, 4] + '-' + match[3][4, 4]
|
26
|
+
data['title_id'] = data['print_identifier']
|
27
|
+
end
|
28
|
+
|
29
|
+
if (match[1].upcase == 'FULL')
|
30
|
+
data['rtype'] = 'ARTICLE'
|
31
|
+
data['mime'] = 'HTML'
|
32
|
+
elsif (match[1].upcase == 'PDF')
|
33
|
+
data['rtype'] = 'ARTICLE'
|
34
|
+
data['mime'] = 'PDF'
|
35
|
+
elsif (match[1].upcase == 'ABS')
|
36
|
+
data['rtype'] = 'ABS'
|
37
|
+
data['mime'] = 'HTML'
|
38
|
+
end
|
39
|
+
|
40
|
+
elsif ((match = /^\/toc\/([a-zA-Z0-9]+)\/current$/.match(path)))
|
41
|
+
data['rtype'] = 'TOC'
|
42
|
+
data['mime'] = 'HTML'
|
43
|
+
data['title_id'] = match[1]
|
44
|
+
data['unit_id'] = match[1]
|
45
|
+
elsif ((match = /^\/loi\/([a-zA-Z0-9]+)$/.match(path)))
|
46
|
+
data['rtype'] = 'TOC'
|
47
|
+
data['mime'] = 'HTML'
|
48
|
+
data['title_id'] = match[1]
|
49
|
+
data['unit_id'] = match[1]
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
return data;
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,202 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module Wiley
|
4
|
+
def Wiley.parse (input)
|
5
|
+
uri = URI(URI.unescape(input))
|
6
|
+
|
7
|
+
path = uri.path
|
8
|
+
|
9
|
+
data = {
|
10
|
+
"provider" => "wiley"
|
11
|
+
}
|
12
|
+
|
13
|
+
#!/usr/bin/env node
|
14
|
+
|
15
|
+
if ((match = /\/journal\/(10\.[0-9]+\/(\(ISSN\)([0-9]{4}-[0-9]{3}[0-9xX])))/i.match(path)))
|
16
|
+
data['doi'] = match[1];
|
17
|
+
data['unit_id'] = match[2];
|
18
|
+
data['rtype'] = 'TOC';
|
19
|
+
data['mime'] = 'MISC';
|
20
|
+
|
21
|
+
data['online_identifier'] = match[3];
|
22
|
+
|
23
|
+
elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.([0-9]{4})\.[^.]+\.[^.]+))\/issuetoc$/i.match(path)))
|
24
|
+
data['doi'] = match[1];
|
25
|
+
data['unit_id'] = match[2];
|
26
|
+
data['title_id'] = match[3].upcase;
|
27
|
+
data['rtype'] = 'TOC';
|
28
|
+
data['mime'] = 'MISC';
|
29
|
+
|
30
|
+
data['publication_date'] = match[4];
|
31
|
+
|
32
|
+
elsif ((match = /^\/doi\/(10\.[0-9]+\/(j\.([0-9]{4}-[0-9]{3}[0-9xX])\.([0-9]{4})\.[^.]+\.[^.]+))\/abstract$/i.match(path)))
|
33
|
+
data['doi'] = match[1];
|
34
|
+
data['unit_id'] = match[2];
|
35
|
+
data['rtype'] = 'ABS';
|
36
|
+
data['mime'] = 'MISC';
|
37
|
+
|
38
|
+
data['online_identifier'] = match[3];
|
39
|
+
data['publication_date'] = match[4];
|
40
|
+
|
41
|
+
elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.([0-9]{4})[0-9]+))\/abstract$/i.match(path)))
|
42
|
+
data['doi'] = match[1];
|
43
|
+
data['unit_id'] = match[2];
|
44
|
+
data['title_id'] = match[3].upcase;
|
45
|
+
data['rtype'] = 'ABS';
|
46
|
+
data['mime'] = 'MISC';
|
47
|
+
|
48
|
+
data['publication_date'] = match[4];
|
49
|
+
|
50
|
+
elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.[0-9]+))\/full$/i.match(path)))
|
51
|
+
data['doi'] = match[1];
|
52
|
+
data['unit_id'] = match[2];
|
53
|
+
data['title_id'] = match[3].upcase;
|
54
|
+
data['rtype'] = 'ARTICLE';
|
55
|
+
data['mime'] = 'HTML';
|
56
|
+
|
57
|
+
elsif ((match = /^\/doi\/(10\.[0-9]+\/(j\.([0-9]{4}-[0-9]{3}[0-9xX])\.([0-9]{4})\.[^.]+\.[^.]+))\/pdf$/i.match(path)))
|
58
|
+
data['doi'] = match[1];
|
59
|
+
data['unit_id'] = match[2];
|
60
|
+
data['rtype'] = 'ARTICLE';
|
61
|
+
data['mime'] = 'PDF';
|
62
|
+
|
63
|
+
data['online_identifier'] = match[3];
|
64
|
+
data['publication_date'] = match[4];
|
65
|
+
|
66
|
+
elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.[0-9]+))\/pdf$/i.match(path)))
|
67
|
+
data['doi'] = match[1];
|
68
|
+
data['unit_id'] = match[2];
|
69
|
+
data['title_id'] = match[3].upcase;
|
70
|
+
data['rtype'] = 'ARTICLE';
|
71
|
+
data['mime'] = 'PDF';
|
72
|
+
|
73
|
+
elsif ((match = /^\/book\/(10\.[0-9]+\/([0-9]+))$/i.match(path)))
|
74
|
+
data['doi'] = match[1];
|
75
|
+
data['unit_id'] = match[2];
|
76
|
+
data['title_id'] = match[2].upcase;
|
77
|
+
data['rtype'] = 'TOC';
|
78
|
+
data['mime'] = 'MISC';
|
79
|
+
|
80
|
+
data['print_identifier'] = match[2];
|
81
|
+
|
82
|
+
elsif (match = /^\/doi\/(10\.[0-9]+\/(([0-9]+)\.[^.]+))\/pdf$/i.match(path))
|
83
|
+
data['doi'] = match[1];
|
84
|
+
data['unit_id'] = match[2];
|
85
|
+
data['title_id'] = match[3].upcase;
|
86
|
+
data['rtype'] = 'BOOK_SECTION';
|
87
|
+
data['mime'] = 'PDF';
|
88
|
+
|
89
|
+
data['print_identifier'] = match[3];
|
90
|
+
|
91
|
+
elsif (match = /^\/enhanced\/doi\/(10\.[0-9]+\/(([^.]+)\.[^\/]+))\/?$/i.match(path))
|
92
|
+
data['doi'] = match[1];
|
93
|
+
data['unit_id'] = match[2];
|
94
|
+
data['title_id'] = match[3].upcase;
|
95
|
+
data['rtype'] = 'ARTICLE';
|
96
|
+
data['mime'] = 'HTML';
|
97
|
+
|
98
|
+
elsif ((match = /^\/enhanced\/doi\/(10\.[0-9]+\/(([0-9]{4})([a-z0-9]{2})[a-z0-9]+))\/?$/i.match(path)))
|
99
|
+
|
100
|
+
data['doi'] = match[1];
|
101
|
+
data['unit_id'] = match[2];
|
102
|
+
data['title_id'] = match[4].upcase;
|
103
|
+
data['rtype'] = 'ARTICLE';
|
104
|
+
data['mime'] = 'HTML';
|
105
|
+
|
106
|
+
data['publication_date'] = match[3];
|
107
|
+
|
108
|
+
elsif ((match = /^\/agu\/issue\/(10\.[0-9]+\/(([^.]+)\.[^\/]+))\/?$/i.match(path)))
|
109
|
+
data['doi'] = match[1];
|
110
|
+
data['unit_id'] = match[2];
|
111
|
+
data['title_id'] = match[3].upcase;
|
112
|
+
data['rtype'] = 'TOC';
|
113
|
+
data['mime'] = 'HTML';
|
114
|
+
|
115
|
+
elsif (/^\/readcube$/i.match(path))
|
116
|
+
data['rtype'] = 'ARTICLE';
|
117
|
+
data['mime'] = 'READCUBE';
|
118
|
+
|
119
|
+
if (uri.query)
|
120
|
+
params = CGI::parse(uri.query)
|
121
|
+
|
122
|
+
if (params["resource"])
|
123
|
+
|
124
|
+
data['doi'] = params["resource"][0];
|
125
|
+
data['unit_id'] = params["resource"][0].split('/')[1];
|
126
|
+
if ((match = /(10\.[0-9]+)\/([0-9]{4})([a-z0-9]{2})([^\/]+)$/i.match(params['resource'][0])))
|
127
|
+
data['title_id'] = match[3].upcase;
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
elsif ((match = /^\/doi\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[a-z0-9]+))\/pdf$/i.match(path)))
|
134
|
+
data['doi'] = match[1];
|
135
|
+
data['unit_id'] = match[2];
|
136
|
+
data['title_id'] = match[2].upcase;
|
137
|
+
data['rtype'] = 'ARTICLE';
|
138
|
+
data['mime'] = 'PDF';
|
139
|
+
|
140
|
+
data['publication_date'] = '20' + match[3];
|
141
|
+
|
142
|
+
elsif ((match = /^\/iucr\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[0-9a-z]+))/i.match(path)))
|
143
|
+
data['doi'] = match[1];
|
144
|
+
data['unit_id'] = match[2] ;
|
145
|
+
data['title_id'] = match[2].upcase;
|
146
|
+
data['rtype'] = 'ARTICLE';
|
147
|
+
data['mime'] = 'HTML';
|
148
|
+
|
149
|
+
data['publication_date'] = '20' + match[3];
|
150
|
+
|
151
|
+
elsif ((match = /^\/doi\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[0-9a-z]+))\/([a-z]+)$/i.match(path)))
|
152
|
+
data['doi'] = match[1];
|
153
|
+
data['unit_id'] = match[2];
|
154
|
+
data['title_id'] = match[2].upcase;
|
155
|
+
|
156
|
+
data['publication_date'] = '20' + match[3];
|
157
|
+
|
158
|
+
case (match[4])
|
159
|
+
when 'abstract'
|
160
|
+
data['rtype'] = 'ABS';
|
161
|
+
data['mime'] = 'MISC';
|
162
|
+
when 'pdf'
|
163
|
+
data['rtype'] = 'ARTICLE';
|
164
|
+
data['mime'] = 'PDF';
|
165
|
+
when 'full'
|
166
|
+
data['mime'] = 'PDF';
|
167
|
+
data['rtype'] = 'HTML';
|
168
|
+
end
|
169
|
+
|
170
|
+
elsif ((match = /^\/store\/(10\.[0-9]+\/(([a-z]+)\.([0-9]{4})[0-9]+))\/asset\/[a-z]+[0-9]+.pdf$/i.match(path)))
|
171
|
+
data['doi'] = match[1];
|
172
|
+
data['unit_id'] = match[2];
|
173
|
+
data['title_id'] = match[3].upcase;
|
174
|
+
data['mime'] = 'PDF';
|
175
|
+
|
176
|
+
data['publication_date'] = match[4];
|
177
|
+
|
178
|
+
elsif ((match = /^\/doi\/(10\.[0-9]+\/(([0-9]{2,4})([a-z]+)[0-9]+))\/(pdf|full)$/i.match(path)))
|
179
|
+
data['doi'] = match[1];
|
180
|
+
data['unit_id'] = match[2];
|
181
|
+
data['title_id'] = match[4].upcase;
|
182
|
+
data['rtype'] = 'ARTICLE';
|
183
|
+
data['mime'] = match[5] == 'pdf' ? 'PDF' : 'HTML';
|
184
|
+
|
185
|
+
data['publication_date'] = match[3];
|
186
|
+
|
187
|
+
if (match[3].length === 2)
|
188
|
+
data['publication_date'] = '19' + match[3];
|
189
|
+
end
|
190
|
+
|
191
|
+
elsif ((match = /^\/doi\/(10\.[0-9]+\/([^.]+))\/(pdf|full)$/i.match(path)))
|
192
|
+
data['doi'] = match[1];
|
193
|
+
data['unit_id'] = match[2];
|
194
|
+
data['rtype'] = 'ARTICLE';
|
195
|
+
data['mime'] = match[3] === 'pdf' ? 'PDF' : 'HTML';
|
196
|
+
end
|
197
|
+
|
198
|
+
return data
|
199
|
+
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|