logstash-filter-ezproxy 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/lib/logstash/filters/cambridge.rb +123 -0
- data/lib/logstash/filters/dawsonera.rb +0 -5
- data/lib/logstash/filters/ebscohost.rb +26 -0
- data/lib/logstash/filters/emerald.rb +2 -6
- data/lib/logstash/filters/ezproxy.rb +79 -40
- data/lib/logstash/filters/gale.rb +21 -10
- data/lib/logstash/filters/heinonline.rb +44 -0
- data/lib/logstash/filters/lexis_webanalytics.rb +81 -0
- data/lib/logstash/filters/myilibrary.rb +29 -0
- data/lib/logstash/filters/oxford.rb +38 -0
- data/lib/logstash/filters/proquest.rb +42 -0
- data/lib/logstash/filters/sciencedirect.rb +1 -3
- data/lib/logstash/filters/scopus.rb +57 -0
- data/lib/logstash/filters/springer.rb +1 -1
- data/lib/logstash/filters/webofknowledge.rb +85 -0
- data/lib/logstash/filters/wiley.rb +154 -116
- data/lib/logstash/helpers/mime_helper.rb +38 -0
- data/lib/logstash/helpers/param_helper.rb +32 -0
- data/lib/logstash/helpers/url_parser.rb +2 -2
- data/logstash-filter-ezproxy.gemspec +2 -2
- data/spec/filters/cambridge/cambridge.2013-10-28.csv +13 -0
- data/spec/filters/cambridge/cambridge_spec.rb +27 -0
- data/spec/filters/ebscohost/ebscohost.2014-08-21.csv +5 -0
- data/spec/filters/ezproxy_spec.rb +1 -1
- data/spec/filters/gale/gale_spec.rb +0 -2
- data/spec/filters/heinonline/heinonline.2015-05-18.csv +12 -0
- data/spec/filters/heinonline/heinonline_spec.rb +20 -0
- data/spec/filters/lexis_webanalytics/lexis360.2017-04-28.csv +9 -0
- data/spec/filters/lexis_webanalytics/lexis_webanalytics_spec.rb +21 -0
- data/spec/filters/myilibrary/myilibrary.2018-02-09.csv +6 -0
- data/spec/filters/myilibrary/myilibrary_spec.rb +20 -0
- data/spec/filters/oxford/oxford.2018-02-15.csv +5 -0
- data/spec/filters/oxford/oxford_spec.rb +21 -0
- data/spec/filters/proquest/proquest.2018-02-09.csv +6 -0
- data/spec/filters/proquest/proquest_spec.rb +21 -0
- data/spec/filters/scopus/scopus.2016-07-18.csv +7 -0
- data/spec/filters/scopus/scopus_spec.rb +19 -0
- data/spec/filters/webofknowledge/webofknowledge_spec.rb +21 -0
- data/spec/filters/webofknowledge/wos.2017-01-13.csv +11 -0
- data/spec/filters/wiley/wiley.2018-02-07.csv +9 -0
- data/spec/filters/wiley/wiley_spec.rb +19 -0
- metadata +50 -6
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module LexisWebAnalytics
|
4
|
+
def LexisWebAnalytics.parse (path, params, uri)
|
5
|
+
|
6
|
+
raw_url = uri.to_s
|
7
|
+
|
8
|
+
data = {
|
9
|
+
"provider" => "lexisnexis_webanalytics"
|
10
|
+
}
|
11
|
+
|
12
|
+
if (match = /^\/Document\/([\w]+)\/([\w-]+)$/i.match(path))
|
13
|
+
data['rtype'] = 'TOC'
|
14
|
+
data['mime'] = 'HTML'
|
15
|
+
data['unit_id'] = params['rndNum'][0]
|
16
|
+
|
17
|
+
if ((match = /([a-z_]+)_(\d+_\w+_\d+)_n_(\d+)/i.match(match[1])))
|
18
|
+
data['title_id'] = match[1]
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
elsif ((match = /^\/Docview.aspx$/i.match(path)))
|
23
|
+
data['rtype'] = 'ARTICLE'
|
24
|
+
data['mime'] = 'HTML'
|
25
|
+
citationData = JSON.parse(params['citationData'][0])
|
26
|
+
data['unit_id'] = citationData['docId']
|
27
|
+
data['title_id'] = citationData['docId'].split('_')[1]
|
28
|
+
|
29
|
+
|
30
|
+
elsif ((match = /^\/wa_k4c.watag$/i.match(path)))
|
31
|
+
if ((match3 = /&wa_DocId=([0-9a-zA-Z_-]+)&/i.match(raw_url)))
|
32
|
+
data['unit_id'] = match3[1]
|
33
|
+
if ((match3a = /PS_([A-Z]+)/.match(data['unit_id'])))
|
34
|
+
data['title_id'] = match3a[1]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
if ((match4 = /&wa_DocSourceType=([0-9a-z%é_]+)&/i.match(raw_url)))
|
39
|
+
docSourceType = match4[1]
|
40
|
+
|
41
|
+
if (docSourceType == 'FicheMethodo' || docSourceType == 'FicheRevision')
|
42
|
+
data['rtype'] = 'ENCYCLOPAEDIA_ENTRY'
|
43
|
+
data['mime'] = 'HTML'
|
44
|
+
|
45
|
+
elsif (docSourceType == 'PresseSommaire')
|
46
|
+
data['rtype'] = 'TOC'
|
47
|
+
data['mime'] = 'HTML'
|
48
|
+
|
49
|
+
elsif (docSourceType == 'Presse')
|
50
|
+
data['rtype'] = 'ARTICLE'
|
51
|
+
data['mime'] = 'HTML'
|
52
|
+
|
53
|
+
elsif (docSourceType == 'En_eFascicule')
|
54
|
+
if ((match5 = /&wa_UserAction=([a-zA-Z]+)&/i.match(raw_url)))
|
55
|
+
userAction = match5[1]
|
56
|
+
|
57
|
+
if (userAction == 'ViewDoc' || userAction == 'ChangeToc')
|
58
|
+
data['rtype'] = 'ENCYCLOPAEDIA_ENTRY'
|
59
|
+
data['mime'] = 'HTML'
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
if (/L[é%C3A9]+gislationconsolid[é%C3A9]+e/.match(docSourceType))
|
67
|
+
if ((match6 = /&wa_UserAction=([a-zA-Z]+)&/i.match(raw_url)))
|
68
|
+
userAction = match6[1]
|
69
|
+
|
70
|
+
if (userAction == 'ViewDoc' || userAction == 'ChangeToc')
|
71
|
+
data['rtype'] = 'CODES'
|
72
|
+
data['mime'] = 'HTML'
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
return data
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module MyILibrary
|
2
|
+
def MyILibrary.parse (path, params)
|
3
|
+
|
4
|
+
data = {
|
5
|
+
"provider" => "myilibrary"
|
6
|
+
}
|
7
|
+
|
8
|
+
if (/\/Open\.aspx/.match(path) || /\/browse\/open\.asp/i.match(path) || /^\/$/.match(path))
|
9
|
+
if params.key?('id')
|
10
|
+
data['title_id'] = params['id'][0]
|
11
|
+
data['unit_id'] = params['id'][0]
|
12
|
+
data['rtype'] = 'BOOK'
|
13
|
+
data['mime'] = 'MISC'
|
14
|
+
end
|
15
|
+
|
16
|
+
elsif (/\/Viewer\/getImage\_Servlet\.aspx/i.match(path))
|
17
|
+
data['rtype'] = 'BOOK_PAGE'
|
18
|
+
data['mime'] = 'JPG'
|
19
|
+
|
20
|
+
elsif (/\/Viewer\/get[DP]MP\_Servlet\.aspx/i.match(path))
|
21
|
+
data['rtype'] = 'BOOK_PAGE'
|
22
|
+
data['mime'] = params['t'][0]
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
return data
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Oxford
|
2
|
+
def Oxford.parse (path, params)
|
3
|
+
|
4
|
+
data = {
|
5
|
+
"provider" => "oxford"
|
6
|
+
}
|
7
|
+
|
8
|
+
if (match = /\/view\/([0-9\.]+\/[a-z0-9\.\/\:]+)\/(.+?)(-([a-z][\-a-z0-9]+))?$/i.match(path))
|
9
|
+
data['doi'] = match[1]
|
10
|
+
data['title_id'] = match[2]
|
11
|
+
data['unit_id'] = match[2]
|
12
|
+
|
13
|
+
if (match[4])
|
14
|
+
match4 = match[4].downcase.split("-")
|
15
|
+
|
16
|
+
case match4[0]
|
17
|
+
when "chapter"
|
18
|
+
data['rtype'] = "BOOK_CHAPTER"
|
19
|
+
when "bibliography"
|
20
|
+
data['rtype'] = "BIBLIOGRAPHY"
|
21
|
+
when "indexlist"
|
22
|
+
data['rtype'] = 'TOC'
|
23
|
+
end
|
24
|
+
else
|
25
|
+
data['rtype'] = 'BOOK'
|
26
|
+
end
|
27
|
+
|
28
|
+
if params.key?('print')
|
29
|
+
data['mime'] = params['print'][0].upcase
|
30
|
+
end
|
31
|
+
|
32
|
+
elsif (match = /\/doc\/([0-9\.]+\/[a-z0-9\.\/\:]+)\/.+/i.match(path))
|
33
|
+
data['doi'] = match[1]
|
34
|
+
end
|
35
|
+
|
36
|
+
return data
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Proquest
|
2
|
+
def Proquest.parse (path, params)
|
3
|
+
data = {
|
4
|
+
"provider" => "proquest"
|
5
|
+
}
|
6
|
+
|
7
|
+
if (/\/lib\/lancaster\/remoteDocServer\.api/i.match(path))
|
8
|
+
data['rtype'] = 'BOOK_PAGE'
|
9
|
+
data['mime'] = 'MISC'
|
10
|
+
data['page'] = params['pageNum'][0]
|
11
|
+
data['remote_id'] = params['remote_id'][0]
|
12
|
+
|
13
|
+
elsif (/\/lib\/lancaster\/detail\.action/i.match(path))
|
14
|
+
data['rtype'] = 'TOC'
|
15
|
+
data['mime'] = 'HTML'
|
16
|
+
data['title_id'] = params['docID'][0]
|
17
|
+
data['unit_id'] = params['docID'][0]
|
18
|
+
|
19
|
+
elsif (/\/lib\/lancaster\/reader\.action/i.match(path))
|
20
|
+
data['rtype'] = 'BOOK'
|
21
|
+
data['mime'] = 'MISC'
|
22
|
+
data['title_id'] = params['docID'][0]
|
23
|
+
data['unit_id'] = params['docID'][0]
|
24
|
+
|
25
|
+
elsif (/\/lib\/lancaster\/docAccess\.api/i.match(path))
|
26
|
+
data['rtype'] = 'TOC'
|
27
|
+
data['mime'] = 'JSON'
|
28
|
+
data['title_id'] = params['docid'][0]
|
29
|
+
data['unit_id'] = params['docid'][0]
|
30
|
+
data['remote_id'] = params['remote_id'][0]
|
31
|
+
|
32
|
+
elsif (/\/lib\/lancaster\/docSearch\.api/i.match(path))
|
33
|
+
data['rtype'] = 'TOC'
|
34
|
+
data['mime'] = 'JSON'
|
35
|
+
data['title_id'] = params['docid'][0]
|
36
|
+
data['unit_id'] = params['docid'][0]
|
37
|
+
data['remote_id'] = params['remote_id'][0]
|
38
|
+
end
|
39
|
+
|
40
|
+
return data
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
|
2
|
+
module Scopus
|
3
|
+
def Scopus.parse (path, params)
|
4
|
+
|
5
|
+
data = {
|
6
|
+
"provider" => "scopus"
|
7
|
+
}
|
8
|
+
|
9
|
+
if (/^\/results\/citedbyresults.ur[il]$/i.match(path))
|
10
|
+
data['mime'] = 'HTML'
|
11
|
+
data['rtype'] = 'REF'
|
12
|
+
data['unit_id'] = params['cite'][0]
|
13
|
+
|
14
|
+
elsif (match = /^\/record\/([a-z]+)\.ur[il]$/i.match(path))
|
15
|
+
case (match[1])
|
16
|
+
when 'display'
|
17
|
+
data['mime'] = 'HTML'
|
18
|
+
data['rtype'] = 'ABS'
|
19
|
+
data['unit_id'] = params['eid'][0]
|
20
|
+
|
21
|
+
when 'references'
|
22
|
+
data['mime'] = 'HTML'
|
23
|
+
data['rtype'] = 'REF'
|
24
|
+
data['unit_id'] = params['currentRecordPageEID'][0]
|
25
|
+
|
26
|
+
when 'detail'
|
27
|
+
data['mime'] = 'HTML'
|
28
|
+
data['rtype'] = 'BIO'
|
29
|
+
if (params.key?('authorId'))
|
30
|
+
data['unit_id'] = params['authorId'][0]
|
31
|
+
end
|
32
|
+
|
33
|
+
when 'pdfdownload'
|
34
|
+
data['rtype'] = 'REF'
|
35
|
+
data['mime'] = 'PDF'
|
36
|
+
data['unit_id'] = params['eid'][0]
|
37
|
+
end
|
38
|
+
|
39
|
+
elsif (/^\/authid\/detail\.ur[il]$/i.match(path))
|
40
|
+
data['mime'] = 'HTML'
|
41
|
+
data['rtype'] = 'BIO'
|
42
|
+
if (params.key?('authorId'))
|
43
|
+
data['unit_id'] = params['authorId'][0]
|
44
|
+
end
|
45
|
+
|
46
|
+
elsif (/^\/citation\/print\.ur[il]$/i.match(path))
|
47
|
+
data['mime'] = 'PRINT'
|
48
|
+
data['rtype'] = 'REF'
|
49
|
+
if (params.key?('eid'))
|
50
|
+
data['unit_id'] = params['eid'][0]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
return data
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
@@ -115,7 +115,7 @@ module Springer
|
|
115
115
|
|
116
116
|
|
117
117
|
elsif ((match = /^\/(download|static)\/([a-z]+)\/(([0-9.]*)\/([^\/]*)).epub/.match(path)))
|
118
|
-
if (/([0-9]+)\.([0-9]+)/.
|
118
|
+
if (/([0-9]+)\.([0-9]+)/.match(match[4]))
|
119
119
|
data['doi'] = match[3]
|
120
120
|
data['unit_id'] = match[5] + '.epub'
|
121
121
|
data['print_identifier'] = match[5]
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module WebOfKnowledge
|
2
|
+
def WebOfKnowledge.parse (path, params)
|
3
|
+
|
4
|
+
data = {
|
5
|
+
"provider" => "dawsonera"
|
6
|
+
}
|
7
|
+
|
8
|
+
if (match = /^\/([a-z_]+)\.do$/i.match(path))
|
9
|
+
|
10
|
+
if (params.key?('product'))
|
11
|
+
productId = params['product'][0].kind_of?(Array) ? params['product'][0][0] : params['product'][0]
|
12
|
+
end
|
13
|
+
|
14
|
+
case (match[1])
|
15
|
+
when 'Search', 'InterService'
|
16
|
+
data['rtype'] = 'TOC'
|
17
|
+
data['mime'] = 'HTML'
|
18
|
+
if (productId)
|
19
|
+
data['title_id'] = productId
|
20
|
+
end
|
21
|
+
|
22
|
+
when 'full_record'
|
23
|
+
data['rtype'] = 'REF'
|
24
|
+
data['mime'] = 'HTML'
|
25
|
+
if (productId)
|
26
|
+
data['title_id'] = productId
|
27
|
+
end
|
28
|
+
|
29
|
+
when 'CitationReport'
|
30
|
+
data['rtype'] = 'ANALYSIS'
|
31
|
+
data['mime'] = 'MISC'
|
32
|
+
if (productId)
|
33
|
+
data['title_id'] = productId
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
if (/^([a-z]+)_GeneralSearch_input/i.match(match[1]))
|
38
|
+
data['rtype'] = 'SEARCH'
|
39
|
+
data['mime'] = 'HTML'
|
40
|
+
|
41
|
+
if (productId)
|
42
|
+
data['title_id'] = productId
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
elsif (match = /^\/([a-zA-z_]*)\.action$/i.match(path))
|
48
|
+
|
49
|
+
case (match[1])
|
50
|
+
when 'JCRJournalHomeAction'
|
51
|
+
data['rtype'] = 'TOC'
|
52
|
+
data['mime'] = 'HTML'
|
53
|
+
when 'JCRJournalProfileAction'
|
54
|
+
data['rtype'] = 'TABLE'
|
55
|
+
data['mime'] = 'HTML'
|
56
|
+
|
57
|
+
if (params.key?('journalTitle'))
|
58
|
+
data['publication_title'] = params['journalTitle'][0]
|
59
|
+
end
|
60
|
+
if (params.key?('journal'))
|
61
|
+
data['title_id'] = params['journal'][0]
|
62
|
+
data['unit_id'] = "impact/" + params['journal'][0] + "/" + params['year'][0]
|
63
|
+
end
|
64
|
+
|
65
|
+
when 'IndicatorsAction'
|
66
|
+
data['rtype'] = 'MAP'
|
67
|
+
data['mime'] = 'MISC'
|
68
|
+
|
69
|
+
when 'DocumentsAction'
|
70
|
+
data['rtype'] = 'GRAPH'
|
71
|
+
data['mime'] = 'MISC'
|
72
|
+
|
73
|
+
else
|
74
|
+
return {}
|
75
|
+
end
|
76
|
+
|
77
|
+
elsif ((match = /^\/([a-z]{2,3})\/analyze\.do$/i.match(path)))
|
78
|
+
data['rtype'] = 'ANALYSIS'
|
79
|
+
data['mime'] = 'MISC'
|
80
|
+
end
|
81
|
+
|
82
|
+
return data
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
@@ -6,185 +6,223 @@ module Wiley
|
|
6
6
|
data = {
|
7
7
|
"provider" => "wiley"
|
8
8
|
}
|
9
|
-
|
10
|
-
if ((match = /\/journal\/(10\.[0-9]+\/(\(ISSN\)([0-9]{4}-[0-9]{3}[0-9xX])))/i.match(path)))
|
11
|
-
data['doi'] = match[1];
|
12
|
-
data['unit_id'] = match[2];
|
13
|
-
data['rtype'] = 'TOC';
|
14
|
-
data['mime'] = 'MISC';
|
15
9
|
|
16
|
-
|
10
|
+
if ((match = /^\/pdf\/(10\.[0-9]+\/([0-9x]+))(\.ch[0-9]+)$/i.match(path)))
|
11
|
+
data['rtype'] = 'BOOK_SECTION'
|
12
|
+
data['mime'] = 'PDF'
|
13
|
+
data['doi'] = match[1]
|
14
|
+
data['unit_id'] = match[2] + match[3]
|
15
|
+
data['online_identifier'] = match[2]
|
16
|
+
|
17
|
+
elsif ((match = /^\/doi(\/[a-z]+)?\/(10\.[0-9]+\/([a-z0-9._-]+))$/i.match(path)))
|
18
|
+
data['doi'] = match[2]
|
19
|
+
data['unit_id'] = match[3]
|
20
|
+
|
21
|
+
case (match[1])
|
22
|
+
when '/pdf', '/epdf'
|
23
|
+
data['rtype'] = 'ARTICLE'
|
24
|
+
data['mime'] = 'PDF'
|
25
|
+
|
26
|
+
when '/full'
|
27
|
+
data['rtype'] = 'ARTICLE'
|
28
|
+
data['mime'] = 'HTML'
|
29
|
+
|
30
|
+
when '/abs'
|
31
|
+
data['rtype'] = 'ABS'
|
32
|
+
data['mime'] = 'HTML'
|
33
|
+
|
34
|
+
else
|
35
|
+
data['rtype'] = 'ARTICLE'
|
36
|
+
data['mime'] = 'HTML'
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
elsif ((match = /^\/toc\/toc\/(([0-9]+)\/([0-9]+)\/([0-9]+))$/i.match(path)))
|
41
|
+
data['rtype'] = 'TOC'
|
42
|
+
data['mime'] = 'MISC'
|
43
|
+
data['unit_id'] = match[1]
|
44
|
+
data['title_id'] = match[2]
|
45
|
+
data['vol'] = match[3]
|
46
|
+
data['issue'] = match[4]
|
47
|
+
|
48
|
+
elsif ((match = /^\/journal\/([0-9]+)$/i.match(path)))
|
49
|
+
data['rtype'] = 'TOC'
|
50
|
+
data['mime'] = 'MISC'
|
51
|
+
data['title_id'] = match[1]
|
52
|
+
|
53
|
+
elsif ((match = /\/journal\/(10\.[0-9]+\/(\(ISSN\)([0-9]{4}-[0-9]{3}[0-9xX])))/i.match(path)))
|
54
|
+
data['doi'] = match[1]
|
55
|
+
data['unit_id'] = match[2]
|
56
|
+
data['rtype'] = 'TOC'
|
57
|
+
data['mime'] = 'MISC'
|
58
|
+
data['online_identifier'] = match[3]
|
17
59
|
|
18
60
|
elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.([0-9]{4})\.[^.]+\.[^.]+))\/issuetoc$/i.match(path)))
|
19
|
-
data['doi'] = match[1]
|
20
|
-
data['unit_id'] = match[2]
|
21
|
-
data['title_id'] = match[3].upcase
|
22
|
-
data['rtype'] = 'TOC'
|
23
|
-
data['mime'] = 'MISC'
|
24
|
-
|
25
|
-
data['publication_date'] = match[4];
|
61
|
+
data['doi'] = match[1]
|
62
|
+
data['unit_id'] = match[2]
|
63
|
+
data['title_id'] = match[3].upcase
|
64
|
+
data['rtype'] = 'TOC'
|
65
|
+
data['mime'] = 'MISC'
|
66
|
+
data['publication_date'] = match[4]
|
26
67
|
|
27
68
|
elsif ((match = /^\/doi\/(10\.[0-9]+\/(j\.([0-9]{4}-[0-9]{3}[0-9xX])\.([0-9]{4})\.[^.]+\.[^.]+))\/abstract$/i.match(path)))
|
28
|
-
data['doi'] = match[1]
|
29
|
-
data['unit_id'] = match[2]
|
30
|
-
data['rtype'] = 'ABS'
|
31
|
-
data['mime'] = 'MISC'
|
32
|
-
|
33
|
-
data['
|
34
|
-
data['publication_date'] = match[4];
|
69
|
+
data['doi'] = match[1]
|
70
|
+
data['unit_id'] = match[2]
|
71
|
+
data['rtype'] = 'ABS'
|
72
|
+
data['mime'] = 'MISC'
|
73
|
+
data['online_identifier'] = match[3]
|
74
|
+
data['publication_date'] = match[4]
|
35
75
|
|
36
76
|
elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.([0-9]{4})[0-9]+))\/abstract$/i.match(path)))
|
37
|
-
data['doi'] = match[1]
|
38
|
-
data['unit_id'] = match[2]
|
39
|
-
data['title_id'] = match[3].upcase
|
40
|
-
data['rtype'] = 'ABS'
|
41
|
-
data['mime'] = 'MISC'
|
42
|
-
|
43
|
-
data['publication_date'] = match[4];
|
77
|
+
data['doi'] = match[1]
|
78
|
+
data['unit_id'] = match[2]
|
79
|
+
data['title_id'] = match[3].upcase
|
80
|
+
data['rtype'] = 'ABS'
|
81
|
+
data['mime'] = 'MISC'
|
82
|
+
data['publication_date'] = match[4]
|
44
83
|
|
45
84
|
elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.[0-9]+))\/full$/i.match(path)))
|
46
|
-
data['doi'] = match[1]
|
47
|
-
data['unit_id'] = match[2]
|
48
|
-
data['title_id'] = match[3].upcase
|
49
|
-
data['rtype'] = 'ARTICLE'
|
50
|
-
data['mime'] = 'HTML'
|
85
|
+
data['doi'] = match[1]
|
86
|
+
data['unit_id'] = match[2]
|
87
|
+
data['title_id'] = match[3].upcase
|
88
|
+
data['rtype'] = 'ARTICLE'
|
89
|
+
data['mime'] = 'HTML'
|
51
90
|
|
52
91
|
elsif ((match = /^\/doi\/(10\.[0-9]+\/(j\.([0-9]{4}-[0-9]{3}[0-9xX])\.([0-9]{4})\.[^.]+\.[^.]+))\/pdf$/i.match(path)))
|
53
|
-
data['doi'] = match[1]
|
54
|
-
data['unit_id'] = match[2]
|
55
|
-
data['rtype'] = 'ARTICLE'
|
56
|
-
data['mime'] = 'PDF'
|
57
|
-
|
58
|
-
data['
|
59
|
-
data['publication_date'] = match[4];
|
92
|
+
data['doi'] = match[1]
|
93
|
+
data['unit_id'] = match[2]
|
94
|
+
data['rtype'] = 'ARTICLE'
|
95
|
+
data['mime'] = 'PDF'
|
96
|
+
data['online_identifier'] = match[3]
|
97
|
+
data['publication_date'] = match[4]
|
60
98
|
|
61
99
|
elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.[0-9]+))\/pdf$/i.match(path)))
|
62
|
-
data['doi'] = match[1]
|
63
|
-
data['unit_id'] = match[2]
|
64
|
-
data['title_id'] = match[3].upcase
|
65
|
-
data['rtype'] = 'ARTICLE'
|
66
|
-
data['mime'] = 'PDF'
|
100
|
+
data['doi'] = match[1]
|
101
|
+
data['unit_id'] = match[2]
|
102
|
+
data['title_id'] = match[3].upcase
|
103
|
+
data['rtype'] = 'ARTICLE'
|
104
|
+
data['mime'] = 'PDF'
|
67
105
|
|
68
106
|
elsif ((match = /^\/book\/(10\.[0-9]+\/([0-9]+))$/i.match(path)))
|
69
|
-
data['doi'] = match[1]
|
70
|
-
data['unit_id'] = match[2]
|
71
|
-
data['title_id'] = match[2].upcase
|
72
|
-
data['rtype'] = 'TOC'
|
73
|
-
data['mime'] = 'MISC'
|
107
|
+
data['doi'] = match[1]
|
108
|
+
data['unit_id'] = match[2]
|
109
|
+
data['title_id'] = match[2].upcase
|
110
|
+
data['rtype'] = 'TOC'
|
111
|
+
data['mime'] = 'MISC'
|
74
112
|
|
75
|
-
data['print_identifier'] = match[2]
|
113
|
+
data['print_identifier'] = match[2]
|
76
114
|
|
77
115
|
elsif (match = /^\/doi\/(10\.[0-9]+\/(([0-9]+)\.[^.]+))\/pdf$/i.match(path))
|
78
|
-
data['doi'] = match[1]
|
79
|
-
data['unit_id'] = match[2]
|
80
|
-
data['title_id'] = match[3].upcase
|
81
|
-
data['rtype'] = 'BOOK_SECTION'
|
82
|
-
data['mime'] = 'PDF'
|
116
|
+
data['doi'] = match[1]
|
117
|
+
data['unit_id'] = match[2]
|
118
|
+
data['title_id'] = match[3].upcase
|
119
|
+
data['rtype'] = 'BOOK_SECTION'
|
120
|
+
data['mime'] = 'PDF'
|
83
121
|
|
84
|
-
data['print_identifier'] = match[3]
|
122
|
+
data['print_identifier'] = match[3]
|
85
123
|
|
86
124
|
elsif (match = /^\/enhanced\/doi\/(10\.[0-9]+\/(([^.]+)\.[^\/]+))\/?$/i.match(path))
|
87
|
-
data['doi'] = match[1]
|
88
|
-
data['unit_id'] = match[2]
|
89
|
-
data['title_id'] = match[3].upcase
|
90
|
-
data['rtype'] = 'ARTICLE'
|
91
|
-
data['mime'] = 'HTML'
|
125
|
+
data['doi'] = match[1]
|
126
|
+
data['unit_id'] = match[2]
|
127
|
+
data['title_id'] = match[3].upcase
|
128
|
+
data['rtype'] = 'ARTICLE'
|
129
|
+
data['mime'] = 'HTML'
|
92
130
|
|
93
131
|
elsif ((match = /^\/enhanced\/doi\/(10\.[0-9]+\/(([0-9]{4})([a-z0-9]{2})[a-z0-9]+))\/?$/i.match(path)))
|
94
132
|
|
95
|
-
data['doi'] = match[1]
|
96
|
-
data['unit_id'] = match[2]
|
97
|
-
data['title_id'] = match[4].upcase
|
98
|
-
data['rtype'] = 'ARTICLE'
|
99
|
-
data['mime'] = 'HTML'
|
133
|
+
data['doi'] = match[1]
|
134
|
+
data['unit_id'] = match[2]
|
135
|
+
data['title_id'] = match[4].upcase
|
136
|
+
data['rtype'] = 'ARTICLE'
|
137
|
+
data['mime'] = 'HTML'
|
100
138
|
|
101
|
-
data['publication_date'] = match[3]
|
139
|
+
data['publication_date'] = match[3]
|
102
140
|
|
103
141
|
elsif ((match = /^\/agu\/issue\/(10\.[0-9]+\/(([^.]+)\.[^\/]+))\/?$/i.match(path)))
|
104
|
-
data['doi'] = match[1]
|
105
|
-
data['unit_id'] = match[2]
|
106
|
-
data['title_id'] = match[3].upcase
|
107
|
-
data['rtype'] = 'TOC'
|
108
|
-
data['mime'] = 'HTML'
|
142
|
+
data['doi'] = match[1]
|
143
|
+
data['unit_id'] = match[2]
|
144
|
+
data['title_id'] = match[3].upcase
|
145
|
+
data['rtype'] = 'TOC'
|
146
|
+
data['mime'] = 'HTML'
|
109
147
|
|
110
148
|
elsif (/^\/readcube$/i.match(path))
|
111
|
-
data['rtype'] = 'ARTICLE'
|
112
|
-
data['mime'] = 'READCUBE'
|
149
|
+
data['rtype'] = 'ARTICLE'
|
150
|
+
data['mime'] = 'READCUBE'
|
113
151
|
|
114
152
|
if (params.key?("resource"))
|
115
153
|
|
116
|
-
data['doi'] = params["resource"][0]
|
117
|
-
data['unit_id'] = params["resource"][0].split('/')[1]
|
154
|
+
data['doi'] = params["resource"][0]
|
155
|
+
data['unit_id'] = params["resource"][0].split('/')[1]
|
118
156
|
if ((match = /(10\.[0-9]+)\/([0-9]{4})([a-z0-9]{2})([^\/]+)$/i.match(params['resource'][0])))
|
119
|
-
data['title_id'] = match[3].upcase
|
157
|
+
data['title_id'] = match[3].upcase
|
120
158
|
end
|
121
159
|
end
|
122
160
|
|
123
161
|
|
124
162
|
|
125
163
|
elsif ((match = /^\/doi\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[a-z0-9]+))\/pdf$/i.match(path)))
|
126
|
-
data['doi'] = match[1]
|
127
|
-
data['unit_id'] = match[2]
|
128
|
-
data['title_id'] = match[2].upcase
|
129
|
-
data['rtype'] = 'ARTICLE'
|
130
|
-
data['mime'] = 'PDF'
|
164
|
+
data['doi'] = match[1]
|
165
|
+
data['unit_id'] = match[2]
|
166
|
+
data['title_id'] = match[2].upcase
|
167
|
+
data['rtype'] = 'ARTICLE'
|
168
|
+
data['mime'] = 'PDF'
|
131
169
|
|
132
|
-
data['publication_date'] = '20' + match[3]
|
170
|
+
data['publication_date'] = '20' + match[3]
|
133
171
|
|
134
172
|
elsif ((match = /^\/iucr\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[0-9a-z]+))/i.match(path)))
|
135
|
-
data['doi'] = match[1]
|
136
|
-
data['unit_id'] = match[2]
|
137
|
-
data['title_id'] = match[2].upcase
|
138
|
-
data['rtype'] = 'ARTICLE'
|
139
|
-
data['mime'] = 'HTML'
|
173
|
+
data['doi'] = match[1]
|
174
|
+
data['unit_id'] = match[2]
|
175
|
+
data['title_id'] = match[2].upcase
|
176
|
+
data['rtype'] = 'ARTICLE'
|
177
|
+
data['mime'] = 'HTML'
|
140
178
|
|
141
|
-
data['publication_date'] = '20' + match[3]
|
179
|
+
data['publication_date'] = '20' + match[3]
|
142
180
|
|
143
181
|
elsif ((match = /^\/doi\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[0-9a-z]+))\/([a-z]+)$/i.match(path)))
|
144
|
-
data['doi'] = match[1]
|
145
|
-
data['unit_id'] = match[2]
|
146
|
-
data['title_id'] = match[2].upcase
|
182
|
+
data['doi'] = match[1]
|
183
|
+
data['unit_id'] = match[2]
|
184
|
+
data['title_id'] = match[2].upcase
|
147
185
|
|
148
|
-
data['publication_date'] = '20' + match[3]
|
186
|
+
data['publication_date'] = '20' + match[3]
|
149
187
|
|
150
188
|
case (match[4])
|
151
189
|
when 'abstract'
|
152
|
-
data['rtype'] = 'ABS'
|
153
|
-
data['mime'] = 'MISC'
|
190
|
+
data['rtype'] = 'ABS'
|
191
|
+
data['mime'] = 'MISC'
|
154
192
|
when 'pdf'
|
155
|
-
data['rtype'] = 'ARTICLE'
|
156
|
-
data['mime'] = 'PDF'
|
193
|
+
data['rtype'] = 'ARTICLE'
|
194
|
+
data['mime'] = 'PDF'
|
157
195
|
when 'full'
|
158
|
-
data['mime'] = 'PDF'
|
159
|
-
data['rtype'] = 'HTML'
|
196
|
+
data['mime'] = 'PDF'
|
197
|
+
data['rtype'] = 'HTML'
|
160
198
|
end
|
161
199
|
|
162
200
|
elsif ((match = /^\/store\/(10\.[0-9]+\/(([a-z]+)\.([0-9]{4})[0-9]+))\/asset\/[a-z]+[0-9]+.pdf$/i.match(path)))
|
163
|
-
data['doi'] = match[1]
|
164
|
-
data['unit_id'] = match[2]
|
165
|
-
data['title_id'] = match[3].upcase
|
166
|
-
data['mime'] = 'PDF'
|
201
|
+
data['doi'] = match[1]
|
202
|
+
data['unit_id'] = match[2]
|
203
|
+
data['title_id'] = match[3].upcase
|
204
|
+
data['mime'] = 'PDF'
|
167
205
|
|
168
|
-
data['publication_date'] = match[4]
|
206
|
+
data['publication_date'] = match[4]
|
169
207
|
|
170
208
|
elsif ((match = /^\/doi\/(10\.[0-9]+\/(([0-9]{2,4})([a-z]+)[0-9]+))\/(pdf|full)$/i.match(path)))
|
171
|
-
data['doi'] = match[1]
|
172
|
-
data['unit_id'] = match[2]
|
173
|
-
data['title_id'] = match[4].upcase
|
174
|
-
data['rtype'] = 'ARTICLE'
|
175
|
-
data['mime'] = match[5] == 'pdf' ? 'PDF' : 'HTML'
|
209
|
+
data['doi'] = match[1]
|
210
|
+
data['unit_id'] = match[2]
|
211
|
+
data['title_id'] = match[4].upcase
|
212
|
+
data['rtype'] = 'ARTICLE'
|
213
|
+
data['mime'] = match[5] == 'pdf' ? 'PDF' : 'HTML'
|
176
214
|
|
177
|
-
data['publication_date'] = match[3]
|
215
|
+
data['publication_date'] = match[3]
|
178
216
|
|
179
217
|
if (match[3].length === 2)
|
180
|
-
data['publication_date'] = '19' + match[3]
|
218
|
+
data['publication_date'] = '19' + match[3]
|
181
219
|
end
|
182
220
|
|
183
221
|
elsif ((match = /^\/doi\/(10\.[0-9]+\/([^.]+))\/(pdf|full)$/i.match(path)))
|
184
|
-
data['doi'] = match[1]
|
185
|
-
data['unit_id'] = match[2]
|
186
|
-
data['rtype'] = 'ARTICLE'
|
187
|
-
data['mime'] = match[3] === 'pdf' ? 'PDF' : 'HTML'
|
222
|
+
data['doi'] = match[1]
|
223
|
+
data['unit_id'] = match[2]
|
224
|
+
data['rtype'] = 'ARTICLE'
|
225
|
+
data['mime'] = match[3] === 'pdf' ? 'PDF' : 'HTML'
|
188
226
|
end
|
189
227
|
|
190
228
|
return data
|