logstash-filter-ezproxy 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -1
  3. data/lib/logstash/filters/cambridge.rb +123 -0
  4. data/lib/logstash/filters/dawsonera.rb +0 -5
  5. data/lib/logstash/filters/ebscohost.rb +26 -0
  6. data/lib/logstash/filters/emerald.rb +2 -6
  7. data/lib/logstash/filters/ezproxy.rb +79 -40
  8. data/lib/logstash/filters/gale.rb +21 -10
  9. data/lib/logstash/filters/heinonline.rb +44 -0
  10. data/lib/logstash/filters/lexis_webanalytics.rb +81 -0
  11. data/lib/logstash/filters/myilibrary.rb +29 -0
  12. data/lib/logstash/filters/oxford.rb +38 -0
  13. data/lib/logstash/filters/proquest.rb +42 -0
  14. data/lib/logstash/filters/sciencedirect.rb +1 -3
  15. data/lib/logstash/filters/scopus.rb +57 -0
  16. data/lib/logstash/filters/springer.rb +1 -1
  17. data/lib/logstash/filters/webofknowledge.rb +85 -0
  18. data/lib/logstash/filters/wiley.rb +154 -116
  19. data/lib/logstash/helpers/mime_helper.rb +38 -0
  20. data/lib/logstash/helpers/param_helper.rb +32 -0
  21. data/lib/logstash/helpers/url_parser.rb +2 -2
  22. data/logstash-filter-ezproxy.gemspec +2 -2
  23. data/spec/filters/cambridge/cambridge.2013-10-28.csv +13 -0
  24. data/spec/filters/cambridge/cambridge_spec.rb +27 -0
  25. data/spec/filters/ebscohost/ebscohost.2014-08-21.csv +5 -0
  26. data/spec/filters/ezproxy_spec.rb +1 -1
  27. data/spec/filters/gale/gale_spec.rb +0 -2
  28. data/spec/filters/heinonline/heinonline.2015-05-18.csv +12 -0
  29. data/spec/filters/heinonline/heinonline_spec.rb +20 -0
  30. data/spec/filters/lexis_webanalytics/lexis360.2017-04-28.csv +9 -0
  31. data/spec/filters/lexis_webanalytics/lexis_webanalytics_spec.rb +21 -0
  32. data/spec/filters/myilibrary/myilibrary.2018-02-09.csv +6 -0
  33. data/spec/filters/myilibrary/myilibrary_spec.rb +20 -0
  34. data/spec/filters/oxford/oxford.2018-02-15.csv +5 -0
  35. data/spec/filters/oxford/oxford_spec.rb +21 -0
  36. data/spec/filters/proquest/proquest.2018-02-09.csv +6 -0
  37. data/spec/filters/proquest/proquest_spec.rb +21 -0
  38. data/spec/filters/scopus/scopus.2016-07-18.csv +7 -0
  39. data/spec/filters/scopus/scopus_spec.rb +19 -0
  40. data/spec/filters/webofknowledge/webofknowledge_spec.rb +21 -0
  41. data/spec/filters/webofknowledge/wos.2017-01-13.csv +11 -0
  42. data/spec/filters/wiley/wiley.2018-02-07.csv +9 -0
  43. data/spec/filters/wiley/wiley_spec.rb +19 -0
  44. metadata +50 -6
@@ -0,0 +1,81 @@
1
+ require 'json'
2
+
3
+ module LexisWebAnalytics
4
+ def LexisWebAnalytics.parse (path, params, uri)
5
+
6
+ raw_url = uri.to_s
7
+
8
+ data = {
9
+ "provider" => "lexisnexis_webanalytics"
10
+ }
11
+
12
+ if (match = /^\/Document\/([\w]+)\/([\w-]+)$/i.match(path))
13
+ data['rtype'] = 'TOC'
14
+ data['mime'] = 'HTML'
15
+ data['unit_id'] = params['rndNum'][0]
16
+
17
+ if ((match = /([a-z_]+)_(\d+_\w+_\d+)_n_(\d+)/i.match(match[1])))
18
+ data['title_id'] = match[1]
19
+ end
20
+
21
+
22
+ elsif ((match = /^\/Docview.aspx$/i.match(path)))
23
+ data['rtype'] = 'ARTICLE'
24
+ data['mime'] = 'HTML'
25
+ citationData = JSON.parse(params['citationData'][0])
26
+ data['unit_id'] = citationData['docId']
27
+ data['title_id'] = citationData['docId'].split('_')[1]
28
+
29
+
30
+ elsif ((match = /^\/wa_k4c.watag$/i.match(path)))
31
+ if ((match3 = /&wa_DocId=([0-9a-zA-Z_-]+)&/i.match(raw_url)))
32
+ data['unit_id'] = match3[1]
33
+ if ((match3a = /PS_([A-Z]+)/.match(data['unit_id'])))
34
+ data['title_id'] = match3a[1]
35
+ end
36
+ end
37
+
38
+ if ((match4 = /&wa_DocSourceType=([0-9a-z%é_]+)&/i.match(raw_url)))
39
+ docSourceType = match4[1]
40
+
41
+ if (docSourceType == 'FicheMethodo' || docSourceType == 'FicheRevision')
42
+ data['rtype'] = 'ENCYCLOPAEDIA_ENTRY'
43
+ data['mime'] = 'HTML'
44
+
45
+ elsif (docSourceType == 'PresseSommaire')
46
+ data['rtype'] = 'TOC'
47
+ data['mime'] = 'HTML'
48
+
49
+ elsif (docSourceType == 'Presse')
50
+ data['rtype'] = 'ARTICLE'
51
+ data['mime'] = 'HTML'
52
+
53
+ elsif (docSourceType == 'En_eFascicule')
54
+ if ((match5 = /&wa_UserAction=([a-zA-Z]+)&/i.match(raw_url)))
55
+ userAction = match5[1]
56
+
57
+ if (userAction == 'ViewDoc' || userAction == 'ChangeToc')
58
+ data['rtype'] = 'ENCYCLOPAEDIA_ENTRY'
59
+ data['mime'] = 'HTML'
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+
66
+ if (/L[é%C3A9]+gislationconsolid[é%C3A9]+e/.match(docSourceType))
67
+ if ((match6 = /&wa_UserAction=([a-zA-Z]+)&/i.match(raw_url)))
68
+ userAction = match6[1]
69
+
70
+ if (userAction == 'ViewDoc' || userAction == 'ChangeToc')
71
+ data['rtype'] = 'CODES'
72
+ data['mime'] = 'HTML'
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ return data
79
+
80
+ end
81
+ end
@@ -0,0 +1,29 @@
1
+ module MyILibrary
2
+ def MyILibrary.parse (path, params)
3
+
4
+ data = {
5
+ "provider" => "myilibrary"
6
+ }
7
+
8
+ if (/\/Open\.aspx/.match(path) || /\/browse\/open\.asp/i.match(path) || /^\/$/.match(path))
9
+ if params.key?('id')
10
+ data['title_id'] = params['id'][0]
11
+ data['unit_id'] = params['id'][0]
12
+ data['rtype'] = 'BOOK'
13
+ data['mime'] = 'MISC'
14
+ end
15
+
16
+ elsif (/\/Viewer\/getImage\_Servlet\.aspx/i.match(path))
17
+ data['rtype'] = 'BOOK_PAGE'
18
+ data['mime'] = 'JPG'
19
+
20
+ elsif (/\/Viewer\/get[DP]MP\_Servlet\.aspx/i.match(path))
21
+ data['rtype'] = 'BOOK_PAGE'
22
+ data['mime'] = params['t'][0]
23
+
24
+ end
25
+
26
+ return data
27
+
28
+ end
29
+ end
@@ -0,0 +1,38 @@
1
+ module Oxford
2
+ def Oxford.parse (path, params)
3
+
4
+ data = {
5
+ "provider" => "oxford"
6
+ }
7
+
8
+ if (match = /\/view\/([0-9\.]+\/[a-z0-9\.\/\:]+)\/(.+?)(-([a-z][\-a-z0-9]+))?$/i.match(path))
9
+ data['doi'] = match[1]
10
+ data['title_id'] = match[2]
11
+ data['unit_id'] = match[2]
12
+
13
+ if (match[4])
14
+ match4 = match[4].downcase.split("-")
15
+
16
+ case match4[0]
17
+ when "chapter"
18
+ data['rtype'] = "BOOK_CHAPTER"
19
+ when "bibliography"
20
+ data['rtype'] = "BIBLIOGRAPHY"
21
+ when "indexlist"
22
+ data['rtype'] = 'TOC'
23
+ end
24
+ else
25
+ data['rtype'] = 'BOOK'
26
+ end
27
+
28
+ if params.key?('print')
29
+ data['mime'] = params['print'][0].upcase
30
+ end
31
+
32
+ elsif (match = /\/doc\/([0-9\.]+\/[a-z0-9\.\/\:]+)\/.+/i.match(path))
33
+ data['doi'] = match[1]
34
+ end
35
+
36
+ return data
37
+ end
38
+ end
@@ -0,0 +1,42 @@
1
+ module Proquest
2
+ def Proquest.parse (path, params)
3
+ data = {
4
+ "provider" => "proquest"
5
+ }
6
+
7
+ if (/\/lib\/lancaster\/remoteDocServer\.api/i.match(path))
8
+ data['rtype'] = 'BOOK_PAGE'
9
+ data['mime'] = 'MISC'
10
+ data['page'] = params['pageNum'][0]
11
+ data['remote_id'] = params['remote_id'][0]
12
+
13
+ elsif (/\/lib\/lancaster\/detail\.action/i.match(path))
14
+ data['rtype'] = 'TOC'
15
+ data['mime'] = 'HTML'
16
+ data['title_id'] = params['docID'][0]
17
+ data['unit_id'] = params['docID'][0]
18
+
19
+ elsif (/\/lib\/lancaster\/reader\.action/i.match(path))
20
+ data['rtype'] = 'BOOK'
21
+ data['mime'] = 'MISC'
22
+ data['title_id'] = params['docID'][0]
23
+ data['unit_id'] = params['docID'][0]
24
+
25
+ elsif (/\/lib\/lancaster\/docAccess\.api/i.match(path))
26
+ data['rtype'] = 'TOC'
27
+ data['mime'] = 'JSON'
28
+ data['title_id'] = params['docid'][0]
29
+ data['unit_id'] = params['docid'][0]
30
+ data['remote_id'] = params['remote_id'][0]
31
+
32
+ elsif (/\/lib\/lancaster\/docSearch\.api/i.match(path))
33
+ data['rtype'] = 'TOC'
34
+ data['mime'] = 'JSON'
35
+ data['title_id'] = params['docid'][0]
36
+ data['unit_id'] = params['docid'][0]
37
+ data['remote_id'] = params['remote_id'][0]
38
+ end
39
+
40
+ return data
41
+ end
42
+ end
@@ -4,9 +4,7 @@ require 'cgi'
4
4
  module ScienceDirect
5
5
  def ScienceDirect.parse (path, params)
6
6
 
7
- data = {
8
- "provider" => "sciencedirect"
9
- }
7
+ data = {}
10
8
 
11
9
  if (params.key?("_ob"))
12
10
  if (params['_cdi'])
@@ -0,0 +1,57 @@
1
+
2
+ module Scopus
3
+ def Scopus.parse (path, params)
4
+
5
+ data = {
6
+ "provider" => "scopus"
7
+ }
8
+
9
+ if (/^\/results\/citedbyresults.ur[il]$/i.match(path))
10
+ data['mime'] = 'HTML'
11
+ data['rtype'] = 'REF'
12
+ data['unit_id'] = params['cite'][0]
13
+
14
+ elsif (match = /^\/record\/([a-z]+)\.ur[il]$/i.match(path))
15
+ case (match[1])
16
+ when 'display'
17
+ data['mime'] = 'HTML'
18
+ data['rtype'] = 'ABS'
19
+ data['unit_id'] = params['eid'][0]
20
+
21
+ when 'references'
22
+ data['mime'] = 'HTML'
23
+ data['rtype'] = 'REF'
24
+ data['unit_id'] = params['currentRecordPageEID'][0]
25
+
26
+ when 'detail'
27
+ data['mime'] = 'HTML'
28
+ data['rtype'] = 'BIO'
29
+ if (params.key?('authorId'))
30
+ data['unit_id'] = params['authorId'][0]
31
+ end
32
+
33
+ when 'pdfdownload'
34
+ data['rtype'] = 'REF'
35
+ data['mime'] = 'PDF'
36
+ data['unit_id'] = params['eid'][0]
37
+ end
38
+
39
+ elsif (/^\/authid\/detail\.ur[il]$/i.match(path))
40
+ data['mime'] = 'HTML'
41
+ data['rtype'] = 'BIO'
42
+ if (params.key?('authorId'))
43
+ data['unit_id'] = params['authorId'][0]
44
+ end
45
+
46
+ elsif (/^\/citation\/print\.ur[il]$/i.match(path))
47
+ data['mime'] = 'PRINT'
48
+ data['rtype'] = 'REF'
49
+ if (params.key?('eid'))
50
+ data['unit_id'] = params['eid'][0]
51
+ end
52
+ end
53
+
54
+ return data
55
+
56
+ end
57
+ end
@@ -115,7 +115,7 @@ module Springer
115
115
 
116
116
 
117
117
  elsif ((match = /^\/(download|static)\/([a-z]+)\/(([0-9.]*)\/([^\/]*)).epub/.match(path)))
118
- if (/([0-9]+)\.([0-9]+)/.test(match[4]))
118
+ if (/([0-9]+)\.([0-9]+)/.match(match[4]))
119
119
  data['doi'] = match[3]
120
120
  data['unit_id'] = match[5] + '.epub'
121
121
  data['print_identifier'] = match[5]
@@ -0,0 +1,85 @@
1
+ module WebOfKnowledge
2
+ def WebOfKnowledge.parse (path, params)
3
+
4
+ data = {
5
+ "provider" => "dawsonera"
6
+ }
7
+
8
+ if (match = /^\/([a-z_]+)\.do$/i.match(path))
9
+
10
+ if (params.key?('product'))
11
+ productId = params['product'][0].kind_of?(Array) ? params['product'][0][0] : params['product'][0]
12
+ end
13
+
14
+ case (match[1])
15
+ when 'Search', 'InterService'
16
+ data['rtype'] = 'TOC'
17
+ data['mime'] = 'HTML'
18
+ if (productId)
19
+ data['title_id'] = productId
20
+ end
21
+
22
+ when 'full_record'
23
+ data['rtype'] = 'REF'
24
+ data['mime'] = 'HTML'
25
+ if (productId)
26
+ data['title_id'] = productId
27
+ end
28
+
29
+ when 'CitationReport'
30
+ data['rtype'] = 'ANALYSIS'
31
+ data['mime'] = 'MISC'
32
+ if (productId)
33
+ data['title_id'] = productId
34
+ end
35
+ end
36
+
37
+ if (/^([a-z]+)_GeneralSearch_input/i.match(match[1]))
38
+ data['rtype'] = 'SEARCH'
39
+ data['mime'] = 'HTML'
40
+
41
+ if (productId)
42
+ data['title_id'] = productId
43
+ end
44
+ end
45
+
46
+
47
+ elsif (match = /^\/([a-zA-z_]*)\.action$/i.match(path))
48
+
49
+ case (match[1])
50
+ when 'JCRJournalHomeAction'
51
+ data['rtype'] = 'TOC'
52
+ data['mime'] = 'HTML'
53
+ when 'JCRJournalProfileAction'
54
+ data['rtype'] = 'TABLE'
55
+ data['mime'] = 'HTML'
56
+
57
+ if (params.key?('journalTitle'))
58
+ data['publication_title'] = params['journalTitle'][0]
59
+ end
60
+ if (params.key?('journal'))
61
+ data['title_id'] = params['journal'][0]
62
+ data['unit_id'] = "impact/" + params['journal'][0] + "/" + params['year'][0]
63
+ end
64
+
65
+ when 'IndicatorsAction'
66
+ data['rtype'] = 'MAP'
67
+ data['mime'] = 'MISC'
68
+
69
+ when 'DocumentsAction'
70
+ data['rtype'] = 'GRAPH'
71
+ data['mime'] = 'MISC'
72
+
73
+ else
74
+ return {}
75
+ end
76
+
77
+ elsif ((match = /^\/([a-z]{2,3})\/analyze\.do$/i.match(path)))
78
+ data['rtype'] = 'ANALYSIS'
79
+ data['mime'] = 'MISC'
80
+ end
81
+
82
+ return data
83
+
84
+ end
85
+ end
@@ -6,185 +6,223 @@ module Wiley
6
6
  data = {
7
7
  "provider" => "wiley"
8
8
  }
9
-
10
- if ((match = /\/journal\/(10\.[0-9]+\/(\(ISSN\)([0-9]{4}-[0-9]{3}[0-9xX])))/i.match(path)))
11
- data['doi'] = match[1];
12
- data['unit_id'] = match[2];
13
- data['rtype'] = 'TOC';
14
- data['mime'] = 'MISC';
15
9
 
16
- data['online_identifier'] = match[3];
10
+ if ((match = /^\/pdf\/(10\.[0-9]+\/([0-9x]+))(\.ch[0-9]+)$/i.match(path)))
11
+ data['rtype'] = 'BOOK_SECTION'
12
+ data['mime'] = 'PDF'
13
+ data['doi'] = match[1]
14
+ data['unit_id'] = match[2] + match[3]
15
+ data['online_identifier'] = match[2]
16
+
17
+ elsif ((match = /^\/doi(\/[a-z]+)?\/(10\.[0-9]+\/([a-z0-9._-]+))$/i.match(path)))
18
+ data['doi'] = match[2]
19
+ data['unit_id'] = match[3]
20
+
21
+ case (match[1])
22
+ when '/pdf', '/epdf'
23
+ data['rtype'] = 'ARTICLE'
24
+ data['mime'] = 'PDF'
25
+
26
+ when '/full'
27
+ data['rtype'] = 'ARTICLE'
28
+ data['mime'] = 'HTML'
29
+
30
+ when '/abs'
31
+ data['rtype'] = 'ABS'
32
+ data['mime'] = 'HTML'
33
+
34
+ else
35
+ data['rtype'] = 'ARTICLE'
36
+ data['mime'] = 'HTML'
37
+ end
38
+
39
+
40
+ elsif ((match = /^\/toc\/toc\/(([0-9]+)\/([0-9]+)\/([0-9]+))$/i.match(path)))
41
+ data['rtype'] = 'TOC'
42
+ data['mime'] = 'MISC'
43
+ data['unit_id'] = match[1]
44
+ data['title_id'] = match[2]
45
+ data['vol'] = match[3]
46
+ data['issue'] = match[4]
47
+
48
+ elsif ((match = /^\/journal\/([0-9]+)$/i.match(path)))
49
+ data['rtype'] = 'TOC'
50
+ data['mime'] = 'MISC'
51
+ data['title_id'] = match[1]
52
+
53
+ elsif ((match = /\/journal\/(10\.[0-9]+\/(\(ISSN\)([0-9]{4}-[0-9]{3}[0-9xX])))/i.match(path)))
54
+ data['doi'] = match[1]
55
+ data['unit_id'] = match[2]
56
+ data['rtype'] = 'TOC'
57
+ data['mime'] = 'MISC'
58
+ data['online_identifier'] = match[3]
17
59
 
18
60
  elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.([0-9]{4})\.[^.]+\.[^.]+))\/issuetoc$/i.match(path)))
19
- data['doi'] = match[1];
20
- data['unit_id'] = match[2];
21
- data['title_id'] = match[3].upcase;
22
- data['rtype'] = 'TOC';
23
- data['mime'] = 'MISC';
24
-
25
- data['publication_date'] = match[4];
61
+ data['doi'] = match[1]
62
+ data['unit_id'] = match[2]
63
+ data['title_id'] = match[3].upcase
64
+ data['rtype'] = 'TOC'
65
+ data['mime'] = 'MISC'
66
+ data['publication_date'] = match[4]
26
67
 
27
68
  elsif ((match = /^\/doi\/(10\.[0-9]+\/(j\.([0-9]{4}-[0-9]{3}[0-9xX])\.([0-9]{4})\.[^.]+\.[^.]+))\/abstract$/i.match(path)))
28
- data['doi'] = match[1];
29
- data['unit_id'] = match[2];
30
- data['rtype'] = 'ABS';
31
- data['mime'] = 'MISC';
32
-
33
- data['online_identifier'] = match[3];
34
- data['publication_date'] = match[4];
69
+ data['doi'] = match[1]
70
+ data['unit_id'] = match[2]
71
+ data['rtype'] = 'ABS'
72
+ data['mime'] = 'MISC'
73
+ data['online_identifier'] = match[3]
74
+ data['publication_date'] = match[4]
35
75
 
36
76
  elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.([0-9]{4})[0-9]+))\/abstract$/i.match(path)))
37
- data['doi'] = match[1];
38
- data['unit_id'] = match[2];
39
- data['title_id'] = match[3].upcase;
40
- data['rtype'] = 'ABS';
41
- data['mime'] = 'MISC';
42
-
43
- data['publication_date'] = match[4];
77
+ data['doi'] = match[1]
78
+ data['unit_id'] = match[2]
79
+ data['title_id'] = match[3].upcase
80
+ data['rtype'] = 'ABS'
81
+ data['mime'] = 'MISC'
82
+ data['publication_date'] = match[4]
44
83
 
45
84
  elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.[0-9]+))\/full$/i.match(path)))
46
- data['doi'] = match[1];
47
- data['unit_id'] = match[2];
48
- data['title_id'] = match[3].upcase;
49
- data['rtype'] = 'ARTICLE';
50
- data['mime'] = 'HTML';
85
+ data['doi'] = match[1]
86
+ data['unit_id'] = match[2]
87
+ data['title_id'] = match[3].upcase
88
+ data['rtype'] = 'ARTICLE'
89
+ data['mime'] = 'HTML'
51
90
 
52
91
  elsif ((match = /^\/doi\/(10\.[0-9]+\/(j\.([0-9]{4}-[0-9]{3}[0-9xX])\.([0-9]{4})\.[^.]+\.[^.]+))\/pdf$/i.match(path)))
53
- data['doi'] = match[1];
54
- data['unit_id'] = match[2];
55
- data['rtype'] = 'ARTICLE';
56
- data['mime'] = 'PDF';
57
-
58
- data['online_identifier'] = match[3];
59
- data['publication_date'] = match[4];
92
+ data['doi'] = match[1]
93
+ data['unit_id'] = match[2]
94
+ data['rtype'] = 'ARTICLE'
95
+ data['mime'] = 'PDF'
96
+ data['online_identifier'] = match[3]
97
+ data['publication_date'] = match[4]
60
98
 
61
99
  elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.[0-9]+))\/pdf$/i.match(path)))
62
- data['doi'] = match[1];
63
- data['unit_id'] = match[2];
64
- data['title_id'] = match[3].upcase;
65
- data['rtype'] = 'ARTICLE';
66
- data['mime'] = 'PDF';
100
+ data['doi'] = match[1]
101
+ data['unit_id'] = match[2]
102
+ data['title_id'] = match[3].upcase
103
+ data['rtype'] = 'ARTICLE'
104
+ data['mime'] = 'PDF'
67
105
 
68
106
  elsif ((match = /^\/book\/(10\.[0-9]+\/([0-9]+))$/i.match(path)))
69
- data['doi'] = match[1];
70
- data['unit_id'] = match[2];
71
- data['title_id'] = match[2].upcase;
72
- data['rtype'] = 'TOC';
73
- data['mime'] = 'MISC';
107
+ data['doi'] = match[1]
108
+ data['unit_id'] = match[2]
109
+ data['title_id'] = match[2].upcase
110
+ data['rtype'] = 'TOC'
111
+ data['mime'] = 'MISC'
74
112
 
75
- data['print_identifier'] = match[2];
113
+ data['print_identifier'] = match[2]
76
114
 
77
115
  elsif (match = /^\/doi\/(10\.[0-9]+\/(([0-9]+)\.[^.]+))\/pdf$/i.match(path))
78
- data['doi'] = match[1];
79
- data['unit_id'] = match[2];
80
- data['title_id'] = match[3].upcase;
81
- data['rtype'] = 'BOOK_SECTION';
82
- data['mime'] = 'PDF';
116
+ data['doi'] = match[1]
117
+ data['unit_id'] = match[2]
118
+ data['title_id'] = match[3].upcase
119
+ data['rtype'] = 'BOOK_SECTION'
120
+ data['mime'] = 'PDF'
83
121
 
84
- data['print_identifier'] = match[3];
122
+ data['print_identifier'] = match[3]
85
123
 
86
124
  elsif (match = /^\/enhanced\/doi\/(10\.[0-9]+\/(([^.]+)\.[^\/]+))\/?$/i.match(path))
87
- data['doi'] = match[1];
88
- data['unit_id'] = match[2];
89
- data['title_id'] = match[3].upcase;
90
- data['rtype'] = 'ARTICLE';
91
- data['mime'] = 'HTML';
125
+ data['doi'] = match[1]
126
+ data['unit_id'] = match[2]
127
+ data['title_id'] = match[3].upcase
128
+ data['rtype'] = 'ARTICLE'
129
+ data['mime'] = 'HTML'
92
130
 
93
131
  elsif ((match = /^\/enhanced\/doi\/(10\.[0-9]+\/(([0-9]{4})([a-z0-9]{2})[a-z0-9]+))\/?$/i.match(path)))
94
132
 
95
- data['doi'] = match[1];
96
- data['unit_id'] = match[2];
97
- data['title_id'] = match[4].upcase;
98
- data['rtype'] = 'ARTICLE';
99
- data['mime'] = 'HTML';
133
+ data['doi'] = match[1]
134
+ data['unit_id'] = match[2]
135
+ data['title_id'] = match[4].upcase
136
+ data['rtype'] = 'ARTICLE'
137
+ data['mime'] = 'HTML'
100
138
 
101
- data['publication_date'] = match[3];
139
+ data['publication_date'] = match[3]
102
140
 
103
141
  elsif ((match = /^\/agu\/issue\/(10\.[0-9]+\/(([^.]+)\.[^\/]+))\/?$/i.match(path)))
104
- data['doi'] = match[1];
105
- data['unit_id'] = match[2];
106
- data['title_id'] = match[3].upcase;
107
- data['rtype'] = 'TOC';
108
- data['mime'] = 'HTML';
142
+ data['doi'] = match[1]
143
+ data['unit_id'] = match[2]
144
+ data['title_id'] = match[3].upcase
145
+ data['rtype'] = 'TOC'
146
+ data['mime'] = 'HTML'
109
147
 
110
148
  elsif (/^\/readcube$/i.match(path))
111
- data['rtype'] = 'ARTICLE';
112
- data['mime'] = 'READCUBE';
149
+ data['rtype'] = 'ARTICLE'
150
+ data['mime'] = 'READCUBE'
113
151
 
114
152
  if (params.key?("resource"))
115
153
 
116
- data['doi'] = params["resource"][0];
117
- data['unit_id'] = params["resource"][0].split('/')[1];
154
+ data['doi'] = params["resource"][0]
155
+ data['unit_id'] = params["resource"][0].split('/')[1]
118
156
  if ((match = /(10\.[0-9]+)\/([0-9]{4})([a-z0-9]{2})([^\/]+)$/i.match(params['resource'][0])))
119
- data['title_id'] = match[3].upcase;
157
+ data['title_id'] = match[3].upcase
120
158
  end
121
159
  end
122
160
 
123
161
 
124
162
 
125
163
  elsif ((match = /^\/doi\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[a-z0-9]+))\/pdf$/i.match(path)))
126
- data['doi'] = match[1];
127
- data['unit_id'] = match[2];
128
- data['title_id'] = match[2].upcase;
129
- data['rtype'] = 'ARTICLE';
130
- data['mime'] = 'PDF';
164
+ data['doi'] = match[1]
165
+ data['unit_id'] = match[2]
166
+ data['title_id'] = match[2].upcase
167
+ data['rtype'] = 'ARTICLE'
168
+ data['mime'] = 'PDF'
131
169
 
132
- data['publication_date'] = '20' + match[3];
170
+ data['publication_date'] = '20' + match[3]
133
171
 
134
172
  elsif ((match = /^\/iucr\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[0-9a-z]+))/i.match(path)))
135
- data['doi'] = match[1];
136
- data['unit_id'] = match[2] ;
137
- data['title_id'] = match[2].upcase;
138
- data['rtype'] = 'ARTICLE';
139
- data['mime'] = 'HTML';
173
+ data['doi'] = match[1]
174
+ data['unit_id'] = match[2]
175
+ data['title_id'] = match[2].upcase
176
+ data['rtype'] = 'ARTICLE'
177
+ data['mime'] = 'HTML'
140
178
 
141
- data['publication_date'] = '20' + match[3];
179
+ data['publication_date'] = '20' + match[3]
142
180
 
143
181
  elsif ((match = /^\/doi\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[0-9a-z]+))\/([a-z]+)$/i.match(path)))
144
- data['doi'] = match[1];
145
- data['unit_id'] = match[2];
146
- data['title_id'] = match[2].upcase;
182
+ data['doi'] = match[1]
183
+ data['unit_id'] = match[2]
184
+ data['title_id'] = match[2].upcase
147
185
 
148
- data['publication_date'] = '20' + match[3];
186
+ data['publication_date'] = '20' + match[3]
149
187
 
150
188
  case (match[4])
151
189
  when 'abstract'
152
- data['rtype'] = 'ABS';
153
- data['mime'] = 'MISC';
190
+ data['rtype'] = 'ABS'
191
+ data['mime'] = 'MISC'
154
192
  when 'pdf'
155
- data['rtype'] = 'ARTICLE';
156
- data['mime'] = 'PDF';
193
+ data['rtype'] = 'ARTICLE'
194
+ data['mime'] = 'PDF'
157
195
  when 'full'
158
- data['mime'] = 'PDF';
159
- data['rtype'] = 'HTML';
196
+ data['mime'] = 'PDF'
197
+ data['rtype'] = 'HTML'
160
198
  end
161
199
 
162
200
  elsif ((match = /^\/store\/(10\.[0-9]+\/(([a-z]+)\.([0-9]{4})[0-9]+))\/asset\/[a-z]+[0-9]+.pdf$/i.match(path)))
163
- data['doi'] = match[1];
164
- data['unit_id'] = match[2];
165
- data['title_id'] = match[3].upcase;
166
- data['mime'] = 'PDF';
201
+ data['doi'] = match[1]
202
+ data['unit_id'] = match[2]
203
+ data['title_id'] = match[3].upcase
204
+ data['mime'] = 'PDF'
167
205
 
168
- data['publication_date'] = match[4];
206
+ data['publication_date'] = match[4]
169
207
 
170
208
  elsif ((match = /^\/doi\/(10\.[0-9]+\/(([0-9]{2,4})([a-z]+)[0-9]+))\/(pdf|full)$/i.match(path)))
171
- data['doi'] = match[1];
172
- data['unit_id'] = match[2];
173
- data['title_id'] = match[4].upcase;
174
- data['rtype'] = 'ARTICLE';
175
- data['mime'] = match[5] == 'pdf' ? 'PDF' : 'HTML';
209
+ data['doi'] = match[1]
210
+ data['unit_id'] = match[2]
211
+ data['title_id'] = match[4].upcase
212
+ data['rtype'] = 'ARTICLE'
213
+ data['mime'] = match[5] == 'pdf' ? 'PDF' : 'HTML'
176
214
 
177
- data['publication_date'] = match[3];
215
+ data['publication_date'] = match[3]
178
216
 
179
217
  if (match[3].length === 2)
180
- data['publication_date'] = '19' + match[3];
218
+ data['publication_date'] = '19' + match[3]
181
219
  end
182
220
 
183
221
  elsif ((match = /^\/doi\/(10\.[0-9]+\/([^.]+))\/(pdf|full)$/i.match(path)))
184
- data['doi'] = match[1];
185
- data['unit_id'] = match[2];
186
- data['rtype'] = 'ARTICLE';
187
- data['mime'] = match[3] === 'pdf' ? 'PDF' : 'HTML';
222
+ data['doi'] = match[1]
223
+ data['unit_id'] = match[2]
224
+ data['rtype'] = 'ARTICLE'
225
+ data['mime'] = match[3] === 'pdf' ? 'PDF' : 'HTML'
188
226
  end
189
227
 
190
228
  return data