logstash-filter-ezproxy 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +2 -0
  3. data/CONTRIBUTORS +10 -0
  4. data/DEVELOPER.md +2 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE +7 -0
  7. data/README.md +86 -0
  8. data/lib/logstash/filters/dawsonera.rb +41 -0
  9. data/lib/logstash/filters/ebscohost.rb +116 -0
  10. data/lib/logstash/filters/emerald.rb +96 -0
  11. data/lib/logstash/filters/ezproxy.rb +93 -0
  12. data/lib/logstash/filters/jstor.rb +112 -0
  13. data/lib/logstash/filters/lexisnexis.rb +37 -0
  14. data/lib/logstash/filters/sage.rb +39 -0
  15. data/lib/logstash/filters/sciencedirect.rb +171 -0
  16. data/lib/logstash/filters/tandf.rb +55 -0
  17. data/lib/logstash/filters/wiley.rb +202 -0
  18. data/logstash-filter-ezproxy.gemspec +21 -0
  19. data/spec/filters/dawsonera/dawsonera.2014-09-03.csv +4 -0
  20. data/spec/filters/dawsonera/dawsonera_spec.rb +15 -0
  21. data/spec/filters/ebscohost/ebscohost.2014-08-21.csv +13 -0
  22. data/spec/filters/ebscohost/ebscohost_spec.rb +22 -0
  23. data/spec/filters/emerald/emerald.2015-08-11.csv +15 -0
  24. data/spec/filters/emerald/emerald_spec.rb +17 -0
  25. data/spec/filters/ezproxy_spec.rb +53 -0
  26. data/spec/filters/jstor/jstor.2013-10-03.csv +18 -0
  27. data/spec/filters/jstor/jstor_spec.rb +20 -0
  28. data/spec/filters/lexisnexis/lexisnexis.2013-05-17.csv +2 -0
  29. data/spec/filters/lexisnexis/lexisnexis_spec.rb +15 -0
  30. data/spec/filters/sage/sage_spec.rb +16 -0
  31. data/spec/filters/sage/sagej.2016-12-05.csv +6 -0
  32. data/spec/filters/sciencedirect/sciencedirect_spec.rb +17 -0
  33. data/spec/filters/sciencedirect/sd.2013-01-09.csv +28 -0
  34. data/spec/filters/tandf/tandf.2015-03-25.csv +9 -0
  35. data/spec/filters/tandf/tandf_spec.rb +17 -0
  36. data/spec/filters/wiley/wiley.2013-04-15.csv +28 -0
  37. data/spec/filters/wiley/wiley_spec.rb +19 -0
  38. data/spec/spec_helper.rb +2 -0
  39. metadata +130 -0
@@ -0,0 +1,112 @@
1
+ require 'uri'
2
+ require 'cgi'
3
+
4
+ module Jstor
5
+ def Jstor.parse (input)
6
+
7
+ uri = URI(URI.unescape(input))
8
+
9
+ url = uri.path
10
+ params = {}
11
+ if (uri.query)
12
+ params = CGI::parse(uri.query)
13
+ end
14
+
15
+ data = {
16
+ "provider" => "jstor"
17
+ }
18
+ doi_prefix = "10.2307"
19
+
20
+
21
+ if (match = /^\/journal\/([a-z0-9]+)$/i.match(url))
22
+ data["rtype"] = "TOC"
23
+ data["mime"] = "MISC"
24
+ data["unit_id"] = match[1]
25
+ data["title_id"] = match[1]
26
+
27
+ elsif (match = /^\/stable\/10\.[0-9]+\/(([a-z]+)\.([0-9]+)\.([0-9]+)\.issue-([0-9]+))$/i.match(url))
28
+ data["rtype"] = "TOC"
29
+ data["mime"] = "MISC"
30
+ data["unit_id"] = match[1]
31
+ data["title_id"] = match[2]
32
+ data["issue"] = match[5]
33
+
34
+ if match[3].length >= 4
35
+ data["publication_date"] = match[3]
36
+ data["vol"] = match[4]
37
+ else
38
+ data["vol"] = match[3]
39
+ end
40
+
41
+ elsif (match = /^\/stable\/((10\.[0-9]+\/)?([a-z0-9]+))$/i.match(url))
42
+ data["rtype"] = "TOC"
43
+ data["mime"] = "MISC"
44
+ data["unit_id"] = match[3]
45
+ data["title_id"] = match[3]
46
+
47
+ if match[2]
48
+ data["doi"] = match[1]
49
+ end
50
+
51
+ elsif (match = /^\/stable\/(i[0-9]+)$/i.match(url))
52
+ data["rtype"] = "TOC"
53
+ data["mime"] = "MISC"
54
+ data["unit_id"] = match[1]
55
+ data["title_id"] = match[1]
56
+
57
+ elsif (/^\/action\/showPublication$/i.match(url))
58
+ if (params["journalCode"])
59
+ data["title_id"] = params["journalCode"][0]
60
+ data["unit_id"] = params["journalCode"][0]
61
+ data["rtype"] = 'TOC'
62
+ data["mime"] = 'MISC'
63
+ end
64
+
65
+ elsif (match = /^\/stable\/(get_image|pdf|pdfplus)\/((10\.[0-9]+\/)?([a-z0-9.]+?))(?:\.pdf)?$/i.match(url))
66
+ data["unit_id"] = match[4]
67
+ data["doi"] = match[3] ? match[2] : doi_prefix + "/" + match[2]
68
+
69
+ case match[1]
70
+ when 'get_image'
71
+ data["rtype"] = "ARTICLE_SECTION"
72
+ data["mime"] = "GIF"
73
+ when 'pdf'
74
+ data["rtype"] = "ARTICLE"
75
+ data["mime"] = "PDF"
76
+ when 'pdfplus'
77
+ data["rtype"] = "ARTICLE"
78
+ data["mime"] = "PDFPLUS"
79
+ end
80
+
81
+ idPattern = /^([a-z0-9]+)((?:\.(\d+))?\.(\d+)\.(\d+)\.(\w+))?/.match(match[4]) || [];
82
+
83
+ data["title_id"] = idPattern[1]
84
+ data["publication_date"] = idPattern[3]
85
+ data["vol"] = idPattern[4]
86
+ data["issue"] = idPattern[5]
87
+
88
+ if (idPattern[6] == 'cover')
89
+ data["rtype"] = 'COVER'
90
+ elsif (idPattern[6] == 'toc')
91
+ data["rtype"] = 'TOC'
92
+ else
93
+ if (idPattern[6] != nil)
94
+ first_page = idPattern[6].to_i
95
+
96
+ unless (first_page.to_f.nan?)
97
+ data["first_page"] = first_page.to_s
98
+ end
99
+ end
100
+ end
101
+
102
+ elsif (match = /^\/stable\/(info|view)\/([0-9]+)$/i.match(url))
103
+ data["rtype"] = match[1] === 'info' ? "ABS" : "PREVIEW"
104
+ data["mime"] = "MISC"
105
+ data["unit_id"] = match[2]
106
+ data["title_id"] = match[2]
107
+ data["issue"] = match[5]
108
+ end
109
+
110
+ return data
111
+ end
112
+ end
@@ -0,0 +1,37 @@
1
+ require 'uri'
2
+
3
+ module LexisNexis
4
+ def LexisNexis.parse (input)
5
+ uri = URI(URI.unescape(input))
6
+
7
+ path = uri.path
8
+
9
+ data = {
10
+ "provider" => "lexisnexis"
11
+ }
12
+
13
+ if (/\/droit\/results\/docview\/docview/.match(path))
14
+ if (uri.query)
15
+ params = CGI::parse(uri.query)
16
+
17
+ if (params["risb"])
18
+ data["title_id"] = params["risb"][0]
19
+ data["unit_id"] = params["risb"][0]
20
+ end
21
+
22
+ if (params["format"])
23
+ case params["format"][0]
24
+ when 'GNBFULL'
25
+ data["rtype"] = 'ARTICLE'
26
+ data["mime"] = 'HTML'
27
+ when 'AUTRECAS'
28
+ data["rtype"] = 'ARTICLE'
29
+ data["mime"] = 'HTML'
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+ return data
36
+ end
37
+ end
@@ -0,0 +1,39 @@
1
+ require "uri"
2
+
3
+ module Sage
4
+ def Sage.parse (input)
5
+ uri = URI(URI.unescape(input))
6
+
7
+ path = uri.path
8
+
9
+ data = {
10
+ "provider" => "sage"
11
+ }
12
+
13
+ if (match = /^\/(loi|toc)\/(([a-z]+)\/?([0-9]+)?\/?([0-9]+)?)$/i.match(path))
14
+ data["rtype"] = "TOC"
15
+ data["mime"] = "MISC"
16
+ data["title_id"] = match[3]
17
+ if (match[4])
18
+ data["unit_id"] = match[2]
19
+ end
20
+
21
+ elsif (match = /^\/doi\/([a-z]+)\/(([0-9]{2})\.([0-9]{4})\/([0-9]+))$/i.match(path))
22
+ data["rtype"] = "ARTICLE"
23
+ data["mime"] = "HTML"
24
+ data["doi"] = match[2]
25
+ data["unit_id"] = match[5]
26
+
27
+ case (match[1])
28
+ when "pdf"
29
+ data["mime"] = "PDF"
30
+ when "figure"
31
+ data["rtype"] = "FIGURE"
32
+ end
33
+ end
34
+
35
+ return data
36
+
37
+ end
38
+
39
+ end
@@ -0,0 +1,171 @@
1
+ require 'uri'
2
+
3
+ module ScienceDirect
4
+ def ScienceDirect.parse (input)
5
+ uri = URI(URI.unescape(input))
6
+
7
+ path = uri.path
8
+ params = {}
9
+
10
+ if (uri.query)
11
+ params = CGI::parse(uri.query)
12
+ end
13
+
14
+ data = {
15
+ "provider" => "sciencedirect"
16
+ }
17
+
18
+ if (params.key?("_ob"))
19
+ if (params['_cdi'])
20
+ data['title_id'] = params['_cdi'][0]
21
+ end
22
+
23
+ case (params["_ob"][0])
24
+ when 'PdfDownloadURL'
25
+ data['mime'] = 'PDF'
26
+
27
+ data['rtype'] = 'ARTICLES_BUNDLE'
28
+
29
+ data['unit_id'] = params['_hubEid'][0]
30
+ data["pii"] = (params['_hubEid'][0] || '').split('-')[2]
31
+
32
+
33
+ if (params.key?("_isbn")|| params.key?('isBook'))
34
+ data['rtype'] = 'CHAPTERS_BUNDLE'
35
+ data['print_identifier'] = params['_isbn'][0]
36
+ data['title_id'] = params['_isbn'][0]
37
+ data['unit_id'] = params['_isbn'][0]
38
+ end
39
+
40
+ if (data["pii"])
41
+ data['title_id'] = data["pii"][1, 8]
42
+ data['print_identifier'] = data["pii"][1, 4] + "-" + data["pii"][5, 4]
43
+ end
44
+
45
+ when 'IssueURL'
46
+ data['title_id'] = (params['_tockey'][0] || '').split('#')[2]
47
+ data['rtype'] = 'TOC'
48
+ data['mime'] = 'MISC'
49
+
50
+ when 'ArticleURL'
51
+ case (params['_fmt'][0])
52
+ when 'summary'
53
+ data['rtype'] = 'ABS'
54
+ data['mime'] = 'MISC'
55
+ when 'full'
56
+ data['rtype'] = 'ARTICLE'
57
+ data['mime'] = 'HTML'
58
+ end
59
+
60
+ when 'MImg'
61
+ data['rtype'] = 'ARTICLE'
62
+ data['mime'] = 'PDF'
63
+
64
+ when 'MiamiImageURL'
65
+ if (params['_pii'])
66
+
67
+ data["pii"] = params['_pii'][0]
68
+ data['unit_id'] = params['_pii'][0]
69
+
70
+ if (params['_pii'][0][0] === 'B')
71
+ data['print_identifier'] = params['_pii'][0][1, 13]
72
+ data['title_id'] = data['print_identifier']
73
+ data['rtype'] = 'BOOK_SECTION'
74
+ data['mime'] = 'PDF'
75
+
76
+ else
77
+ data['print_identifier'] = params['_pii'][0][1, 4] + "-" + params['_pii'][0][5, 4]
78
+ data['title_id'] = params['_pii'][0][1, 8]
79
+ data['rtype'] = 'ARTICLE'
80
+ data['mime'] = 'PDF'
81
+ end
82
+ end
83
+
84
+ when 'PdfExcerptURL'
85
+ data['rtype'] = 'PREVIEW'
86
+ data['mime'] = 'PDF'
87
+
88
+ if (params['_imagekey'][0] && params['_piikey'][0])
89
+ data["pii"] = params['_piikey'][0]
90
+ if (match = /.?-[^-]+-([0-9]{4})([0-9]{3}[0-9Xx])([0-9A-Za-z]*)-main.pdf$/.match(params['_imagekey'][0]))
91
+ data['unit_id'] = params['_piikey'][0]
92
+ data['title_id'] = match[1] + match[2]
93
+ data['print_identifier'] = match[1] + '-' + match[2]
94
+ end
95
+ end
96
+ end
97
+
98
+ elsif (match = /^\/science\/article\/pii\/(([SB])?([0-9]{7}(?:[0-9]{5})?[0-9Xx])[0-9A-Za-z]*)(\/pdf(?:ft)?)?$/.match(path))
99
+
100
+ data["pii"] = match[1]
101
+ data['unit_id'] = match[1]
102
+ data['mime'] = match[4] ? 'PDF' : 'HTML'
103
+
104
+ if (match[2] == 'B')
105
+ data['rtype'] = 'BOOK_SECTION'
106
+ data['title_id'] = match[3]
107
+ data['print_identifier'] = match[3]
108
+ else
109
+ data['rtype'] = 'ARTICLE'
110
+ data['title_id'] = match[3][0, 8]
111
+ data['print_identifier'] = match[3][0, 4] + "-" + match[3][4, 4]
112
+ end
113
+
114
+
115
+ elsif ((match = /^\/science\/(journal|bookseries|handbooks|handbooks|book)\/([0-9Xx]{8,})(\/[0-9]+)?(\/[0-9]+)?$/.match(path)))
116
+
117
+ data['rtype'] = 'TOC'
118
+ data['mime'] = 'MISC'
119
+ data['unit_id'] = match[2]
120
+ data['title_id'] = match[2]
121
+
122
+ if (match[3])
123
+ data['unit_id'] += match[3]
124
+ end
125
+
126
+ case (match[1])
127
+ when 'journal', 'handbooks', 'bookseries'
128
+ data['print_identifier'] = match[2][0, 4] + "-" + match[2][4, 4]
129
+ when 'book'
130
+ data['print_identifier'] = match[2]
131
+ end
132
+
133
+ elsif (match = /^\/science\/MiamiMultiMediaURL\/[^\/]+(S([0-9]{4})([0-9]{3}[0-9Xx])[a-zA-Z0-9]*).*\.pdf$/.match(path))
134
+
135
+ data["pii"] = match[1]
136
+ data['unit_id'] = match[1]
137
+ data['title_id'] = match[2] + match[3]
138
+ data['print_identifier'] = match[2] + "-" + match[3]
139
+ data['rtype'] = 'ARTICLE'
140
+ data['mime'] = 'PDF'
141
+
142
+ elsif (match = /^\/(([SB])?([0-9]{7}(?:[0-9]{5})?[0-9Xx])[0-9A-Za-z]*)\/[0-9A-Za-z\-.]*-main\.pdf$/.match(path))
143
+
144
+ data["pii"] = match[1]
145
+ data['unit_id'] = match[1]
146
+ data['mime'] = 'PDF'
147
+
148
+ if (match[2] === 'B')
149
+ data['rtype'] = 'BOOK_SECTION'
150
+ data['title_id'] = match[3]
151
+ data['print_identifier'] = match[3]
152
+ else
153
+ data['rtype'] = 'ARTICLE'
154
+ data['title_id'] = match[3][0, 8]
155
+ data['print_identifier'] = match[3][0, 4] + "-" + match[3][4, 4]
156
+ end
157
+
158
+ elsif (path == '/science/publication')
159
+ data['rtype'] = 'TOC'
160
+ data['mime'] = 'MISC'
161
+
162
+ if (params['issn'])
163
+ data['print_identifier'] = params['issn'][0][0, 4] + "-" + params['issn'][0][4, 4]
164
+ data['unit_id'] = data['print_identifier']
165
+ data['title_id'] = params['issn'][0]
166
+ end
167
+ end
168
+
169
+ return data
170
+ end
171
+ end
@@ -0,0 +1,55 @@
1
+
2
+ require 'uri'
3
+ require 'cgi'
4
+
5
+ module TandF
6
+ def TandF.parse (input)
7
+
8
+ uri = URI(URI.unescape(input))
9
+
10
+ path = uri.path
11
+ params = {}
12
+ if (uri.query)
13
+ params = CGI::parse(uri.query)
14
+ end
15
+
16
+ data = {
17
+ "provider" => "tandf"
18
+ }
19
+
20
+ if ((match = /^\/doi\/(full|pdf|abs)\/([0-9.]+\/([0-9a-z.]+))$/.match(path)))
21
+ data['doi'] = match[2]
22
+ data['unit_id'] = match[3]
23
+
24
+ if (/^[0-9]{8}/.match(match[3]))
25
+ data['print_identifier'] = match[3][0, 4] + '-' + match[3][4, 4]
26
+ data['title_id'] = data['print_identifier']
27
+ end
28
+
29
+ if (match[1].upcase == 'FULL')
30
+ data['rtype'] = 'ARTICLE'
31
+ data['mime'] = 'HTML'
32
+ elsif (match[1].upcase == 'PDF')
33
+ data['rtype'] = 'ARTICLE'
34
+ data['mime'] = 'PDF'
35
+ elsif (match[1].upcase == 'ABS')
36
+ data['rtype'] = 'ABS'
37
+ data['mime'] = 'HTML'
38
+ end
39
+
40
+ elsif ((match = /^\/toc\/([a-zA-Z0-9]+)\/current$/.match(path)))
41
+ data['rtype'] = 'TOC'
42
+ data['mime'] = 'HTML'
43
+ data['title_id'] = match[1]
44
+ data['unit_id'] = match[1]
45
+ elsif ((match = /^\/loi\/([a-zA-Z0-9]+)$/.match(path)))
46
+ data['rtype'] = 'TOC'
47
+ data['mime'] = 'HTML'
48
+ data['title_id'] = match[1]
49
+ data['unit_id'] = match[1]
50
+ end
51
+
52
+
53
+ return data;
54
+ end
55
+ end
@@ -0,0 +1,202 @@
1
+ require 'uri'
2
+
3
+ module Wiley
4
+ def Wiley.parse (input)
5
+ uri = URI(URI.unescape(input))
6
+
7
+ path = uri.path
8
+
9
+ data = {
10
+ "provider" => "wiley"
11
+ }
12
+
13
+ #!/usr/bin/env node
14
+
15
+ if ((match = /\/journal\/(10\.[0-9]+\/(\(ISSN\)([0-9]{4}-[0-9]{3}[0-9xX])))/i.match(path)))
16
+ data['doi'] = match[1];
17
+ data['unit_id'] = match[2];
18
+ data['rtype'] = 'TOC';
19
+ data['mime'] = 'MISC';
20
+
21
+ data['online_identifier'] = match[3];
22
+
23
+ elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.([0-9]{4})\.[^.]+\.[^.]+))\/issuetoc$/i.match(path)))
24
+ data['doi'] = match[1];
25
+ data['unit_id'] = match[2];
26
+ data['title_id'] = match[3].upcase;
27
+ data['rtype'] = 'TOC';
28
+ data['mime'] = 'MISC';
29
+
30
+ data['publication_date'] = match[4];
31
+
32
+ elsif ((match = /^\/doi\/(10\.[0-9]+\/(j\.([0-9]{4}-[0-9]{3}[0-9xX])\.([0-9]{4})\.[^.]+\.[^.]+))\/abstract$/i.match(path)))
33
+ data['doi'] = match[1];
34
+ data['unit_id'] = match[2];
35
+ data['rtype'] = 'ABS';
36
+ data['mime'] = 'MISC';
37
+
38
+ data['online_identifier'] = match[3];
39
+ data['publication_date'] = match[4];
40
+
41
+ elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.([0-9]{4})[0-9]+))\/abstract$/i.match(path)))
42
+ data['doi'] = match[1];
43
+ data['unit_id'] = match[2];
44
+ data['title_id'] = match[3].upcase;
45
+ data['rtype'] = 'ABS';
46
+ data['mime'] = 'MISC';
47
+
48
+ data['publication_date'] = match[4];
49
+
50
+ elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.[0-9]+))\/full$/i.match(path)))
51
+ data['doi'] = match[1];
52
+ data['unit_id'] = match[2];
53
+ data['title_id'] = match[3].upcase;
54
+ data['rtype'] = 'ARTICLE';
55
+ data['mime'] = 'HTML';
56
+
57
+ elsif ((match = /^\/doi\/(10\.[0-9]+\/(j\.([0-9]{4}-[0-9]{3}[0-9xX])\.([0-9]{4})\.[^.]+\.[^.]+))\/pdf$/i.match(path)))
58
+ data['doi'] = match[1];
59
+ data['unit_id'] = match[2];
60
+ data['rtype'] = 'ARTICLE';
61
+ data['mime'] = 'PDF';
62
+
63
+ data['online_identifier'] = match[3];
64
+ data['publication_date'] = match[4];
65
+
66
+ elsif ((match = /^\/doi\/(10\.[0-9]+\/(([^.]+)\.[0-9]+))\/pdf$/i.match(path)))
67
+ data['doi'] = match[1];
68
+ data['unit_id'] = match[2];
69
+ data['title_id'] = match[3].upcase;
70
+ data['rtype'] = 'ARTICLE';
71
+ data['mime'] = 'PDF';
72
+
73
+ elsif ((match = /^\/book\/(10\.[0-9]+\/([0-9]+))$/i.match(path)))
74
+ data['doi'] = match[1];
75
+ data['unit_id'] = match[2];
76
+ data['title_id'] = match[2].upcase;
77
+ data['rtype'] = 'TOC';
78
+ data['mime'] = 'MISC';
79
+
80
+ data['print_identifier'] = match[2];
81
+
82
+ elsif (match = /^\/doi\/(10\.[0-9]+\/(([0-9]+)\.[^.]+))\/pdf$/i.match(path))
83
+ data['doi'] = match[1];
84
+ data['unit_id'] = match[2];
85
+ data['title_id'] = match[3].upcase;
86
+ data['rtype'] = 'BOOK_SECTION';
87
+ data['mime'] = 'PDF';
88
+
89
+ data['print_identifier'] = match[3];
90
+
91
+ elsif (match = /^\/enhanced\/doi\/(10\.[0-9]+\/(([^.]+)\.[^\/]+))\/?$/i.match(path))
92
+ data['doi'] = match[1];
93
+ data['unit_id'] = match[2];
94
+ data['title_id'] = match[3].upcase;
95
+ data['rtype'] = 'ARTICLE';
96
+ data['mime'] = 'HTML';
97
+
98
+ elsif ((match = /^\/enhanced\/doi\/(10\.[0-9]+\/(([0-9]{4})([a-z0-9]{2})[a-z0-9]+))\/?$/i.match(path)))
99
+
100
+ data['doi'] = match[1];
101
+ data['unit_id'] = match[2];
102
+ data['title_id'] = match[4].upcase;
103
+ data['rtype'] = 'ARTICLE';
104
+ data['mime'] = 'HTML';
105
+
106
+ data['publication_date'] = match[3];
107
+
108
+ elsif ((match = /^\/agu\/issue\/(10\.[0-9]+\/(([^.]+)\.[^\/]+))\/?$/i.match(path)))
109
+ data['doi'] = match[1];
110
+ data['unit_id'] = match[2];
111
+ data['title_id'] = match[3].upcase;
112
+ data['rtype'] = 'TOC';
113
+ data['mime'] = 'HTML';
114
+
115
+ elsif (/^\/readcube$/i.match(path))
116
+ data['rtype'] = 'ARTICLE';
117
+ data['mime'] = 'READCUBE';
118
+
119
+ if (uri.query)
120
+ params = CGI::parse(uri.query)
121
+
122
+ if (params["resource"])
123
+
124
+ data['doi'] = params["resource"][0];
125
+ data['unit_id'] = params["resource"][0].split('/')[1];
126
+ if ((match = /(10\.[0-9]+)\/([0-9]{4})([a-z0-9]{2})([^\/]+)$/i.match(params['resource'][0])))
127
+ data['title_id'] = match[3].upcase;
128
+ end
129
+ end
130
+ end
131
+
132
+
133
+ elsif ((match = /^\/doi\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[a-z0-9]+))\/pdf$/i.match(path)))
134
+ data['doi'] = match[1];
135
+ data['unit_id'] = match[2];
136
+ data['title_id'] = match[2].upcase;
137
+ data['rtype'] = 'ARTICLE';
138
+ data['mime'] = 'PDF';
139
+
140
+ data['publication_date'] = '20' + match[3];
141
+
142
+ elsif ((match = /^\/iucr\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[0-9a-z]+))/i.match(path)))
143
+ data['doi'] = match[1];
144
+ data['unit_id'] = match[2] ;
145
+ data['title_id'] = match[2].upcase;
146
+ data['rtype'] = 'ARTICLE';
147
+ data['mime'] = 'HTML';
148
+
149
+ data['publication_date'] = '20' + match[3];
150
+
151
+ elsif ((match = /^\/doi\/(10\.[0-9]+\/([a-z]{1}[0-9]{8}([0-9]{2})[0-9a-z]+))\/([a-z]+)$/i.match(path)))
152
+ data['doi'] = match[1];
153
+ data['unit_id'] = match[2];
154
+ data['title_id'] = match[2].upcase;
155
+
156
+ data['publication_date'] = '20' + match[3];
157
+
158
+ case (match[4])
159
+ when 'abstract'
160
+ data['rtype'] = 'ABS';
161
+ data['mime'] = 'MISC';
162
+ when 'pdf'
163
+ data['rtype'] = 'ARTICLE';
164
+ data['mime'] = 'PDF';
165
+ when 'full'
166
+ data['mime'] = 'PDF';
167
+ data['rtype'] = 'HTML';
168
+ end
169
+
170
+ elsif ((match = /^\/store\/(10\.[0-9]+\/(([a-z]+)\.([0-9]{4})[0-9]+))\/asset\/[a-z]+[0-9]+.pdf$/i.match(path)))
171
+ data['doi'] = match[1];
172
+ data['unit_id'] = match[2];
173
+ data['title_id'] = match[3].upcase;
174
+ data['mime'] = 'PDF';
175
+
176
+ data['publication_date'] = match[4];
177
+
178
+ elsif ((match = /^\/doi\/(10\.[0-9]+\/(([0-9]{2,4})([a-z]+)[0-9]+))\/(pdf|full)$/i.match(path)))
179
+ data['doi'] = match[1];
180
+ data['unit_id'] = match[2];
181
+ data['title_id'] = match[4].upcase;
182
+ data['rtype'] = 'ARTICLE';
183
+ data['mime'] = match[5] == 'pdf' ? 'PDF' : 'HTML';
184
+
185
+ data['publication_date'] = match[3];
186
+
187
+ if (match[3].length === 2)
188
+ data['publication_date'] = '19' + match[3];
189
+ end
190
+
191
+ elsif ((match = /^\/doi\/(10\.[0-9]+\/([^.]+))\/(pdf|full)$/i.match(path)))
192
+ data['doi'] = match[1];
193
+ data['unit_id'] = match[2];
194
+ data['rtype'] = 'ARTICLE';
195
+ data['mime'] = match[3] === 'pdf' ? 'PDF' : 'HTML';
196
+ end
197
+
198
+ return data
199
+
200
+ end
201
+
202
+ end