serrano 0.1.4.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -2
- data/CHANGELOG.md +7 -0
- data/Gemfile.lock +2 -2
- data/README.md +4 -0
- data/lib/serrano/cnrequest.rb +6 -1
- data/lib/serrano/cursor_testing.rb +52 -0
- data/lib/serrano/error.rb +22 -0
- data/lib/serrano/faraday.rb +71 -0
- data/lib/serrano/filterhandler.rb +53 -47
- data/lib/serrano/request.rb +10 -12
- data/lib/serrano/request_cursor.rb +138 -0
- data/lib/serrano/utils.rb +5 -0
- data/lib/serrano/version.rb +1 -1
- data/lib/serrano.rb +127 -35
- data/serrano.gemspec +1 -1
- metadata +7 -3
- data/lib/serrano/errors.rb +0 -67
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ce96cb27ec2070ee612d0b85d29949b441d552a4
|
4
|
+
data.tar.gz: 90791e525778c083fb7e57d445a5596a2ffbf7a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc0937a45f90e0c3d01581149fa09a21cb0a3f4c669181404b0317a553d184e8d4d0a816cb7a1b30e9a373ff1eefea1082a531308f863b3f09f17644d8dcaa27
|
7
|
+
data.tar.gz: e36b25922967b65c48bd92656c8db58359987dfae0380025043201edb8527e56098f44e3c4f3c78048276780abda2a7b276f4c860580d34774d4b598528a5961
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## 0.2.0 (2016-03-07)
|
2
|
+
|
3
|
+
* Added error classes to fail more gracefully, adapted from instagram gem (#4)
|
4
|
+
* Added support for the cursor feature in the Crossref API for deep paging (#14)
|
5
|
+
* Added disclaimer to docs that full text/abstracts aren't searched (#24)
|
6
|
+
* Now passing user agent string with serrano version in each request (#25)
|
7
|
+
|
1
8
|
## 0.1.4 (2015-12-04)
|
2
9
|
|
3
10
|
* Added `csl_styles()` method to get CSL styles info (#23)
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
serrano (0.
|
4
|
+
serrano (0.2.0)
|
5
5
|
faraday (~> 0.9.1)
|
6
6
|
faraday_middleware (~> 0.10.0)
|
7
7
|
multi_json (~> 1.0)
|
@@ -15,7 +15,7 @@ GEM
|
|
15
15
|
simplecov
|
16
16
|
url
|
17
17
|
docile (1.1.5)
|
18
|
-
faraday (0.9.
|
18
|
+
faraday (0.9.2)
|
19
19
|
multipart-post (>= 1.2, < 3)
|
20
20
|
faraday_middleware (0.10.0)
|
21
21
|
faraday (>= 0.7.4, < 0.10)
|
data/README.md
CHANGED
@@ -41,6 +41,10 @@ Other methods:
|
|
41
41
|
* [Citation count][ccount] - `Serrano.citation_count()`
|
42
42
|
* [get CSL styles][csl] - `Serrano.csl_styles()`
|
43
43
|
|
44
|
+
Note about searching:
|
45
|
+
|
46
|
+
You are using the Crossref search API described at https://github.com/CrossRef/rest-api-doc/blob/master/rest_api.md. When you search with query terms, on Crossref servers they are not searching full text, or even abstracts of articles, but only what is available in the data that is returned to you. That is, they search article titles, authors, etc. For some discussion on this, see https://github.com/CrossRef/rest-api-doc/issues/101
|
47
|
+
|
44
48
|
## Install
|
45
49
|
|
46
50
|
### Release version
|
data/lib/serrano/cnrequest.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
require "faraday"
|
2
2
|
require "faraday_middleware"
|
3
3
|
require "multi_json"
|
4
|
-
require "serrano/
|
4
|
+
require "serrano/error"
|
5
5
|
require "serrano/constants"
|
6
|
+
require 'serrano/utils'
|
6
7
|
require 'serrano/helpers/configuration'
|
7
8
|
|
8
9
|
##
|
@@ -53,6 +54,8 @@ def make_request(ids, format, style, locale)
|
|
53
54
|
if format == "citeproc-json"
|
54
55
|
endpt = "http://api.crossref.org/works/" + ids + "/" + type
|
55
56
|
cr_works = Faraday.new(:url => endpt)
|
57
|
+
cr_works.headers[:user_agent] = make_ua
|
58
|
+
cr_works.headers["X-USER-AGENT"] = make_ua
|
56
59
|
res = cr_works.get
|
57
60
|
else
|
58
61
|
if format == "text"
|
@@ -62,6 +65,8 @@ def make_request(ids, format, style, locale)
|
|
62
65
|
res = $conn.get do |req|
|
63
66
|
req.url ids
|
64
67
|
req.headers['Accept'] = type
|
68
|
+
req.headers[:user_agent] = make_ua
|
69
|
+
req.headers["X-USER-AGENT"] = make_ua
|
65
70
|
end
|
66
71
|
end
|
67
72
|
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'faraday'
|
2
|
+
require "multi_json"
|
3
|
+
|
4
|
+
query = "widget"
|
5
|
+
cursor = "*"
|
6
|
+
limit = 100
|
7
|
+
cursor_max = 500
|
8
|
+
rows = limit
|
9
|
+
|
10
|
+
filter = nil
|
11
|
+
offset = nil
|
12
|
+
sample = nil
|
13
|
+
sort = nil
|
14
|
+
order = nil
|
15
|
+
facet = nil
|
16
|
+
|
17
|
+
args = { query: query, filter: filter, offset: offset,
|
18
|
+
rows: limit, sample: sample, sort: sort,
|
19
|
+
order: order, facet: facet, cursor: cursor }
|
20
|
+
opts = args.delete_if { |k, v| v.nil? }
|
21
|
+
|
22
|
+
conn = Faraday.new(:url => "http://api.crossref.org/", :request => nil)
|
23
|
+
|
24
|
+
def _req(conn, path, opts)
|
25
|
+
res = conn.get path, opts
|
26
|
+
return MultiJson.load(res.body)
|
27
|
+
end
|
28
|
+
|
29
|
+
def _redo_req(conn, path, js, opts, cu, max_avail, cursor_max)
|
30
|
+
if !cu.nil? and cursor_max > js['message']['items'].length
|
31
|
+
res = [js]
|
32
|
+
total = js['message']['items'].length
|
33
|
+
while (!cu.nil? and cursor_max > total and total < max_avail)
|
34
|
+
opts[:cursor] = cu
|
35
|
+
out = _req(conn, path, opts)
|
36
|
+
cu = out['message']['next-cursor']
|
37
|
+
res << out
|
38
|
+
total = res.collect { |x| x['message']['items'].length }.inject(:+)
|
39
|
+
end
|
40
|
+
return res
|
41
|
+
else
|
42
|
+
return js
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
path = 'works'
|
47
|
+
|
48
|
+
js = _req(conn, path, opts)
|
49
|
+
cu = js['message']['next-cursor']
|
50
|
+
max_avail = js['message']['total-results']
|
51
|
+
res = _redo_req(conn, path, js, opts, cu, max_avail, cursor_max)
|
52
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Serrano
|
2
|
+
# Custom error class for rescuing from all Serrano errors
|
3
|
+
class Error < StandardError; end
|
4
|
+
|
5
|
+
# Raised when Crossref returns the HTTP status code 400
|
6
|
+
class BadRequest < Error; end
|
7
|
+
|
8
|
+
# Raised when Crossref returns the HTTP status code 404
|
9
|
+
class NotFound < Error; end
|
10
|
+
|
11
|
+
# Raised when Crossref returns the HTTP status code 500
|
12
|
+
class InternalServerError < Error; end
|
13
|
+
|
14
|
+
# Raised when Crossref returns the HTTP status code 502
|
15
|
+
class BadGateway < Error; end
|
16
|
+
|
17
|
+
# Raised when Crossref returns the HTTP status code 503
|
18
|
+
class ServiceUnavailable < Error; end
|
19
|
+
|
20
|
+
# Raised when Crossref returns the HTTP status code 504
|
21
|
+
class GatewayTimeout < Error; end
|
22
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'faraday'
|
2
|
+
require 'multi_json'
|
3
|
+
|
4
|
+
# @private
|
5
|
+
module FaradayMiddleware
|
6
|
+
# @private
|
7
|
+
class RaiseHttpException < Faraday::Middleware
|
8
|
+
def call(env)
|
9
|
+
@app.call(env).on_complete do |response|
|
10
|
+
case response[:status].to_i
|
11
|
+
when 400
|
12
|
+
raise Serrano::BadRequest, error_message_400(response)
|
13
|
+
when 404
|
14
|
+
raise Serrano::NotFound, error_message_400(response)
|
15
|
+
when 500
|
16
|
+
raise Serrano::InternalServerError, error_message_500(response, "Something is technically wrong.")
|
17
|
+
when 502
|
18
|
+
raise Serrano::BadGateway, error_message_500(response, "The server returned an invalid or incomplete response.")
|
19
|
+
when 503
|
20
|
+
raise Serrano::ServiceUnavailable, error_message_500(response, "Crossref is rate limiting your requests.")
|
21
|
+
when 504
|
22
|
+
raise Serrano::GatewayTimeout, error_message_500(response, "504 Gateway Time-out")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def initialize(app)
|
28
|
+
super app
|
29
|
+
@parser = nil
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def error_message_400(response)
|
35
|
+
"\n #{response[:method].to_s.upcase} #{response[:url].to_s}\n Status #{response[:status]}#{error_body(response[:body])}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def error_body(body)
|
39
|
+
if not body.nil? and not body.empty? and body.kind_of?(String)
|
40
|
+
if is_json?(body)
|
41
|
+
body = ::MultiJson.load(body)
|
42
|
+
if body['message'].nil?
|
43
|
+
body = nil
|
44
|
+
elseif body['message'].length == 1
|
45
|
+
body = body['message']
|
46
|
+
else
|
47
|
+
body = body['message'].collect { |x| x['message'] }.join('; ')
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
if body.nil?
|
53
|
+
nil
|
54
|
+
else
|
55
|
+
": #{body}"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def error_message_500(response, body=nil)
|
60
|
+
"#{response[:method].to_s.upcase} #{response[:url].to_s}: #{[response[:status].to_s + ':', body].compact.join(' ')}"
|
61
|
+
end
|
62
|
+
|
63
|
+
def is_json?(string)
|
64
|
+
MultiJson.load(string)
|
65
|
+
return true
|
66
|
+
rescue MultiJson::ParseError => e
|
67
|
+
return false
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
@@ -1,58 +1,64 @@
|
|
1
1
|
# helper functions
|
2
|
-
module
|
3
|
-
class Request #:nodoc:
|
2
|
+
module Helpers
|
4
3
|
|
5
|
-
|
6
|
-
|
7
|
-
$others = ['license_url','license_version','license_delay','full_text_version','full_text_type',
|
8
|
-
'award_number','award_funder']
|
9
|
-
|
10
|
-
def filter_handler(x = nil)
|
11
|
-
if x.nil?
|
12
|
-
nil
|
13
|
-
else
|
14
|
-
x = stringify(x)
|
15
|
-
nn = x.keys.collect{ |x| x.to_s }
|
16
|
-
if nn.collect{ |x| $others.include? x }.any?
|
17
|
-
nn = nn.collect{ |x|
|
18
|
-
if $others.include? x
|
19
|
-
case x
|
20
|
-
when 'license_url'
|
21
|
-
'license.url'
|
22
|
-
when 'license_version'
|
23
|
-
'license.version'
|
24
|
-
when 'license_delay'
|
25
|
-
'license.delay'
|
26
|
-
when 'full_text_version'
|
27
|
-
'full-text.version'
|
28
|
-
when 'full_text_type'
|
29
|
-
'full-text.type'
|
30
|
-
when 'award_number'
|
31
|
-
'award.number'
|
32
|
-
when 'award_funder'
|
33
|
-
'award.funder'
|
34
|
-
end
|
35
|
-
else
|
36
|
-
x
|
37
|
-
end
|
38
|
-
}
|
39
|
-
end
|
4
|
+
$others = ['license_url','license_version','license_delay','full_text_version','full_text_type',
|
5
|
+
'award_number','award_funder']
|
40
6
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
7
|
+
def filter_handler(x = nil)
|
8
|
+
if x.nil?
|
9
|
+
nil
|
10
|
+
else
|
11
|
+
x = stringify(x)
|
12
|
+
nn = x.keys.collect{ |x| x.to_s }
|
13
|
+
if nn.collect{ |x| $others.include? x }.any?
|
14
|
+
nn = nn.collect{ |x|
|
15
|
+
if $others.include? x
|
16
|
+
case x
|
17
|
+
when 'license_url'
|
18
|
+
'license.url'
|
19
|
+
when 'license_version'
|
20
|
+
'license.version'
|
21
|
+
when 'license_delay'
|
22
|
+
'license.delay'
|
23
|
+
when 'full_text_version'
|
24
|
+
'full-text.version'
|
25
|
+
when 'full_text_type'
|
26
|
+
'full-text.type'
|
27
|
+
when 'award_number'
|
28
|
+
'award.number'
|
29
|
+
when 'award_funder'
|
30
|
+
'award.funder'
|
31
|
+
end
|
32
|
+
else
|
33
|
+
x
|
34
|
+
end
|
35
|
+
}
|
45
36
|
end
|
46
|
-
end
|
47
37
|
|
48
|
-
|
49
|
-
(x
|
38
|
+
newnn = nn.collect{ |x| x.gsub("_", "-") }
|
39
|
+
x = rename_keys(x, newnn)
|
40
|
+
x = x.collect{ |k,v| [k, v].join(":") }.join(',')
|
41
|
+
return x
|
50
42
|
end
|
43
|
+
end
|
51
44
|
|
52
|
-
|
53
|
-
|
54
|
-
|
45
|
+
def stringify(x)
|
46
|
+
(x.keys.map{ |k,v| k.to_s }.zip x.values).to_h
|
47
|
+
end
|
55
48
|
|
49
|
+
def rename_keys(x, y)
|
50
|
+
(y.zip x.values).to_h
|
56
51
|
end
|
57
52
|
|
58
53
|
end
|
54
|
+
|
55
|
+
module Serrano
|
56
|
+
class Request #:nodoc:
|
57
|
+
include Helpers
|
58
|
+
end
|
59
|
+
|
60
|
+
class RequestCursor #:nodoc:
|
61
|
+
include Helpers
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
data/lib/serrano/request.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
require "faraday"
|
2
2
|
require "multi_json"
|
3
|
-
require "serrano/
|
3
|
+
require "serrano/error"
|
4
4
|
require "serrano/constants"
|
5
|
+
require 'serrano/utils'
|
5
6
|
require 'serrano/helpers/configuration'
|
6
7
|
|
7
8
|
##
|
@@ -58,18 +59,21 @@ module Serrano
|
|
58
59
|
conn = Faraday.new(:url => Serrano.base_url, :request => options) do |f|
|
59
60
|
f.response :logger
|
60
61
|
f.adapter Faraday.default_adapter
|
62
|
+
f.use FaradayMiddleware::RaiseHttpException
|
61
63
|
end
|
62
64
|
else
|
63
|
-
conn = Faraday.new(:url => Serrano.base_url, :request => options)
|
65
|
+
conn = Faraday.new(:url => Serrano.base_url, :request => options) do |f|
|
66
|
+
f.adapter Faraday.default_adapter
|
67
|
+
f.use FaradayMiddleware::RaiseHttpException
|
68
|
+
end
|
64
69
|
end
|
65
70
|
|
71
|
+
conn.headers[:user_agent] = make_ua
|
72
|
+
conn.headers["X-USER-AGENT"] = make_ua
|
73
|
+
|
66
74
|
if self.id.nil?
|
67
|
-
# begin
|
68
75
|
res = conn.get self.endpt, opts
|
69
76
|
return MultiJson.load(res.body)
|
70
|
-
# rescue *NETWORKABLE_EXCEPTIONS => e
|
71
|
-
# rescue_faraday_error(endpt, e)
|
72
|
-
# end
|
73
77
|
else
|
74
78
|
coll = []
|
75
79
|
Array(self.id).each do |x|
|
@@ -85,12 +89,6 @@ module Serrano
|
|
85
89
|
|
86
90
|
res = conn.get endpt, opts
|
87
91
|
coll << MultiJson.load(res.body)
|
88
|
-
# begin
|
89
|
-
# res = conn.get endpt, opts
|
90
|
-
# coll << MultiJson.load(res.body)
|
91
|
-
# rescue *NETWORKABLE_EXCEPTIONS => e
|
92
|
-
# rescue_faraday_error(endpt, e)
|
93
|
-
# end
|
94
92
|
end
|
95
93
|
return coll
|
96
94
|
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require "faraday"
|
2
|
+
require 'faraday_middleware'
|
3
|
+
require "multi_json"
|
4
|
+
require "serrano/error"
|
5
|
+
require "serrano/constants"
|
6
|
+
require 'serrano/helpers/configuration'
|
7
|
+
require 'serrano/filterhandler'
|
8
|
+
require 'serrano/error'
|
9
|
+
require 'serrano/faraday'
|
10
|
+
|
11
|
+
##
|
12
|
+
# Serrano::RequestCursor
|
13
|
+
#
|
14
|
+
# Class to perform HTTP requests to the Crossref API
|
15
|
+
module Serrano
|
16
|
+
class RequestCursor #:nodoc:
|
17
|
+
|
18
|
+
attr_accessor :endpt
|
19
|
+
attr_accessor :id
|
20
|
+
attr_accessor :query
|
21
|
+
attr_accessor :filter
|
22
|
+
attr_accessor :offset
|
23
|
+
attr_accessor :limit
|
24
|
+
attr_accessor :sample
|
25
|
+
attr_accessor :sort
|
26
|
+
attr_accessor :order
|
27
|
+
attr_accessor :facet
|
28
|
+
attr_accessor :works
|
29
|
+
attr_accessor :agency
|
30
|
+
attr_accessor :options
|
31
|
+
attr_accessor :verbose
|
32
|
+
attr_accessor :cursor
|
33
|
+
attr_accessor :cursor_max
|
34
|
+
|
35
|
+
def initialize(endpt, id, query, filter, offset,
|
36
|
+
limit, sample, sort, order, facet, works, agency,
|
37
|
+
options, verbose, cursor, cursor_max)
|
38
|
+
|
39
|
+
self.endpt = endpt
|
40
|
+
self.id = id
|
41
|
+
self.query = query
|
42
|
+
self.filter = filter
|
43
|
+
self.offset = offset
|
44
|
+
self.limit = limit
|
45
|
+
self.sample = sample
|
46
|
+
self.sort = sort
|
47
|
+
self.order = order
|
48
|
+
self.facet = facet
|
49
|
+
self.works = works
|
50
|
+
self.agency = agency
|
51
|
+
self.options = options
|
52
|
+
self.verbose = verbose
|
53
|
+
self.cursor = cursor
|
54
|
+
self.cursor_max = cursor_max
|
55
|
+
end
|
56
|
+
|
57
|
+
def perform
|
58
|
+
filt = filter_handler(self.filter)
|
59
|
+
|
60
|
+
if self.cursor_max.class != nil
|
61
|
+
if self.cursor_max.class != Fixnum
|
62
|
+
raise "cursor_max must be of class int"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
args = { query: self.query, filter: filt, offset: self.offset,
|
67
|
+
rows: self.limit, sample: self.sample, sort: self.sort,
|
68
|
+
order: self.order, facet: self.facet, cursor: self.cursor }
|
69
|
+
opts = args.delete_if { |k, v| v.nil? }
|
70
|
+
|
71
|
+
if verbose
|
72
|
+
$conn = Faraday.new(:url => Serrano.base_url, :request => options) do |f|
|
73
|
+
f.response :logger
|
74
|
+
f.adapter Faraday.default_adapter
|
75
|
+
f.use FaradayMiddleware::RaiseHttpException
|
76
|
+
end
|
77
|
+
else
|
78
|
+
$conn = Faraday.new(:url => Serrano.base_url, :request => options) do |f|
|
79
|
+
f.adapter Faraday.default_adapter
|
80
|
+
f.use FaradayMiddleware::RaiseHttpException
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
$conn.headers[:user_agent] = make_ua
|
85
|
+
$conn.headers["X-USER-AGENT"] = make_ua
|
86
|
+
|
87
|
+
if self.id.nil?
|
88
|
+
js = self._req(self.endpt, opts)
|
89
|
+
cu = js['message']['next-cursor']
|
90
|
+
max_avail = js['message']['total-results']
|
91
|
+
res = self._redo_req(js, opts, cu, max_avail)
|
92
|
+
return res
|
93
|
+
else
|
94
|
+
coll = []
|
95
|
+
Array(self.id).each do |x|
|
96
|
+
if self.works
|
97
|
+
$endpt2 = self.endpt + '/' + x.to_s + "/works"
|
98
|
+
else
|
99
|
+
if self.agency
|
100
|
+
$endpt2 = self.endpt + '/' + x.to_s + "/agency"
|
101
|
+
else
|
102
|
+
$endpt2 = self.endpt + '/' + x.to_s
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
js = self._req($endpt2, opts)
|
107
|
+
cu = js['message']['next-cursor']
|
108
|
+
max_avail = js['message']['total-results']
|
109
|
+
coll << self._redo_req(js, opts, cu, max_avail)
|
110
|
+
end
|
111
|
+
return coll
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def _redo_req(js, opts, cu, max_avail)
|
116
|
+
if !cu.nil? and self.cursor_max > js['message']['items'].length
|
117
|
+
res = [js]
|
118
|
+
total = js['message']['items'].length
|
119
|
+
while !cu.nil? and self.cursor_max > total and total < max_avail do
|
120
|
+
opts[:cursor] = cu
|
121
|
+
out = self._req($endpt2, opts)
|
122
|
+
cu = out['message']['next-cursor']
|
123
|
+
res << out
|
124
|
+
total = res.collect {|x| x['message']['items'].length}.reduce(0, :+)
|
125
|
+
end
|
126
|
+
return res
|
127
|
+
else
|
128
|
+
return js
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def _req(path, opts)
|
133
|
+
res = $conn.get path, opts
|
134
|
+
return MultiJson.load(res.body)
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
end
|
data/lib/serrano/version.rb
CHANGED
data/lib/serrano.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require "serrano/version"
|
2
2
|
require "serrano/request"
|
3
|
+
require "serrano/request_cursor"
|
3
4
|
require "serrano/filterhandler"
|
4
5
|
require "serrano/cnrequest"
|
5
6
|
require "serrano/filters"
|
@@ -12,29 +13,40 @@ require 'rexml/xpath'
|
|
12
13
|
# @param offset [Fixnum] Number of record to start at, from 1 to infinity.
|
13
14
|
# @param limit [Fixnum] Number of results to return. Not relavant when searching with specific dois. Default: 20. Max: 1000
|
14
15
|
# @param sample [Fixnum] Number of random results to return. when you use the sample parameter,
|
15
|
-
#
|
16
|
+
# the limit and offset parameters are ignored. This parameter only used when works requested.
|
16
17
|
# @param sort [String] Field to sort on, one of score, relevance,
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
18
|
+
# updated (date of most recent change to metadata - currently the same as deposited),
|
19
|
+
# deposited (time of most recent deposit), indexed (time of most recent index), or
|
20
|
+
# published (publication date). Note: If the API call includes a query, then the sort
|
21
|
+
# order will be by the relevance score. If no query is included, then the sort order
|
22
|
+
# will be by DOI update date.
|
22
23
|
# @param order [String] Sort order, one of 'asc' or 'desc'
|
23
24
|
# @param facet [Boolean] Include facet results. Default: false
|
24
25
|
# @param verbose [Boolean] Print request headers to stdout. Default: false
|
25
26
|
|
27
|
+
# @!macro cursor_params
|
28
|
+
# @param cursor [String] Cursor character string to do deep paging. Default is `nil`.
|
29
|
+
# Pass in '*' to start deep paging. Any combination of query, filters and facets may be
|
30
|
+
# used with deep paging cursors. While limit may be specified along with cursor, offset
|
31
|
+
# and sample cannot be used. See
|
32
|
+
# https://github.com/CrossRef/rest-api-doc/blob/master/rest_api.md#deep-paging-with-cursors
|
33
|
+
# @param cursor_max [Fixnum] Max records to retrieve. Only used when cursor
|
34
|
+
# param used. Because deep paging can result in continuous requests until all
|
35
|
+
# are retrieved, use this parameter to set a maximum number of records. Of course,
|
36
|
+
# if there are less records found than this value, you will get only those found.
|
37
|
+
|
26
38
|
# @!macro serrano_options
|
27
39
|
# @param options [Hash] Hash of options for configuring the request, passed on to Faraday.new
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
40
|
+
# - timeout [Fixnum] open/read timeout Integer in seconds
|
41
|
+
# - open_timeout [Fixnum] read timeout Integer in seconds
|
42
|
+
# - proxy [Hash] hash of proxy options
|
43
|
+
# - uri [String] Proxy Server URI
|
44
|
+
# - user [String] Proxy server username
|
45
|
+
# - password [String] Proxy server password
|
46
|
+
# - params_encoder [Hash] not sure what this is
|
47
|
+
# - bind [Hash] A hash with host and port values
|
48
|
+
# - boundary [String] of the boundary value
|
49
|
+
# - oauth [Hash] A hash with OAuth details
|
38
50
|
|
39
51
|
##
|
40
52
|
# Serrano - The top level module for using methods
|
@@ -61,6 +73,17 @@ require 'rexml/xpath'
|
|
61
73
|
#
|
62
74
|
# @see https://github.com/CrossRef/rest-api-doc/blob/master/rest_api.md for
|
63
75
|
# detailed description of the Crossref API
|
76
|
+
#
|
77
|
+
# What am I actually searching when using the Crossref search API?
|
78
|
+
#
|
79
|
+
# You are using the Crossref search API described at
|
80
|
+
# https://github.com/CrossRef/rest-api-doc/blob/master/rest_api.md.
|
81
|
+
# When you search with query terms, on Crossref servers they are not
|
82
|
+
# searching full text, or even abstracts of articles, but only what is
|
83
|
+
# available in the data that is returned to you. That is, they search
|
84
|
+
# article titles, authors, etc. For some discussion on this, see
|
85
|
+
# https://github.com/CrossRef/rest-api-doc/issues/101
|
86
|
+
|
64
87
|
module Serrano
|
65
88
|
extend Configuration
|
66
89
|
|
@@ -73,6 +96,7 @@ module Serrano
|
|
73
96
|
#
|
74
97
|
# @!macro serrano_params
|
75
98
|
# @!macro serrano_options
|
99
|
+
# @!macro cursor_params
|
76
100
|
# @param ids [Array] DOIs (digital object identifier) or other identifiers
|
77
101
|
# @param query [String] A query string
|
78
102
|
# @param filter [Hash] Filter options. See ...
|
@@ -101,12 +125,30 @@ module Serrano
|
|
101
125
|
# Serrano.works(ids: '10.1371/journal.pone.0033693', options: {timeout: 3, open_timeout: 2})
|
102
126
|
# ## log request details - uses Faraday middleware
|
103
127
|
# Serrano.works(ids: '10.1371/journal.pone.0033693', verbose: true)
|
128
|
+
#
|
129
|
+
# # facets
|
130
|
+
# Serrano.works(facet: 'license:*', limit: 0, filter: {has_full_text: true})
|
131
|
+
#
|
132
|
+
# # sample
|
133
|
+
# Serrano.works(sample: 2)
|
134
|
+
#
|
135
|
+
# # cursor for deep paging
|
136
|
+
# Serrano.works(query: "widget", cursor: "*", limit: 100)
|
137
|
+
# # another query, more results this time
|
138
|
+
# res = Serrano.works(query: "science", cursor: "*", limit: 250, cursor_max: 1000);
|
139
|
+
# res.collect { |x| x['message']['items'].length }.reduce(0, :+)
|
140
|
+
# # another query
|
141
|
+
# res = Serrano.works(query: "ecology", cursor: "*", limit: 1000, cursor_max: 10000);
|
142
|
+
# res.collect { |x| x['message']['items'].length }.reduce(0, :+)
|
143
|
+
# items = res.collect {|x| x['message']['items']}.flatten
|
144
|
+
# items.collect { |x| x['DOI'] }[0,20]
|
104
145
|
def self.works(ids: nil, query: nil, filter: nil, offset: nil,
|
105
146
|
limit: nil, sample: nil, sort: nil, order: nil, facet: nil,
|
106
|
-
options: nil, verbose: false)
|
147
|
+
options: nil, verbose: false, cursor: nil, cursor_max: 5000)
|
107
148
|
|
108
|
-
|
109
|
-
limit, sample, sort, order, facet, nil, nil, options,
|
149
|
+
RequestCursor.new('works', ids, query, filter, offset,
|
150
|
+
limit, sample, sort, order, facet, nil, nil, options,
|
151
|
+
verbose, cursor, cursor_max).perform
|
110
152
|
end
|
111
153
|
|
112
154
|
##
|
@@ -114,6 +156,7 @@ module Serrano
|
|
114
156
|
#
|
115
157
|
# @!macro serrano_params
|
116
158
|
# @!macro serrano_options
|
159
|
+
# @!macro cursor_params
|
117
160
|
# @param ids [Array] DOIs (digital object identifier) or other identifiers
|
118
161
|
# @param query [String] A query string
|
119
162
|
# @param filter [Hash] Filter options. See ...
|
@@ -133,12 +176,26 @@ module Serrano
|
|
133
176
|
# Serrano.members(query: "ecology", order: "asc")
|
134
177
|
# # Works
|
135
178
|
# Serrano.members(ids: 98, works: true)
|
179
|
+
#
|
180
|
+
# # cursor - deep paging
|
181
|
+
# res = Serrano.members(ids: 98, works: true, cursor: "*", cursor_max: 1000);
|
182
|
+
# res[0].collect { |x| x['message']['items'].length }.reduce(0, :+)
|
183
|
+
# items = res[0].collect { |x| x['message']['items'] }.flatten
|
184
|
+
# items.collect{ |z| z['DOI'] }[0,50]
|
185
|
+
#
|
186
|
+
# # multiple ids with cursor
|
187
|
+
# res = Serrano.members(ids: [98, 340], works: true, cursor: "*", cursor_max: 300);
|
188
|
+
# res[0].collect { |x| x['message']['items'].length }.reduce(0, :+)
|
189
|
+
# items = res[0].collect { |x| x['message']['items'] }.flatten
|
190
|
+
# items.collect{ |z| z['DOI'] }[0,50]
|
136
191
|
def self.members(ids: nil, query: nil, filter: nil, offset: nil,
|
137
192
|
limit: nil, sample: nil, sort: nil, order: nil, facet: nil,
|
138
|
-
works: false, options: nil, verbose: false
|
193
|
+
works: false, options: nil, verbose: false,
|
194
|
+
cursor: nil, cursor_max: 5000)
|
139
195
|
|
140
|
-
|
141
|
-
limit, sample, sort, order, facet, works, nil, options,
|
196
|
+
RequestCursor.new('members', ids, query, filter, offset,
|
197
|
+
limit, sample, sort, order, facet, works, nil, options,
|
198
|
+
verbose, cursor, cursor_max).perform
|
142
199
|
end
|
143
200
|
|
144
201
|
##
|
@@ -146,6 +203,7 @@ module Serrano
|
|
146
203
|
#
|
147
204
|
# @!macro serrano_params
|
148
205
|
# @!macro serrano_options
|
206
|
+
# @!macro cursor_params
|
149
207
|
# @param ids [Array] DOIs (digital object identifier) or other identifiers
|
150
208
|
# @param filter [Hash] Filter options. See ...
|
151
209
|
# @param works [Boolean] If true, works returned as well. Default: false
|
@@ -162,12 +220,20 @@ module Serrano
|
|
162
220
|
# Serrano.prefixes(ids: "10.1016", works: true, limit: 3)
|
163
221
|
# # Sort and order
|
164
222
|
# Serrano.prefixes(ids: "10.1016", works: true, sort: 'relevance', order: "asc")
|
223
|
+
#
|
224
|
+
# # cursor - deep paging
|
225
|
+
# res = Serrano.prefixes(ids: "10.1016", works: true, cursor: "*", cursor_max: 1000);
|
226
|
+
# res[0].collect { |x| x['message']['items'].length }.reduce(0, :+)
|
227
|
+
# items = res[0].collect { |x| x['message']['items'] }.flatten;
|
228
|
+
# items.collect{ |z| z['DOI'] }[0,50]
|
165
229
|
def self.prefixes(ids:, filter: nil, offset: nil,
|
166
230
|
limit: nil, sample: nil, sort: nil, order: nil, facet: nil,
|
167
|
-
works: false, options: nil, verbose: false
|
231
|
+
works: false, options: nil, verbose: false,
|
232
|
+
cursor: nil, cursor_max: 5000)
|
168
233
|
|
169
|
-
|
170
|
-
limit, sample, sort, order, facet, works, nil, options,
|
234
|
+
RequestCursor.new('prefixes', ids, nil, filter, offset,
|
235
|
+
limit, sample, sort, order, facet, works, nil, options,
|
236
|
+
verbose, cursor, cursor_max).perform
|
171
237
|
end
|
172
238
|
|
173
239
|
##
|
@@ -175,6 +241,7 @@ module Serrano
|
|
175
241
|
#
|
176
242
|
# @!macro serrano_params
|
177
243
|
# @!macro serrano_options
|
244
|
+
# @!macro cursor_params
|
178
245
|
# @param ids [Array] DOIs (digital object identifier) or other identifiers
|
179
246
|
# @param query [String] A query string
|
180
247
|
# @param filter [Hash] Filter options. See ...
|
@@ -196,12 +263,20 @@ module Serrano
|
|
196
263
|
# Serrano.funders(ids: '10.13039/100000001', works: true, limit: 3)
|
197
264
|
# # Sort and order
|
198
265
|
# Serrano.funders(ids: "10.13039/100000001", works: true, sort: 'relevance', order: "asc")
|
266
|
+
#
|
267
|
+
# # cursor - deep paging
|
268
|
+
# res = Serrano.funders(ids: '10.13039/100000001', works: true, cursor: "*", cursor_max: 500);
|
269
|
+
# res[0].collect { |x| x['message']['items'].length }.reduce(0, :+)
|
270
|
+
# items = res[0].collect { |x| x['message']['items'] }.flatten;
|
271
|
+
# items.collect{ |z| z['DOI'] }[0,50]
|
199
272
|
def self.funders(ids: nil, query: nil, filter: nil, offset: nil,
|
200
273
|
limit: nil, sample: nil, sort: nil, order: nil, facet: nil,
|
201
|
-
works: false, options: nil, verbose: false
|
274
|
+
works: false, options: nil, verbose: false,
|
275
|
+
cursor: nil, cursor_max: 5000)
|
202
276
|
|
203
|
-
|
204
|
-
limit, sample, sort, order, facet, works, nil, options,
|
277
|
+
RequestCursor.new('funders', ids, query, filter, offset,
|
278
|
+
limit, sample, sort, order, facet, works, nil, options,
|
279
|
+
verbose, cursor, cursor_max).perform
|
205
280
|
end
|
206
281
|
|
207
282
|
##
|
@@ -209,6 +284,7 @@ module Serrano
|
|
209
284
|
#
|
210
285
|
# @!macro serrano_params
|
211
286
|
# @!macro serrano_options
|
287
|
+
# @!macro cursor_params
|
212
288
|
# @param ids [Array] DOIs (digital object identifier) or other identifiers
|
213
289
|
# @param query [String] A query string
|
214
290
|
# @param filter [Hash] Filter options. See ...
|
@@ -230,18 +306,27 @@ module Serrano
|
|
230
306
|
# Serrano.journals(ids: '1803-2427', works: true)
|
231
307
|
# Serrano.journals(limit: 2)
|
232
308
|
# Serrano.journals(sample: 2)
|
309
|
+
#
|
310
|
+
# # cursor - deep paging
|
311
|
+
# res = Serrano.journals(ids: "2167-8359", works: true, cursor: "*", cursor_max: 500);
|
312
|
+
# res[0].collect { |x| x['message']['items'].length }.reduce(0, :+)
|
313
|
+
# items = res[0].collect { |x| x['message']['items'] }.flatten;
|
314
|
+
# items.collect{ |z| z['DOI'] }[0,50]
|
233
315
|
def self.journals(ids: nil, query: nil, filter: nil, offset: nil,
|
234
316
|
limit: nil, sample: nil, sort: nil, order: nil, facet: nil,
|
235
|
-
works: false, options: nil, verbose: false
|
317
|
+
works: false, options: nil, verbose: false,
|
318
|
+
cursor: nil, cursor_max: 5000)
|
236
319
|
|
237
|
-
|
238
|
-
limit, sample, sort, order, facet, works, nil, options,
|
320
|
+
RequestCursor.new('journals', ids, query, filter, offset,
|
321
|
+
limit, sample, sort, order, facet, works, nil, options,
|
322
|
+
verbose, cursor, cursor_max).perform
|
239
323
|
end
|
240
324
|
|
241
325
|
##
|
242
326
|
# Search the types route
|
243
327
|
#
|
244
328
|
# @!macro serrano_options
|
329
|
+
# @!macro cursor_params
|
245
330
|
# @param ids [Array] DOIs (digital object identifier) or other identifiers
|
246
331
|
# @param works [Boolean] If true, works returned as well. Default: false
|
247
332
|
# @return [Array] An array of hashes
|
@@ -252,11 +337,18 @@ module Serrano
|
|
252
337
|
# Serrano.types(ids: "journal")
|
253
338
|
# Serrano.types(ids: ["journal", "dissertation"])
|
254
339
|
# Serrano.types(ids: "journal", works: true)
|
255
|
-
|
256
|
-
|
340
|
+
#
|
341
|
+
# # cursor - deep paging
|
342
|
+
# res = Serrano.types(ids: "journal", works: true, cursor: "*", cursor_max: 500);
|
343
|
+
# res[0].collect { |x| x['message']['items'].length }.reduce(0, :+)
|
344
|
+
# items = res[0].collect { |x| x['message']['items'] }.flatten;
|
345
|
+
# items.collect{ |z| z['DOI'] }[0,50]
|
346
|
+
def self.types(ids: nil, offset: nil, limit: nil, works: false,
|
347
|
+
options: nil, verbose: false, cursor: nil, cursor_max: 5000)
|
257
348
|
|
258
|
-
|
259
|
-
limit, nil, nil, nil, nil, works, nil, options,
|
349
|
+
RequestCursor.new('types', ids, nil, nil, offset,
|
350
|
+
limit, nil, nil, nil, nil, works, nil, options,
|
351
|
+
verbose, cursor, cursor_max).perform
|
260
352
|
end
|
261
353
|
|
262
354
|
##
|
data/serrano.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
|
|
8
8
|
s.version = Serrano::VERSION
|
9
9
|
s.platform = Gem::Platform::RUBY
|
10
10
|
s.required_ruby_version = '>= 2.0'
|
11
|
-
s.date = '
|
11
|
+
s.date = '2016-03-07'
|
12
12
|
s.summary = "Crossref Client"
|
13
13
|
s.description = "Low Level Ruby Client for the Crossref Search API"
|
14
14
|
s.authors = "Scott Chamberlain"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: serrano
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Scott Chamberlain
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-03-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -177,12 +177,16 @@ files:
|
|
177
177
|
- lib/serrano/cn.rb
|
178
178
|
- lib/serrano/cnrequest.rb
|
179
179
|
- lib/serrano/constants.rb
|
180
|
-
- lib/serrano/
|
180
|
+
- lib/serrano/cursor_testing.rb
|
181
|
+
- lib/serrano/error.rb
|
182
|
+
- lib/serrano/faraday.rb
|
181
183
|
- lib/serrano/filterhandler.rb
|
182
184
|
- lib/serrano/filters.rb
|
183
185
|
- lib/serrano/helpers/configuration.rb
|
184
186
|
- lib/serrano/request.rb
|
187
|
+
- lib/serrano/request_cursor.rb
|
185
188
|
- lib/serrano/styles.rb
|
189
|
+
- lib/serrano/utils.rb
|
186
190
|
- lib/serrano/version.rb
|
187
191
|
- serrano.gemspec
|
188
192
|
homepage: http://github.com/sckott/serrano
|
data/lib/serrano/errors.rb
DELETED
@@ -1,67 +0,0 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
|
3
|
-
def rescue_faraday_error(url, error, options={})
|
4
|
-
details = nil
|
5
|
-
headers = {}
|
6
|
-
|
7
|
-
if error.is_a?(Faraday::Error::TimeoutError)
|
8
|
-
status = 408
|
9
|
-
elsif error.respond_to?('status')
|
10
|
-
status = error[:status]
|
11
|
-
elsif error.respond_to?('response') && error.response.present?
|
12
|
-
status = error.response[:status]
|
13
|
-
details = error.response[:body]
|
14
|
-
headers = error.response[:headers]
|
15
|
-
else
|
16
|
-
status = 400
|
17
|
-
end
|
18
|
-
|
19
|
-
# Some sources use a different status for rate-limiting errors
|
20
|
-
status = 429 if status == 403 && details.include?("Excessive use detected")
|
21
|
-
|
22
|
-
if error.respond_to?('exception')
|
23
|
-
exception = error.exception
|
24
|
-
else
|
25
|
-
exception = ""
|
26
|
-
end
|
27
|
-
|
28
|
-
class_name = class_name_by_status(status) || error.class
|
29
|
-
|
30
|
-
message = parse_error_response(error.message)
|
31
|
-
message = "#{message} for #{url}"
|
32
|
-
message = "#{message} with rev #{options[:data][:rev]}" if class_name == Net::HTTPConflict
|
33
|
-
|
34
|
-
{ error: message, status: status }
|
35
|
-
end
|
36
|
-
|
37
|
-
def parse_error_response(string)
|
38
|
-
if is_json?(string)
|
39
|
-
string = MultiJson.load(string)
|
40
|
-
end
|
41
|
-
string = string['error'] if string.is_a?(Hash) && string['error']
|
42
|
-
string
|
43
|
-
end
|
44
|
-
|
45
|
-
def is_json?(string)
|
46
|
-
MultiJson.load(string)
|
47
|
-
rescue MultiJson::ParseError => e
|
48
|
-
e.data
|
49
|
-
e.cause
|
50
|
-
end
|
51
|
-
|
52
|
-
def class_name_by_status(status)
|
53
|
-
{ 400 => Net::HTTPBadRequest,
|
54
|
-
401 => Net::HTTPUnauthorized,
|
55
|
-
403 => Net::HTTPForbidden,
|
56
|
-
404 => Net::HTTPNotFound,
|
57
|
-
406 => Net::HTTPNotAcceptable,
|
58
|
-
408 => Net::HTTPRequestTimeOut,
|
59
|
-
409 => Net::HTTPConflict,
|
60
|
-
417 => Net::HTTPExpectationFailed,
|
61
|
-
429 => Net::HTTPTooManyRequests,
|
62
|
-
500 => Net::HTTPInternalServerError,
|
63
|
-
502 => Net::HTTPBadGateway,
|
64
|
-
503 => Net::HTTPServiceUnavailable,
|
65
|
-
504 => Net::HTTPGatewayTimeOut }.fetch(status, nil)
|
66
|
-
end
|
67
|
-
|