serrano 0.1.4.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ced6b8bd574ca8a373c61dc9f239499817e27bf8
4
- data.tar.gz: ab478119cc61771107057f55f07bea2653a611e0
3
+ metadata.gz: ce96cb27ec2070ee612d0b85d29949b441d552a4
4
+ data.tar.gz: 90791e525778c083fb7e57d445a5596a2ffbf7a4
5
5
  SHA512:
6
- metadata.gz: 029c66c10b76af77eeeba4a76d58552b8de60eb97b310eb0ee7610a9c47726ee9a6a403a15330ea983b4fed2d6396e50a57709445998d5c6a5cc661b61edba13
7
- data.tar.gz: fc940b4c0375aaac5d234aecf5e0939d8c017e8c05629f1f61db97c593ef619cf0b8967c98158b00dd78b029b737fc484e9bac72f2d80c510b468e2dedff5b33
6
+ metadata.gz: dc0937a45f90e0c3d01581149fa09a21cb0a3f4c669181404b0317a553d184e8d4d0a816cb7a1b30e9a373ff1eefea1082a531308f863b3f09f17644d8dcaa27
7
+ data.tar.gz: e36b25922967b65c48bd92656c8db58359987dfae0380025043201edb8527e56098f44e3c4f3c78048276780abda2a7b276f4c860580d34774d4b598528a5961
data/.travis.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.1.7
4
- - 2.2.3
3
+ - 2.1.8
4
+ - 2.2.4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## 0.2.0 (2016-03-07)
2
+
3
+ * Added error classes to fail more gracefully, adapted from instagram gem (#4)
4
+ * Added support for the cursor feature in the Crossref API for deep paging (#14)
5
+ * Added disclaimer to docs that full text/abstracts aren't searched (#24)
6
+ * Now passing user agent string with serrano version in each request (#25)
7
+
1
8
  ## 0.1.4 (2015-12-04)
2
9
 
3
10
  * Added `csl_styles()` method to get CSL styles info (#23)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- serrano (0.1.4.1)
4
+ serrano (0.2.0)
5
5
  faraday (~> 0.9.1)
6
6
  faraday_middleware (~> 0.10.0)
7
7
  multi_json (~> 1.0)
@@ -15,7 +15,7 @@ GEM
15
15
  simplecov
16
16
  url
17
17
  docile (1.1.5)
18
- faraday (0.9.1)
18
+ faraday (0.9.2)
19
19
  multipart-post (>= 1.2, < 3)
20
20
  faraday_middleware (0.10.0)
21
21
  faraday (>= 0.7.4, < 0.10)
data/README.md CHANGED
@@ -41,6 +41,10 @@ Other methods:
41
41
  * [Citation count][ccount] - `Serrano.citation_count()`
42
42
  * [get CSL styles][csl] - `Serrano.csl_styles()`
43
43
 
44
+ Note about searching:
45
+
46
+ You are using the Crossref search API described at https://github.com/CrossRef/rest-api-doc/blob/master/rest_api.md. When you search with query terms, on Crossref servers they are not searching full text, or even abstracts of articles, but only what is available in the data that is returned to you. That is, they search article titles, authors, etc. For some discussion on this, see https://github.com/CrossRef/rest-api-doc/issues/101
47
+
44
48
  ## Install
45
49
 
46
50
  ### Release version
@@ -1,8 +1,9 @@
1
1
  require "faraday"
2
2
  require "faraday_middleware"
3
3
  require "multi_json"
4
- require "serrano/errors"
4
+ require "serrano/error"
5
5
  require "serrano/constants"
6
+ require 'serrano/utils'
6
7
  require 'serrano/helpers/configuration'
7
8
 
8
9
  ##
@@ -53,6 +54,8 @@ def make_request(ids, format, style, locale)
53
54
  if format == "citeproc-json"
54
55
  endpt = "http://api.crossref.org/works/" + ids + "/" + type
55
56
  cr_works = Faraday.new(:url => endpt)
57
+ cr_works.headers[:user_agent] = make_ua
58
+ cr_works.headers["X-USER-AGENT"] = make_ua
56
59
  res = cr_works.get
57
60
  else
58
61
  if format == "text"
@@ -62,6 +65,8 @@ def make_request(ids, format, style, locale)
62
65
  res = $conn.get do |req|
63
66
  req.url ids
64
67
  req.headers['Accept'] = type
68
+ req.headers[:user_agent] = make_ua
69
+ req.headers["X-USER-AGENT"] = make_ua
65
70
  end
66
71
  end
67
72
 
@@ -0,0 +1,52 @@
1
+ require 'faraday'
2
+ require "multi_json"
3
+
4
+ query = "widget"
5
+ cursor = "*"
6
+ limit = 100
7
+ cursor_max = 500
8
+ rows = limit
9
+
10
+ filter = nil
11
+ offset = nil
12
+ sample = nil
13
+ sort = nil
14
+ order = nil
15
+ facet = nil
16
+
17
+ args = { query: query, filter: filter, offset: offset,
18
+ rows: limit, sample: sample, sort: sort,
19
+ order: order, facet: facet, cursor: cursor }
20
+ opts = args.delete_if { |k, v| v.nil? }
21
+
22
+ conn = Faraday.new(:url => "http://api.crossref.org/", :request => nil)
23
+
24
+ def _req(conn, path, opts)
25
+ res = conn.get path, opts
26
+ return MultiJson.load(res.body)
27
+ end
28
+
29
+ def _redo_req(conn, path, js, opts, cu, max_avail, cursor_max)
30
+ if !cu.nil? and cursor_max > js['message']['items'].length
31
+ res = [js]
32
+ total = js['message']['items'].length
33
+ while (!cu.nil? and cursor_max > total and total < max_avail)
34
+ opts[:cursor] = cu
35
+ out = _req(conn, path, opts)
36
+ cu = out['message']['next-cursor']
37
+ res << out
38
+ total = res.collect { |x| x['message']['items'].length }.inject(:+)
39
+ end
40
+ return res
41
+ else
42
+ return js
43
+ end
44
+ end
45
+
46
+ path = 'works'
47
+
48
+ js = _req(conn, path, opts)
49
+ cu = js['message']['next-cursor']
50
+ max_avail = js['message']['total-results']
51
+ res = _redo_req(conn, path, js, opts, cu, max_avail, cursor_max)
52
+
@@ -0,0 +1,22 @@
1
+ module Serrano
2
+ # Custom error class for rescuing from all Serrano errors
3
+ class Error < StandardError; end
4
+
5
+ # Raised when Crossref returns the HTTP status code 400
6
+ class BadRequest < Error; end
7
+
8
+ # Raised when Crossref returns the HTTP status code 404
9
+ class NotFound < Error; end
10
+
11
+ # Raised when Crossref returns the HTTP status code 500
12
+ class InternalServerError < Error; end
13
+
14
+ # Raised when Crossref returns the HTTP status code 502
15
+ class BadGateway < Error; end
16
+
17
+ # Raised when Crossref returns the HTTP status code 503
18
+ class ServiceUnavailable < Error; end
19
+
20
+ # Raised when Crossref returns the HTTP status code 504
21
+ class GatewayTimeout < Error; end
22
+ end
@@ -0,0 +1,71 @@
1
+ require 'faraday'
2
+ require 'multi_json'
3
+
4
+ # @private
5
+ module FaradayMiddleware
6
+ # @private
7
+ class RaiseHttpException < Faraday::Middleware
8
+ def call(env)
9
+ @app.call(env).on_complete do |response|
10
+ case response[:status].to_i
11
+ when 400
12
+ raise Serrano::BadRequest, error_message_400(response)
13
+ when 404
14
+ raise Serrano::NotFound, error_message_400(response)
15
+ when 500
16
+ raise Serrano::InternalServerError, error_message_500(response, "Something is technically wrong.")
17
+ when 502
18
+ raise Serrano::BadGateway, error_message_500(response, "The server returned an invalid or incomplete response.")
19
+ when 503
20
+ raise Serrano::ServiceUnavailable, error_message_500(response, "Crossref is rate limiting your requests.")
21
+ when 504
22
+ raise Serrano::GatewayTimeout, error_message_500(response, "504 Gateway Time-out")
23
+ end
24
+ end
25
+ end
26
+
27
+ def initialize(app)
28
+ super app
29
+ @parser = nil
30
+ end
31
+
32
+ private
33
+
34
+ def error_message_400(response)
35
+ "\n #{response[:method].to_s.upcase} #{response[:url].to_s}\n Status #{response[:status]}#{error_body(response[:body])}"
36
+ end
37
+
38
+ def error_body(body)
39
+ if not body.nil? and not body.empty? and body.kind_of?(String)
40
+ if is_json?(body)
41
+ body = ::MultiJson.load(body)
42
+ if body['message'].nil?
43
+ body = nil
44
+ elseif body['message'].length == 1
45
+ body = body['message']
46
+ else
47
+ body = body['message'].collect { |x| x['message'] }.join('; ')
48
+ end
49
+ end
50
+ end
51
+
52
+ if body.nil?
53
+ nil
54
+ else
55
+ ": #{body}"
56
+ end
57
+ end
58
+
59
+ def error_message_500(response, body=nil)
60
+ "#{response[:method].to_s.upcase} #{response[:url].to_s}: #{[response[:status].to_s + ':', body].compact.join(' ')}"
61
+ end
62
+
63
+ def is_json?(string)
64
+ MultiJson.load(string)
65
+ return true
66
+ rescue MultiJson::ParseError => e
67
+ return false
68
+ end
69
+
70
+ end
71
+ end
@@ -1,58 +1,64 @@
1
1
  # helper functions
2
- module Serrano
3
- class Request #:nodoc:
2
+ module Helpers
4
3
 
5
- private
6
-
7
- $others = ['license_url','license_version','license_delay','full_text_version','full_text_type',
8
- 'award_number','award_funder']
9
-
10
- def filter_handler(x = nil)
11
- if x.nil?
12
- nil
13
- else
14
- x = stringify(x)
15
- nn = x.keys.collect{ |x| x.to_s }
16
- if nn.collect{ |x| $others.include? x }.any?
17
- nn = nn.collect{ |x|
18
- if $others.include? x
19
- case x
20
- when 'license_url'
21
- 'license.url'
22
- when 'license_version'
23
- 'license.version'
24
- when 'license_delay'
25
- 'license.delay'
26
- when 'full_text_version'
27
- 'full-text.version'
28
- when 'full_text_type'
29
- 'full-text.type'
30
- when 'award_number'
31
- 'award.number'
32
- when 'award_funder'
33
- 'award.funder'
34
- end
35
- else
36
- x
37
- end
38
- }
39
- end
4
+ $others = ['license_url','license_version','license_delay','full_text_version','full_text_type',
5
+ 'award_number','award_funder']
40
6
 
41
- newnn = nn.collect{ |x| x.gsub("_", "-") }
42
- x = rename_keys(x, newnn)
43
- x = x.collect{ |k,v| [k, v].join(":") }.join(',')
44
- return x
7
+ def filter_handler(x = nil)
8
+ if x.nil?
9
+ nil
10
+ else
11
+ x = stringify(x)
12
+ nn = x.keys.collect{ |x| x.to_s }
13
+ if nn.collect{ |x| $others.include? x }.any?
14
+ nn = nn.collect{ |x|
15
+ if $others.include? x
16
+ case x
17
+ when 'license_url'
18
+ 'license.url'
19
+ when 'license_version'
20
+ 'license.version'
21
+ when 'license_delay'
22
+ 'license.delay'
23
+ when 'full_text_version'
24
+ 'full-text.version'
25
+ when 'full_text_type'
26
+ 'full-text.type'
27
+ when 'award_number'
28
+ 'award.number'
29
+ when 'award_funder'
30
+ 'award.funder'
31
+ end
32
+ else
33
+ x
34
+ end
35
+ }
45
36
  end
46
- end
47
37
 
48
- def stringify(x)
49
- (x.keys.map{ |k,v| k.to_s }.zip x.values).to_h
38
+ newnn = nn.collect{ |x| x.gsub("_", "-") }
39
+ x = rename_keys(x, newnn)
40
+ x = x.collect{ |k,v| [k, v].join(":") }.join(',')
41
+ return x
50
42
  end
43
+ end
51
44
 
52
- def rename_keys(x, y)
53
- (y.zip x.values).to_h
54
- end
45
+ def stringify(x)
46
+ (x.keys.map{ |k,v| k.to_s }.zip x.values).to_h
47
+ end
55
48
 
49
+ def rename_keys(x, y)
50
+ (y.zip x.values).to_h
56
51
  end
57
52
 
58
53
  end
54
+
55
+ module Serrano
56
+ class Request #:nodoc:
57
+ include Helpers
58
+ end
59
+
60
+ class RequestCursor #:nodoc:
61
+ include Helpers
62
+ end
63
+ end
64
+
@@ -1,7 +1,8 @@
1
1
  require "faraday"
2
2
  require "multi_json"
3
- require "serrano/errors"
3
+ require "serrano/error"
4
4
  require "serrano/constants"
5
+ require 'serrano/utils'
5
6
  require 'serrano/helpers/configuration'
6
7
 
7
8
  ##
@@ -58,18 +59,21 @@ module Serrano
58
59
  conn = Faraday.new(:url => Serrano.base_url, :request => options) do |f|
59
60
  f.response :logger
60
61
  f.adapter Faraday.default_adapter
62
+ f.use FaradayMiddleware::RaiseHttpException
61
63
  end
62
64
  else
63
- conn = Faraday.new(:url => Serrano.base_url, :request => options)
65
+ conn = Faraday.new(:url => Serrano.base_url, :request => options) do |f|
66
+ f.adapter Faraday.default_adapter
67
+ f.use FaradayMiddleware::RaiseHttpException
68
+ end
64
69
  end
65
70
 
71
+ conn.headers[:user_agent] = make_ua
72
+ conn.headers["X-USER-AGENT"] = make_ua
73
+
66
74
  if self.id.nil?
67
- # begin
68
75
  res = conn.get self.endpt, opts
69
76
  return MultiJson.load(res.body)
70
- # rescue *NETWORKABLE_EXCEPTIONS => e
71
- # rescue_faraday_error(endpt, e)
72
- # end
73
77
  else
74
78
  coll = []
75
79
  Array(self.id).each do |x|
@@ -85,12 +89,6 @@ module Serrano
85
89
 
86
90
  res = conn.get endpt, opts
87
91
  coll << MultiJson.load(res.body)
88
- # begin
89
- # res = conn.get endpt, opts
90
- # coll << MultiJson.load(res.body)
91
- # rescue *NETWORKABLE_EXCEPTIONS => e
92
- # rescue_faraday_error(endpt, e)
93
- # end
94
92
  end
95
93
  return coll
96
94
  end
@@ -0,0 +1,138 @@
1
+ require "faraday"
2
+ require 'faraday_middleware'
3
+ require "multi_json"
4
+ require "serrano/error"
5
+ require "serrano/constants"
6
+ require 'serrano/helpers/configuration'
7
+ require 'serrano/filterhandler'
8
+ require 'serrano/error'
9
+ require 'serrano/faraday'
10
+
11
+ ##
12
+ # Serrano::RequestCursor
13
+ #
14
+ # Class to perform HTTP requests to the Crossref API
15
+ module Serrano
16
+ class RequestCursor #:nodoc:
17
+
18
+ attr_accessor :endpt
19
+ attr_accessor :id
20
+ attr_accessor :query
21
+ attr_accessor :filter
22
+ attr_accessor :offset
23
+ attr_accessor :limit
24
+ attr_accessor :sample
25
+ attr_accessor :sort
26
+ attr_accessor :order
27
+ attr_accessor :facet
28
+ attr_accessor :works
29
+ attr_accessor :agency
30
+ attr_accessor :options
31
+ attr_accessor :verbose
32
+ attr_accessor :cursor
33
+ attr_accessor :cursor_max
34
+
35
+ def initialize(endpt, id, query, filter, offset,
36
+ limit, sample, sort, order, facet, works, agency,
37
+ options, verbose, cursor, cursor_max)
38
+
39
+ self.endpt = endpt
40
+ self.id = id
41
+ self.query = query
42
+ self.filter = filter
43
+ self.offset = offset
44
+ self.limit = limit
45
+ self.sample = sample
46
+ self.sort = sort
47
+ self.order = order
48
+ self.facet = facet
49
+ self.works = works
50
+ self.agency = agency
51
+ self.options = options
52
+ self.verbose = verbose
53
+ self.cursor = cursor
54
+ self.cursor_max = cursor_max
55
+ end
56
+
57
+ def perform
58
+ filt = filter_handler(self.filter)
59
+
60
+ if self.cursor_max.class != nil
61
+ if self.cursor_max.class != Fixnum
62
+ raise "cursor_max must be of class int"
63
+ end
64
+ end
65
+
66
+ args = { query: self.query, filter: filt, offset: self.offset,
67
+ rows: self.limit, sample: self.sample, sort: self.sort,
68
+ order: self.order, facet: self.facet, cursor: self.cursor }
69
+ opts = args.delete_if { |k, v| v.nil? }
70
+
71
+ if verbose
72
+ $conn = Faraday.new(:url => Serrano.base_url, :request => options) do |f|
73
+ f.response :logger
74
+ f.adapter Faraday.default_adapter
75
+ f.use FaradayMiddleware::RaiseHttpException
76
+ end
77
+ else
78
+ $conn = Faraday.new(:url => Serrano.base_url, :request => options) do |f|
79
+ f.adapter Faraday.default_adapter
80
+ f.use FaradayMiddleware::RaiseHttpException
81
+ end
82
+ end
83
+
84
+ $conn.headers[:user_agent] = make_ua
85
+ $conn.headers["X-USER-AGENT"] = make_ua
86
+
87
+ if self.id.nil?
88
+ js = self._req(self.endpt, opts)
89
+ cu = js['message']['next-cursor']
90
+ max_avail = js['message']['total-results']
91
+ res = self._redo_req(js, opts, cu, max_avail)
92
+ return res
93
+ else
94
+ coll = []
95
+ Array(self.id).each do |x|
96
+ if self.works
97
+ $endpt2 = self.endpt + '/' + x.to_s + "/works"
98
+ else
99
+ if self.agency
100
+ $endpt2 = self.endpt + '/' + x.to_s + "/agency"
101
+ else
102
+ $endpt2 = self.endpt + '/' + x.to_s
103
+ end
104
+ end
105
+
106
+ js = self._req($endpt2, opts)
107
+ cu = js['message']['next-cursor']
108
+ max_avail = js['message']['total-results']
109
+ coll << self._redo_req(js, opts, cu, max_avail)
110
+ end
111
+ return coll
112
+ end
113
+ end
114
+
115
+ def _redo_req(js, opts, cu, max_avail)
116
+ if !cu.nil? and self.cursor_max > js['message']['items'].length
117
+ res = [js]
118
+ total = js['message']['items'].length
119
+ while !cu.nil? and self.cursor_max > total and total < max_avail do
120
+ opts[:cursor] = cu
121
+ out = self._req($endpt2, opts)
122
+ cu = out['message']['next-cursor']
123
+ res << out
124
+ total = res.collect {|x| x['message']['items'].length}.reduce(0, :+)
125
+ end
126
+ return res
127
+ else
128
+ return js
129
+ end
130
+ end
131
+
132
+ def _req(path, opts)
133
+ res = $conn.get path, opts
134
+ return MultiJson.load(res.body)
135
+ end
136
+
137
+ end
138
+ end
@@ -0,0 +1,5 @@
1
+ def make_ua
2
+ requa = 'Faraday/v' + Faraday::VERSION
3
+ habua = 'Serrano/v' + Serrano::VERSION
4
+ return requa + ' ' + habua
5
+ end
@@ -1,3 +1,3 @@
1
1
  module Serrano
2
- VERSION = "0.1.4.1"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/serrano.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require "serrano/version"
2
2
  require "serrano/request"
3
+ require "serrano/request_cursor"
3
4
  require "serrano/filterhandler"
4
5
  require "serrano/cnrequest"
5
6
  require "serrano/filters"
@@ -12,29 +13,40 @@ require 'rexml/xpath'
12
13
  # @param offset [Fixnum] Number of record to start at, from 1 to infinity.
13
14
  # @param limit [Fixnum] Number of results to return. Not relavant when searching with specific dois. Default: 20. Max: 1000
14
15
  # @param sample [Fixnum] Number of random results to return. when you use the sample parameter,
15
- # the limit and offset parameters are ignored. This parameter only used when works requested.
16
+ # the limit and offset parameters are ignored. This parameter only used when works requested.
16
17
  # @param sort [String] Field to sort on, one of score, relevance,
17
- # updated (date of most recent change to metadata. Currently the same as deposited),
18
- # deposited (time of most recent deposit), indexed (time of most recent index), or
19
- # published (publication date). Note: If the API call includes a query, then the sort
20
- # order will be by the relevance score. If no query is included, then the sort order
21
- # will be by DOI update date.
18
+ # updated (date of most recent change to metadata - currently the same as deposited),
19
+ # deposited (time of most recent deposit), indexed (time of most recent index), or
20
+ # published (publication date). Note: If the API call includes a query, then the sort
21
+ # order will be by the relevance score. If no query is included, then the sort order
22
+ # will be by DOI update date.
22
23
  # @param order [String] Sort order, one of 'asc' or 'desc'
23
24
  # @param facet [Boolean] Include facet results. Default: false
24
25
  # @param verbose [Boolean] Print request headers to stdout. Default: false
25
26
 
27
+ # @!macro cursor_params
28
+ # @param cursor [String] Cursor character string to do deep paging. Default is `nil`.
29
+ # Pass in '*' to start deep paging. Any combination of query, filters and facets may be
30
+ # used with deep paging cursors. While limit may be specified along with cursor, offset
31
+ # and sample cannot be used. See
32
+ # https://github.com/CrossRef/rest-api-doc/blob/master/rest_api.md#deep-paging-with-cursors
33
+ # @param cursor_max [Fixnum] Max records to retrieve. Only used when cursor
34
+ # param used. Because deep paging can result in continuous requests until all
35
+ # are retrieved, use this parameter to set a maximum number of records. Of course,
36
+ # if there are less records found than this value, you will get only those found.
37
+
26
38
  # @!macro serrano_options
27
39
  # @param options [Hash] Hash of options for configuring the request, passed on to Faraday.new
28
- # :timeout - [Fixnum] open/read timeout Integer in seconds
29
- # :open_timeout - [Fixnum] read timeout Integer in seconds
30
- # :proxy - [Hash] hash of proxy options
31
- # :uri - [String] Proxy Server URI
32
- # :user - [String] Proxy server username
33
- # :password - [String] Proxy server password
34
- # :params_encoder - [Hash] not sure what this is
35
- # :bind - [Hash] A hash with host and port values
36
- # :boundary - [String] of the boundary value
37
- # :oauth - [Hash] A hash with OAuth details
40
+ # - timeout [Fixnum] open/read timeout Integer in seconds
41
+ # - open_timeout [Fixnum] read timeout Integer in seconds
42
+ # - proxy [Hash] hash of proxy options
43
+ # - uri [String] Proxy Server URI
44
+ # - user [String] Proxy server username
45
+ # - password [String] Proxy server password
46
+ # - params_encoder [Hash] not sure what this is
47
+ # - bind [Hash] A hash with host and port values
48
+ # - boundary [String] of the boundary value
49
+ # - oauth [Hash] A hash with OAuth details
38
50
 
39
51
  ##
40
52
  # Serrano - The top level module for using methods
@@ -61,6 +73,17 @@ require 'rexml/xpath'
61
73
  #
62
74
  # @see https://github.com/CrossRef/rest-api-doc/blob/master/rest_api.md for
63
75
  # detailed description of the Crossref API
76
+ #
77
+ # What am I actually searching when using the Crossref search API?
78
+ #
79
+ # You are using the Crossref search API described at
80
+ # https://github.com/CrossRef/rest-api-doc/blob/master/rest_api.md.
81
+ # When you search with query terms, on Crossref servers they are not
82
+ # searching full text, or even abstracts of articles, but only what is
83
+ # available in the data that is returned to you. That is, they search
84
+ # article titles, authors, etc. For some discussion on this, see
85
+ # https://github.com/CrossRef/rest-api-doc/issues/101
86
+
64
87
  module Serrano
65
88
  extend Configuration
66
89
 
@@ -73,6 +96,7 @@ module Serrano
73
96
  #
74
97
  # @!macro serrano_params
75
98
  # @!macro serrano_options
99
+ # @!macro cursor_params
76
100
  # @param ids [Array] DOIs (digital object identifier) or other identifiers
77
101
  # @param query [String] A query string
78
102
  # @param filter [Hash] Filter options. See ...
@@ -101,12 +125,30 @@ module Serrano
101
125
  # Serrano.works(ids: '10.1371/journal.pone.0033693', options: {timeout: 3, open_timeout: 2})
102
126
  # ## log request details - uses Faraday middleware
103
127
  # Serrano.works(ids: '10.1371/journal.pone.0033693', verbose: true)
128
+ #
129
+ # # facets
130
+ # Serrano.works(facet: 'license:*', limit: 0, filter: {has_full_text: true})
131
+ #
132
+ # # sample
133
+ # Serrano.works(sample: 2)
134
+ #
135
+ # # cursor for deep paging
136
+ # Serrano.works(query: "widget", cursor: "*", limit: 100)
137
+ # # another query, more results this time
138
+ # res = Serrano.works(query: "science", cursor: "*", limit: 250, cursor_max: 1000);
139
+ # res.collect { |x| x['message']['items'].length }.reduce(0, :+)
140
+ # # another query
141
+ # res = Serrano.works(query: "ecology", cursor: "*", limit: 1000, cursor_max: 10000);
142
+ # res.collect { |x| x['message']['items'].length }.reduce(0, :+)
143
+ # items = res.collect {|x| x['message']['items']}.flatten
144
+ # items.collect { |x| x['DOI'] }[0,20]
104
145
  def self.works(ids: nil, query: nil, filter: nil, offset: nil,
105
146
  limit: nil, sample: nil, sort: nil, order: nil, facet: nil,
106
- options: nil, verbose: false)
147
+ options: nil, verbose: false, cursor: nil, cursor_max: 5000)
107
148
 
108
- Request.new('works', ids, query, filter, offset,
109
- limit, sample, sort, order, facet, nil, nil, options, verbose).perform
149
+ RequestCursor.new('works', ids, query, filter, offset,
150
+ limit, sample, sort, order, facet, nil, nil, options,
151
+ verbose, cursor, cursor_max).perform
110
152
  end
111
153
 
112
154
  ##
@@ -114,6 +156,7 @@ module Serrano
114
156
  #
115
157
  # @!macro serrano_params
116
158
  # @!macro serrano_options
159
+ # @!macro cursor_params
117
160
  # @param ids [Array] DOIs (digital object identifier) or other identifiers
118
161
  # @param query [String] A query string
119
162
  # @param filter [Hash] Filter options. See ...
@@ -133,12 +176,26 @@ module Serrano
133
176
  # Serrano.members(query: "ecology", order: "asc")
134
177
  # # Works
135
178
  # Serrano.members(ids: 98, works: true)
179
+ #
180
+ # # cursor - deep paging
181
+ # res = Serrano.members(ids: 98, works: true, cursor: "*", cursor_max: 1000);
182
+ # res[0].collect { |x| x['message']['items'].length }.reduce(0, :+)
183
+ # items = res[0].collect { |x| x['message']['items'] }.flatten
184
+ # items.collect{ |z| z['DOI'] }[0,50]
185
+ #
186
+ # # multiple ids with cursor
187
+ # res = Serrano.members(ids: [98, 340], works: true, cursor: "*", cursor_max: 300);
188
+ # res[0].collect { |x| x['message']['items'].length }.reduce(0, :+)
189
+ # items = res[0].collect { |x| x['message']['items'] }.flatten
190
+ # items.collect{ |z| z['DOI'] }[0,50]
136
191
  def self.members(ids: nil, query: nil, filter: nil, offset: nil,
137
192
  limit: nil, sample: nil, sort: nil, order: nil, facet: nil,
138
- works: false, options: nil, verbose: false)
193
+ works: false, options: nil, verbose: false,
194
+ cursor: nil, cursor_max: 5000)
139
195
 
140
- Request.new('members', ids, query, filter, offset,
141
- limit, sample, sort, order, facet, works, nil, options, verbose).perform
196
+ RequestCursor.new('members', ids, query, filter, offset,
197
+ limit, sample, sort, order, facet, works, nil, options,
198
+ verbose, cursor, cursor_max).perform
142
199
  end
143
200
 
144
201
  ##
@@ -146,6 +203,7 @@ module Serrano
146
203
  #
147
204
  # @!macro serrano_params
148
205
  # @!macro serrano_options
206
+ # @!macro cursor_params
149
207
  # @param ids [Array] DOIs (digital object identifier) or other identifiers
150
208
  # @param filter [Hash] Filter options. See ...
151
209
  # @param works [Boolean] If true, works returned as well. Default: false
@@ -162,12 +220,20 @@ module Serrano
162
220
  # Serrano.prefixes(ids: "10.1016", works: true, limit: 3)
163
221
  # # Sort and order
164
222
  # Serrano.prefixes(ids: "10.1016", works: true, sort: 'relevance', order: "asc")
223
+ #
224
+ # # cursor - deep paging
225
+ # res = Serrano.prefixes(ids: "10.1016", works: true, cursor: "*", cursor_max: 1000);
226
+ # res[0].collect { |x| x['message']['items'].length }.reduce(0, :+)
227
+ # items = res[0].collect { |x| x['message']['items'] }.flatten;
228
+ # items.collect{ |z| z['DOI'] }[0,50]
165
229
  def self.prefixes(ids:, filter: nil, offset: nil,
166
230
  limit: nil, sample: nil, sort: nil, order: nil, facet: nil,
167
- works: false, options: nil, verbose: false)
231
+ works: false, options: nil, verbose: false,
232
+ cursor: nil, cursor_max: 5000)
168
233
 
169
- Request.new('prefixes', ids, nil, filter, offset,
170
- limit, sample, sort, order, facet, works, nil, options, verbose).perform
234
+ RequestCursor.new('prefixes', ids, nil, filter, offset,
235
+ limit, sample, sort, order, facet, works, nil, options,
236
+ verbose, cursor, cursor_max).perform
171
237
  end
172
238
 
173
239
  ##
@@ -175,6 +241,7 @@ module Serrano
175
241
  #
176
242
  # @!macro serrano_params
177
243
  # @!macro serrano_options
244
+ # @!macro cursor_params
178
245
  # @param ids [Array] DOIs (digital object identifier) or other identifiers
179
246
  # @param query [String] A query string
180
247
  # @param filter [Hash] Filter options. See ...
@@ -196,12 +263,20 @@ module Serrano
196
263
  # Serrano.funders(ids: '10.13039/100000001', works: true, limit: 3)
197
264
  # # Sort and order
198
265
  # Serrano.funders(ids: "10.13039/100000001", works: true, sort: 'relevance', order: "asc")
266
+ #
267
+ # # cursor - deep paging
268
+ # res = Serrano.funders(ids: '10.13039/100000001', works: true, cursor: "*", cursor_max: 500);
269
+ # res[0].collect { |x| x['message']['items'].length }.reduce(0, :+)
270
+ # items = res[0].collect { |x| x['message']['items'] }.flatten;
271
+ # items.collect{ |z| z['DOI'] }[0,50]
199
272
  def self.funders(ids: nil, query: nil, filter: nil, offset: nil,
200
273
  limit: nil, sample: nil, sort: nil, order: nil, facet: nil,
201
- works: false, options: nil, verbose: false)
274
+ works: false, options: nil, verbose: false,
275
+ cursor: nil, cursor_max: 5000)
202
276
 
203
- Request.new('funders', ids, query, filter, offset,
204
- limit, sample, sort, order, facet, works, nil, options, verbose).perform
277
+ RequestCursor.new('funders', ids, query, filter, offset,
278
+ limit, sample, sort, order, facet, works, nil, options,
279
+ verbose, cursor, cursor_max).perform
205
280
  end
206
281
 
207
282
  ##
@@ -209,6 +284,7 @@ module Serrano
209
284
  #
210
285
  # @!macro serrano_params
211
286
  # @!macro serrano_options
287
+ # @!macro cursor_params
212
288
  # @param ids [Array] DOIs (digital object identifier) or other identifiers
213
289
  # @param query [String] A query string
214
290
  # @param filter [Hash] Filter options. See ...
@@ -230,18 +306,27 @@ module Serrano
230
306
  # Serrano.journals(ids: '1803-2427', works: true)
231
307
  # Serrano.journals(limit: 2)
232
308
  # Serrano.journals(sample: 2)
309
+ #
310
+ # # cursor - deep paging
311
+ # res = Serrano.journals(ids: "2167-8359", works: true, cursor: "*", cursor_max: 500);
312
+ # res[0].collect { |x| x['message']['items'].length }.reduce(0, :+)
313
+ # items = res[0].collect { |x| x['message']['items'] }.flatten;
314
+ # items.collect{ |z| z['DOI'] }[0,50]
233
315
  def self.journals(ids: nil, query: nil, filter: nil, offset: nil,
234
316
  limit: nil, sample: nil, sort: nil, order: nil, facet: nil,
235
- works: false, options: nil, verbose: false)
317
+ works: false, options: nil, verbose: false,
318
+ cursor: nil, cursor_max: 5000)
236
319
 
237
- Request.new('journals', ids, query, filter, offset,
238
- limit, sample, sort, order, facet, works, nil, options, verbose).perform
320
+ RequestCursor.new('journals', ids, query, filter, offset,
321
+ limit, sample, sort, order, facet, works, nil, options,
322
+ verbose, cursor, cursor_max).perform
239
323
  end
240
324
 
241
325
  ##
242
326
  # Search the types route
243
327
  #
244
328
  # @!macro serrano_options
329
+ # @!macro cursor_params
245
330
  # @param ids [Array] DOIs (digital object identifier) or other identifiers
246
331
  # @param works [Boolean] If true, works returned as well. Default: false
247
332
  # @return [Array] An array of hashes
@@ -252,11 +337,18 @@ module Serrano
252
337
  # Serrano.types(ids: "journal")
253
338
  # Serrano.types(ids: ["journal", "dissertation"])
254
339
  # Serrano.types(ids: "journal", works: true)
255
- def self.types(ids: nil, offset: nil,
256
- limit: nil, works: false, options: nil, verbose: false)
340
+ #
341
+ # # cursor - deep paging
342
+ # res = Serrano.types(ids: "journal", works: true, cursor: "*", cursor_max: 500);
343
+ # res[0].collect { |x| x['message']['items'].length }.reduce(0, :+)
344
+ # items = res[0].collect { |x| x['message']['items'] }.flatten;
345
+ # items.collect{ |z| z['DOI'] }[0,50]
346
+ def self.types(ids: nil, offset: nil, limit: nil, works: false,
347
+ options: nil, verbose: false, cursor: nil, cursor_max: 5000)
257
348
 
258
- Request.new('types', ids, nil, nil, offset,
259
- limit, nil, nil, nil, nil, works, nil, options, verbose).perform
349
+ RequestCursor.new('types', ids, nil, nil, offset,
350
+ limit, nil, nil, nil, nil, works, nil, options,
351
+ verbose, cursor, cursor_max).perform
260
352
  end
261
353
 
262
354
  ##
data/serrano.gemspec CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
8
8
  s.version = Serrano::VERSION
9
9
  s.platform = Gem::Platform::RUBY
10
10
  s.required_ruby_version = '>= 2.0'
11
- s.date = '2015-12-04'
11
+ s.date = '2016-03-07'
12
12
  s.summary = "Crossref Client"
13
13
  s.description = "Low Level Ruby Client for the Crossref Search API"
14
14
  s.authors = "Scott Chamberlain"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: serrano
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Scott Chamberlain
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-04 00:00:00.000000000 Z
11
+ date: 2016-03-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -177,12 +177,16 @@ files:
177
177
  - lib/serrano/cn.rb
178
178
  - lib/serrano/cnrequest.rb
179
179
  - lib/serrano/constants.rb
180
- - lib/serrano/errors.rb
180
+ - lib/serrano/cursor_testing.rb
181
+ - lib/serrano/error.rb
182
+ - lib/serrano/faraday.rb
181
183
  - lib/serrano/filterhandler.rb
182
184
  - lib/serrano/filters.rb
183
185
  - lib/serrano/helpers/configuration.rb
184
186
  - lib/serrano/request.rb
187
+ - lib/serrano/request_cursor.rb
185
188
  - lib/serrano/styles.rb
189
+ - lib/serrano/utils.rb
186
190
  - lib/serrano/version.rb
187
191
  - serrano.gemspec
188
192
  homepage: http://github.com/sckott/serrano
@@ -1,67 +0,0 @@
1
- require 'net/http'
2
-
3
- def rescue_faraday_error(url, error, options={})
4
- details = nil
5
- headers = {}
6
-
7
- if error.is_a?(Faraday::Error::TimeoutError)
8
- status = 408
9
- elsif error.respond_to?('status')
10
- status = error[:status]
11
- elsif error.respond_to?('response') && error.response.present?
12
- status = error.response[:status]
13
- details = error.response[:body]
14
- headers = error.response[:headers]
15
- else
16
- status = 400
17
- end
18
-
19
- # Some sources use a different status for rate-limiting errors
20
- status = 429 if status == 403 && details.include?("Excessive use detected")
21
-
22
- if error.respond_to?('exception')
23
- exception = error.exception
24
- else
25
- exception = ""
26
- end
27
-
28
- class_name = class_name_by_status(status) || error.class
29
-
30
- message = parse_error_response(error.message)
31
- message = "#{message} for #{url}"
32
- message = "#{message} with rev #{options[:data][:rev]}" if class_name == Net::HTTPConflict
33
-
34
- { error: message, status: status }
35
- end
36
-
37
- def parse_error_response(string)
38
- if is_json?(string)
39
- string = MultiJson.load(string)
40
- end
41
- string = string['error'] if string.is_a?(Hash) && string['error']
42
- string
43
- end
44
-
45
- def is_json?(string)
46
- MultiJson.load(string)
47
- rescue MultiJson::ParseError => e
48
- e.data
49
- e.cause
50
- end
51
-
52
- def class_name_by_status(status)
53
- { 400 => Net::HTTPBadRequest,
54
- 401 => Net::HTTPUnauthorized,
55
- 403 => Net::HTTPForbidden,
56
- 404 => Net::HTTPNotFound,
57
- 406 => Net::HTTPNotAcceptable,
58
- 408 => Net::HTTPRequestTimeOut,
59
- 409 => Net::HTTPConflict,
60
- 417 => Net::HTTPExpectationFailed,
61
- 429 => Net::HTTPTooManyRequests,
62
- 500 => Net::HTTPInternalServerError,
63
- 502 => Net::HTTPBadGateway,
64
- 503 => Net::HTTPServiceUnavailable,
65
- 504 => Net::HTTPGatewayTimeOut }.fetch(status, nil)
66
- end
67
-