serrano 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
data/lib/serrano/cn.rb ADDED
@@ -0,0 +1,30 @@
1
+ require "serrano/version"
2
+ require "serrano/cnrequest"
3
+
4
+ ##
5
+ # ContentNegotiation - Content Negotiation class
6
+ #
7
+ # @see http://www.crosscite.org/cn/ for details
8
+ module Serrano
9
+
10
+ class ContentNegotiation
11
+
12
+ attr_accessor :ids
13
+ attr_accessor :format
14
+ attr_accessor :style
15
+ attr_accessor :locale
16
+
17
+ def initialize(ids, format = "bibtex", style = "apa", locale = "en-US")
18
+ self.ids = ids
19
+ self.format = format
20
+ self.style = style
21
+ self.locale = locale
22
+ end
23
+
24
+ def cn
25
+ CNRequest.new(self.ids, self.format, self.style, self.locale).perform
26
+ end
27
+
28
+ end
29
+
30
+ end
@@ -0,0 +1,83 @@
1
+ require "faraday"
2
+ require "faraday_middleware"
3
+ require "multi_json"
4
+ require "serrano/errors"
5
+ require "serrano/constants"
6
+ require 'serrano/helpers/configuration'
7
+
8
+ ##
9
+ # Serrano::CNRequest
10
+ #
11
+ # Class to perform HTTP requests to the Crossref API
12
+ module Serrano
13
+ class CNRequest #:nodoc:
14
+
15
+ attr_accessor :ids
16
+ attr_accessor :format
17
+ attr_accessor :style
18
+ attr_accessor :locale
19
+
20
+ def initialize(ids, format, style, locale)
21
+ self.ids = ids
22
+ self.format = format
23
+ self.style = style
24
+ self.locale = locale
25
+ end
26
+
27
+ def perform
28
+ if !$cn_formats.include? self.format
29
+ raise "format not one of accepted types"
30
+ end
31
+
32
+ $conn = Faraday.new "http://dx.doi.org/" do |c|
33
+ c.use FaradayMiddleware::FollowRedirects
34
+ c.adapter :net_http
35
+ end
36
+
37
+ if self.ids.length == 1
38
+ return make_request(self.ids, self.format, self.style, self.locale)
39
+ else
40
+ coll = []
41
+ Array(self.ids).each do |x|
42
+ coll << make_request(x, self.format, self.style, self.locale)
43
+ end
44
+ return coll
45
+ end
46
+ end
47
+ end
48
+ end
49
+
50
+ def make_request(ids, format, style, locale)
51
+ type = $cn_format_headers.select { |x, _| x.include? format }.values[0]
52
+
53
+ if format == "citeproc-json"
54
+ endpt = "http://api.crossref.org/works/" + ids + "/" + type
55
+ cr_works = Faraday.new(:url => endpt)
56
+ res = cr_works.get
57
+ else
58
+ if format == "text"
59
+ type = type + "; style = " + style + "; locale = " + locale
60
+ end
61
+
62
+ res = $conn.get do |req|
63
+ req.url ids
64
+ req.headers['Accept'] = type
65
+ end
66
+ end
67
+
68
+ return res.body
69
+ end
70
+
71
+ # parser <- cn_types[[self.format]]
72
+ # if (raw) {
73
+ # content(response, "text")
74
+ # } else {
75
+ # out <- content(response, "parsed", parser, "UTF-8")
76
+ # if (format == "text") {
77
+ # out <- gsub("\n", "", out)
78
+ # }
79
+ # if (format == "bibentry") {
80
+ # out <- parse_bibtex(out)
81
+ # }
82
+ # out
83
+ # }
@@ -0,0 +1,36 @@
1
+ require 'net/http'
2
+
3
+ NETWORKABLE_EXCEPTIONS = [Faraday::Error::ClientError,
4
+ URI::InvalidURIError,
5
+ Encoding::UndefinedConversionError,
6
+ ArgumentError,
7
+ NoMethodError,
8
+ TypeError]
9
+
10
+ $cn_formats = ["rdf-xml", "turtle", "citeproc-json",
11
+ "citeproc-json-ish", "text", "ris", "bibtex",
12
+ "crossref-xml", "datacite-xml", "bibentry",
13
+ "crossref-tdm"]
14
+
15
+ $cn_format_headers = {"rdf-xml" => "application/rdf+xml",
16
+ "turtle" => "text/turtle",
17
+ "citeproc-json" => "transform/application/vnd.citationstyles.csl+json",
18
+ "text" => "text/x-bibliography",
19
+ "ris" => "application/x-research-info-systems",
20
+ "bibtex" => "application/x-bibtex",
21
+ "crossref-xml" => "application/vnd.crossref.unixref+xml",
22
+ "datacite-xml" => "application/vnd.datacite.datacite+xml",
23
+ "bibentry" => "application/x-bibtex",
24
+ "crossref-tdm" => "application/vnd.crossref.unixsd+xml"}
25
+
26
+ $cn_types = {"rdf-xml" => "text/xml",
27
+ "turtle" => "text/plain",
28
+ "citeproc-json" => "application/json",
29
+ "citeproc-json-ish" => "application/json",
30
+ "text" => "text/plain",
31
+ "ris" => "text/plain",
32
+ "bibtex" => "text/plain",
33
+ "crossref-xml" => "text/xml",
34
+ "datacite-xml" => "text/xml",
35
+ "bibentry" => "text/plain",
36
+ "crossref-tdm" => "text/xml"}
@@ -0,0 +1,67 @@
1
+ require 'net/http'
2
+
3
+ def rescue_faraday_error(url, error, options={})
4
+ details = nil
5
+ headers = {}
6
+
7
+ if error.is_a?(Faraday::Error::TimeoutError)
8
+ status = 408
9
+ elsif error.respond_to?('status')
10
+ status = error[:status]
11
+ elsif error.respond_to?('response') && error.response.present?
12
+ status = error.response[:status]
13
+ details = error.response[:body]
14
+ headers = error.response[:headers]
15
+ else
16
+ status = 400
17
+ end
18
+
19
+ # Some sources use a different status for rate-limiting errors
20
+ status = 429 if status == 403 && details.include?("Excessive use detected")
21
+
22
+ if error.respond_to?('exception')
23
+ exception = error.exception
24
+ else
25
+ exception = ""
26
+ end
27
+
28
+ class_name = class_name_by_status(status) || error.class
29
+
30
+ message = parse_error_response(error.message)
31
+ message = "#{message} for #{url}"
32
+ message = "#{message} with rev #{options[:data][:rev]}" if class_name == Net::HTTPConflict
33
+
34
+ { error: message, status: status }
35
+ end
36
+
37
+ def parse_error_response(string)
38
+ if is_json?(string)
39
+ string = MultiJson.load(string)
40
+ end
41
+ string = string['error'] if string.is_a?(Hash) && string['error']
42
+ string
43
+ end
44
+
45
+ def is_json?(string)
46
+ MultiJson.load(string)
47
+ rescue MultiJson::ParseError => e
48
+ e.data
49
+ e.cause
50
+ end
51
+
52
+ def class_name_by_status(status)
53
+ { 400 => Net::HTTPBadRequest,
54
+ 401 => Net::HTTPUnauthorized,
55
+ 403 => Net::HTTPForbidden,
56
+ 404 => Net::HTTPNotFound,
57
+ 406 => Net::HTTPNotAcceptable,
58
+ 408 => Net::HTTPRequestTimeOut,
59
+ 409 => Net::HTTPConflict,
60
+ 417 => Net::HTTPExpectationFailed,
61
+ 429 => Net::HTTPTooManyRequests,
62
+ 500 => Net::HTTPInternalServerError,
63
+ 502 => Net::HTTPBadGateway,
64
+ 503 => Net::HTTPServiceUnavailable,
65
+ 504 => Net::HTTPGatewayTimeOut }.fetch(status, nil)
66
+ end
67
+
@@ -0,0 +1,58 @@
1
+ # helper functions
2
+ module Serrano
3
+ class Request #:nodoc:
4
+
5
+ private
6
+
7
+ $others = ['license_url','license_version','license_delay','full_text_version','full_text_type',
8
+ 'award_number','award_funder']
9
+
10
+ def filter_handler(x = nil)
11
+ if x.nil?
12
+ nil
13
+ else
14
+ x = stringify(x)
15
+ nn = x.keys.collect{ |x| x.to_s }
16
+ if nn.collect{ |x| $others.include? x }.any?
17
+ nn = nn.collect{ |x|
18
+ if $others.include? x
19
+ case x
20
+ when 'license_url'
21
+ 'license.url'
22
+ when 'license_version'
23
+ 'license.version'
24
+ when 'license_delay'
25
+ 'license.delay'
26
+ when 'full_text_version'
27
+ 'full-text.version'
28
+ when 'full_text_type'
29
+ 'full-text.type'
30
+ when 'award_number'
31
+ 'award.number'
32
+ when 'award_funder'
33
+ 'award.funder'
34
+ end
35
+ else
36
+ x
37
+ end
38
+ }
39
+ end
40
+
41
+ newnn = nn.collect{ |x| x.gsub("_", "-") }
42
+ x = rename_keys(x, newnn)
43
+ x = x.collect{ |k,v| [k, v].join(":") }.join(',')
44
+ return x
45
+ end
46
+ end
47
+
48
+ def stringify(x)
49
+ (x.keys.map{ |k,v| k.to_s }.zip x.values).to_h
50
+ end
51
+
52
+ def rename_keys(x, y)
53
+ (y.zip x.values).to_h
54
+ end
55
+
56
+ end
57
+
58
+ end
@@ -0,0 +1,84 @@
1
+ ##
2
+ # Serrano::Filters
3
+ #
4
+ # Information on Crossref API filters
5
+ #
6
+ # @example
7
+ # # List filter names
8
+ # Serrano::Filters.names
9
+ # # List filter values and description
10
+ # Serrano::Filters.filters
11
+ # Serrano::Filters.filters['has_funder']
12
+ # Serrano::Filters.filters['has_funder']['description']
13
+ module Serrano
14
+ module Filters
15
+ def self.names
16
+ $filter_list
17
+ end
18
+
19
+ def self.filters
20
+ $filter_details
21
+ end
22
+ end
23
+ end
24
+
25
+ $filter_list = [
26
+ 'has_funder','funder','prefix','member','from_index_date','until_index_date',
27
+ 'from_deposit_date','until_deposit_date','from_update_date','until_update_date',
28
+ 'from_first_deposit_date','until_first_deposit_date','from_pub_date','until_pub_date',
29
+ 'has_license','license_url','license_version','license_delay','has_full_text',
30
+ 'full_text_version','full_text_type','public_references','has_references','has_archive',
31
+ 'archive','has_orcid','orcid','issn','type','directory','doi','updates','is_update',
32
+ 'has_update_policy','container_title','publisher_name','category_name','type_name',
33
+ 'from_created_date', 'until_created_date', 'affiliation', 'has_affiliation',
34
+ 'assertion_group', 'assertion', 'article_number', 'alternative_id'
35
+ ]
36
+
37
+ $filter_details = {
38
+ "has_funder" => { "possible_values" => nil, "description" => "metadata which includes one or more funder entry" },
39
+ "funder" => { "possible_values" => "{funder_id}", "description" => "metadata which include the {funder_id} in FundRef data" },
40
+ "prefix" => { "possible_values" => "{owner_prefix}", "description" => "metadata belonging to a DOI owner prefix {owner_prefix} (e.g. '10.1016' )" },
41
+ "member" => { "possible_values" => "{member_id}", "description" => "metadata belonging to a CrossRef member" },
42
+ "from_index_date" => { "possible_values" => '{date}', "description" => "metadata indexed since (inclusive) {date}" },
43
+ "until_index_date" => { "possible_values" => '{date}', "description" => "metadata indexed before (inclusive) {date}" },
44
+ "from_deposit_date" => { "possible_values" => '{date}', "description" => "metadata last (re)deposited since (inclusive) {date}" },
45
+ "until_deposit_date" => { "possible_values" => '{date}', "description" => "metadata last (re)deposited before (inclusive) {date}" },
46
+ "from_update_date" => { "possible_values" => '{date}', "description" => "Metadata updated since (inclusive) {date} Currently the same as 'from_deposit_date'" },
47
+ "until_update_date" => { "possible_values" => '{date}', "description" => "Metadata updated before (inclusive) {date} Currently the same as 'until_deposit_date'" },
48
+ "from_created_date" => { "possible_values" => '{date}', "description" => "metadata first deposited since (inclusive) {date}" },
49
+ "until_created_date" => { "possible_values" => '{date}', "description" => "metadata first deposited before (inclusive) {date}" },
50
+ "from_pub_date" => { "possible_values" => '{date}', "description" => "metadata where published date is since (inclusive) {date}" },
51
+ "until_pub_date" => { "possible_values" => '{date}', "description" => "metadata where published date is before (inclusive) {date}" },
52
+ "has_license" => { "possible_values" => nil, "description" => "metadata that includes any '<license_ref>' elements" },
53
+ "license_url" => { "possible_values" => '{url}', "description" => "metadata where '<license_ref>' value equals {url}" },
54
+ "license_version" => { "possible_values" => '{string}', "description" => "metadata where the '<license_ref>''s 'applies_to' attribute is '{string}'"},
55
+ "license_delay" => { "possible_values" => "{integer}", "description" => "metadata where difference between publication date and the '<license_ref>''s 'start_date' attribute is <= '{integer}' (in days"},
56
+ "has_full_text" => { "possible_values" => nil, "description" => "metadata that includes any full text '<resource>' elements_" },
57
+ "full_text_version" => { "possible_values" => '{string}' , "description" => "metadata where '<resource>' element's 'content_version' attribute is '{string}'" },
58
+ "full_text_type" => { "possible_values" => '{mime_type}' , "description" => "metadata where '<resource>' element's 'content_type' attribute is '{mime_type}' (e.g. 'application/pdf')" },
59
+ "public_references" => { "possible_values" => nil, "description" => "metadata where publishers allow references to be distributed publically" },
60
+ "has_references" => { "possible_values" => nil , "description" => "metadata for works that have a list of references" },
61
+ "has_archive" => { "possible_values" => nil , "description" => "metadata which include name of archive partner" },
62
+ "archive" => { "possible_values" => '{string}', "description" => "metadata which where value of archive partner is '{string}'" },
63
+ "has_orcid" => { "possible_values" => nil, "description" => "metadata which includes one or more ORCIDs" },
64
+ "orcid" => { "possible_values" => '{orcid}', "description" => "metadata where '<orcid>' element's value = '{orcid}'" },
65
+ "issn" => { "possible_values" => '{issn}', "description" => "metadata where record has an ISSN = '{issn}' Format is 'xxxx_xxxx'." },
66
+ "type" => { "possible_values" => '{type}', "description" => "metadata records whose type = '{type}' Type must be an ID value from the list of types returned by the '/types' resource" },
67
+ "directory" => { "possible_values" => "{directory}", "description" => "metadata records whose article or serial are mentioned in the given '{directory}'. Currently the only supported value is 'doaj'" },
68
+ "doi" => { "possible_values" => '{doi}', "description" => "metadata describing the DOI '{doi}'" },
69
+ "updates" => { "possible_values" => '{doi}', "description" => "metadata for records that represent editorial updates to the DOI '{doi}'" },
70
+ "is_update" => { "possible_values" => nil, "description" => "metadata for records that represent editorial updates" },
71
+ "has_update_policy" => { "possible_values" => nil, "description" => "metadata for records that include a link to an editorial update policy" },
72
+ "container_title" => { "possible_values" => nil, "description" => "metadata for records with a publication title exactly with an exact match" },
73
+ "publisher_name" => { "possible_values" => nil, "description" => "metadata for records with an exact matching publisher name" },
74
+ "category_name" => { "possible_values" => nil, "description" => "metadata for records with an exact matching category label" },
75
+ "type_name" => { "possible_values" => nil, "description" => "metadata for records with an exacty matching type label" },
76
+ "award_number" => { "possible_values" => "{award_number}", "description" => "metadata for records with a matching award nunber_ Optionally combine with 'award_funder'" },
77
+ "award_funder" => { "possible_values" => '{funder doi or id}', "description" => "metadata for records with an award with matching funder. Optionally combine with 'award_number'" },
78
+ "assertion_group" => { "possible_values" => nil, "description" => "metadata for records with an assertion in a particular group" },
79
+ "assertion" => { "possible_values" => nil, "description" => "metadata for records with a particular named assertion" },
80
+ "affiliation" => { "possible_values" => nil, "description" => "metadata for records with at least one contributor with the given affiliation" },
81
+ "has_affiliation" => { "possible_values" => nil, "description" => "metadata for records that have any affiliation information" },
82
+ "alternative_id" => { "possible_values" => nil, "description" => "metadata for records with the given alternative ID, which may be a publisher_specific ID, or any other identifier a publisher may have provided" },
83
+ "article_number" => { "possible_values" => nil, "description" => "metadata for records with a given article number" }
84
+ }
@@ -0,0 +1,26 @@
1
+ # taken from: https://viget.com/extend/easy-gem-configuration-variables-with-defaults
2
+ module Configuration
3
+
4
+ def configuration
5
+ yield self
6
+ end
7
+
8
+ def define_setting(name, default = nil)
9
+ class_variable_set("@@#{name}", default)
10
+ define_class_method "#{name}=" do |value|
11
+ class_variable_set("@@#{name}", value)
12
+ end
13
+ define_class_method name do
14
+ class_variable_get("@@#{name}")
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ def define_class_method(name, &block)
21
+ (class << self; self; end).instance_eval do
22
+ define_method name, &block
23
+ end
24
+ end
25
+
26
+ end
@@ -0,0 +1,65 @@
1
+ require 'nokogiri'
2
+ require 'uuidtools'
3
+
4
+ def detect_type(x)
5
+ ctype = x.headers['content-type']
6
+ case ctype
7
+ when 'text/xml'
8
+ 'xml'
9
+ when 'text/plain'
10
+ 'plain'
11
+ when 'application/pdf'
12
+ 'pdf'
13
+ end
14
+ end
15
+
16
+ def make_ext(x)
17
+ case x
18
+ when 'xml'
19
+ 'xml'
20
+ when 'plain'
21
+ 'txt'
22
+ when 'pdf'
23
+ 'pdf'
24
+ end
25
+ end
26
+
27
+ def make_path(type)
28
+ # id = x.split('article/')[1].split('?')[0]
29
+ # path = id + '.' + type
30
+ # return path
31
+ type = make_ext(type)
32
+ uuid = UUIDTools::UUID.random_create.to_s
33
+ path = uuid + '.' + type
34
+ return path
35
+ end
36
+
37
+ def write_disk(res, path)
38
+ f = File.new(path, "wb")
39
+ f.write(res.body)
40
+ f.close()
41
+ end
42
+
43
+ def read_disk(path)
44
+ return File.read(path)
45
+ end
46
+
47
+ def parse_xml(x)
48
+ text = read_disk(x)
49
+ xml = Nokogiri.parse(text)
50
+ return xml
51
+ end
52
+
53
+ def parse_plain(x)
54
+ text = read_disk(x)
55
+ return text
56
+ end
57
+
58
+ def parse_pdf(x)
59
+ raise "not ready yet"
60
+ end
61
+
62
+ def is_elsevier(x)
63
+ tmp = x.match 'elsevier'
64
+ !tmp.nil?
65
+ end