serrano 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/serrano/cn.rb ADDED
@@ -0,0 +1,30 @@
1
+ require "serrano/version"
2
+ require "serrano/cnrequest"
3
+
4
+ ##
5
+ # ContentNegotiation - Content Negotiation class
6
+ #
7
+ # @see http://www.crosscite.org/cn/ for details
8
+ module Serrano
9
+
10
+ class ContentNegotiation
11
+
12
+ attr_accessor :ids
13
+ attr_accessor :format
14
+ attr_accessor :style
15
+ attr_accessor :locale
16
+
17
+ def initialize(ids, format = "bibtex", style = "apa", locale = "en-US")
18
+ self.ids = ids
19
+ self.format = format
20
+ self.style = style
21
+ self.locale = locale
22
+ end
23
+
24
+ def cn
25
+ CNRequest.new(self.ids, self.format, self.style, self.locale).perform
26
+ end
27
+
28
+ end
29
+
30
+ end
@@ -0,0 +1,83 @@
1
+ require "faraday"
2
+ require "faraday_middleware"
3
+ require "multi_json"
4
+ require "serrano/errors"
5
+ require "serrano/constants"
6
+ require 'serrano/helpers/configuration'
7
+
8
+ ##
9
+ # Serrano::CNRequest
10
+ #
11
+ # Class to perform HTTP requests to the Crossref API
12
+ module Serrano
13
+ class CNRequest #:nodoc:
14
+
15
+ attr_accessor :ids
16
+ attr_accessor :format
17
+ attr_accessor :style
18
+ attr_accessor :locale
19
+
20
+ def initialize(ids, format, style, locale)
21
+ self.ids = ids
22
+ self.format = format
23
+ self.style = style
24
+ self.locale = locale
25
+ end
26
+
27
+ def perform
28
+ if !$cn_formats.include? self.format
29
+ raise "format not one of accepted types"
30
+ end
31
+
32
+ $conn = Faraday.new "http://dx.doi.org/" do |c|
33
+ c.use FaradayMiddleware::FollowRedirects
34
+ c.adapter :net_http
35
+ end
36
+
37
+ if self.ids.length == 1
38
+ return make_request(self.ids, self.format, self.style, self.locale)
39
+ else
40
+ coll = []
41
+ Array(self.ids).each do |x|
42
+ coll << make_request(x, self.format, self.style, self.locale)
43
+ end
44
+ return coll
45
+ end
46
+ end
47
+ end
48
+ end
49
+
50
+ def make_request(ids, format, style, locale)
51
+ type = $cn_format_headers.select { |x, _| x.include? format }.values[0]
52
+
53
+ if format == "citeproc-json"
54
+ endpt = "http://api.crossref.org/works/" + ids + "/" + type
55
+ cr_works = Faraday.new(:url => endpt)
56
+ res = cr_works.get
57
+ else
58
+ if format == "text"
59
+ type = type + "; style = " + style + "; locale = " + locale
60
+ end
61
+
62
+ res = $conn.get do |req|
63
+ req.url ids
64
+ req.headers['Accept'] = type
65
+ end
66
+ end
67
+
68
+ return res.body
69
+ end
70
+
71
+ # parser <- cn_types[[self.format]]
72
+ # if (raw) {
73
+ # content(response, "text")
74
+ # } else {
75
+ # out <- content(response, "parsed", parser, "UTF-8")
76
+ # if (format == "text") {
77
+ # out <- gsub("\n", "", out)
78
+ # }
79
+ # if (format == "bibentry") {
80
+ # out <- parse_bibtex(out)
81
+ # }
82
+ # out
83
+ # }
@@ -0,0 +1,36 @@
1
+ require 'net/http'
2
+
3
+ NETWORKABLE_EXCEPTIONS = [Faraday::Error::ClientError,
4
+ URI::InvalidURIError,
5
+ Encoding::UndefinedConversionError,
6
+ ArgumentError,
7
+ NoMethodError,
8
+ TypeError]
9
+
10
+ $cn_formats = ["rdf-xml", "turtle", "citeproc-json",
11
+ "citeproc-json-ish", "text", "ris", "bibtex",
12
+ "crossref-xml", "datacite-xml", "bibentry",
13
+ "crossref-tdm"]
14
+
15
+ $cn_format_headers = {"rdf-xml" => "application/rdf+xml",
16
+ "turtle" => "text/turtle",
17
+ "citeproc-json" => "transform/application/vnd.citationstyles.csl+json",
18
+ "text" => "text/x-bibliography",
19
+ "ris" => "application/x-research-info-systems",
20
+ "bibtex" => "application/x-bibtex",
21
+ "crossref-xml" => "application/vnd.crossref.unixref+xml",
22
+ "datacite-xml" => "application/vnd.datacite.datacite+xml",
23
+ "bibentry" => "application/x-bibtex",
24
+ "crossref-tdm" => "application/vnd.crossref.unixsd+xml"}
25
+
26
+ $cn_types = {"rdf-xml" => "text/xml",
27
+ "turtle" => "text/plain",
28
+ "citeproc-json" => "application/json",
29
+ "citeproc-json-ish" => "application/json",
30
+ "text" => "text/plain",
31
+ "ris" => "text/plain",
32
+ "bibtex" => "text/plain",
33
+ "crossref-xml" => "text/xml",
34
+ "datacite-xml" => "text/xml",
35
+ "bibentry" => "text/plain",
36
+ "crossref-tdm" => "text/xml"}
@@ -0,0 +1,67 @@
1
+ require 'net/http'
2
+
3
+ def rescue_faraday_error(url, error, options={})
4
+ details = nil
5
+ headers = {}
6
+
7
+ if error.is_a?(Faraday::Error::TimeoutError)
8
+ status = 408
9
+ elsif error.respond_to?('status')
10
+ status = error[:status]
11
+ elsif error.respond_to?('response') && error.response.present?
12
+ status = error.response[:status]
13
+ details = error.response[:body]
14
+ headers = error.response[:headers]
15
+ else
16
+ status = 400
17
+ end
18
+
19
+ # Some sources use a different status for rate-limiting errors
20
+ status = 429 if status == 403 && details.include?("Excessive use detected")
21
+
22
+ if error.respond_to?('exception')
23
+ exception = error.exception
24
+ else
25
+ exception = ""
26
+ end
27
+
28
+ class_name = class_name_by_status(status) || error.class
29
+
30
+ message = parse_error_response(error.message)
31
+ message = "#{message} for #{url}"
32
+ message = "#{message} with rev #{options[:data][:rev]}" if class_name == Net::HTTPConflict
33
+
34
+ { error: message, status: status }
35
+ end
36
+
37
+ def parse_error_response(string)
38
+ if is_json?(string)
39
+ string = MultiJson.load(string)
40
+ end
41
+ string = string['error'] if string.is_a?(Hash) && string['error']
42
+ string
43
+ end
44
+
45
+ def is_json?(string)
46
+ MultiJson.load(string)
47
+ rescue MultiJson::ParseError => e
48
+ e.data
49
+ e.cause
50
+ end
51
+
52
+ def class_name_by_status(status)
53
+ { 400 => Net::HTTPBadRequest,
54
+ 401 => Net::HTTPUnauthorized,
55
+ 403 => Net::HTTPForbidden,
56
+ 404 => Net::HTTPNotFound,
57
+ 406 => Net::HTTPNotAcceptable,
58
+ 408 => Net::HTTPRequestTimeOut,
59
+ 409 => Net::HTTPConflict,
60
+ 417 => Net::HTTPExpectationFailed,
61
+ 429 => Net::HTTPTooManyRequests,
62
+ 500 => Net::HTTPInternalServerError,
63
+ 502 => Net::HTTPBadGateway,
64
+ 503 => Net::HTTPServiceUnavailable,
65
+ 504 => Net::HTTPGatewayTimeOut }.fetch(status, nil)
66
+ end
67
+
@@ -0,0 +1,58 @@
1
+ # helper functions
2
+ module Serrano
3
+ class Request #:nodoc:
4
+
5
+ private
6
+
7
+ $others = ['license_url','license_version','license_delay','full_text_version','full_text_type',
8
+ 'award_number','award_funder']
9
+
10
+ def filter_handler(x = nil)
11
+ if x.nil?
12
+ nil
13
+ else
14
+ x = stringify(x)
15
+ nn = x.keys.collect{ |x| x.to_s }
16
+ if nn.collect{ |x| $others.include? x }.any?
17
+ nn = nn.collect{ |x|
18
+ if $others.include? x
19
+ case x
20
+ when 'license_url'
21
+ 'license.url'
22
+ when 'license_version'
23
+ 'license.version'
24
+ when 'license_delay'
25
+ 'license.delay'
26
+ when 'full_text_version'
27
+ 'full-text.version'
28
+ when 'full_text_type'
29
+ 'full-text.type'
30
+ when 'award_number'
31
+ 'award.number'
32
+ when 'award_funder'
33
+ 'award.funder'
34
+ end
35
+ else
36
+ x
37
+ end
38
+ }
39
+ end
40
+
41
+ newnn = nn.collect{ |x| x.gsub("_", "-") }
42
+ x = rename_keys(x, newnn)
43
+ x = x.collect{ |k,v| [k, v].join(":") }.join(',')
44
+ return x
45
+ end
46
+ end
47
+
48
+ def stringify(x)
49
+ (x.keys.map{ |k,v| k.to_s }.zip x.values).to_h
50
+ end
51
+
52
+ def rename_keys(x, y)
53
+ (y.zip x.values).to_h
54
+ end
55
+
56
+ end
57
+
58
+ end
@@ -0,0 +1,84 @@
1
+ ##
2
+ # Serrano::Filters
3
+ #
4
+ # Information on Crossref API filters
5
+ #
6
+ # @example
7
+ # # List filter names
8
+ # Serrano::Filters.names
9
+ # # List filter values and description
10
+ # Serrano::Filters.filters
11
+ # Serrano::Filters.filters['has_funder']
12
+ # Serrano::Filters.filters['has_funder']['description']
13
+ module Serrano
14
+ module Filters
15
+ def self.names
16
+ $filter_list
17
+ end
18
+
19
+ def self.filters
20
+ $filter_details
21
+ end
22
+ end
23
+ end
24
+
25
+ $filter_list = [
26
+ 'has_funder','funder','prefix','member','from_index_date','until_index_date',
27
+ 'from_deposit_date','until_deposit_date','from_update_date','until_update_date',
28
+ 'from_first_deposit_date','until_first_deposit_date','from_pub_date','until_pub_date',
29
+ 'has_license','license_url','license_version','license_delay','has_full_text',
30
+ 'full_text_version','full_text_type','public_references','has_references','has_archive',
31
+ 'archive','has_orcid','orcid','issn','type','directory','doi','updates','is_update',
32
+ 'has_update_policy','container_title','publisher_name','category_name','type_name',
33
+ 'from_created_date', 'until_created_date', 'affiliation', 'has_affiliation',
34
+ 'assertion_group', 'assertion', 'article_number', 'alternative_id'
35
+ ]
36
+
37
+ $filter_details = {
38
+ "has_funder" => { "possible_values" => nil, "description" => "metadata which includes one or more funder entry" },
39
+ "funder" => { "possible_values" => "{funder_id}", "description" => "metadata which include the {funder_id} in FundRef data" },
40
+ "prefix" => { "possible_values" => "{owner_prefix}", "description" => "metadata belonging to a DOI owner prefix {owner_prefix} (e.g. '10.1016' )" },
41
+ "member" => { "possible_values" => "{member_id}", "description" => "metadata belonging to a CrossRef member" },
42
+ "from_index_date" => { "possible_values" => '{date}', "description" => "metadata indexed since (inclusive) {date}" },
43
+ "until_index_date" => { "possible_values" => '{date}', "description" => "metadata indexed before (inclusive) {date}" },
44
+ "from_deposit_date" => { "possible_values" => '{date}', "description" => "metadata last (re)deposited since (inclusive) {date}" },
45
+ "until_deposit_date" => { "possible_values" => '{date}', "description" => "metadata last (re)deposited before (inclusive) {date}" },
46
+ "from_update_date" => { "possible_values" => '{date}', "description" => "Metadata updated since (inclusive) {date} Currently the same as 'from_deposit_date'" },
47
+ "until_update_date" => { "possible_values" => '{date}', "description" => "Metadata updated before (inclusive) {date} Currently the same as 'until_deposit_date'" },
48
+ "from_created_date" => { "possible_values" => '{date}', "description" => "metadata first deposited since (inclusive) {date}" },
49
+ "until_created_date" => { "possible_values" => '{date}', "description" => "metadata first deposited before (inclusive) {date}" },
50
+ "from_pub_date" => { "possible_values" => '{date}', "description" => "metadata where published date is since (inclusive) {date}" },
51
+ "until_pub_date" => { "possible_values" => '{date}', "description" => "metadata where published date is before (inclusive) {date}" },
52
+ "has_license" => { "possible_values" => nil, "description" => "metadata that includes any '<license_ref>' elements" },
53
+ "license_url" => { "possible_values" => '{url}', "description" => "metadata where '<license_ref>' value equals {url}" },
54
+ "license_version" => { "possible_values" => '{string}', "description" => "metadata where the '<license_ref>''s 'applies_to' attribute is '{string}'"},
55
+ "license_delay" => { "possible_values" => "{integer}", "description" => "metadata where difference between publication date and the '<license_ref>''s 'start_date' attribute is <= '{integer}' (in days"},
56
+ "has_full_text" => { "possible_values" => nil, "description" => "metadata that includes any full text '<resource>' elements_" },
57
+ "full_text_version" => { "possible_values" => '{string}' , "description" => "metadata where '<resource>' element's 'content_version' attribute is '{string}'" },
58
+ "full_text_type" => { "possible_values" => '{mime_type}' , "description" => "metadata where '<resource>' element's 'content_type' attribute is '{mime_type}' (e.g. 'application/pdf')" },
59
+ "public_references" => { "possible_values" => nil, "description" => "metadata where publishers allow references to be distributed publically" },
60
+ "has_references" => { "possible_values" => nil , "description" => "metadata for works that have a list of references" },
61
+ "has_archive" => { "possible_values" => nil , "description" => "metadata which include name of archive partner" },
62
+ "archive" => { "possible_values" => '{string}', "description" => "metadata which where value of archive partner is '{string}'" },
63
+ "has_orcid" => { "possible_values" => nil, "description" => "metadata which includes one or more ORCIDs" },
64
+ "orcid" => { "possible_values" => '{orcid}', "description" => "metadata where '<orcid>' element's value = '{orcid}'" },
65
+ "issn" => { "possible_values" => '{issn}', "description" => "metadata where record has an ISSN = '{issn}' Format is 'xxxx_xxxx'." },
66
+ "type" => { "possible_values" => '{type}', "description" => "metadata records whose type = '{type}' Type must be an ID value from the list of types returned by the '/types' resource" },
67
+ "directory" => { "possible_values" => "{directory}", "description" => "metadata records whose article or serial are mentioned in the given '{directory}'. Currently the only supported value is 'doaj'" },
68
+ "doi" => { "possible_values" => '{doi}', "description" => "metadata describing the DOI '{doi}'" },
69
+ "updates" => { "possible_values" => '{doi}', "description" => "metadata for records that represent editorial updates to the DOI '{doi}'" },
70
+ "is_update" => { "possible_values" => nil, "description" => "metadata for records that represent editorial updates" },
71
+ "has_update_policy" => { "possible_values" => nil, "description" => "metadata for records that include a link to an editorial update policy" },
72
+ "container_title" => { "possible_values" => nil, "description" => "metadata for records with a publication title exactly with an exact match" },
73
+ "publisher_name" => { "possible_values" => nil, "description" => "metadata for records with an exact matching publisher name" },
74
+ "category_name" => { "possible_values" => nil, "description" => "metadata for records with an exact matching category label" },
75
+ "type_name" => { "possible_values" => nil, "description" => "metadata for records with an exacty matching type label" },
76
+ "award_number" => { "possible_values" => "{award_number}", "description" => "metadata for records with a matching award nunber_ Optionally combine with 'award_funder'" },
77
+ "award_funder" => { "possible_values" => '{funder doi or id}', "description" => "metadata for records with an award with matching funder. Optionally combine with 'award_number'" },
78
+ "assertion_group" => { "possible_values" => nil, "description" => "metadata for records with an assertion in a particular group" },
79
+ "assertion" => { "possible_values" => nil, "description" => "metadata for records with a particular named assertion" },
80
+ "affiliation" => { "possible_values" => nil, "description" => "metadata for records with at least one contributor with the given affiliation" },
81
+ "has_affiliation" => { "possible_values" => nil, "description" => "metadata for records that have any affiliation information" },
82
+ "alternative_id" => { "possible_values" => nil, "description" => "metadata for records with the given alternative ID, which may be a publisher_specific ID, or any other identifier a publisher may have provided" },
83
+ "article_number" => { "possible_values" => nil, "description" => "metadata for records with a given article number" }
84
+ }
@@ -0,0 +1,26 @@
1
+ # taken from: https://viget.com/extend/easy-gem-configuration-variables-with-defaults
2
+ module Configuration
3
+
4
+ def configuration
5
+ yield self
6
+ end
7
+
8
+ def define_setting(name, default = nil)
9
+ class_variable_set("@@#{name}", default)
10
+ define_class_method "#{name}=" do |value|
11
+ class_variable_set("@@#{name}", value)
12
+ end
13
+ define_class_method name do
14
+ class_variable_get("@@#{name}")
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ def define_class_method(name, &block)
21
+ (class << self; self; end).instance_eval do
22
+ define_method name, &block
23
+ end
24
+ end
25
+
26
+ end
@@ -0,0 +1,65 @@
1
+ require 'nokogiri'
2
+ require 'uuidtools'
3
+
4
+ def detect_type(x)
5
+ ctype = x.headers['content-type']
6
+ case ctype
7
+ when 'text/xml'
8
+ 'xml'
9
+ when 'text/plain'
10
+ 'plain'
11
+ when 'application/pdf'
12
+ 'pdf'
13
+ end
14
+ end
15
+
16
+ def make_ext(x)
17
+ case x
18
+ when 'xml'
19
+ 'xml'
20
+ when 'plain'
21
+ 'txt'
22
+ when 'pdf'
23
+ 'pdf'
24
+ end
25
+ end
26
+
27
+ def make_path(type)
28
+ # id = x.split('article/')[1].split('?')[0]
29
+ # path = id + '.' + type
30
+ # return path
31
+ type = make_ext(type)
32
+ uuid = UUIDTools::UUID.random_create.to_s
33
+ path = uuid + '.' + type
34
+ return path
35
+ end
36
+
37
+ def write_disk(res, path)
38
+ f = File.new(path, "wb")
39
+ f.write(res.body)
40
+ f.close()
41
+ end
42
+
43
+ def read_disk(path)
44
+ return File.read(path)
45
+ end
46
+
47
+ def parse_xml(x)
48
+ text = read_disk(x)
49
+ xml = Nokogiri.parse(text)
50
+ return xml
51
+ end
52
+
53
+ def parse_plain(x)
54
+ text = read_disk(x)
55
+ return text
56
+ end
57
+
58
+ def parse_pdf(x)
59
+ raise "not ready yet"
60
+ end
61
+
62
+ def is_elsevier(x)
63
+ tmp = x.match 'elsevier'
64
+ !tmp.nil?
65
+ end