cites 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/crref +9 -0
- data/bin/crsearch +4 -0
- data/lib/cites.rb +346 -0
- metadata +133 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7981c7021e09b78536ea2da88b1babc9e6d93f4e
|
4
|
+
data.tar.gz: 57d1535b97db170b66f20581fa20ce5f0fade62b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 52d393ca56353d91ee08a4967063813e4982dd4a5ccdd18ee400014be19f09ca3df574ac2e14044465bb601ada03aae7fc2972f1042b430afc6320925cc2f0a3
|
7
|
+
data.tar.gz: 74a13bd74953774843af9be7d4a30baeacf5d55c2391201ba02ad3c6800978615f2a1213c2f3cabeefd76d67e10da922f45e542d645f45be9e947030cc12953b
|
data/bin/crref
ADDED
data/bin/crsearch
ADDED
data/lib/cites.rb
ADDED
@@ -0,0 +1,346 @@
|
|
1
|
+
require 'api_cache'
|
2
|
+
require 'bibtex'
|
3
|
+
require 'digest/sha1'
|
4
|
+
require 'httparty'
|
5
|
+
require 'json'
|
6
|
+
require 'moneta'
|
7
|
+
|
8
|
+
def response_ok(code)
|
9
|
+
# See CrossCite documentation http://crosscite.org/cn/
|
10
|
+
case code
|
11
|
+
when 200
|
12
|
+
return true
|
13
|
+
when 204
|
14
|
+
raise "The request was OK but there was no metadata available (response code: #{code})"
|
15
|
+
when 404
|
16
|
+
raise "The DOI requested doesn't exist (response code: #{code})"
|
17
|
+
when 406
|
18
|
+
raise "Can't serve any requested content type (response code: #{code})"
|
19
|
+
when 500...600
|
20
|
+
raise "ZOMG ERROR #{code}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Cites: The single class (for now) in cites
|
25
|
+
|
26
|
+
class Cites
|
27
|
+
|
28
|
+
class << self; attr_accessor :cache_location end
|
29
|
+
@cache_location = ENV['HOME'] + '/.cites/cache'
|
30
|
+
|
31
|
+
##
|
32
|
+
# Get a single citation in various formats from a DOI
|
33
|
+
#
|
34
|
+
# Args:
|
35
|
+
# * doi: A DOI
|
36
|
+
# * format: one of rdf-xml, turtle, citeproc-json, text, ris, bibtex, crossref-xml,
|
37
|
+
# * style: Only used if format='text', e.g., apa, harvard3
|
38
|
+
# * locale: A locale, e.g., en-US
|
39
|
+
# * cache: Should cache be used
|
40
|
+
# * true: Try fetcing from cache and store to cache (default)
|
41
|
+
# * false: Do use cache at all
|
42
|
+
# * 'flush': Get a fresh response and cache it
|
43
|
+
#
|
44
|
+
def self.getcite(doi, format='text', style='apa', locale='en-US',
|
45
|
+
cache=true)
|
46
|
+
formats = {"rdf-xml" => "application/rdf+xml",
|
47
|
+
"turtle" => "text/turtle",
|
48
|
+
"citeproc-json" => "application/vnd.citationstyles.csl+json",
|
49
|
+
"text" => "text/x-bibliography",
|
50
|
+
"ris" => "application/x-research-info-systems",
|
51
|
+
"bibtex" => "application/x-bibtex",
|
52
|
+
"crossref-xml" => "application/vnd.crossref.unixref+xml",
|
53
|
+
"datacite-xml" => "application/vnd.datacite.datacite+xml"
|
54
|
+
}
|
55
|
+
formatuse = formats[format]
|
56
|
+
if format == 'text'
|
57
|
+
type = "#{formatuse}; style=#{style}; locale=#{locale}"
|
58
|
+
else
|
59
|
+
type = formatuse
|
60
|
+
end
|
61
|
+
doi = 'http://dx.doi.org/' + doi
|
62
|
+
|
63
|
+
if cache == true or cache == 'flush'
|
64
|
+
if cache == true
|
65
|
+
cache_time = 6000
|
66
|
+
msg = "Requested DOI not in cache or is stale, requesting..."
|
67
|
+
elsif cache == 'flush'
|
68
|
+
cache_time = 1
|
69
|
+
msg = "Flushing cache, requesting..."
|
70
|
+
end
|
71
|
+
# Keep cache data valid forever
|
72
|
+
# [todo] - should using cache be reported?
|
73
|
+
|
74
|
+
# Create a cache key based on the DOI requested + the type on
|
75
|
+
# content
|
76
|
+
cache_key = Digest::SHA1.hexdigest("#{doi}-#{type}")
|
77
|
+
|
78
|
+
content = APICache.get(cache_key, :cache => cache_time,
|
79
|
+
:valid => :forever, :period => 0,
|
80
|
+
:timeout => 30) do
|
81
|
+
puts msg
|
82
|
+
response = HTTParty.get(doi, :headers => {"Accept" => type})
|
83
|
+
|
84
|
+
# If response code is ok (200) get response body and return
|
85
|
+
# that from this block. Otherwise an error will be raised.
|
86
|
+
begin
|
87
|
+
if response_ok(response.code)
|
88
|
+
content = response.body
|
89
|
+
end
|
90
|
+
content
|
91
|
+
rescue Exception => e
|
92
|
+
puts e.message
|
93
|
+
puts "Format requested: #{formatuse}"
|
94
|
+
exit
|
95
|
+
end
|
96
|
+
end
|
97
|
+
elsif cache == false
|
98
|
+
puts "Not using cache, requesting..."
|
99
|
+
response = HTTParty.get(doi, :headers => {"Accept" => type})
|
100
|
+
|
101
|
+
if response_ok(response.code)
|
102
|
+
content = response.body
|
103
|
+
end
|
104
|
+
else
|
105
|
+
fail "Invalid cache value #{cache}"
|
106
|
+
end
|
107
|
+
# response = HTTParty.get(doi, :headers => {"Accept" => type})
|
108
|
+
if format == 'bibtex'
|
109
|
+
output = BibTeX.parse(content).to_s
|
110
|
+
else
|
111
|
+
output = content
|
112
|
+
end
|
113
|
+
# output.display
|
114
|
+
return output
|
115
|
+
end
|
116
|
+
|
117
|
+
##
|
118
|
+
# Get a citation in various formats from a DOI
|
119
|
+
#
|
120
|
+
# Args:
|
121
|
+
# * doi: A DOI
|
122
|
+
# * format: one of rdf-xml, turtle, citeproc-json, text, ris, bibtex, crossref-xml,
|
123
|
+
# * style: Only used if format='text', e.g., apa, harvard3
|
124
|
+
# * locale: A locale, e.g., en-US
|
125
|
+
# * cache: Should cache be used
|
126
|
+
# * true: Try fetcing from cache and store to cache (default)
|
127
|
+
# * false: Do use cache at all
|
128
|
+
# * 'flush': Get a fresh response and cache it
|
129
|
+
#
|
130
|
+
# Examples:
|
131
|
+
# require 'cites'
|
132
|
+
# Cites.doi2cit('10.1371/journal.pone.0000308')
|
133
|
+
# Cites.doi2cit('10.1371/journal.pbio.0030427')
|
134
|
+
# Cites.doi2cit('10.1371/journal.pbio.0030427', 'crossref-xml')
|
135
|
+
# Cites.doi2cit('10.1371/journal.pbio.0030427', 'bibtex')
|
136
|
+
# Cites.doi2cit('10.1371/journal.pbio.0030427', 'ris')
|
137
|
+
#
|
138
|
+
# out = Cites.doi2cit(['10.1371/journal.pone.0000308','10.1371/journal.pbio.0030427','10.1371/journal.pone.0084549'], 'bibtex')
|
139
|
+
# Cites.show(out)
|
140
|
+
#
|
141
|
+
# Returns an array of citation content. The structure of the content will
|
142
|
+
# depend on the format requested.
|
143
|
+
#
|
144
|
+
def self.doi2cit(doi, format='text', style='apa', locale='en-US',
|
145
|
+
cache=true)
|
146
|
+
if doi.class == String
|
147
|
+
doi = [doi]
|
148
|
+
elsif doi.class == Array
|
149
|
+
doi = doi
|
150
|
+
else
|
151
|
+
fail 'doi must be one of String or Array class'
|
152
|
+
end
|
153
|
+
|
154
|
+
cc = []
|
155
|
+
doi.each do |iter|
|
156
|
+
# if iter.include?('http://')
|
157
|
+
# iter = iter.sub('http://dx.doi.org/', '')
|
158
|
+
# else
|
159
|
+
# nil
|
160
|
+
# end
|
161
|
+
# cc << Cites.getcite(doi=iter, format=format, style=style, locale=locale)
|
162
|
+
content = Cites.getcite(iter, format, style, locale, cache)
|
163
|
+
if format == 'citeproc-json'
|
164
|
+
content = JSON.parse(content)
|
165
|
+
end
|
166
|
+
cc << content
|
167
|
+
end
|
168
|
+
|
169
|
+
return cc
|
170
|
+
end
|
171
|
+
|
172
|
+
def self.show(input)
|
173
|
+
input.each do |iter|
|
174
|
+
puts iter.display,"\n"
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
##
|
179
|
+
# match: Look for matches to free-form citations to DOIs for an object (article, book, etc). in CrossRef
|
180
|
+
#
|
181
|
+
# Args:
|
182
|
+
# * query: A free form string of terms.
|
183
|
+
#
|
184
|
+
# Examples:
|
185
|
+
# require 'cites'
|
186
|
+
# Cites.match('Piwowar sharing data increases citation PLOS')
|
187
|
+
# Cites.match('boettiger Modeling stabilizing selection')
|
188
|
+
# Cites.match(['Piwowar sharing data increases citation PLOS', 'boettiger Modeling stabilizing selection'])
|
189
|
+
# out = Cites.match(['piwowar sharing data increases citation PLOS',
|
190
|
+
# 'boettiger Modeling stabilizing selection',
|
191
|
+
# 'priem Using social media to explore scholarly impact',
|
192
|
+
# 'fenner Peroxisome ligands for the treatment of breast cancer'])
|
193
|
+
# out.map {|i| i['doi']}
|
194
|
+
#
|
195
|
+
# # Feed into the doi2cit method
|
196
|
+
# Cites.doi2cit(out.map {|i| i['doi']})
|
197
|
+
def self.match(query)
|
198
|
+
if query.class == String
|
199
|
+
query = [query]
|
200
|
+
elsif query.class == Array
|
201
|
+
query = query
|
202
|
+
else
|
203
|
+
fail 'query must be one of String or Array class'
|
204
|
+
end
|
205
|
+
url = "http://search.labs.crossref.org/links"
|
206
|
+
out =
|
207
|
+
HTTParty.post(url,
|
208
|
+
:body => query.to_json,
|
209
|
+
:headers => { "Content-Type" => "application/json"}
|
210
|
+
)
|
211
|
+
if out.code == 200
|
212
|
+
nil
|
213
|
+
else
|
214
|
+
puts "ERROR #{out.code}"
|
215
|
+
end
|
216
|
+
tt = out['results']
|
217
|
+
coll = []
|
218
|
+
tt.each do |item|
|
219
|
+
gg = item['doi']
|
220
|
+
if gg!=nil
|
221
|
+
gg = gg.sub('http://dx.doi.org/', '')
|
222
|
+
end
|
223
|
+
coll <<
|
224
|
+
{
|
225
|
+
'match'=>item['match'],
|
226
|
+
'doi'=>gg,
|
227
|
+
'text'=>item['text']
|
228
|
+
}
|
229
|
+
end
|
230
|
+
# coll.display
|
231
|
+
return coll
|
232
|
+
end
|
233
|
+
|
234
|
+
##
|
235
|
+
# search: Search for scholary objects in CrossRef
|
236
|
+
#
|
237
|
+
# Args:
|
238
|
+
# * query: A single or many terms (in an array). This function performs
|
239
|
+
# a single search if multiple terms are supplied. If this is
|
240
|
+
# supplied, the doi arg is ignored.
|
241
|
+
# * doi: A DOI to search for. If this is supplied, query is ignored.
|
242
|
+
# * page: Page number to return.
|
243
|
+
# * rows: Number of records to return
|
244
|
+
# * sort: Sort (logical)
|
245
|
+
# * year: Year to restrict search to.
|
246
|
+
#
|
247
|
+
# Examples:
|
248
|
+
# require 'cites'
|
249
|
+
# Cites.search(query='renear')
|
250
|
+
# Cites.search('palmer')
|
251
|
+
# Cites.search(['ecology', 'microbiology'])
|
252
|
+
# out = Cites.search(['renear', 'science', 'smith birds'])
|
253
|
+
# out.map {|i| i['doi']}
|
254
|
+
#
|
255
|
+
# Cites.search('science', :rows => 5)
|
256
|
+
#
|
257
|
+
#
|
258
|
+
# # Feed into the doi2cit method
|
259
|
+
# out = Cites.search('palmer')
|
260
|
+
# g = Cites.doi2cit(out[1]['doi'], format='bibtex')
|
261
|
+
# Cites.show(g)
|
262
|
+
def self.search(query, options = {})
|
263
|
+
defaults = {:doi => nil, :page => nil, :rows => 10,
|
264
|
+
:sort => nil, :year => nil, :header => true,
|
265
|
+
:fields => ["doi","normalizedScore","title","year"]}
|
266
|
+
# defaults = {"query" => 'ecology', "doi" => nil, "page" => nil, "rows" => 10,
|
267
|
+
# "sort" => nil, "year" => nil, "header" => true,
|
268
|
+
# "fields" => ["doi","normalizedScore","title","year"]}
|
269
|
+
options = defaults.merge(options)
|
270
|
+
fields = options[:fields]
|
271
|
+
options.delete(:fields)
|
272
|
+
|
273
|
+
if query.class == String
|
274
|
+
nil
|
275
|
+
elsif query.class == Array
|
276
|
+
query = query.join('+')
|
277
|
+
else
|
278
|
+
fail 'query must be one of String or Array class'
|
279
|
+
end
|
280
|
+
|
281
|
+
url = "http://search.labs.crossref.org/dois"
|
282
|
+
|
283
|
+
if options[:doi] == nil
|
284
|
+
# [fimxe] - looks like "rows" option isn't working like it's supposed to
|
285
|
+
args = {"q" => query, "page" => options[:page], "rows" => options[:rows],
|
286
|
+
"sort" => options[:sort], "year" => options[:year], "header" => options[:header]}
|
287
|
+
args = args.delete_if { |k, v| v.nil? }
|
288
|
+
out = HTTParty.get(url, :query => args)
|
289
|
+
if out.code == 200
|
290
|
+
nil
|
291
|
+
else
|
292
|
+
puts "ERROR #{out.code}"
|
293
|
+
end
|
294
|
+
|
295
|
+
items = out['items']
|
296
|
+
coll = []
|
297
|
+
items.each do |item|
|
298
|
+
gg = item.reject { |key,_| !fields.include? key }
|
299
|
+
coll << gg
|
300
|
+
end
|
301
|
+
|
302
|
+
if options[:header] == true
|
303
|
+
out = out.to_hash
|
304
|
+
meta = out.except('items')
|
305
|
+
coll = {'meta' => meta, 'items' => coll}
|
306
|
+
else
|
307
|
+
nil
|
308
|
+
end
|
309
|
+
else
|
310
|
+
nil
|
311
|
+
end
|
312
|
+
return coll
|
313
|
+
end
|
314
|
+
|
315
|
+
##
|
316
|
+
# setcache: Search for scholary objects in CrossRef
|
317
|
+
#
|
318
|
+
# Args:
|
319
|
+
# * query: A free form string of terms.
|
320
|
+
#
|
321
|
+
# Examples:
|
322
|
+
# require 'cites'
|
323
|
+
# Cites.search(query='renear')
|
324
|
+
# Cites.search('palmer')
|
325
|
+
# Cites.search(['ecology', 'microbiology'])
|
326
|
+
# out = Cites.search(['renear', 'science', 'smith birds'])
|
327
|
+
# out.map {|i| i['doi']}
|
328
|
+
#
|
329
|
+
# # Feed into the doi2cit method
|
330
|
+
# out = Cites.search('palmer')
|
331
|
+
# g = Cites.doi2cit(out[1]['doi'], format='bibtex')
|
332
|
+
# Cites.show(g)
|
333
|
+
end
|
334
|
+
|
335
|
+
class Hash
|
336
|
+
def except(which)
|
337
|
+
self.tap{ |h| h.delete(which) }
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
# [fixme] - Setting the cache_location should really be handled by a method
|
342
|
+
# but since all the methods in class Cites are static setting the cache
|
343
|
+
# has to done manually in each static method (because we don't know which
|
344
|
+
# is called first) or then we would need a propers initializer.
|
345
|
+
|
346
|
+
APICache.store = Moneta.new(:File, dir: Cites::cache_location)
|
metadata
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cites
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Scott Chamberlain
|
8
|
+
- Joona Lehtomäki
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2014-02-22 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bibtex-ruby
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '3.0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '3.0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: httparty
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0.12'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0.12'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: thor
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0.18'
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0.18'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: json
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '1.8'
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '1.8'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: api_cache
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0.2'
|
77
|
+
type: :runtime
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - "~>"
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0.2'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: moneta
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - "~>"
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0.7'
|
91
|
+
type: :runtime
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - "~>"
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0.7'
|
98
|
+
description: Search for articles, and get citations from DOIs
|
99
|
+
email: myrmecocystus@gmail.com
|
100
|
+
executables:
|
101
|
+
- crsearch
|
102
|
+
- crref
|
103
|
+
extensions: []
|
104
|
+
extra_rdoc_files: []
|
105
|
+
files:
|
106
|
+
- bin/crref
|
107
|
+
- bin/crsearch
|
108
|
+
- lib/cites.rb
|
109
|
+
homepage: http://github.com/sckott/cites
|
110
|
+
licenses:
|
111
|
+
- MIT
|
112
|
+
metadata: {}
|
113
|
+
post_install_message:
|
114
|
+
rdoc_options: []
|
115
|
+
require_paths:
|
116
|
+
- lib
|
117
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
requirements: []
|
128
|
+
rubyforge_project:
|
129
|
+
rubygems_version: 2.2.0
|
130
|
+
signing_key:
|
131
|
+
specification_version: 4
|
132
|
+
summary: Gets citations from DOIs
|
133
|
+
test_files: []
|