cites 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/crref +9 -0
- data/bin/crsearch +4 -0
- data/lib/cites.rb +346 -0
- metadata +133 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7981c7021e09b78536ea2da88b1babc9e6d93f4e
|
4
|
+
data.tar.gz: 57d1535b97db170b66f20581fa20ce5f0fade62b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 52d393ca56353d91ee08a4967063813e4982dd4a5ccdd18ee400014be19f09ca3df574ac2e14044465bb601ada03aae7fc2972f1042b430afc6320925cc2f0a3
|
7
|
+
data.tar.gz: 74a13bd74953774843af9be7d4a30baeacf5d55c2391201ba02ad3c6800978615f2a1213c2f3cabeefd76d67e10da922f45e542d645f45be9e947030cc12953b
|
data/bin/crref
ADDED
data/bin/crsearch
ADDED
data/lib/cites.rb
ADDED
@@ -0,0 +1,346 @@
|
|
1
|
+
require 'api_cache'
|
2
|
+
require 'bibtex'
|
3
|
+
require 'digest/sha1'
|
4
|
+
require 'httparty'
|
5
|
+
require 'json'
|
6
|
+
require 'moneta'
|
7
|
+
|
8
|
+
def response_ok(code)
|
9
|
+
# See CrossCite documentation http://crosscite.org/cn/
|
10
|
+
case code
|
11
|
+
when 200
|
12
|
+
return true
|
13
|
+
when 204
|
14
|
+
raise "The request was OK but there was no metadata available (response code: #{code})"
|
15
|
+
when 404
|
16
|
+
raise "The DOI requested doesn't exist (response code: #{code})"
|
17
|
+
when 406
|
18
|
+
raise "Can't serve any requested content type (response code: #{code})"
|
19
|
+
when 500...600
|
20
|
+
raise "ZOMG ERROR #{code}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Cites: The single class (for now) in cites
|
25
|
+
|
26
|
+
class Cites
|
27
|
+
|
28
|
+
class << self; attr_accessor :cache_location end
|
29
|
+
@cache_location = ENV['HOME'] + '/.cites/cache'
|
30
|
+
|
31
|
+
##
|
32
|
+
# Get a single citation in various formats from a DOI
|
33
|
+
#
|
34
|
+
# Args:
|
35
|
+
# * doi: A DOI
|
36
|
+
# * format: one of rdf-xml, turtle, citeproc-json, text, ris, bibtex, crossref-xml,
|
37
|
+
# * style: Only used if format='text', e.g., apa, harvard3
|
38
|
+
# * locale: A locale, e.g., en-US
|
39
|
+
# * cache: Should cache be used
|
40
|
+
# * true: Try fetcing from cache and store to cache (default)
|
41
|
+
# * false: Do use cache at all
|
42
|
+
# * 'flush': Get a fresh response and cache it
|
43
|
+
#
|
44
|
+
def self.getcite(doi, format='text', style='apa', locale='en-US',
|
45
|
+
cache=true)
|
46
|
+
formats = {"rdf-xml" => "application/rdf+xml",
|
47
|
+
"turtle" => "text/turtle",
|
48
|
+
"citeproc-json" => "application/vnd.citationstyles.csl+json",
|
49
|
+
"text" => "text/x-bibliography",
|
50
|
+
"ris" => "application/x-research-info-systems",
|
51
|
+
"bibtex" => "application/x-bibtex",
|
52
|
+
"crossref-xml" => "application/vnd.crossref.unixref+xml",
|
53
|
+
"datacite-xml" => "application/vnd.datacite.datacite+xml"
|
54
|
+
}
|
55
|
+
formatuse = formats[format]
|
56
|
+
if format == 'text'
|
57
|
+
type = "#{formatuse}; style=#{style}; locale=#{locale}"
|
58
|
+
else
|
59
|
+
type = formatuse
|
60
|
+
end
|
61
|
+
doi = 'http://dx.doi.org/' + doi
|
62
|
+
|
63
|
+
if cache == true or cache == 'flush'
|
64
|
+
if cache == true
|
65
|
+
cache_time = 6000
|
66
|
+
msg = "Requested DOI not in cache or is stale, requesting..."
|
67
|
+
elsif cache == 'flush'
|
68
|
+
cache_time = 1
|
69
|
+
msg = "Flushing cache, requesting..."
|
70
|
+
end
|
71
|
+
# Keep cache data valid forever
|
72
|
+
# [todo] - should using cache be reported?
|
73
|
+
|
74
|
+
# Create a cache key based on the DOI requested + the type on
|
75
|
+
# content
|
76
|
+
cache_key = Digest::SHA1.hexdigest("#{doi}-#{type}")
|
77
|
+
|
78
|
+
content = APICache.get(cache_key, :cache => cache_time,
|
79
|
+
:valid => :forever, :period => 0,
|
80
|
+
:timeout => 30) do
|
81
|
+
puts msg
|
82
|
+
response = HTTParty.get(doi, :headers => {"Accept" => type})
|
83
|
+
|
84
|
+
# If response code is ok (200) get response body and return
|
85
|
+
# that from this block. Otherwise an error will be raised.
|
86
|
+
begin
|
87
|
+
if response_ok(response.code)
|
88
|
+
content = response.body
|
89
|
+
end
|
90
|
+
content
|
91
|
+
rescue Exception => e
|
92
|
+
puts e.message
|
93
|
+
puts "Format requested: #{formatuse}"
|
94
|
+
exit
|
95
|
+
end
|
96
|
+
end
|
97
|
+
elsif cache == false
|
98
|
+
puts "Not using cache, requesting..."
|
99
|
+
response = HTTParty.get(doi, :headers => {"Accept" => type})
|
100
|
+
|
101
|
+
if response_ok(response.code)
|
102
|
+
content = response.body
|
103
|
+
end
|
104
|
+
else
|
105
|
+
fail "Invalid cache value #{cache}"
|
106
|
+
end
|
107
|
+
# response = HTTParty.get(doi, :headers => {"Accept" => type})
|
108
|
+
if format == 'bibtex'
|
109
|
+
output = BibTeX.parse(content).to_s
|
110
|
+
else
|
111
|
+
output = content
|
112
|
+
end
|
113
|
+
# output.display
|
114
|
+
return output
|
115
|
+
end
|
116
|
+
|
117
|
+
##
|
118
|
+
# Get a citation in various formats from a DOI
|
119
|
+
#
|
120
|
+
# Args:
|
121
|
+
# * doi: A DOI
|
122
|
+
# * format: one of rdf-xml, turtle, citeproc-json, text, ris, bibtex, crossref-xml,
|
123
|
+
# * style: Only used if format='text', e.g., apa, harvard3
|
124
|
+
# * locale: A locale, e.g., en-US
|
125
|
+
# * cache: Should cache be used
|
126
|
+
# * true: Try fetcing from cache and store to cache (default)
|
127
|
+
# * false: Do use cache at all
|
128
|
+
# * 'flush': Get a fresh response and cache it
|
129
|
+
#
|
130
|
+
# Examples:
|
131
|
+
# require 'cites'
|
132
|
+
# Cites.doi2cit('10.1371/journal.pone.0000308')
|
133
|
+
# Cites.doi2cit('10.1371/journal.pbio.0030427')
|
134
|
+
# Cites.doi2cit('10.1371/journal.pbio.0030427', 'crossref-xml')
|
135
|
+
# Cites.doi2cit('10.1371/journal.pbio.0030427', 'bibtex')
|
136
|
+
# Cites.doi2cit('10.1371/journal.pbio.0030427', 'ris')
|
137
|
+
#
|
138
|
+
# out = Cites.doi2cit(['10.1371/journal.pone.0000308','10.1371/journal.pbio.0030427','10.1371/journal.pone.0084549'], 'bibtex')
|
139
|
+
# Cites.show(out)
|
140
|
+
#
|
141
|
+
# Returns an array of citation content. The structure of the content will
|
142
|
+
# depend on the format requested.
|
143
|
+
#
|
144
|
+
def self.doi2cit(doi, format='text', style='apa', locale='en-US',
|
145
|
+
cache=true)
|
146
|
+
if doi.class == String
|
147
|
+
doi = [doi]
|
148
|
+
elsif doi.class == Array
|
149
|
+
doi = doi
|
150
|
+
else
|
151
|
+
fail 'doi must be one of String or Array class'
|
152
|
+
end
|
153
|
+
|
154
|
+
cc = []
|
155
|
+
doi.each do |iter|
|
156
|
+
# if iter.include?('http://')
|
157
|
+
# iter = iter.sub('http://dx.doi.org/', '')
|
158
|
+
# else
|
159
|
+
# nil
|
160
|
+
# end
|
161
|
+
# cc << Cites.getcite(doi=iter, format=format, style=style, locale=locale)
|
162
|
+
content = Cites.getcite(iter, format, style, locale, cache)
|
163
|
+
if format == 'citeproc-json'
|
164
|
+
content = JSON.parse(content)
|
165
|
+
end
|
166
|
+
cc << content
|
167
|
+
end
|
168
|
+
|
169
|
+
return cc
|
170
|
+
end
|
171
|
+
|
172
|
+
def self.show(input)
|
173
|
+
input.each do |iter|
|
174
|
+
puts iter.display,"\n"
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
##
|
179
|
+
# match: Look for matches to free-form citations to DOIs for an object (article, book, etc). in CrossRef
|
180
|
+
#
|
181
|
+
# Args:
|
182
|
+
# * query: A free form string of terms.
|
183
|
+
#
|
184
|
+
# Examples:
|
185
|
+
# require 'cites'
|
186
|
+
# Cites.match('Piwowar sharing data increases citation PLOS')
|
187
|
+
# Cites.match('boettiger Modeling stabilizing selection')
|
188
|
+
# Cites.match(['Piwowar sharing data increases citation PLOS', 'boettiger Modeling stabilizing selection'])
|
189
|
+
# out = Cites.match(['piwowar sharing data increases citation PLOS',
|
190
|
+
# 'boettiger Modeling stabilizing selection',
|
191
|
+
# 'priem Using social media to explore scholarly impact',
|
192
|
+
# 'fenner Peroxisome ligands for the treatment of breast cancer'])
|
193
|
+
# out.map {|i| i['doi']}
|
194
|
+
#
|
195
|
+
# # Feed into the doi2cit method
|
196
|
+
# Cites.doi2cit(out.map {|i| i['doi']})
|
197
|
+
def self.match(query)
|
198
|
+
if query.class == String
|
199
|
+
query = [query]
|
200
|
+
elsif query.class == Array
|
201
|
+
query = query
|
202
|
+
else
|
203
|
+
fail 'query must be one of String or Array class'
|
204
|
+
end
|
205
|
+
url = "http://search.labs.crossref.org/links"
|
206
|
+
out =
|
207
|
+
HTTParty.post(url,
|
208
|
+
:body => query.to_json,
|
209
|
+
:headers => { "Content-Type" => "application/json"}
|
210
|
+
)
|
211
|
+
if out.code == 200
|
212
|
+
nil
|
213
|
+
else
|
214
|
+
puts "ERROR #{out.code}"
|
215
|
+
end
|
216
|
+
tt = out['results']
|
217
|
+
coll = []
|
218
|
+
tt.each do |item|
|
219
|
+
gg = item['doi']
|
220
|
+
if gg!=nil
|
221
|
+
gg = gg.sub('http://dx.doi.org/', '')
|
222
|
+
end
|
223
|
+
coll <<
|
224
|
+
{
|
225
|
+
'match'=>item['match'],
|
226
|
+
'doi'=>gg,
|
227
|
+
'text'=>item['text']
|
228
|
+
}
|
229
|
+
end
|
230
|
+
# coll.display
|
231
|
+
return coll
|
232
|
+
end
|
233
|
+
|
234
|
+
##
|
235
|
+
# search: Search for scholary objects in CrossRef
|
236
|
+
#
|
237
|
+
# Args:
|
238
|
+
# * query: A single or many terms (in an array). This function performs
|
239
|
+
# a single search if multiple terms are supplied. If this is
|
240
|
+
# supplied, the doi arg is ignored.
|
241
|
+
# * doi: A DOI to search for. If this is supplied, query is ignored.
|
242
|
+
# * page: Page number to return.
|
243
|
+
# * rows: Number of records to return
|
244
|
+
# * sort: Sort (logical)
|
245
|
+
# * year: Year to restrict search to.
|
246
|
+
#
|
247
|
+
# Examples:
|
248
|
+
# require 'cites'
|
249
|
+
# Cites.search(query='renear')
|
250
|
+
# Cites.search('palmer')
|
251
|
+
# Cites.search(['ecology', 'microbiology'])
|
252
|
+
# out = Cites.search(['renear', 'science', 'smith birds'])
|
253
|
+
# out.map {|i| i['doi']}
|
254
|
+
#
|
255
|
+
# Cites.search('science', :rows => 5)
|
256
|
+
#
|
257
|
+
#
|
258
|
+
# # Feed into the doi2cit method
|
259
|
+
# out = Cites.search('palmer')
|
260
|
+
# g = Cites.doi2cit(out[1]['doi'], format='bibtex')
|
261
|
+
# Cites.show(g)
|
262
|
+
def self.search(query, options = {})
|
263
|
+
defaults = {:doi => nil, :page => nil, :rows => 10,
|
264
|
+
:sort => nil, :year => nil, :header => true,
|
265
|
+
:fields => ["doi","normalizedScore","title","year"]}
|
266
|
+
# defaults = {"query" => 'ecology', "doi" => nil, "page" => nil, "rows" => 10,
|
267
|
+
# "sort" => nil, "year" => nil, "header" => true,
|
268
|
+
# "fields" => ["doi","normalizedScore","title","year"]}
|
269
|
+
options = defaults.merge(options)
|
270
|
+
fields = options[:fields]
|
271
|
+
options.delete(:fields)
|
272
|
+
|
273
|
+
if query.class == String
|
274
|
+
nil
|
275
|
+
elsif query.class == Array
|
276
|
+
query = query.join('+')
|
277
|
+
else
|
278
|
+
fail 'query must be one of String or Array class'
|
279
|
+
end
|
280
|
+
|
281
|
+
url = "http://search.labs.crossref.org/dois"
|
282
|
+
|
283
|
+
if options[:doi] == nil
|
284
|
+
# [fimxe] - looks like "rows" option isn't working like it's supposed to
|
285
|
+
args = {"q" => query, "page" => options[:page], "rows" => options[:rows],
|
286
|
+
"sort" => options[:sort], "year" => options[:year], "header" => options[:header]}
|
287
|
+
args = args.delete_if { |k, v| v.nil? }
|
288
|
+
out = HTTParty.get(url, :query => args)
|
289
|
+
if out.code == 200
|
290
|
+
nil
|
291
|
+
else
|
292
|
+
puts "ERROR #{out.code}"
|
293
|
+
end
|
294
|
+
|
295
|
+
items = out['items']
|
296
|
+
coll = []
|
297
|
+
items.each do |item|
|
298
|
+
gg = item.reject { |key,_| !fields.include? key }
|
299
|
+
coll << gg
|
300
|
+
end
|
301
|
+
|
302
|
+
if options[:header] == true
|
303
|
+
out = out.to_hash
|
304
|
+
meta = out.except('items')
|
305
|
+
coll = {'meta' => meta, 'items' => coll}
|
306
|
+
else
|
307
|
+
nil
|
308
|
+
end
|
309
|
+
else
|
310
|
+
nil
|
311
|
+
end
|
312
|
+
return coll
|
313
|
+
end
|
314
|
+
|
315
|
+
##
|
316
|
+
# setcache: Search for scholary objects in CrossRef
|
317
|
+
#
|
318
|
+
# Args:
|
319
|
+
# * query: A free form string of terms.
|
320
|
+
#
|
321
|
+
# Examples:
|
322
|
+
# require 'cites'
|
323
|
+
# Cites.search(query='renear')
|
324
|
+
# Cites.search('palmer')
|
325
|
+
# Cites.search(['ecology', 'microbiology'])
|
326
|
+
# out = Cites.search(['renear', 'science', 'smith birds'])
|
327
|
+
# out.map {|i| i['doi']}
|
328
|
+
#
|
329
|
+
# # Feed into the doi2cit method
|
330
|
+
# out = Cites.search('palmer')
|
331
|
+
# g = Cites.doi2cit(out[1]['doi'], format='bibtex')
|
332
|
+
# Cites.show(g)
|
333
|
+
end
|
334
|
+
|
335
|
+
class Hash
|
336
|
+
def except(which)
|
337
|
+
self.tap{ |h| h.delete(which) }
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
# [fixme] - Setting the cache_location should really be handled by a method
|
342
|
+
# but since all the methods in class Cites are static setting the cache
|
343
|
+
# has to done manually in each static method (because we don't know which
|
344
|
+
# is called first) or then we would need a propers initializer.
|
345
|
+
|
346
|
+
APICache.store = Moneta.new(:File, dir: Cites::cache_location)
|
metadata
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cites
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Scott Chamberlain
|
8
|
+
- Joona Lehtomäki
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2014-02-22 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bibtex-ruby
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '3.0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '3.0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: httparty
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0.12'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0.12'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: thor
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0.18'
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0.18'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: json
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '1.8'
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '1.8'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: api_cache
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0.2'
|
77
|
+
type: :runtime
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - "~>"
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0.2'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: moneta
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - "~>"
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0.7'
|
91
|
+
type: :runtime
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - "~>"
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0.7'
|
98
|
+
description: Search for articles, and get citations from DOIs
|
99
|
+
email: myrmecocystus@gmail.com
|
100
|
+
executables:
|
101
|
+
- crsearch
|
102
|
+
- crref
|
103
|
+
extensions: []
|
104
|
+
extra_rdoc_files: []
|
105
|
+
files:
|
106
|
+
- bin/crref
|
107
|
+
- bin/crsearch
|
108
|
+
- lib/cites.rb
|
109
|
+
homepage: http://github.com/sckott/cites
|
110
|
+
licenses:
|
111
|
+
- MIT
|
112
|
+
metadata: {}
|
113
|
+
post_install_message:
|
114
|
+
rdoc_options: []
|
115
|
+
require_paths:
|
116
|
+
- lib
|
117
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
requirements: []
|
128
|
+
rubyforge_project:
|
129
|
+
rubygems_version: 2.2.0
|
130
|
+
signing_key:
|
131
|
+
specification_version: 4
|
132
|
+
summary: Gets citations from DOIs
|
133
|
+
test_files: []
|