cites 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. checksums.yaml +7 -0
  2. data/bin/crref +9 -0
  3. data/bin/crsearch +4 -0
  4. data/lib/cites.rb +346 -0
  5. metadata +133 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7981c7021e09b78536ea2da88b1babc9e6d93f4e
4
+ data.tar.gz: 57d1535b97db170b66f20581fa20ce5f0fade62b
5
+ SHA512:
6
+ metadata.gz: 52d393ca56353d91ee08a4967063813e4982dd4a5ccdd18ee400014be19f09ca3df574ac2e14044465bb601ada03aae7fc2972f1042b430afc6320925cc2f0a3
7
+ data.tar.gz: 74a13bd74953774843af9be7d4a30baeacf5d55c2391201ba02ad3c6800978615f2a1213c2f3cabeefd76d67e10da922f45e542d645f45be9e947030cc12953b
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'doiref'
4
+
5
+ # if ARGV[1].length == 0
6
+
7
+ # end
8
+
9
+ puts DOIref.doi2cit(doi=ARGV[0].to_s, format=ARGV[1].to_s, style=ARGV[2].to_s, locale=ARGV[3].to_s)
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'doiref'
4
+ puts DOIref.search(ARGV[0])
@@ -0,0 +1,346 @@
1
+ require 'api_cache'
2
+ require 'bibtex'
3
+ require 'digest/sha1'
4
+ require 'httparty'
5
+ require 'json'
6
+ require 'moneta'
7
+
8
+ def response_ok(code)
9
+ # See CrossCite documentation http://crosscite.org/cn/
10
+ case code
11
+ when 200
12
+ return true
13
+ when 204
14
+ raise "The request was OK but there was no metadata available (response code: #{code})"
15
+ when 404
16
+ raise "The DOI requested doesn't exist (response code: #{code})"
17
+ when 406
18
+ raise "Can't serve any requested content type (response code: #{code})"
19
+ when 500...600
20
+ raise "ZOMG ERROR #{code}"
21
+ end
22
+ end
23
+
24
+ # Cites: The single class (for now) in cites
25
+
26
+ class Cites
27
+
28
+ class << self; attr_accessor :cache_location end
29
+ @cache_location = ENV['HOME'] + '/.cites/cache'
30
+
31
+ ##
32
+ # Get a single citation in various formats from a DOI
33
+ #
34
+ # Args:
35
+ # * doi: A DOI
36
+ # * format: one of rdf-xml, turtle, citeproc-json, text, ris, bibtex, crossref-xml,
37
+ # * style: Only used if format='text', e.g., apa, harvard3
38
+ # * locale: A locale, e.g., en-US
39
+ # * cache: Should cache be used
40
+ # * true: Try fetcing from cache and store to cache (default)
41
+ # * false: Do use cache at all
42
+ # * 'flush': Get a fresh response and cache it
43
+ #
44
+ def self.getcite(doi, format='text', style='apa', locale='en-US',
45
+ cache=true)
46
+ formats = {"rdf-xml" => "application/rdf+xml",
47
+ "turtle" => "text/turtle",
48
+ "citeproc-json" => "application/vnd.citationstyles.csl+json",
49
+ "text" => "text/x-bibliography",
50
+ "ris" => "application/x-research-info-systems",
51
+ "bibtex" => "application/x-bibtex",
52
+ "crossref-xml" => "application/vnd.crossref.unixref+xml",
53
+ "datacite-xml" => "application/vnd.datacite.datacite+xml"
54
+ }
55
+ formatuse = formats[format]
56
+ if format == 'text'
57
+ type = "#{formatuse}; style=#{style}; locale=#{locale}"
58
+ else
59
+ type = formatuse
60
+ end
61
+ doi = 'http://dx.doi.org/' + doi
62
+
63
+ if cache == true or cache == 'flush'
64
+ if cache == true
65
+ cache_time = 6000
66
+ msg = "Requested DOI not in cache or is stale, requesting..."
67
+ elsif cache == 'flush'
68
+ cache_time = 1
69
+ msg = "Flushing cache, requesting..."
70
+ end
71
+ # Keep cache data valid forever
72
+ # [todo] - should using cache be reported?
73
+
74
+ # Create a cache key based on the DOI requested + the type on
75
+ # content
76
+ cache_key = Digest::SHA1.hexdigest("#{doi}-#{type}")
77
+
78
+ content = APICache.get(cache_key, :cache => cache_time,
79
+ :valid => :forever, :period => 0,
80
+ :timeout => 30) do
81
+ puts msg
82
+ response = HTTParty.get(doi, :headers => {"Accept" => type})
83
+
84
+ # If response code is ok (200) get response body and return
85
+ # that from this block. Otherwise an error will be raised.
86
+ begin
87
+ if response_ok(response.code)
88
+ content = response.body
89
+ end
90
+ content
91
+ rescue Exception => e
92
+ puts e.message
93
+ puts "Format requested: #{formatuse}"
94
+ exit
95
+ end
96
+ end
97
+ elsif cache == false
98
+ puts "Not using cache, requesting..."
99
+ response = HTTParty.get(doi, :headers => {"Accept" => type})
100
+
101
+ if response_ok(response.code)
102
+ content = response.body
103
+ end
104
+ else
105
+ fail "Invalid cache value #{cache}"
106
+ end
107
+ # response = HTTParty.get(doi, :headers => {"Accept" => type})
108
+ if format == 'bibtex'
109
+ output = BibTeX.parse(content).to_s
110
+ else
111
+ output = content
112
+ end
113
+ # output.display
114
+ return output
115
+ end
116
+
117
+ ##
118
+ # Get a citation in various formats from a DOI
119
+ #
120
+ # Args:
121
+ # * doi: A DOI
122
+ # * format: one of rdf-xml, turtle, citeproc-json, text, ris, bibtex, crossref-xml,
123
+ # * style: Only used if format='text', e.g., apa, harvard3
124
+ # * locale: A locale, e.g., en-US
125
+ # * cache: Should cache be used
126
+ # * true: Try fetcing from cache and store to cache (default)
127
+ # * false: Do use cache at all
128
+ # * 'flush': Get a fresh response and cache it
129
+ #
130
+ # Examples:
131
+ # require 'cites'
132
+ # Cites.doi2cit('10.1371/journal.pone.0000308')
133
+ # Cites.doi2cit('10.1371/journal.pbio.0030427')
134
+ # Cites.doi2cit('10.1371/journal.pbio.0030427', 'crossref-xml')
135
+ # Cites.doi2cit('10.1371/journal.pbio.0030427', 'bibtex')
136
+ # Cites.doi2cit('10.1371/journal.pbio.0030427', 'ris')
137
+ #
138
+ # out = Cites.doi2cit(['10.1371/journal.pone.0000308','10.1371/journal.pbio.0030427','10.1371/journal.pone.0084549'], 'bibtex')
139
+ # Cites.show(out)
140
+ #
141
+ # Returns an array of citation content. The structure of the content will
142
+ # depend on the format requested.
143
+ #
144
+ def self.doi2cit(doi, format='text', style='apa', locale='en-US',
145
+ cache=true)
146
+ if doi.class == String
147
+ doi = [doi]
148
+ elsif doi.class == Array
149
+ doi = doi
150
+ else
151
+ fail 'doi must be one of String or Array class'
152
+ end
153
+
154
+ cc = []
155
+ doi.each do |iter|
156
+ # if iter.include?('http://')
157
+ # iter = iter.sub('http://dx.doi.org/', '')
158
+ # else
159
+ # nil
160
+ # end
161
+ # cc << Cites.getcite(doi=iter, format=format, style=style, locale=locale)
162
+ content = Cites.getcite(iter, format, style, locale, cache)
163
+ if format == 'citeproc-json'
164
+ content = JSON.parse(content)
165
+ end
166
+ cc << content
167
+ end
168
+
169
+ return cc
170
+ end
171
+
172
+ def self.show(input)
173
+ input.each do |iter|
174
+ puts iter.display,"\n"
175
+ end
176
+ end
177
+
178
+ ##
179
+ # match: Look for matches to free-form citations to DOIs for an object (article, book, etc). in CrossRef
180
+ #
181
+ # Args:
182
+ # * query: A free form string of terms.
183
+ #
184
+ # Examples:
185
+ # require 'cites'
186
+ # Cites.match('Piwowar sharing data increases citation PLOS')
187
+ # Cites.match('boettiger Modeling stabilizing selection')
188
+ # Cites.match(['Piwowar sharing data increases citation PLOS', 'boettiger Modeling stabilizing selection'])
189
+ # out = Cites.match(['piwowar sharing data increases citation PLOS',
190
+ # 'boettiger Modeling stabilizing selection',
191
+ # 'priem Using social media to explore scholarly impact',
192
+ # 'fenner Peroxisome ligands for the treatment of breast cancer'])
193
+ # out.map {|i| i['doi']}
194
+ #
195
+ # # Feed into the doi2cit method
196
+ # Cites.doi2cit(out.map {|i| i['doi']})
197
+ def self.match(query)
198
+ if query.class == String
199
+ query = [query]
200
+ elsif query.class == Array
201
+ query = query
202
+ else
203
+ fail 'query must be one of String or Array class'
204
+ end
205
+ url = "http://search.labs.crossref.org/links"
206
+ out =
207
+ HTTParty.post(url,
208
+ :body => query.to_json,
209
+ :headers => { "Content-Type" => "application/json"}
210
+ )
211
+ if out.code == 200
212
+ nil
213
+ else
214
+ puts "ERROR #{out.code}"
215
+ end
216
+ tt = out['results']
217
+ coll = []
218
+ tt.each do |item|
219
+ gg = item['doi']
220
+ if gg!=nil
221
+ gg = gg.sub('http://dx.doi.org/', '')
222
+ end
223
+ coll <<
224
+ {
225
+ 'match'=>item['match'],
226
+ 'doi'=>gg,
227
+ 'text'=>item['text']
228
+ }
229
+ end
230
+ # coll.display
231
+ return coll
232
+ end
233
+
234
+ ##
235
+ # search: Search for scholary objects in CrossRef
236
+ #
237
+ # Args:
238
+ # * query: A single or many terms (in an array). This function performs
239
+ # a single search if multiple terms are supplied. If this is
240
+ # supplied, the doi arg is ignored.
241
+ # * doi: A DOI to search for. If this is supplied, query is ignored.
242
+ # * page: Page number to return.
243
+ # * rows: Number of records to return
244
+ # * sort: Sort (logical)
245
+ # * year: Year to restrict search to.
246
+ #
247
+ # Examples:
248
+ # require 'cites'
249
+ # Cites.search(query='renear')
250
+ # Cites.search('palmer')
251
+ # Cites.search(['ecology', 'microbiology'])
252
+ # out = Cites.search(['renear', 'science', 'smith birds'])
253
+ # out.map {|i| i['doi']}
254
+ #
255
+ # Cites.search('science', :rows => 5)
256
+ #
257
+ #
258
+ # # Feed into the doi2cit method
259
+ # out = Cites.search('palmer')
260
+ # g = Cites.doi2cit(out[1]['doi'], format='bibtex')
261
+ # Cites.show(g)
262
+ def self.search(query, options = {})
263
+ defaults = {:doi => nil, :page => nil, :rows => 10,
264
+ :sort => nil, :year => nil, :header => true,
265
+ :fields => ["doi","normalizedScore","title","year"]}
266
+ # defaults = {"query" => 'ecology', "doi" => nil, "page" => nil, "rows" => 10,
267
+ # "sort" => nil, "year" => nil, "header" => true,
268
+ # "fields" => ["doi","normalizedScore","title","year"]}
269
+ options = defaults.merge(options)
270
+ fields = options[:fields]
271
+ options.delete(:fields)
272
+
273
+ if query.class == String
274
+ nil
275
+ elsif query.class == Array
276
+ query = query.join('+')
277
+ else
278
+ fail 'query must be one of String or Array class'
279
+ end
280
+
281
+ url = "http://search.labs.crossref.org/dois"
282
+
283
+ if options[:doi] == nil
284
+ # [fimxe] - looks like "rows" option isn't working like it's supposed to
285
+ args = {"q" => query, "page" => options[:page], "rows" => options[:rows],
286
+ "sort" => options[:sort], "year" => options[:year], "header" => options[:header]}
287
+ args = args.delete_if { |k, v| v.nil? }
288
+ out = HTTParty.get(url, :query => args)
289
+ if out.code == 200
290
+ nil
291
+ else
292
+ puts "ERROR #{out.code}"
293
+ end
294
+
295
+ items = out['items']
296
+ coll = []
297
+ items.each do |item|
298
+ gg = item.reject { |key,_| !fields.include? key }
299
+ coll << gg
300
+ end
301
+
302
+ if options[:header] == true
303
+ out = out.to_hash
304
+ meta = out.except('items')
305
+ coll = {'meta' => meta, 'items' => coll}
306
+ else
307
+ nil
308
+ end
309
+ else
310
+ nil
311
+ end
312
+ return coll
313
+ end
314
+
315
+ ##
316
+ # setcache: Search for scholary objects in CrossRef
317
+ #
318
+ # Args:
319
+ # * query: A free form string of terms.
320
+ #
321
+ # Examples:
322
+ # require 'cites'
323
+ # Cites.search(query='renear')
324
+ # Cites.search('palmer')
325
+ # Cites.search(['ecology', 'microbiology'])
326
+ # out = Cites.search(['renear', 'science', 'smith birds'])
327
+ # out.map {|i| i['doi']}
328
+ #
329
+ # # Feed into the doi2cit method
330
+ # out = Cites.search('palmer')
331
+ # g = Cites.doi2cit(out[1]['doi'], format='bibtex')
332
+ # Cites.show(g)
333
+ end
334
+
335
+ class Hash
336
+ def except(which)
337
+ self.tap{ |h| h.delete(which) }
338
+ end
339
+ end
340
+
341
+ # [fixme] - Setting the cache_location should really be handled by a method
342
+ # but since all the methods in class Cites are static setting the cache
343
+ # has to done manually in each static method (because we don't know which
344
+ # is called first) or then we would need a propers initializer.
345
+
346
+ APICache.store = Moneta.new(:File, dir: Cites::cache_location)
metadata ADDED
@@ -0,0 +1,133 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cites
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Scott Chamberlain
8
+ - Joona Lehtomäki
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-02-22 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bibtex-ruby
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '3.0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '3.0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: httparty
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '0.12'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '0.12'
42
+ - !ruby/object:Gem::Dependency
43
+ name: thor
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '0.18'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '0.18'
56
+ - !ruby/object:Gem::Dependency
57
+ name: json
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '1.8'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '1.8'
70
+ - !ruby/object:Gem::Dependency
71
+ name: api_cache
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '0.2'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: '0.2'
84
+ - !ruby/object:Gem::Dependency
85
+ name: moneta
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - "~>"
89
+ - !ruby/object:Gem::Version
90
+ version: '0.7'
91
+ type: :runtime
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: '0.7'
98
+ description: Search for articles, and get citations from DOIs
99
+ email: myrmecocystus@gmail.com
100
+ executables:
101
+ - crsearch
102
+ - crref
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - bin/crref
107
+ - bin/crsearch
108
+ - lib/cites.rb
109
+ homepage: http://github.com/sckott/cites
110
+ licenses:
111
+ - MIT
112
+ metadata: {}
113
+ post_install_message:
114
+ rdoc_options: []
115
+ require_paths:
116
+ - lib
117
+ required_ruby_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ required_rubygems_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ requirements: []
128
+ rubyforge_project:
129
+ rubygems_version: 2.2.0
130
+ signing_key:
131
+ specification_version: 4
132
+ summary: Gets citations from DOIs
133
+ test_files: []