cites 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +7 -0
  2. data/bin/crref +9 -0
  3. data/bin/crsearch +4 -0
  4. data/lib/cites.rb +346 -0
  5. metadata +133 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7981c7021e09b78536ea2da88b1babc9e6d93f4e
4
+ data.tar.gz: 57d1535b97db170b66f20581fa20ce5f0fade62b
5
+ SHA512:
6
+ metadata.gz: 52d393ca56353d91ee08a4967063813e4982dd4a5ccdd18ee400014be19f09ca3df574ac2e14044465bb601ada03aae7fc2972f1042b430afc6320925cc2f0a3
7
+ data.tar.gz: 74a13bd74953774843af9be7d4a30baeacf5d55c2391201ba02ad3c6800978615f2a1213c2f3cabeefd76d67e10da922f45e542d645f45be9e947030cc12953b
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'doiref'
4
+
5
+ # if ARGV[1].length == 0
6
+
7
+ # end
8
+
9
+ puts DOIref.doi2cit(doi=ARGV[0].to_s, format=ARGV[1].to_s, style=ARGV[2].to_s, locale=ARGV[3].to_s)
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'doiref'
4
+ puts DOIref.search(ARGV[0])
@@ -0,0 +1,346 @@
1
+ require 'api_cache'
2
+ require 'bibtex'
3
+ require 'digest/sha1'
4
+ require 'httparty'
5
+ require 'json'
6
+ require 'moneta'
7
+
8
+ def response_ok(code)
9
+ # See CrossCite documentation http://crosscite.org/cn/
10
+ case code
11
+ when 200
12
+ return true
13
+ when 204
14
+ raise "The request was OK but there was no metadata available (response code: #{code})"
15
+ when 404
16
+ raise "The DOI requested doesn't exist (response code: #{code})"
17
+ when 406
18
+ raise "Can't serve any requested content type (response code: #{code})"
19
+ when 500...600
20
+ raise "ZOMG ERROR #{code}"
21
+ end
22
+ end
23
+
24
+ # Cites: The single class (for now) in cites
25
+
26
+ class Cites
27
+
28
+ class << self; attr_accessor :cache_location end
29
+ @cache_location = ENV['HOME'] + '/.cites/cache'
30
+
31
+ ##
32
+ # Get a single citation in various formats from a DOI
33
+ #
34
+ # Args:
35
+ # * doi: A DOI
36
+ # * format: one of rdf-xml, turtle, citeproc-json, text, ris, bibtex, crossref-xml,
37
+ # * style: Only used if format='text', e.g., apa, harvard3
38
+ # * locale: A locale, e.g., en-US
39
+ # * cache: Should cache be used
40
+ # * true: Try fetcing from cache and store to cache (default)
41
+ # * false: Do use cache at all
42
+ # * 'flush': Get a fresh response and cache it
43
+ #
44
+ def self.getcite(doi, format='text', style='apa', locale='en-US',
45
+ cache=true)
46
+ formats = {"rdf-xml" => "application/rdf+xml",
47
+ "turtle" => "text/turtle",
48
+ "citeproc-json" => "application/vnd.citationstyles.csl+json",
49
+ "text" => "text/x-bibliography",
50
+ "ris" => "application/x-research-info-systems",
51
+ "bibtex" => "application/x-bibtex",
52
+ "crossref-xml" => "application/vnd.crossref.unixref+xml",
53
+ "datacite-xml" => "application/vnd.datacite.datacite+xml"
54
+ }
55
+ formatuse = formats[format]
56
+ if format == 'text'
57
+ type = "#{formatuse}; style=#{style}; locale=#{locale}"
58
+ else
59
+ type = formatuse
60
+ end
61
+ doi = 'http://dx.doi.org/' + doi
62
+
63
+ if cache == true or cache == 'flush'
64
+ if cache == true
65
+ cache_time = 6000
66
+ msg = "Requested DOI not in cache or is stale, requesting..."
67
+ elsif cache == 'flush'
68
+ cache_time = 1
69
+ msg = "Flushing cache, requesting..."
70
+ end
71
+ # Keep cache data valid forever
72
+ # [todo] - should using cache be reported?
73
+
74
+ # Create a cache key based on the DOI requested + the type on
75
+ # content
76
+ cache_key = Digest::SHA1.hexdigest("#{doi}-#{type}")
77
+
78
+ content = APICache.get(cache_key, :cache => cache_time,
79
+ :valid => :forever, :period => 0,
80
+ :timeout => 30) do
81
+ puts msg
82
+ response = HTTParty.get(doi, :headers => {"Accept" => type})
83
+
84
+ # If response code is ok (200) get response body and return
85
+ # that from this block. Otherwise an error will be raised.
86
+ begin
87
+ if response_ok(response.code)
88
+ content = response.body
89
+ end
90
+ content
91
+ rescue Exception => e
92
+ puts e.message
93
+ puts "Format requested: #{formatuse}"
94
+ exit
95
+ end
96
+ end
97
+ elsif cache == false
98
+ puts "Not using cache, requesting..."
99
+ response = HTTParty.get(doi, :headers => {"Accept" => type})
100
+
101
+ if response_ok(response.code)
102
+ content = response.body
103
+ end
104
+ else
105
+ fail "Invalid cache value #{cache}"
106
+ end
107
+ # response = HTTParty.get(doi, :headers => {"Accept" => type})
108
+ if format == 'bibtex'
109
+ output = BibTeX.parse(content).to_s
110
+ else
111
+ output = content
112
+ end
113
+ # output.display
114
+ return output
115
+ end
116
+
117
+ ##
118
+ # Get a citation in various formats from a DOI
119
+ #
120
+ # Args:
121
+ # * doi: A DOI
122
+ # * format: one of rdf-xml, turtle, citeproc-json, text, ris, bibtex, crossref-xml,
123
+ # * style: Only used if format='text', e.g., apa, harvard3
124
+ # * locale: A locale, e.g., en-US
125
+ # * cache: Should cache be used
126
+ # * true: Try fetcing from cache and store to cache (default)
127
+ # * false: Do use cache at all
128
+ # * 'flush': Get a fresh response and cache it
129
+ #
130
+ # Examples:
131
+ # require 'cites'
132
+ # Cites.doi2cit('10.1371/journal.pone.0000308')
133
+ # Cites.doi2cit('10.1371/journal.pbio.0030427')
134
+ # Cites.doi2cit('10.1371/journal.pbio.0030427', 'crossref-xml')
135
+ # Cites.doi2cit('10.1371/journal.pbio.0030427', 'bibtex')
136
+ # Cites.doi2cit('10.1371/journal.pbio.0030427', 'ris')
137
+ #
138
+ # out = Cites.doi2cit(['10.1371/journal.pone.0000308','10.1371/journal.pbio.0030427','10.1371/journal.pone.0084549'], 'bibtex')
139
+ # Cites.show(out)
140
+ #
141
+ # Returns an array of citation content. The structure of the content will
142
+ # depend on the format requested.
143
+ #
144
+ def self.doi2cit(doi, format='text', style='apa', locale='en-US',
145
+ cache=true)
146
+ if doi.class == String
147
+ doi = [doi]
148
+ elsif doi.class == Array
149
+ doi = doi
150
+ else
151
+ fail 'doi must be one of String or Array class'
152
+ end
153
+
154
+ cc = []
155
+ doi.each do |iter|
156
+ # if iter.include?('http://')
157
+ # iter = iter.sub('http://dx.doi.org/', '')
158
+ # else
159
+ # nil
160
+ # end
161
+ # cc << Cites.getcite(doi=iter, format=format, style=style, locale=locale)
162
+ content = Cites.getcite(iter, format, style, locale, cache)
163
+ if format == 'citeproc-json'
164
+ content = JSON.parse(content)
165
+ end
166
+ cc << content
167
+ end
168
+
169
+ return cc
170
+ end
171
+
172
+ def self.show(input)
173
+ input.each do |iter|
174
+ puts iter.display,"\n"
175
+ end
176
+ end
177
+
178
+ ##
179
+ # match: Look for matches to free-form citations to DOIs for an object (article, book, etc). in CrossRef
180
+ #
181
+ # Args:
182
+ # * query: A free form string of terms.
183
+ #
184
+ # Examples:
185
+ # require 'cites'
186
+ # Cites.match('Piwowar sharing data increases citation PLOS')
187
+ # Cites.match('boettiger Modeling stabilizing selection')
188
+ # Cites.match(['Piwowar sharing data increases citation PLOS', 'boettiger Modeling stabilizing selection'])
189
+ # out = Cites.match(['piwowar sharing data increases citation PLOS',
190
+ # 'boettiger Modeling stabilizing selection',
191
+ # 'priem Using social media to explore scholarly impact',
192
+ # 'fenner Peroxisome ligands for the treatment of breast cancer'])
193
+ # out.map {|i| i['doi']}
194
+ #
195
+ # # Feed into the doi2cit method
196
+ # Cites.doi2cit(out.map {|i| i['doi']})
197
+ def self.match(query)
198
+ if query.class == String
199
+ query = [query]
200
+ elsif query.class == Array
201
+ query = query
202
+ else
203
+ fail 'query must be one of String or Array class'
204
+ end
205
+ url = "http://search.labs.crossref.org/links"
206
+ out =
207
+ HTTParty.post(url,
208
+ :body => query.to_json,
209
+ :headers => { "Content-Type" => "application/json"}
210
+ )
211
+ if out.code == 200
212
+ nil
213
+ else
214
+ puts "ERROR #{out.code}"
215
+ end
216
+ tt = out['results']
217
+ coll = []
218
+ tt.each do |item|
219
+ gg = item['doi']
220
+ if gg!=nil
221
+ gg = gg.sub('http://dx.doi.org/', '')
222
+ end
223
+ coll <<
224
+ {
225
+ 'match'=>item['match'],
226
+ 'doi'=>gg,
227
+ 'text'=>item['text']
228
+ }
229
+ end
230
+ # coll.display
231
+ return coll
232
+ end
233
+
234
+ ##
235
+ # search: Search for scholary objects in CrossRef
236
+ #
237
+ # Args:
238
+ # * query: A single or many terms (in an array). This function performs
239
+ # a single search if multiple terms are supplied. If this is
240
+ # supplied, the doi arg is ignored.
241
+ # * doi: A DOI to search for. If this is supplied, query is ignored.
242
+ # * page: Page number to return.
243
+ # * rows: Number of records to return
244
+ # * sort: Sort (logical)
245
+ # * year: Year to restrict search to.
246
+ #
247
+ # Examples:
248
+ # require 'cites'
249
+ # Cites.search(query='renear')
250
+ # Cites.search('palmer')
251
+ # Cites.search(['ecology', 'microbiology'])
252
+ # out = Cites.search(['renear', 'science', 'smith birds'])
253
+ # out.map {|i| i['doi']}
254
+ #
255
+ # Cites.search('science', :rows => 5)
256
+ #
257
+ #
258
+ # # Feed into the doi2cit method
259
+ # out = Cites.search('palmer')
260
+ # g = Cites.doi2cit(out[1]['doi'], format='bibtex')
261
+ # Cites.show(g)
262
+ def self.search(query, options = {})
263
+ defaults = {:doi => nil, :page => nil, :rows => 10,
264
+ :sort => nil, :year => nil, :header => true,
265
+ :fields => ["doi","normalizedScore","title","year"]}
266
+ # defaults = {"query" => 'ecology', "doi" => nil, "page" => nil, "rows" => 10,
267
+ # "sort" => nil, "year" => nil, "header" => true,
268
+ # "fields" => ["doi","normalizedScore","title","year"]}
269
+ options = defaults.merge(options)
270
+ fields = options[:fields]
271
+ options.delete(:fields)
272
+
273
+ if query.class == String
274
+ nil
275
+ elsif query.class == Array
276
+ query = query.join('+')
277
+ else
278
+ fail 'query must be one of String or Array class'
279
+ end
280
+
281
+ url = "http://search.labs.crossref.org/dois"
282
+
283
+ if options[:doi] == nil
284
+ # [fimxe] - looks like "rows" option isn't working like it's supposed to
285
+ args = {"q" => query, "page" => options[:page], "rows" => options[:rows],
286
+ "sort" => options[:sort], "year" => options[:year], "header" => options[:header]}
287
+ args = args.delete_if { |k, v| v.nil? }
288
+ out = HTTParty.get(url, :query => args)
289
+ if out.code == 200
290
+ nil
291
+ else
292
+ puts "ERROR #{out.code}"
293
+ end
294
+
295
+ items = out['items']
296
+ coll = []
297
+ items.each do |item|
298
+ gg = item.reject { |key,_| !fields.include? key }
299
+ coll << gg
300
+ end
301
+
302
+ if options[:header] == true
303
+ out = out.to_hash
304
+ meta = out.except('items')
305
+ coll = {'meta' => meta, 'items' => coll}
306
+ else
307
+ nil
308
+ end
309
+ else
310
+ nil
311
+ end
312
+ return coll
313
+ end
314
+
315
+ ##
316
+ # setcache: Search for scholary objects in CrossRef
317
+ #
318
+ # Args:
319
+ # * query: A free form string of terms.
320
+ #
321
+ # Examples:
322
+ # require 'cites'
323
+ # Cites.search(query='renear')
324
+ # Cites.search('palmer')
325
+ # Cites.search(['ecology', 'microbiology'])
326
+ # out = Cites.search(['renear', 'science', 'smith birds'])
327
+ # out.map {|i| i['doi']}
328
+ #
329
+ # # Feed into the doi2cit method
330
+ # out = Cites.search('palmer')
331
+ # g = Cites.doi2cit(out[1]['doi'], format='bibtex')
332
+ # Cites.show(g)
333
+ end
334
+
335
+ class Hash
336
+ def except(which)
337
+ self.tap{ |h| h.delete(which) }
338
+ end
339
+ end
340
+
341
+ # [fixme] - Setting the cache_location should really be handled by a method
342
+ # but since all the methods in class Cites are static setting the cache
343
+ # has to done manually in each static method (because we don't know which
344
+ # is called first) or then we would need a propers initializer.
345
+
346
+ APICache.store = Moneta.new(:File, dir: Cites::cache_location)
metadata ADDED
@@ -0,0 +1,133 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cites
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Scott Chamberlain
8
+ - Joona Lehtomäki
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-02-22 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bibtex-ruby
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '3.0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '3.0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: httparty
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '0.12'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '0.12'
42
+ - !ruby/object:Gem::Dependency
43
+ name: thor
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '0.18'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '0.18'
56
+ - !ruby/object:Gem::Dependency
57
+ name: json
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '1.8'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '1.8'
70
+ - !ruby/object:Gem::Dependency
71
+ name: api_cache
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '0.2'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: '0.2'
84
+ - !ruby/object:Gem::Dependency
85
+ name: moneta
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - "~>"
89
+ - !ruby/object:Gem::Version
90
+ version: '0.7'
91
+ type: :runtime
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: '0.7'
98
+ description: Search for articles, and get citations from DOIs
99
+ email: myrmecocystus@gmail.com
100
+ executables:
101
+ - crsearch
102
+ - crref
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - bin/crref
107
+ - bin/crsearch
108
+ - lib/cites.rb
109
+ homepage: http://github.com/sckott/cites
110
+ licenses:
111
+ - MIT
112
+ metadata: {}
113
+ post_install_message:
114
+ rdoc_options: []
115
+ require_paths:
116
+ - lib
117
+ required_ruby_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ required_rubygems_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ requirements: []
128
+ rubyforge_project:
129
+ rubygems_version: 2.2.0
130
+ signing_key:
131
+ specification_version: 4
132
+ summary: Gets citations from DOIs
133
+ test_files: []