worldcat 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +4 -0
- data/README.rdoc +74 -0
- data/lib/worldcat.rb +331 -0
- metadata +110 -0
data/CHANGELOG.rdoc
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
= WorldCat Search API
|
2
|
+
|
3
|
+
A WorldCat API for Ruby to interact with WorldCat search webservices.
|
4
|
+
http://www.worldcat.org
|
5
|
+
|
6
|
+
== Usage
|
7
|
+
|
8
|
+
require 'worldcat'
|
9
|
+
|
10
|
+
client = WorldCat.new '[api_key]'
|
11
|
+
|
12
|
+
Get Atom or RSS response from an OpenSearch
|
13
|
+
|
14
|
+
atom = client.open_search :query => "Civil War"
|
15
|
+
puts atom.feed.title
|
16
|
+
puts atom.entries.first.author
|
17
|
+
|
18
|
+
Get MARC XML or Dublin Core from a SRU CQL query
|
19
|
+
|
20
|
+
cql = 'srw.kw="civil war" and (srw.su="antietam" or srw.su="sharpsburg")'
|
21
|
+
|
22
|
+
records = client.sru_search :query => cql, :format => "marcxml"
|
23
|
+
for record in records
|
24
|
+
# print out field 245 subfield a
|
25
|
+
puts record['245']['a']
|
26
|
+
end
|
27
|
+
|
28
|
+
If you'd like to use another implementation, the raw response is available:
|
29
|
+
|
30
|
+
client.raw_response
|
31
|
+
|
32
|
+
A faster way?
|
33
|
+
|
34
|
+
rss = WorldCat.new.open_search :q => "Globalization", :format => "rss", :wskey => '[api_key]'
|
35
|
+
|
36
|
+
For more information, please have a look at the documentation or the test cases.
|
37
|
+
|
38
|
+
== Installation
|
39
|
+
|
40
|
+
gem install worldcat
|
41
|
+
|
42
|
+
== Why?
|
43
|
+
|
44
|
+
The 'wcapi' gem does not satisfy several points, so another version is justified for many reasons:
|
45
|
+
|
46
|
+
* It is better to use a RSS Ruby implementation, actually SimpleRSS, to get Atom or RSS response.
|
47
|
+
* It is better to use the MARC Ruby implementation to get MARC XML or Dublin Core response from a SRU CQL search or other search.
|
48
|
+
* Unit testing is great.
|
49
|
+
|
50
|
+
== What this API can do
|
51
|
+
|
52
|
+
* Send searches in OpenSearch or SRU CQL syntax.
|
53
|
+
* Receive OpenSearch responses in RSS or Atom format (both are a SimpleRSS object).
|
54
|
+
* Receive SRU responses in an array of MARC::Record or Dublin Core (REXML::Document).
|
55
|
+
* Receive a MARC::Record for a single OCLC record.
|
56
|
+
* Receive a REXML::Document for geographically-sorted library holdings information.
|
57
|
+
* Receive a HTML formatted String for standard bibliographic citation formats (APA, Chicago, Harvard, MLA, and Turabian).
|
58
|
+
|
59
|
+
== To do
|
60
|
+
|
61
|
+
* Use SRU gem to get response from sru_search.
|
62
|
+
|
63
|
+
== Contribution
|
64
|
+
|
65
|
+
Feel free to fork and send me a pull request for changes, fixes or simply a message for any suggestion.
|
66
|
+
|
67
|
+
== See
|
68
|
+
|
69
|
+
* {WorldCat webservices}[http://www.worldcat.org/affiliate/tools?atype=wcapi]
|
70
|
+
* {Ruby MARC documentation}[http://marc.rubyforge.org/]
|
71
|
+
* {Ruby Simple RSS documentation}[http://simple-rss.rubyforge.org/]
|
72
|
+
|
73
|
+
Vivien Didelot <vivien.didelot@gmail.com>
|
74
|
+
http://github.com/v0n/worldcat
|
data/lib/worldcat.rb
ADDED
@@ -0,0 +1,331 @@
|
|
1
|
+
# Simple WorldCat Search Ruby API
|
2
|
+
# http://oclc.org/developer/services/WCAPI
|
3
|
+
#
|
4
|
+
# Author:: Vivien Didelot 'v0n' <vivien.didelot@gmail.com>
|
5
|
+
|
6
|
+
require 'rubygems' # needed by simple-rss
|
7
|
+
require 'open-uri' # used to fetch responses
|
8
|
+
require 'simple-rss' # used for Atom and RSS format
|
9
|
+
require 'marc' # used for MARC records
|
10
|
+
require 'rexml/document' # used for many XML purposes
|
11
|
+
require 'json' # used for JSON format
|
12
|
+
|
13
|
+
# The WorldCat class methods use WorldCat webservices.
|
14
|
+
# Options are given as a hash and Symbol keys may be:
|
15
|
+
# * the same name than GET parameters,
|
16
|
+
# * Ruby naming convention (i.e. underscore),
|
17
|
+
# * or aliases if available.
|
18
|
+
#
|
19
|
+
# Note: aliases have priority.
|
20
|
+
#
|
21
|
+
# For a complete list of parameters, see documentation here:
|
22
|
+
# http://oclc.org/developer/documentation/worldcat-search-api/parameters
|
23
|
+
|
24
|
+
# The WorldCat class, used to interact with the WorldCat search webservices.
|
25
|
+
class WorldCat
|
26
|
+
|
27
|
+
# A specific WorldCat error class.
|
28
|
+
class WorldCatError < StandardError
|
29
|
+
def initialize(details = nil)
|
30
|
+
@details = details
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# The WorldCat webservices API key.
|
35
|
+
attr_writer :api_key
|
36
|
+
|
37
|
+
# The raw response from WorldCat.
|
38
|
+
attr_reader :raw_response
|
39
|
+
|
40
|
+
# The raw url used to fetch the response.
|
41
|
+
attr_reader :raw_url
|
42
|
+
|
43
|
+
# The constructor.
|
44
|
+
# The API key can be given here or later.
|
45
|
+
def initialize(api_key = nil)
|
46
|
+
@api_key = api_key
|
47
|
+
@raw_url = nil
|
48
|
+
@raw_response = nil
|
49
|
+
end
|
50
|
+
|
51
|
+
# OpenSearch method.
|
52
|
+
#
|
53
|
+
# Aliases:
|
54
|
+
# * :query is an alias for :q
|
55
|
+
# * :max is an alias for :count
|
56
|
+
# * :citation_format is an alias for :cformat
|
57
|
+
#
|
58
|
+
# This method returns a SimpleRSS object. You can see the usage on:
|
59
|
+
# http://simple-rss.rubyforge.org/
|
60
|
+
def open_search(options)
|
61
|
+
# Check aliases
|
62
|
+
options.keys.each do |k|
|
63
|
+
case k
|
64
|
+
when :query then options[:q] = options.delete(k)
|
65
|
+
when :max then options[:count] = options.delete(k)
|
66
|
+
when :citation_format then options[:cformat] = options.delete(k)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
fetch("search/opensearch", options)
|
71
|
+
#TODO diagnostic
|
72
|
+
|
73
|
+
# Add tags
|
74
|
+
SimpleRSS.feed_tags << :"opensearch:totalResults"
|
75
|
+
SimpleRSS.feed_tags << :"opensearch:startIndex"
|
76
|
+
SimpleRSS.feed_tags << :"opensearch:itemsPerPage"
|
77
|
+
SimpleRSS.item_tags << :"dc:identifier"
|
78
|
+
SimpleRSS.item_tags << :"oclcterms:recordIdentifier"
|
79
|
+
|
80
|
+
SimpleRSS.parse @raw_response
|
81
|
+
#TODO rescue SimpleRSS Error? (i.e. response too small)
|
82
|
+
end
|
83
|
+
|
84
|
+
# SRU search method.
|
85
|
+
#
|
86
|
+
# aliases:
|
87
|
+
# * :q is an alias for :query
|
88
|
+
# * :format is an alias for :record_schema
|
89
|
+
# and its value can match "marc" or "dublin", or can be the exact value. e.g.
|
90
|
+
# :format => :marcxml
|
91
|
+
# * :citation_format is an alias for :cformat
|
92
|
+
# * :start is an alias for :start_record
|
93
|
+
# * :count and :max are aliases for :maximum_records
|
94
|
+
#
|
95
|
+
# this method returns an array of MARC::Record objects for marc format
|
96
|
+
# (you can see the usage on http://marc.rubyforge.org),
|
97
|
+
# or a REXML::Document for Dublin Core format.
|
98
|
+
def sru_search(options)
|
99
|
+
#TODO add other control_tags?
|
100
|
+
|
101
|
+
# Check aliases
|
102
|
+
options.keys.each do |k|
|
103
|
+
case k
|
104
|
+
when :q then options[:query] = options.delete(k)
|
105
|
+
when :count, :max then options[:maximum_records] = options.delete(k)
|
106
|
+
when :start then options[:start_record] = options.delete(k)
|
107
|
+
when :citation_format then options[:cformat] = options.delete(k)
|
108
|
+
when :format
|
109
|
+
format = options.delete(k).to_s
|
110
|
+
if format =~ /marc/ then format = "info:srw/schema/1/marcxml" end
|
111
|
+
if format =~ /dublin/ then format = "info:srw/schema/1/dc" end
|
112
|
+
options[:record_schema] = format
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
fetch("search/sru", options)
|
117
|
+
xml_diagnostic
|
118
|
+
|
119
|
+
format = options[:record_schema]
|
120
|
+
if format.nil? || format == "info:srw/schema/1/marcxml"
|
121
|
+
marc_to_array
|
122
|
+
else
|
123
|
+
REXML::Document.new @raw_response
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# Library locations method.
|
128
|
+
#
|
129
|
+
# aliases:
|
130
|
+
# * :start is an alias for :start_library
|
131
|
+
# * :count and :max are aliases for :maximum_libraries
|
132
|
+
# * :latitude is an alias for :lat
|
133
|
+
# * :longitude is an alias for :lon
|
134
|
+
# * libtype can be given as text value as well. e.g.:
|
135
|
+
# :libtype => :academic
|
136
|
+
# * record identifier should be given as type => id. e.g.:
|
137
|
+
# :isbn => "014330223X"
|
138
|
+
#
|
139
|
+
# this method returns a REXML::Document for XML format,
|
140
|
+
# or a Hash for JSON format.
|
141
|
+
def library_locations(options)
|
142
|
+
url_comp = "content/libraries/"
|
143
|
+
|
144
|
+
# Check aliases
|
145
|
+
options.keys.each do |k|
|
146
|
+
case k
|
147
|
+
when :count, :max then options[:maximum_libraries] = options.delete(k)
|
148
|
+
when :start then options[:start_library] = options.delete(k)
|
149
|
+
when :latitude then options[:lat] = options.delete(k)
|
150
|
+
when :longitude then options[:lon] = options.delete(k)
|
151
|
+
when :format then options.delete(k) if options[k].to_s == "xml"
|
152
|
+
when :libtype
|
153
|
+
libtype = options[k].to_s
|
154
|
+
options[k] = 1 if libtype == "academic"
|
155
|
+
options[k] = 2 if libtype == "public"
|
156
|
+
options[k] = 3 if libtype == "government"
|
157
|
+
options[k] = 4 if libtype == "other"
|
158
|
+
when :oclc then url_comp << options.delete(k).to_s
|
159
|
+
when :isbn then url_comp << "isbn/" << options.delete(k).to_s
|
160
|
+
when :issn then url_comp << "issn/" << options.delete(k).to_s
|
161
|
+
when :sn then url_comp << "sn/" << options.delete(k).to_s
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
if options.has_key? :format
|
166
|
+
fetch(url_comp, options)
|
167
|
+
json_diagnostic
|
168
|
+
response = JSON.parse(@raw_response)
|
169
|
+
else
|
170
|
+
fetch(url_comp, options)
|
171
|
+
xml_diagnostic
|
172
|
+
response = REXML::Document.new(@raw_response)
|
173
|
+
end
|
174
|
+
|
175
|
+
response
|
176
|
+
end
|
177
|
+
|
178
|
+
# Single Bibliographic Record.
|
179
|
+
#
|
180
|
+
# aliases:
|
181
|
+
# * record identifier should be given as type => id. e.g.:
|
182
|
+
# :isbn => "014330223X"
|
183
|
+
#
|
184
|
+
# this method returns a MARC::Record.
|
185
|
+
def single_record(options)
|
186
|
+
url_comp = "content/"
|
187
|
+
|
188
|
+
# Check aliases
|
189
|
+
options.keys.each do |k|
|
190
|
+
case k
|
191
|
+
when :oclc then url_comp << options.delete(k).to_s
|
192
|
+
when :isbn then url_comp << "isbn/" << options.delete(k).to_s
|
193
|
+
when :issn then url_comp << "issn/" << options.delete(k).to_s
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
fetch(url_comp, options)
|
198
|
+
xml_diagnostic
|
199
|
+
marc_to_array.first
|
200
|
+
end
|
201
|
+
|
202
|
+
# Libray Catalog URL for a Record.
|
203
|
+
#
|
204
|
+
# aliases:
|
205
|
+
# * record identifier should be given as type => id. e.g.:
|
206
|
+
# :isbn => "014330223X"
|
207
|
+
#
|
208
|
+
# this method returns a MARC::Record.
|
209
|
+
def library_catalog_url(options)
|
210
|
+
url_comp = "content/libraries/"
|
211
|
+
|
212
|
+
# Check aliases
|
213
|
+
options.keys.each do |k|
|
214
|
+
case k
|
215
|
+
when :oclc then url_comp << options.delete(k).to_s
|
216
|
+
when :isbn then url_comp << "isbn/" << options.delete(k).to_s
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
#TODO get diagnostic for "no holdings found" instead of raising it.
|
221
|
+
fetch(url_comp, options)
|
222
|
+
xml_diagnostic
|
223
|
+
REXML::Document.new(@raw_response)
|
224
|
+
end
|
225
|
+
|
226
|
+
# Formatted Citations.
|
227
|
+
#
|
228
|
+
# aliases:
|
229
|
+
# * :citation_format is an alias for :cformat
|
230
|
+
# * record identifier should be given as:
|
231
|
+
# :oclc => [oclc_number]
|
232
|
+
#
|
233
|
+
# this method returns a HTML formatted String.
|
234
|
+
def formatted_citations(options)
|
235
|
+
url_comp = "content/citations/"
|
236
|
+
|
237
|
+
# Check aliases
|
238
|
+
options.keys.each do |k|
|
239
|
+
case k
|
240
|
+
when :citation_format then options[:cformat] = options.delete(k)
|
241
|
+
when :oclc then url_comp << options.delete(k).to_s
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
fetch(url_comp, options)
|
246
|
+
if options.has_key? :cformat
|
247
|
+
xml_diagnostic
|
248
|
+
else
|
249
|
+
str_diagnostic
|
250
|
+
end
|
251
|
+
|
252
|
+
@raw_response
|
253
|
+
end
|
254
|
+
|
255
|
+
private
|
256
|
+
|
257
|
+
# Helper method to convert a MARC::XMLReader in an array of records.
|
258
|
+
# That's easier to use and better because of the bug
|
259
|
+
# that makes the REXML reader empty after the first #each call.
|
260
|
+
def marc_to_array
|
261
|
+
reader = MARC::XMLReader.new(StringIO.new(@raw_response))
|
262
|
+
records = Array.new
|
263
|
+
reader.each { |record| records << record }
|
264
|
+
|
265
|
+
records
|
266
|
+
end
|
267
|
+
|
268
|
+
# Method to fetch the raw response from WorldCat webservices.
|
269
|
+
def fetch(url_comp, options)
|
270
|
+
# Use the API key attribute or the one provided.
|
271
|
+
options = {:wskey => @api_key}.merge options
|
272
|
+
|
273
|
+
url = "http://www.worldcat.org/webservices/catalog/" << url_comp << "?"
|
274
|
+
url << options.map { |k, v| "#{camelize(k)}=#{parse_value(v)}" }.join("&")
|
275
|
+
@raw_url = URI.escape(url)
|
276
|
+
|
277
|
+
begin
|
278
|
+
open @raw_url do |raw|
|
279
|
+
@raw_response = raw.read
|
280
|
+
end
|
281
|
+
rescue OpenURI::HTTPError => e
|
282
|
+
if e.message =~ /status=UNAUTHENTICATED/
|
283
|
+
raise WorldCatError.new(e.message), "Authentication failure"
|
284
|
+
else raise e
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
def str_diagnostic
|
290
|
+
# May be something like: "info:srw/diagnostic/1/65Record does not exist"
|
291
|
+
if @raw_response =~ /(info:srw\/diagnostic\/\d+\/\d+)(.*)/
|
292
|
+
raise WorldCatError.new, $2
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
# Check for diagnostics of XML responses from WorldCat.
|
297
|
+
def xml_diagnostic
|
298
|
+
xml = REXML::Document.new @raw_response
|
299
|
+
d = xml.elements['diagnostics'] || xml.root.elements['diagnostics']
|
300
|
+
unless d.nil?
|
301
|
+
d = d.elements.first
|
302
|
+
details = d.elements["details"]
|
303
|
+
details = details.text unless details.nil?
|
304
|
+
message = d.elements["message"].text
|
305
|
+
|
306
|
+
raise WorldCatError.new(details), message
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
# Check for diagnostics of JSON responses from WorldCat.
|
311
|
+
def json_diagnostic
|
312
|
+
json = JSON.parse(@raw_response)
|
313
|
+
if json.has_key? "diagnostic"
|
314
|
+
details = json["diagnostic"].first["details"]
|
315
|
+
message = json["diagnostic"].first["message"]
|
316
|
+
raise WorldCatError.new(details), message
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
# Helper function to camelize a string or symbol
|
321
|
+
# to match WorldCat services parameters.
|
322
|
+
def camelize(key)
|
323
|
+
key.to_s.gsub(/_(\w)/) { |m| m.sub('_', '').capitalize }
|
324
|
+
end
|
325
|
+
|
326
|
+
# Helper function to parse a array, number or string
|
327
|
+
# to match WorldCat services parameters.
|
328
|
+
def parse_value(value)
|
329
|
+
value.is_a?(Array) ? value.join(',') : value.to_s
|
330
|
+
end
|
331
|
+
end
|
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: worldcat
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Vivien Didelot
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-09-17 00:00:00 +10:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: simple-rss
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 1
|
30
|
+
- 2
|
31
|
+
- 3
|
32
|
+
version: 1.2.3
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: marc
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
segments:
|
44
|
+
- 0
|
45
|
+
- 3
|
46
|
+
- 3
|
47
|
+
version: 0.3.3
|
48
|
+
type: :runtime
|
49
|
+
version_requirements: *id002
|
50
|
+
- !ruby/object:Gem::Dependency
|
51
|
+
name: json
|
52
|
+
prerelease: false
|
53
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
segments:
|
59
|
+
- 1
|
60
|
+
- 4
|
61
|
+
- 6
|
62
|
+
version: 1.4.6
|
63
|
+
type: :runtime
|
64
|
+
version_requirements: *id003
|
65
|
+
description:
|
66
|
+
email: vivien.didelot@gmail.com
|
67
|
+
executables: []
|
68
|
+
|
69
|
+
extensions: []
|
70
|
+
|
71
|
+
extra_rdoc_files: []
|
72
|
+
|
73
|
+
files:
|
74
|
+
- lib/worldcat.rb
|
75
|
+
- README.rdoc
|
76
|
+
- CHANGELOG.rdoc
|
77
|
+
has_rdoc: true
|
78
|
+
homepage:
|
79
|
+
licenses: []
|
80
|
+
|
81
|
+
post_install_message:
|
82
|
+
rdoc_options: []
|
83
|
+
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
88
|
+
requirements:
|
89
|
+
- - ">="
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
segments:
|
92
|
+
- 0
|
93
|
+
version: "0"
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
segments:
|
100
|
+
- 0
|
101
|
+
version: "0"
|
102
|
+
requirements: []
|
103
|
+
|
104
|
+
rubyforge_project:
|
105
|
+
rubygems_version: 1.3.7
|
106
|
+
signing_key:
|
107
|
+
specification_version: 3
|
108
|
+
summary: A Ruby API for the WorldCat Search webservices
|
109
|
+
test_files: []
|
110
|
+
|