worldcat 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +4 -0
- data/README.rdoc +74 -0
- data/lib/worldcat.rb +331 -0
- metadata +110 -0
data/CHANGELOG.rdoc
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
= WorldCat Search API
|
2
|
+
|
3
|
+
A WorldCat API for Ruby to interact with WorldCat search webservices.
|
4
|
+
http://www.worldcat.org
|
5
|
+
|
6
|
+
== Usage
|
7
|
+
|
8
|
+
require 'worldcat'
|
9
|
+
|
10
|
+
client = WorldCat.new '[api_key]'
|
11
|
+
|
12
|
+
Get Atom or RSS response from an OpenSearch
|
13
|
+
|
14
|
+
atom = client.open_search :query => "Civil War"
|
15
|
+
puts atom.feed.title
|
16
|
+
puts atom.entries.first.author
|
17
|
+
|
18
|
+
Get MARC XML or Dublin Core from a SRU CQL query
|
19
|
+
|
20
|
+
cql = 'srw.kw="civil war" and (srw.su="antietam" or srw.su="sharpsburg")'
|
21
|
+
|
22
|
+
records = client.sru_search :query => cql, :format => "marcxml"
|
23
|
+
for record in records
|
24
|
+
# print out field 245 subfield a
|
25
|
+
puts record['245']['a']
|
26
|
+
end
|
27
|
+
|
28
|
+
If you'd like to use another implementation, the raw response is available:
|
29
|
+
|
30
|
+
client.raw_response
|
31
|
+
|
32
|
+
A faster way?
|
33
|
+
|
34
|
+
rss = WorldCat.new.open_search :q => "Globalization", :format => "rss", :wskey => '[api_key]'
|
35
|
+
|
36
|
+
For more information, please have a look at the documentation or the test cases.
|
37
|
+
|
38
|
+
== Installation
|
39
|
+
|
40
|
+
gem install worldcat
|
41
|
+
|
42
|
+
== Why?
|
43
|
+
|
44
|
+
The 'wcapi' gem does not satisfy several points, so another version is justified for many reasons:
|
45
|
+
|
46
|
+
* It is better to use a RSS Ruby implementation, actually SimpleRSS, to get Atom or RSS response.
|
47
|
+
* It is better to use the MARC Ruby implementation to get MARC XML or Dublin Core response from a SRU CQL search or other search.
|
48
|
+
* Unit testing is great.
|
49
|
+
|
50
|
+
== What this API can do
|
51
|
+
|
52
|
+
* Send searches in OpenSearch or SRU CQL syntax.
|
53
|
+
* Receive OpenSearch responses in RSS or Atom format (both are a SimpleRSS object).
|
54
|
+
* Receive SRU responses in an array of MARC::Record or Dublin Core (REXML::Document).
|
55
|
+
* Receive a MARC::Record for a single OCLC record.
|
56
|
+
* Receive a REXML::Document for geographically-sorted library holdings information.
|
57
|
+
* Receive a HTML formatted String for standard bibliographic citation formats (APA, Chicago, Harvard, MLA, and Turabian).
|
58
|
+
|
59
|
+
== To do
|
60
|
+
|
61
|
+
* Use SRU gem to get response from sru_search.
|
62
|
+
|
63
|
+
== Contribution
|
64
|
+
|
65
|
+
Feel free to fork and send me a pull request for changes, fixes or simply a message for any suggestion.
|
66
|
+
|
67
|
+
== See
|
68
|
+
|
69
|
+
* {WorldCat webservices}[http://www.worldcat.org/affiliate/tools?atype=wcapi]
|
70
|
+
* {Ruby MARC documentation}[http://marc.rubyforge.org/]
|
71
|
+
* {Ruby Simple RSS documentation}[http://simple-rss.rubyforge.org/]
|
72
|
+
|
73
|
+
Vivien Didelot <vivien.didelot@gmail.com>
|
74
|
+
http://github.com/v0n/worldcat
|
data/lib/worldcat.rb
ADDED
@@ -0,0 +1,331 @@
|
|
1
|
+
# Simple WorldCat Search Ruby API
|
2
|
+
# http://oclc.org/developer/services/WCAPI
|
3
|
+
#
|
4
|
+
# Author:: Vivien Didelot 'v0n' <vivien.didelot@gmail.com>
|
5
|
+
|
6
|
+
require 'rubygems' # needed by simple-rss
|
7
|
+
require 'open-uri' # used to fetch responses
|
8
|
+
require 'simple-rss' # used for Atom and RSS format
|
9
|
+
require 'marc' # used for MARC records
|
10
|
+
require 'rexml/document' # used for many XML purposes
|
11
|
+
require 'json' # used for JSON format
|
12
|
+
|
13
|
+
# The WorldCat class methods use WorldCat webservices.
|
14
|
+
# Options are given as a hash and Symbol keys may be:
|
15
|
+
# * the same name than GET parameters,
|
16
|
+
# * Ruby naming convention (i.e. underscore),
|
17
|
+
# * or aliases if available.
|
18
|
+
#
|
19
|
+
# Note: aliases have priority.
|
20
|
+
#
|
21
|
+
# For a complete list of parameters, see documentation here:
|
22
|
+
# http://oclc.org/developer/documentation/worldcat-search-api/parameters
|
23
|
+
|
24
|
+
# The WorldCat class, used to interact with the WorldCat search webservices.
|
25
|
+
class WorldCat
|
26
|
+
|
27
|
+
# A specific WorldCat error class.
|
28
|
+
class WorldCatError < StandardError
|
29
|
+
def initialize(details = nil)
|
30
|
+
@details = details
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# The WorldCat webservices API key.
|
35
|
+
attr_writer :api_key
|
36
|
+
|
37
|
+
# The raw response from WorldCat.
|
38
|
+
attr_reader :raw_response
|
39
|
+
|
40
|
+
# The raw url used to fetch the response.
|
41
|
+
attr_reader :raw_url
|
42
|
+
|
43
|
+
# The constructor.
|
44
|
+
# The API key can be given here or later.
|
45
|
+
def initialize(api_key = nil)
|
46
|
+
@api_key = api_key
|
47
|
+
@raw_url = nil
|
48
|
+
@raw_response = nil
|
49
|
+
end
|
50
|
+
|
51
|
+
# OpenSearch method.
|
52
|
+
#
|
53
|
+
# Aliases:
|
54
|
+
# * :query is an alias for :q
|
55
|
+
# * :max is an alias for :count
|
56
|
+
# * :citation_format is an alias for :cformat
|
57
|
+
#
|
58
|
+
# This method returns a SimpleRSS object. You can see the usage on:
|
59
|
+
# http://simple-rss.rubyforge.org/
|
60
|
+
def open_search(options)
|
61
|
+
# Check aliases
|
62
|
+
options.keys.each do |k|
|
63
|
+
case k
|
64
|
+
when :query then options[:q] = options.delete(k)
|
65
|
+
when :max then options[:count] = options.delete(k)
|
66
|
+
when :citation_format then options[:cformat] = options.delete(k)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
fetch("search/opensearch", options)
|
71
|
+
#TODO diagnostic
|
72
|
+
|
73
|
+
# Add tags
|
74
|
+
SimpleRSS.feed_tags << :"opensearch:totalResults"
|
75
|
+
SimpleRSS.feed_tags << :"opensearch:startIndex"
|
76
|
+
SimpleRSS.feed_tags << :"opensearch:itemsPerPage"
|
77
|
+
SimpleRSS.item_tags << :"dc:identifier"
|
78
|
+
SimpleRSS.item_tags << :"oclcterms:recordIdentifier"
|
79
|
+
|
80
|
+
SimpleRSS.parse @raw_response
|
81
|
+
#TODO rescue SimpleRSS Error? (i.e. response too small)
|
82
|
+
end
|
83
|
+
|
84
|
+
# SRU search method.
|
85
|
+
#
|
86
|
+
# aliases:
|
87
|
+
# * :q is an alias for :query
|
88
|
+
# * :format is an alias for :record_schema
|
89
|
+
# and its value can match "marc" or "dublin", or can be the exact value. e.g.
|
90
|
+
# :format => :marcxml
|
91
|
+
# * :citation_format is an alias for :cformat
|
92
|
+
# * :start is an alias for :start_record
|
93
|
+
# * :count and :max are aliases for :maximum_records
|
94
|
+
#
|
95
|
+
# this method returns an array of MARC::Record objects for marc format
|
96
|
+
# (you can see the usage on http://marc.rubyforge.org),
|
97
|
+
# or a REXML::Document for Dublin Core format.
|
98
|
+
def sru_search(options)
|
99
|
+
#TODO add other control_tags?
|
100
|
+
|
101
|
+
# Check aliases
|
102
|
+
options.keys.each do |k|
|
103
|
+
case k
|
104
|
+
when :q then options[:query] = options.delete(k)
|
105
|
+
when :count, :max then options[:maximum_records] = options.delete(k)
|
106
|
+
when :start then options[:start_record] = options.delete(k)
|
107
|
+
when :citation_format then options[:cformat] = options.delete(k)
|
108
|
+
when :format
|
109
|
+
format = options.delete(k).to_s
|
110
|
+
if format =~ /marc/ then format = "info:srw/schema/1/marcxml" end
|
111
|
+
if format =~ /dublin/ then format = "info:srw/schema/1/dc" end
|
112
|
+
options[:record_schema] = format
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
fetch("search/sru", options)
|
117
|
+
xml_diagnostic
|
118
|
+
|
119
|
+
format = options[:record_schema]
|
120
|
+
if format.nil? || format == "info:srw/schema/1/marcxml"
|
121
|
+
marc_to_array
|
122
|
+
else
|
123
|
+
REXML::Document.new @raw_response
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# Library locations method.
|
128
|
+
#
|
129
|
+
# aliases:
|
130
|
+
# * :start is an alias for :start_library
|
131
|
+
# * :count and :max are aliases for :maximum_libraries
|
132
|
+
# * :latitude is an alias for :lat
|
133
|
+
# * :longitude is an alias for :lon
|
134
|
+
# * libtype can be given as text value as well. e.g.:
|
135
|
+
# :libtype => :academic
|
136
|
+
# * record identifier should be given as type => id. e.g.:
|
137
|
+
# :isbn => "014330223X"
|
138
|
+
#
|
139
|
+
# this method returns a REXML::Document for XML format,
|
140
|
+
# or a Hash for JSON format.
|
141
|
+
def library_locations(options)
|
142
|
+
url_comp = "content/libraries/"
|
143
|
+
|
144
|
+
# Check aliases
|
145
|
+
options.keys.each do |k|
|
146
|
+
case k
|
147
|
+
when :count, :max then options[:maximum_libraries] = options.delete(k)
|
148
|
+
when :start then options[:start_library] = options.delete(k)
|
149
|
+
when :latitude then options[:lat] = options.delete(k)
|
150
|
+
when :longitude then options[:lon] = options.delete(k)
|
151
|
+
when :format then options.delete(k) if options[k].to_s == "xml"
|
152
|
+
when :libtype
|
153
|
+
libtype = options[k].to_s
|
154
|
+
options[k] = 1 if libtype == "academic"
|
155
|
+
options[k] = 2 if libtype == "public"
|
156
|
+
options[k] = 3 if libtype == "government"
|
157
|
+
options[k] = 4 if libtype == "other"
|
158
|
+
when :oclc then url_comp << options.delete(k).to_s
|
159
|
+
when :isbn then url_comp << "isbn/" << options.delete(k).to_s
|
160
|
+
when :issn then url_comp << "issn/" << options.delete(k).to_s
|
161
|
+
when :sn then url_comp << "sn/" << options.delete(k).to_s
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
if options.has_key? :format
|
166
|
+
fetch(url_comp, options)
|
167
|
+
json_diagnostic
|
168
|
+
response = JSON.parse(@raw_response)
|
169
|
+
else
|
170
|
+
fetch(url_comp, options)
|
171
|
+
xml_diagnostic
|
172
|
+
response = REXML::Document.new(@raw_response)
|
173
|
+
end
|
174
|
+
|
175
|
+
response
|
176
|
+
end
|
177
|
+
|
178
|
+
# Single Bibliographic Record.
|
179
|
+
#
|
180
|
+
# aliases:
|
181
|
+
# * record identifier should be given as type => id. e.g.:
|
182
|
+
# :isbn => "014330223X"
|
183
|
+
#
|
184
|
+
# this method returns a MARC::Record.
|
185
|
+
def single_record(options)
|
186
|
+
url_comp = "content/"
|
187
|
+
|
188
|
+
# Check aliases
|
189
|
+
options.keys.each do |k|
|
190
|
+
case k
|
191
|
+
when :oclc then url_comp << options.delete(k).to_s
|
192
|
+
when :isbn then url_comp << "isbn/" << options.delete(k).to_s
|
193
|
+
when :issn then url_comp << "issn/" << options.delete(k).to_s
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
fetch(url_comp, options)
|
198
|
+
xml_diagnostic
|
199
|
+
marc_to_array.first
|
200
|
+
end
|
201
|
+
|
202
|
+
# Libray Catalog URL for a Record.
|
203
|
+
#
|
204
|
+
# aliases:
|
205
|
+
# * record identifier should be given as type => id. e.g.:
|
206
|
+
# :isbn => "014330223X"
|
207
|
+
#
|
208
|
+
# this method returns a MARC::Record.
|
209
|
+
def library_catalog_url(options)
|
210
|
+
url_comp = "content/libraries/"
|
211
|
+
|
212
|
+
# Check aliases
|
213
|
+
options.keys.each do |k|
|
214
|
+
case k
|
215
|
+
when :oclc then url_comp << options.delete(k).to_s
|
216
|
+
when :isbn then url_comp << "isbn/" << options.delete(k).to_s
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
#TODO get diagnostic for "no holdings found" instead of raising it.
|
221
|
+
fetch(url_comp, options)
|
222
|
+
xml_diagnostic
|
223
|
+
REXML::Document.new(@raw_response)
|
224
|
+
end
|
225
|
+
|
226
|
+
# Formatted Citations.
|
227
|
+
#
|
228
|
+
# aliases:
|
229
|
+
# * :citation_format is an alias for :cformat
|
230
|
+
# * record identifier should be given as:
|
231
|
+
# :oclc => [oclc_number]
|
232
|
+
#
|
233
|
+
# this method returns a HTML formatted String.
|
234
|
+
def formatted_citations(options)
|
235
|
+
url_comp = "content/citations/"
|
236
|
+
|
237
|
+
# Check aliases
|
238
|
+
options.keys.each do |k|
|
239
|
+
case k
|
240
|
+
when :citation_format then options[:cformat] = options.delete(k)
|
241
|
+
when :oclc then url_comp << options.delete(k).to_s
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
fetch(url_comp, options)
|
246
|
+
if options.has_key? :cformat
|
247
|
+
xml_diagnostic
|
248
|
+
else
|
249
|
+
str_diagnostic
|
250
|
+
end
|
251
|
+
|
252
|
+
@raw_response
|
253
|
+
end
|
254
|
+
|
255
|
+
private
|
256
|
+
|
257
|
+
# Helper method to convert a MARC::XMLReader in an array of records.
|
258
|
+
# That's easier to use and better because of the bug
|
259
|
+
# that makes the REXML reader empty after the first #each call.
|
260
|
+
def marc_to_array
|
261
|
+
reader = MARC::XMLReader.new(StringIO.new(@raw_response))
|
262
|
+
records = Array.new
|
263
|
+
reader.each { |record| records << record }
|
264
|
+
|
265
|
+
records
|
266
|
+
end
|
267
|
+
|
268
|
+
# Method to fetch the raw response from WorldCat webservices.
|
269
|
+
def fetch(url_comp, options)
|
270
|
+
# Use the API key attribute or the one provided.
|
271
|
+
options = {:wskey => @api_key}.merge options
|
272
|
+
|
273
|
+
url = "http://www.worldcat.org/webservices/catalog/" << url_comp << "?"
|
274
|
+
url << options.map { |k, v| "#{camelize(k)}=#{parse_value(v)}" }.join("&")
|
275
|
+
@raw_url = URI.escape(url)
|
276
|
+
|
277
|
+
begin
|
278
|
+
open @raw_url do |raw|
|
279
|
+
@raw_response = raw.read
|
280
|
+
end
|
281
|
+
rescue OpenURI::HTTPError => e
|
282
|
+
if e.message =~ /status=UNAUTHENTICATED/
|
283
|
+
raise WorldCatError.new(e.message), "Authentication failure"
|
284
|
+
else raise e
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
def str_diagnostic
|
290
|
+
# May be something like: "info:srw/diagnostic/1/65Record does not exist"
|
291
|
+
if @raw_response =~ /(info:srw\/diagnostic\/\d+\/\d+)(.*)/
|
292
|
+
raise WorldCatError.new, $2
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
# Check for diagnostics of XML responses from WorldCat.
|
297
|
+
def xml_diagnostic
|
298
|
+
xml = REXML::Document.new @raw_response
|
299
|
+
d = xml.elements['diagnostics'] || xml.root.elements['diagnostics']
|
300
|
+
unless d.nil?
|
301
|
+
d = d.elements.first
|
302
|
+
details = d.elements["details"]
|
303
|
+
details = details.text unless details.nil?
|
304
|
+
message = d.elements["message"].text
|
305
|
+
|
306
|
+
raise WorldCatError.new(details), message
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
# Check for diagnostics of JSON responses from WorldCat.
|
311
|
+
def json_diagnostic
|
312
|
+
json = JSON.parse(@raw_response)
|
313
|
+
if json.has_key? "diagnostic"
|
314
|
+
details = json["diagnostic"].first["details"]
|
315
|
+
message = json["diagnostic"].first["message"]
|
316
|
+
raise WorldCatError.new(details), message
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
# Helper function to camelize a string or symbol
|
321
|
+
# to match WorldCat services parameters.
|
322
|
+
def camelize(key)
|
323
|
+
key.to_s.gsub(/_(\w)/) { |m| m.sub('_', '').capitalize }
|
324
|
+
end
|
325
|
+
|
326
|
+
# Helper function to parse a array, number or string
|
327
|
+
# to match WorldCat services parameters.
|
328
|
+
def parse_value(value)
|
329
|
+
value.is_a?(Array) ? value.join(',') : value.to_s
|
330
|
+
end
|
331
|
+
end
|
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: worldcat
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Vivien Didelot
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-09-17 00:00:00 +10:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: simple-rss
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 1
|
30
|
+
- 2
|
31
|
+
- 3
|
32
|
+
version: 1.2.3
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: marc
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
segments:
|
44
|
+
- 0
|
45
|
+
- 3
|
46
|
+
- 3
|
47
|
+
version: 0.3.3
|
48
|
+
type: :runtime
|
49
|
+
version_requirements: *id002
|
50
|
+
- !ruby/object:Gem::Dependency
|
51
|
+
name: json
|
52
|
+
prerelease: false
|
53
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
segments:
|
59
|
+
- 1
|
60
|
+
- 4
|
61
|
+
- 6
|
62
|
+
version: 1.4.6
|
63
|
+
type: :runtime
|
64
|
+
version_requirements: *id003
|
65
|
+
description:
|
66
|
+
email: vivien.didelot@gmail.com
|
67
|
+
executables: []
|
68
|
+
|
69
|
+
extensions: []
|
70
|
+
|
71
|
+
extra_rdoc_files: []
|
72
|
+
|
73
|
+
files:
|
74
|
+
- lib/worldcat.rb
|
75
|
+
- README.rdoc
|
76
|
+
- CHANGELOG.rdoc
|
77
|
+
has_rdoc: true
|
78
|
+
homepage:
|
79
|
+
licenses: []
|
80
|
+
|
81
|
+
post_install_message:
|
82
|
+
rdoc_options: []
|
83
|
+
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
88
|
+
requirements:
|
89
|
+
- - ">="
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
segments:
|
92
|
+
- 0
|
93
|
+
version: "0"
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
segments:
|
100
|
+
- 0
|
101
|
+
version: "0"
|
102
|
+
requirements: []
|
103
|
+
|
104
|
+
rubyforge_project:
|
105
|
+
rubygems_version: 1.3.7
|
106
|
+
signing_key:
|
107
|
+
specification_version: 3
|
108
|
+
summary: A Ruby API for the WorldCat Search webservices
|
109
|
+
test_files: []
|
110
|
+
|