contentdm 0.1.20

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,36 @@
1
+ = Introduction
2
+ The ContentDm module for Ruby provides access to structured metadata in CONTENTdm collections
3
+ via CONTENTdm's built-in OAI-PMH provider interface. The module turns Qualified Dublin Core
4
+ metadata into a convenient hash structure. With proper authentication, it can also scrape
5
+ collection-level field information from the CONTENTdm administrative interface and create
6
+ formatted HTML output from retrieved records.
7
+
8
+ = Examples
9
+
10
+ # Create a Harvester using the location of a CONTENTdm repository
11
+ harvester = ContentDm::Harvester.new('http://mycontentdm.example.com/')
12
+
13
+ # Retrieve the list of collections
14
+ collections = harvester.collections
15
+ => {"collection1" => "My First Collection", "collection2" => "My Second Collection"}
16
+
17
+ # Retrieve a single record from collection1
18
+ record = harvester.get_record("collection1",16)
19
+
20
+ # Retrieve all records from collection2
21
+ records = harvester.get_records("collection2")
22
+
23
+ Calling <tt>record#to_xml()</tt> or <tt>record#to_html()</tt> at this point will return
24
+ generic and arbitrarily-ordered markup, because the ContentDm::Mapper for
25
+ <tt>collection1</tt> hasn't been initialized.
26
+
27
+ # Initialize the Mapper for a single collection
28
+ ContentDm::Mapper.init_map("http://mycontentdm.example.com/", "collection1" :user => "my_contentdm_admin", :pass => "p@$$w0rd")
29
+ => #<ContentDm::Mapper ... >
30
+
31
+ # Initialize Mappers for all collections on the server
32
+ ContentDm::Mapper.init_all("http://mycontentdm.example.com/", :user => "my_contentdm_admin", :pass => "p@$$w0rd")
33
+ => ["collection1", "collection2"]
34
+
35
+ Now <tt>record#to_xml()</tt> and <tt>record#to_html()</tt> will return markup consistent with
36
+ the settings defined for the collection within CONTENTdm.
@@ -0,0 +1,8 @@
1
+ require 'contentdm/uri'
2
+ require 'contentdm/harvester'
3
+ require 'contentdm/mapper'
4
+ require 'contentdm/record'
5
+
6
+ module ContentDm
7
+ VERSION = '0.1.20'
8
+ end
@@ -0,0 +1,118 @@
1
+ require 'rubygems'
2
+ require 'nokogiri'
3
+ require 'open-uri'
4
+ require 'uri'
5
+
6
+ module ContentDm #:nodoc:#
7
+
8
+ class Harvester
9
+
10
+ extend URI
11
+
12
+ OAI_PAGE_SIZE = 1000
13
+
14
+ attr_reader :base_uri
15
+ attr_accessor :page_size
16
+
17
+ # The constructor must be passed the URL of a CONTENTdm installation. This will usually
18
+ # be the root of the server on which CONTENTdm is installed.
19
+ def initialize(base_uri)
20
+ @base_uri = self.class.normalize(base_uri)
21
+ @page_size = 1000
22
+ end
23
+
24
+ # Convenience method which returns a single Record when passed a URL in
25
+ # one of two forms:
26
+ # * A CONTENTdm URL containing CISOROOT/CISOPTR values for the desired item
27
+ # * A CONTENTdm canonical URL in the form
28
+ # http://path/to/contentdm/u?[collection],[ptr]
29
+ # where <tt>[collection]</tt> is the CONTENTdm collection name, and <tt>[ptr]</tt> is the sequential
30
+ # item ID within the collection.
31
+ def self.get_record(url)
32
+ base_uri = self.normalize(url)
33
+ params = {}
34
+ if args = url.match(/^(.+\/)u\/?\?\/(.+),(\d+)$/)
35
+ params[:base_url] = args[1]
36
+ params[:collection] = args[2]
37
+ params[:id] = args[3]
38
+ else
39
+ args = base_uri.query.split(/&/).inject({}) { |hash,arg|
40
+ (k,v) = arg.split(/\=/,2)
41
+ hash[k] = ::URI.decode(v)
42
+ hash
43
+ }
44
+ params[:base_url] = base_uri.merge('..')
45
+ params[:collection] = args['CISOROOT'][1..-1]
46
+ params[:id] = args['CISOPTR']
47
+ end
48
+ harvester = Harvester.new(params[:base_url])
49
+ harvester.get_record(params[:collection],params[:id])
50
+ end
51
+
52
+ # Return a hash of collection IDs and collection names
53
+ def collections
54
+ response = Nokogiri::XML(open(@base_uri.merge('cgi-bin/oai.exe?verb=ListSets')))
55
+ sets = response.search('//xmlns:set',response.namespaces)
56
+ result = {}
57
+ sets.inject({}) { |hash,set|
58
+ set_id = (set / 'setSpec').text()
59
+ set_desc = (set / 'setName').text()
60
+ hash[set_id] = set_desc
61
+ hash
62
+ }
63
+ end
64
+
65
+ # Return a single Record given its collection ID and ordinal position
66
+ # within the collection
67
+ def get_record(collection, id)
68
+ oai_id = "oai:%s:%s/%d" % [@base_uri.host, collection, id]
69
+ response = get_response({ :verb => 'GetRecord', :identifier => oai_id, :metadataPrefix => 'qdc' })
70
+ record = parse_records(response).first
71
+ Record.new(record, { :base_uri => @base_uri, :collection => collection })
72
+ end
73
+
74
+ # Return an array of all the Records in a given collection
75
+ def get_records(collection, opts = {})
76
+ max = opts[:max].to_i
77
+ token = "#{collection}:#{opts[:from].to_s}:#{opts[:until].to_s}:qdc:#{opts[:first].to_i || 0}"
78
+ result = []
79
+ until token.nil? or ((max > 0) and (result.length >= max))
80
+ args = { :verb => 'ListRecords', :resumptionToken => token.to_s }
81
+ response = get_response(args)
82
+ token = response.search('/xmlns:OAI-PMH/xmlns:ListRecords/xmlns:resumptionToken/text()', response.namespaces).first
83
+ result += parse_records(response)
84
+ end
85
+ if result.length > max
86
+ result = result[0..max-1]
87
+ end
88
+ result.collect { |record|
89
+ Record.new(record, { :base_uri => @base_uri, :collection => collection })
90
+ }
91
+ end
92
+
93
+ private
94
+ def parse_records(response)
95
+ result = []
96
+ qdcs = response.search('//qdc:qualifieddc',{ 'qdc' => 'http://epubs.cclrc.ac.uk/xmlns/qdc/' })
97
+ qdcs.each { |qdc|
98
+ metadata = Hash.new { |h,k| h[k] = [] }
99
+ qdc.children.each { |child|
100
+ if child.element?
101
+ metadata[[child.namespace.prefix,child.name].join('.')] << child.text # unless child.text.empty?
102
+ end
103
+ }
104
+ result << metadata
105
+ }
106
+ result
107
+ end
108
+
109
+ def get_response(args)
110
+ path = 'cgi-bin/oai.exe'
111
+ query = args.collect { |k,v| [k.to_s,::URI.encode(v)].join('=') }.join('&')
112
+ uri = @base_uri.merge("#{path}?#{query}")
113
+ response = Nokogiri::XML(open(uri))
114
+ end
115
+
116
+ end
117
+
118
+ end
@@ -0,0 +1,258 @@
1
+ require 'rubygems'
2
+ require 'erb'
3
+ require 'net/http'
4
+ require 'nokogiri'
5
+ require 'open-uri'
6
+ require 'uri'
7
+
8
+ module ContentDm
9
+
10
+ DEFAULT_TEMPLATE = %{<span>
11
+ % field_order.each do |fieldname|
12
+ % unless data[fieldname].nil? or data[fieldname].empty?
13
+ <p>
14
+ <b><%= fieldname %>: </b>
15
+ <%= data[fieldname].to_a.join("; ") %>
16
+ </p>
17
+ % end
18
+ % end
19
+ </span>}
20
+
21
+ # GenericMapper acts as a fallback formatter for instances when no other Mapper is defined
22
+ class GenericMapper
23
+
24
+ SaveOptions = Nokogiri::XML::Node::SaveOptions
25
+
26
+ # Serialize the given Record to a Qualified Dublin Core XML string
27
+ def to_xml(record, opts = {})
28
+ builder = Nokogiri::XML::Builder.new do |doc|
29
+ doc.qualifieddc('xmlns:qdc' => "http://epubs.cclrc.ac.uk/xmlns/qdc/",
30
+ 'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
31
+ 'xmlns:dcterms' => "http://purl.org/dc/terms/") {
32
+ record.metadata.each_pair { |k,v|
33
+ (prefix,tag) = k.split(/\./)
34
+ if v.is_a?(Array)
35
+ v.each { |value|
36
+ doc[prefix].send(tag.to_sym) {
37
+ doc.text(value)
38
+ }
39
+ }
40
+ else
41
+ doc[prefix].send(tag.to_sym) {
42
+ doc.text(v)
43
+ }
44
+ end
45
+ }
46
+ }
47
+ end
48
+ builder.to_xml
49
+ end
50
+
51
+ # Serialize the given Record to an HTML string
52
+ def to_html(record, opts = {})
53
+ save_options = { :encoding => 'UTF-8', :save_with => (SaveOptions::AS_XML | SaveOptions::NO_DECLARATION), :indent => 2 }.merge(opts)
54
+ builder = Nokogiri::XML::Builder.new do |doc|
55
+ doc.span {
56
+ record.metadata.each_pair { |k,v|
57
+ unless v.nil? or v.to_s.empty?
58
+ (prefix,tag) = k.split(/\./)
59
+ # Convert from camelCase to Human Readable Label
60
+ tag = tag.gsub(/(\S)([A-Z])/,'\1 \2').gsub(/\b('?[a-z])/) { $1.capitalize }
61
+ doc.p {
62
+ doc.b {
63
+ doc.text "#{tag}:"
64
+ }
65
+ doc.text " "
66
+ if v.is_a?(Array)
67
+ doc.br
68
+ v.each { |value|
69
+ doc.text value unless value.empty?
70
+ doc.br
71
+ }
72
+ else
73
+ doc.text v
74
+ end
75
+ }
76
+ end
77
+ }
78
+ }
79
+ end
80
+ builder.to_xml(save_options)
81
+ end
82
+
83
+ end
84
+
85
+ # A Mapper provides information about field label, visibility, and output order for a
86
+ # specific CONTENTdm collection. This information can be screen-scraped from a
87
+ # CONTENTdm installation, or defined programatically.
88
+ class Mapper < GenericMapper
89
+
90
+ extend URI
91
+ @@maps = {}
92
+ @@auto_init = true
93
+
94
+ attr_accessor :fields, :order
95
+
96
+ class << self
97
+
98
+ attr_accessor :auto_init
99
+
100
+ def maps
101
+ @@maps.keys
102
+ end
103
+
104
+ # Returns true if a Mapper has been initialized for the given collection at the specified base URI.
105
+ def mapped?(uri, collection)
106
+ return @@maps.include?(self.signature(uri,collection))
107
+ end
108
+
109
+ # Initializes Mappers for all collections at the specified base URI.
110
+ def init_all(base_uri)
111
+ uri = self.normalize(base_uri)
112
+ response = Nokogiri::XML(open(uri.merge('cgi-bin/oai.exe?verb=ListSets')))
113
+ sets = response.search('//xmlns:set/xmlns:setSpec/text()',response.namespaces).collect { |set| set.text }
114
+ sets.each { |set|
115
+ self.init_map(uri, set)
116
+ }
117
+ end
118
+
119
+ # Initializes the Mapper for the given collection at the specified base URI.
120
+ def init_map(base_uri, collection)
121
+ uri = self.normalize(base_uri)
122
+
123
+ dc_map = self.from(uri, 'DC_MAPPING')
124
+ if dc_map.nil?
125
+ fields = open(uri.merge("dc.txt")) { |res| res.read }
126
+ dc_map = {}
127
+ fields.each_line { |field|
128
+ field_properties = field.chomp.split(/:/)
129
+ dc_field = self.normalize_field_name(field_properties[0])
130
+ field_code = field_properties[1]
131
+ dc_map[field_code] = dc_field
132
+ }
133
+ @@maps[self.signature(uri, 'DC_MAPPING')] = dc_map
134
+ end
135
+
136
+ fields = open(uri.merge("#{collection}/index/etc/config.txt")) { |res| res.read }
137
+ map = { :fields => Hash.new { |h,k| h[k] = [] }, :order => [] }
138
+ fields.each_line { |field|
139
+ field_properties = field.chomp.split(/:/)
140
+ field_label = field_properties.first
141
+ field_code = field_properties.last
142
+ map[:fields][dc_map[field_code]] << field_label
143
+ map[:order] << field_label unless field_properties[-3] == 'HIDE'
144
+ }
145
+ map[:fields]['dc.identifier'] << 'Permalink'
146
+ @@maps[self.signature(uri,collection)] = self.new(uri, collection, map[:fields], map[:order])
147
+ end
148
+
149
+ # Assigns a map (either an initialized Map or a Hash/Array combination indicating the
150
+ # field mapping and field order) to a given collection.
151
+ def assign_map(base_uri, collection, *args)
152
+ uri = self.normalize(base_uri)
153
+ if args[0].is_a?(self)
154
+ @@maps[self.signature(uri,collection)] = args[0]
155
+ else
156
+ @@maps[self.signature(uri,collection)] = self.new(uri, collection, *args)
157
+ end
158
+ end
159
+
160
+ # Returns the appropriate Mapper for the given collection at the specified base URI. If it
161
+ # has not been initialized or the collection does not exist, returns nil.
162
+ def from(uri, collection)
163
+ if @@auto_init and (collection != 'DC_MAPPING')
164
+ unless self.mapped?(uri, collection)
165
+ self.init_map(uri, collection)
166
+ end
167
+ end
168
+ @@maps[self.signature(uri,collection)]
169
+ end
170
+ end
171
+
172
+ # Creates a map based on the hash of fields
173
+ def initialize(base_uri, collection, fields, order = nil)
174
+ @base_uri = base_uri
175
+ @collection = collection
176
+ @fields = fields
177
+ @order = order
178
+ end
179
+
180
+ def rename(old_field,new_field)
181
+ @fields.each_pair { |k,v| v.collect! { |name| name == old_field ? new_field : name } }
182
+ @order.collect! { |name| name == old_field ? new_field : name }
183
+ end
184
+
185
+ # Returns a hash of field labels and data
186
+ def map(record)
187
+ data = record.metadata
188
+ result = {}
189
+ @fields.each_pair { |k,v|
190
+ v.each_with_index { |key,index|
191
+ if data[k]
192
+ value = data[k][index]
193
+ unless value.nil?
194
+ result[key] = value.split(/;\s*/)
195
+ if result[key].length == 1
196
+ result[key] = result[key].first
197
+ end
198
+ end
199
+ end
200
+ }
201
+ }
202
+ result
203
+ end
204
+
205
+ # Serialize the given Record to a Qualified Dublin Core XML string
206
+ def to_xml(record, opts = {})
207
+ save_options = { :encoding => 'UTF-8', :save_with => SaveOptions::AS_XML, :indent => 2 }.merge(opts)
208
+ data = self.map(record)
209
+ field_order = @order || []
210
+ builder = Nokogiri::XML::Builder.new do |doc|
211
+ doc.qualifieddc('xmlns:qdc' => "http://epubs.cclrc.ac.uk/xmlns/qdc/",
212
+ 'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
213
+ 'xmlns:dcterms' => "http://purl.org/dc/terms/") {
214
+ field_order.each { |fieldname|
215
+ field_info = @fields.find { |k,v| v.include?(fieldname) }
216
+ unless field_info.nil? or field_info[0].nil?
217
+ (prefix,tag) = field_info[0].split(/\./)
218
+ index = field_info[1].index(fieldname)
219
+ value = data[fieldname]
220
+ if value.is_a?(Array)
221
+ value = value[index]
222
+ end
223
+ doc[prefix].send("#{tag}_".to_sym) {
224
+ doc.text(value)
225
+ }
226
+ end
227
+ }
228
+ }
229
+ end
230
+ builder.to_xml
231
+ end
232
+
233
+ # Serialize the given Record to an HTML string
234
+ def to_html(record, vars = {})
235
+ erb = vars.delete(:template) || DEFAULT_TEMPLATE
236
+ data = self.map(record)
237
+ field_order = @order || []
238
+ template = ERB.new(erb,nil,'%')
239
+ template.result(binding)
240
+ end
241
+
242
+ private
243
+ def self.signature(uri, collection)
244
+ "#{uri.to_s} :: #{collection}"
245
+ end
246
+
247
+ def self.normalize_field_name(fieldname)
248
+ parts = fieldname.downcase.gsub(/(\s+[a-z])/) { |ch| ch.upcase.strip }.split(/-/)
249
+ if parts.length == 1
250
+ "dc.#{parts[0]}"
251
+ else
252
+ "dcterms.#{parts[1]}"
253
+ end
254
+ end
255
+
256
+ end
257
+
258
+ end
@@ -0,0 +1,85 @@
1
+ module ContentDm
2
+
3
+ class Record
4
+
5
+ attr_reader :metadata, :source
6
+
7
+ def initialize(data, source)
8
+ @metadata = data.dup
9
+ @source = source
10
+
11
+ # Account for bug in single-record output
12
+ # parts = self.permalink.split
13
+ # if parts.length > 1
14
+ # self.permalink = @source[:base_uri].merge(parts.last).to_s
15
+ # end
16
+
17
+ (collection, record_id) = @metadata['dc.identifier'][-1].scan(/\?\/(.+),([0-9]+)$/).flatten
18
+ @source[:collection] = collection
19
+ @source[:id] = record_id.to_i
20
+ self.permalink = @source[:base_uri].merge("/u?/#{collection},#{record_id}").to_s
21
+ end
22
+
23
+ def img_href(opts = {})
24
+ params = {
25
+ 'CISOROOT' => "/#{@source[:collection]}",
26
+ 'CISOPTR' => @source[:id],
27
+ 'DMSCALE' => 100,
28
+ 'DMWIDTH' => 0,
29
+ 'DMHEIGHT' => 0,
30
+ 'DMX' => 0,
31
+ 'DMY' => 0,
32
+ 'DMTEXT' => '',
33
+ 'DMTHUMB' => '',
34
+ 'DMROTATE' => 0
35
+ }
36
+ opts.each_pair { |k,v|
37
+ case k
38
+ when :width then params['DMWIDTH'] = v
39
+ when :height then params['DMHEIGHT'] = v
40
+ when :scale then params['DMSCALE'] = v
41
+ else params[k] = v
42
+ end
43
+ }
44
+ query = params.collect { |k,v| "#{k}=#{::URI.encode(v.to_s)}" }.join('&')
45
+ @source[:base_uri].merge("cgi-bin/getimage.exe?#{query}")
46
+ end
47
+
48
+ def thumbnail_href
49
+ params = {
50
+ 'CISOROOT' => "/#{@source[:collection]}",
51
+ 'CISOPTR' => @source[:id],
52
+ }
53
+ query = params.collect { |k,v| "#{k}=#{::URI.encode(v.to_s)}" }.join('&')
54
+ @source[:base_uri].merge("cgi-bin/thumbnail.exe?#{query}")
55
+ end
56
+
57
+ def permalink
58
+ @metadata['dc.identifier'][-1]
59
+ end
60
+
61
+ def permalink=(value)
62
+ @metadata['dc.identifier'][-1] = value
63
+ end
64
+
65
+ def mapper
66
+ Mapper.from(@source[:base_uri],@source[:collection]) || GenericMapper.new
67
+ end
68
+
69
+ # Serialize the Record to a Qualified Dublin Core XML string. If
70
+ # a Mapper has been initialized for the Record's owning collection,
71
+ # it will be used. Otherwise, the GenericMapper will be used.
72
+ def to_xml(opts = {})
73
+ mapper.to_xml(self, opts)
74
+ end
75
+
76
+ # Serialize the Record to an HTML string. If a Mapper has been
77
+ # initialized for the Record's owning collection, it will be
78
+ # used. Otherwise, the GenericMapper will be used.
79
+ def to_html(opts = {})
80
+ mapper.to_html(self, opts)
81
+ end
82
+
83
+ end
84
+
85
+ end
@@ -0,0 +1,11 @@
1
+ module ContentDm
2
+
3
+ module URI
4
+ def normalize(uri)
5
+ local_uri = uri.is_a?(::URI) ? uri : ::URI.parse(uri)
6
+ local_uri.path.sub!(/\/+$/,'')
7
+ local_uri
8
+ end
9
+ end
10
+
11
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: contentdm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.20
5
+ platform: ruby
6
+ authors:
7
+ - Michael B. Klein
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-02-03 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: Module providing access to structured metadata in CONTENTdm collections
26
+ email: Michael.Klein@oregonstate.edu
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README.rdoc
33
+ files:
34
+ - README.rdoc
35
+ - lib/contentdm/harvester.rb
36
+ - lib/contentdm/mapper.rb
37
+ - lib/contentdm/record.rb
38
+ - lib/contentdm/uri.rb
39
+ - lib/contentdm.rb
40
+ has_rdoc: true
41
+ homepage:
42
+ licenses: []
43
+
44
+ post_install_message:
45
+ rdoc_options: []
46
+
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: "0"
54
+ version:
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ requirements: []
62
+
63
+ rubyforge_project:
64
+ rubygems_version: 1.3.5
65
+ signing_key:
66
+ specification_version: 3
67
+ summary: Access to structured metadata in CONTENTdm collections
68
+ test_files: []
69
+