contentdm 0.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ = Introduction
2
+ The ContentDm module for Ruby provides access to structured metadata in CONTENTdm collections
3
+ via CONTENTdm's built-in OAI-PMH provider interface. The module turns Qualified Dublin Core
4
+ metadata into a convenient hash structure. With proper authentication, it can also scrape
5
+ collection-level field information from the CONTENTdm administrative interface and create
6
+ formatted HTML output from retrieved records.
7
+
8
+ = Examples
9
+
10
+ # Create a Harvester using the location of a CONTENTdm repository
11
+ harvester = ContentDm::Harvester.new('http://mycontentdm.example.com/')
12
+
13
+ # Retrieve the list of collections
14
+ collections = harvester.collections
15
+ => {"collection1" => "My First Collection", "collection2" => "My Second Collection"}
16
+
17
+ # Retrieve a single record from collection1
18
+ record = harvester.get_record("collection1",16)
19
+
20
+ # Retrieve all records from collection2
21
+ records = harvester.get_records("collection2")
22
+
23
+ Calling <tt>record#to_xml()</tt> or <tt>record#to_html()</tt> at this point will return
24
+ generic and arbitrarily-ordered markup, because the ContentDm::Mapper for
25
+ <tt>collection1</tt> hasn't been initialized.
26
+
27
+ # Initialize the Mapper for a single collection
28
+ ContentDm::Mapper.init_map("http://mycontentdm.example.com/", "collection1" :user => "my_contentdm_admin", :pass => "p@$$w0rd")
29
+ => #<ContentDm::Mapper ... >
30
+
31
+ # Initialize Mappers for all collections on the server
32
+ ContentDm::Mapper.init_all("http://mycontentdm.example.com/", :user => "my_contentdm_admin", :pass => "p@$$w0rd")
33
+ => ["collection1", "collection2"]
34
+
35
+ Now <tt>record#to_xml()</tt> and <tt>record#to_html()</tt> will return markup consistent with
36
+ the settings defined for the collection within CONTENTdm.
@@ -0,0 +1,8 @@
1
+ require 'contentdm/uri'
2
+ require 'contentdm/harvester'
3
+ require 'contentdm/mapper'
4
+ require 'contentdm/record'
5
+
6
+ module ContentDm
7
+ VERSION = '0.1.20'
8
+ end
@@ -0,0 +1,118 @@
1
+ require 'rubygems'
2
+ require 'nokogiri'
3
+ require 'open-uri'
4
+ require 'uri'
5
+
6
+ module ContentDm #:nodoc:#
7
+
8
+ class Harvester
9
+
10
+ extend URI
11
+
12
+ OAI_PAGE_SIZE = 1000
13
+
14
+ attr_reader :base_uri
15
+ attr_accessor :page_size
16
+
17
+ # The constructor must be passed the URL of a CONTENTdm installation. This will usually
18
+ # be the root of the server on which CONTENTdm is installed.
19
+ def initialize(base_uri)
20
+ @base_uri = self.class.normalize(base_uri)
21
+ @page_size = 1000
22
+ end
23
+
24
+ # Convenience method which returns a single Record when passed a URL in
25
+ # one of two forms:
26
+ # * A CONTENTdm URL containing CISOROOT/CISOPTR values for the desired item
27
+ # * A CONTENTdm canonical URL in the form
28
+ # http://path/to/contentdm/u?[collection],[ptr]
29
+ # where <tt>[collection]</tt> is the CONTENTdm collection name, and <tt>[ptr]</tt> is the sequential
30
+ # item ID within the collection.
31
+ def self.get_record(url)
32
+ base_uri = self.normalize(url)
33
+ params = {}
34
+ if args = url.match(/^(.+\/)u\/?\?\/(.+),(\d+)$/)
35
+ params[:base_url] = args[1]
36
+ params[:collection] = args[2]
37
+ params[:id] = args[3]
38
+ else
39
+ args = base_uri.query.split(/&/).inject({}) { |hash,arg|
40
+ (k,v) = arg.split(/\=/,2)
41
+ hash[k] = ::URI.decode(v)
42
+ hash
43
+ }
44
+ params[:base_url] = base_uri.merge('..')
45
+ params[:collection] = args['CISOROOT'][1..-1]
46
+ params[:id] = args['CISOPTR']
47
+ end
48
+ harvester = Harvester.new(params[:base_url])
49
+ harvester.get_record(params[:collection],params[:id])
50
+ end
51
+
52
+ # Return a hash of collection IDs and collection names
53
+ def collections
54
+ response = Nokogiri::XML(open(@base_uri.merge('cgi-bin/oai.exe?verb=ListSets')))
55
+ sets = response.search('//xmlns:set',response.namespaces)
56
+ result = {}
57
+ sets.inject({}) { |hash,set|
58
+ set_id = (set / 'setSpec').text()
59
+ set_desc = (set / 'setName').text()
60
+ hash[set_id] = set_desc
61
+ hash
62
+ }
63
+ end
64
+
65
+ # Return a single Record given its collection ID and ordinal position
66
+ # within the collection
67
+ def get_record(collection, id)
68
+ oai_id = "oai:%s:%s/%d" % [@base_uri.host, collection, id]
69
+ response = get_response({ :verb => 'GetRecord', :identifier => oai_id, :metadataPrefix => 'qdc' })
70
+ record = parse_records(response).first
71
+ Record.new(record, { :base_uri => @base_uri, :collection => collection })
72
+ end
73
+
74
+ # Return an array of all the Records in a given collection
75
+ def get_records(collection, opts = {})
76
+ max = opts[:max].to_i
77
+ token = "#{collection}:#{opts[:from].to_s}:#{opts[:until].to_s}:qdc:#{opts[:first].to_i || 0}"
78
+ result = []
79
+ until token.nil? or ((max > 0) and (result.length >= max))
80
+ args = { :verb => 'ListRecords', :resumptionToken => token.to_s }
81
+ response = get_response(args)
82
+ token = response.search('/xmlns:OAI-PMH/xmlns:ListRecords/xmlns:resumptionToken/text()', response.namespaces).first
83
+ result += parse_records(response)
84
+ end
85
+ if result.length > max
86
+ result = result[0..max-1]
87
+ end
88
+ result.collect { |record|
89
+ Record.new(record, { :base_uri => @base_uri, :collection => collection })
90
+ }
91
+ end
92
+
93
+ private
94
+ def parse_records(response)
95
+ result = []
96
+ qdcs = response.search('//qdc:qualifieddc',{ 'qdc' => 'http://epubs.cclrc.ac.uk/xmlns/qdc/' })
97
+ qdcs.each { |qdc|
98
+ metadata = Hash.new { |h,k| h[k] = [] }
99
+ qdc.children.each { |child|
100
+ if child.element?
101
+ metadata[[child.namespace.prefix,child.name].join('.')] << child.text # unless child.text.empty?
102
+ end
103
+ }
104
+ result << metadata
105
+ }
106
+ result
107
+ end
108
+
109
+ def get_response(args)
110
+ path = 'cgi-bin/oai.exe'
111
+ query = args.collect { |k,v| [k.to_s,::URI.encode(v)].join('=') }.join('&')
112
+ uri = @base_uri.merge("#{path}?#{query}")
113
+ response = Nokogiri::XML(open(uri))
114
+ end
115
+
116
+ end
117
+
118
+ end
@@ -0,0 +1,258 @@
1
+ require 'rubygems'
2
+ require 'erb'
3
+ require 'net/http'
4
+ require 'nokogiri'
5
+ require 'open-uri'
6
+ require 'uri'
7
+
8
+ module ContentDm
9
+
10
+ DEFAULT_TEMPLATE = %{<span>
11
+ % field_order.each do |fieldname|
12
+ % unless data[fieldname].nil? or data[fieldname].empty?
13
+ <p>
14
+ <b><%= fieldname %>: </b>
15
+ <%= data[fieldname].to_a.join("; ") %>
16
+ </p>
17
+ % end
18
+ % end
19
+ </span>}
20
+
21
+ # GenericMapper acts as a fallback formatter for instances when no other Mapper is defined
22
+ class GenericMapper
23
+
24
+ SaveOptions = Nokogiri::XML::Node::SaveOptions
25
+
26
+ # Serialize the given Record to a Qualified Dublin Core XML string
27
+ def to_xml(record, opts = {})
28
+ builder = Nokogiri::XML::Builder.new do |doc|
29
+ doc.qualifieddc('xmlns:qdc' => "http://epubs.cclrc.ac.uk/xmlns/qdc/",
30
+ 'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
31
+ 'xmlns:dcterms' => "http://purl.org/dc/terms/") {
32
+ record.metadata.each_pair { |k,v|
33
+ (prefix,tag) = k.split(/\./)
34
+ if v.is_a?(Array)
35
+ v.each { |value|
36
+ doc[prefix].send(tag.to_sym) {
37
+ doc.text(value)
38
+ }
39
+ }
40
+ else
41
+ doc[prefix].send(tag.to_sym) {
42
+ doc.text(v)
43
+ }
44
+ end
45
+ }
46
+ }
47
+ end
48
+ builder.to_xml
49
+ end
50
+
51
+ # Serialize the given Record to an HTML string
52
+ def to_html(record, opts = {})
53
+ save_options = { :encoding => 'UTF-8', :save_with => (SaveOptions::AS_XML | SaveOptions::NO_DECLARATION), :indent => 2 }.merge(opts)
54
+ builder = Nokogiri::XML::Builder.new do |doc|
55
+ doc.span {
56
+ record.metadata.each_pair { |k,v|
57
+ unless v.nil? or v.to_s.empty?
58
+ (prefix,tag) = k.split(/\./)
59
+ # Convert from camelCase to Human Readable Label
60
+ tag = tag.gsub(/(\S)([A-Z])/,'\1 \2').gsub(/\b('?[a-z])/) { $1.capitalize }
61
+ doc.p {
62
+ doc.b {
63
+ doc.text "#{tag}:"
64
+ }
65
+ doc.text " "
66
+ if v.is_a?(Array)
67
+ doc.br
68
+ v.each { |value|
69
+ doc.text value unless value.empty?
70
+ doc.br
71
+ }
72
+ else
73
+ doc.text v
74
+ end
75
+ }
76
+ end
77
+ }
78
+ }
79
+ end
80
+ builder.to_xml(save_options)
81
+ end
82
+
83
+ end
84
+
85
+ # A Mapper provides information about field label, visibility, and output order for a
86
+ # specific CONTENTdm collection. This information can be screen-scraped from a
87
+ # CONTENTdm installation, or defined programatically.
88
+ class Mapper < GenericMapper
89
+
90
+ extend URI
91
+ @@maps = {}
92
+ @@auto_init = true
93
+
94
+ attr_accessor :fields, :order
95
+
96
+ class << self
97
+
98
+ attr_accessor :auto_init
99
+
100
+ def maps
101
+ @@maps.keys
102
+ end
103
+
104
+ # Returns true if a Mapper has been initialized for the given collection at the specified base URI.
105
+ def mapped?(uri, collection)
106
+ return @@maps.include?(self.signature(uri,collection))
107
+ end
108
+
109
+ # Initializes Mappers for all collections at the specified base URI.
110
+ def init_all(base_uri)
111
+ uri = self.normalize(base_uri)
112
+ response = Nokogiri::XML(open(uri.merge('cgi-bin/oai.exe?verb=ListSets')))
113
+ sets = response.search('//xmlns:set/xmlns:setSpec/text()',response.namespaces).collect { |set| set.text }
114
+ sets.each { |set|
115
+ self.init_map(uri, set)
116
+ }
117
+ end
118
+
119
+ # Initializes the Mapper for the given collection at the specified base URI.
120
+ def init_map(base_uri, collection)
121
+ uri = self.normalize(base_uri)
122
+
123
+ dc_map = self.from(uri, 'DC_MAPPING')
124
+ if dc_map.nil?
125
+ fields = open(uri.merge("dc.txt")) { |res| res.read }
126
+ dc_map = {}
127
+ fields.each_line { |field|
128
+ field_properties = field.chomp.split(/:/)
129
+ dc_field = self.normalize_field_name(field_properties[0])
130
+ field_code = field_properties[1]
131
+ dc_map[field_code] = dc_field
132
+ }
133
+ @@maps[self.signature(uri, 'DC_MAPPING')] = dc_map
134
+ end
135
+
136
+ fields = open(uri.merge("#{collection}/index/etc/config.txt")) { |res| res.read }
137
+ map = { :fields => Hash.new { |h,k| h[k] = [] }, :order => [] }
138
+ fields.each_line { |field|
139
+ field_properties = field.chomp.split(/:/)
140
+ field_label = field_properties.first
141
+ field_code = field_properties.last
142
+ map[:fields][dc_map[field_code]] << field_label
143
+ map[:order] << field_label unless field_properties[-3] == 'HIDE'
144
+ }
145
+ map[:fields]['dc.identifier'] << 'Permalink'
146
+ @@maps[self.signature(uri,collection)] = self.new(uri, collection, map[:fields], map[:order])
147
+ end
148
+
149
+ # Assigns a map (either an initialized Map or a Hash/Array combination indicating the
150
+ # field mapping and field order) to a given collection.
151
+ def assign_map(base_uri, collection, *args)
152
+ uri = self.normalize(base_uri)
153
+ if args[0].is_a?(self)
154
+ @@maps[self.signature(uri,collection)] = args[0]
155
+ else
156
+ @@maps[self.signature(uri,collection)] = self.new(uri, collection, *args)
157
+ end
158
+ end
159
+
160
+ # Returns the appropriate Mapper for the given collection at the specified base URI. If it
161
+ # has not been initialized or the collection does not exist, returns nil.
162
+ def from(uri, collection)
163
+ if @@auto_init and (collection != 'DC_MAPPING')
164
+ unless self.mapped?(uri, collection)
165
+ self.init_map(uri, collection)
166
+ end
167
+ end
168
+ @@maps[self.signature(uri,collection)]
169
+ end
170
+ end
171
+
172
+ # Creates a map based on the hash of fields
173
+ def initialize(base_uri, collection, fields, order = nil)
174
+ @base_uri = base_uri
175
+ @collection = collection
176
+ @fields = fields
177
+ @order = order
178
+ end
179
+
180
+ def rename(old_field,new_field)
181
+ @fields.each_pair { |k,v| v.collect! { |name| name == old_field ? new_field : name } }
182
+ @order.collect! { |name| name == old_field ? new_field : name }
183
+ end
184
+
185
+ # Returns a hash of field labels and data
186
+ def map(record)
187
+ data = record.metadata
188
+ result = {}
189
+ @fields.each_pair { |k,v|
190
+ v.each_with_index { |key,index|
191
+ if data[k]
192
+ value = data[k][index]
193
+ unless value.nil?
194
+ result[key] = value.split(/;\s*/)
195
+ if result[key].length == 1
196
+ result[key] = result[key].first
197
+ end
198
+ end
199
+ end
200
+ }
201
+ }
202
+ result
203
+ end
204
+
205
+ # Serialize the given Record to a Qualified Dublin Core XML string
206
+ def to_xml(record, opts = {})
207
+ save_options = { :encoding => 'UTF-8', :save_with => SaveOptions::AS_XML, :indent => 2 }.merge(opts)
208
+ data = self.map(record)
209
+ field_order = @order || []
210
+ builder = Nokogiri::XML::Builder.new do |doc|
211
+ doc.qualifieddc('xmlns:qdc' => "http://epubs.cclrc.ac.uk/xmlns/qdc/",
212
+ 'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
213
+ 'xmlns:dcterms' => "http://purl.org/dc/terms/") {
214
+ field_order.each { |fieldname|
215
+ field_info = @fields.find { |k,v| v.include?(fieldname) }
216
+ unless field_info.nil? or field_info[0].nil?
217
+ (prefix,tag) = field_info[0].split(/\./)
218
+ index = field_info[1].index(fieldname)
219
+ value = data[fieldname]
220
+ if value.is_a?(Array)
221
+ value = value[index]
222
+ end
223
+ doc[prefix].send("#{tag}_".to_sym) {
224
+ doc.text(value)
225
+ }
226
+ end
227
+ }
228
+ }
229
+ end
230
+ builder.to_xml
231
+ end
232
+
233
+ # Serialize the given Record to an HTML string
234
+ def to_html(record, vars = {})
235
+ erb = vars.delete(:template) || DEFAULT_TEMPLATE
236
+ data = self.map(record)
237
+ field_order = @order || []
238
+ template = ERB.new(erb,nil,'%')
239
+ template.result(binding)
240
+ end
241
+
242
+ private
243
+ def self.signature(uri, collection)
244
+ "#{uri.to_s} :: #{collection}"
245
+ end
246
+
247
+ def self.normalize_field_name(fieldname)
248
+ parts = fieldname.downcase.gsub(/(\s+[a-z])/) { |ch| ch.upcase.strip }.split(/-/)
249
+ if parts.length == 1
250
+ "dc.#{parts[0]}"
251
+ else
252
+ "dcterms.#{parts[1]}"
253
+ end
254
+ end
255
+
256
+ end
257
+
258
+ end
@@ -0,0 +1,85 @@
1
+ module ContentDm
2
+
3
+ class Record
4
+
5
+ attr_reader :metadata, :source
6
+
7
+ def initialize(data, source)
8
+ @metadata = data.dup
9
+ @source = source
10
+
11
+ # Account for bug in single-record output
12
+ # parts = self.permalink.split
13
+ # if parts.length > 1
14
+ # self.permalink = @source[:base_uri].merge(parts.last).to_s
15
+ # end
16
+
17
+ (collection, record_id) = @metadata['dc.identifier'][-1].scan(/\?\/(.+),([0-9]+)$/).flatten
18
+ @source[:collection] = collection
19
+ @source[:id] = record_id.to_i
20
+ self.permalink = @source[:base_uri].merge("/u?/#{collection},#{record_id}").to_s
21
+ end
22
+
23
+ def img_href(opts = {})
24
+ params = {
25
+ 'CISOROOT' => "/#{@source[:collection]}",
26
+ 'CISOPTR' => @source[:id],
27
+ 'DMSCALE' => 100,
28
+ 'DMWIDTH' => 0,
29
+ 'DMHEIGHT' => 0,
30
+ 'DMX' => 0,
31
+ 'DMY' => 0,
32
+ 'DMTEXT' => '',
33
+ 'DMTHUMB' => '',
34
+ 'DMROTATE' => 0
35
+ }
36
+ opts.each_pair { |k,v|
37
+ case k
38
+ when :width then params['DMWIDTH'] = v
39
+ when :height then params['DMHEIGHT'] = v
40
+ when :scale then params['DMSCALE'] = v
41
+ else params[k] = v
42
+ end
43
+ }
44
+ query = params.collect { |k,v| "#{k}=#{::URI.encode(v.to_s)}" }.join('&')
45
+ @source[:base_uri].merge("cgi-bin/getimage.exe?#{query}")
46
+ end
47
+
48
+ def thumbnail_href
49
+ params = {
50
+ 'CISOROOT' => "/#{@source[:collection]}",
51
+ 'CISOPTR' => @source[:id],
52
+ }
53
+ query = params.collect { |k,v| "#{k}=#{::URI.encode(v.to_s)}" }.join('&')
54
+ @source[:base_uri].merge("cgi-bin/thumbnail.exe?#{query}")
55
+ end
56
+
57
+ def permalink
58
+ @metadata['dc.identifier'][-1]
59
+ end
60
+
61
+ def permalink=(value)
62
+ @metadata['dc.identifier'][-1] = value
63
+ end
64
+
65
+ def mapper
66
+ Mapper.from(@source[:base_uri],@source[:collection]) || GenericMapper.new
67
+ end
68
+
69
+ # Serialize the Record to a Qualified Dublin Core XML string. If
70
+ # a Mapper has been initialized for the Record's owning collection,
71
+ # it will be used. Otherwise, the GenericMapper will be used.
72
+ def to_xml(opts = {})
73
+ mapper.to_xml(self, opts)
74
+ end
75
+
76
+ # Serialize the Record to an HTML string. If a Mapper has been
77
+ # initialized for the Record's owning collection, it will be
78
+ # used. Otherwise, the GenericMapper will be used.
79
+ def to_html(opts = {})
80
+ mapper.to_html(self, opts)
81
+ end
82
+
83
+ end
84
+
85
+ end
@@ -0,0 +1,11 @@
1
+ module ContentDm
2
+
3
+ module URI
4
+ def normalize(uri)
5
+ local_uri = uri.is_a?(::URI) ? uri : ::URI.parse(uri)
6
+ local_uri.path.sub!(/\/+$/,'')
7
+ local_uri
8
+ end
9
+ end
10
+
11
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: contentdm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.20
5
+ platform: ruby
6
+ authors:
7
+ - Michael B. Klein
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-02-03 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: Module providing access to structured metadata in CONTENTdm collections
26
+ email: Michael.Klein@oregonstate.edu
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README.rdoc
33
+ files:
34
+ - README.rdoc
35
+ - lib/contentdm/harvester.rb
36
+ - lib/contentdm/mapper.rb
37
+ - lib/contentdm/record.rb
38
+ - lib/contentdm/uri.rb
39
+ - lib/contentdm.rb
40
+ has_rdoc: true
41
+ homepage:
42
+ licenses: []
43
+
44
+ post_install_message:
45
+ rdoc_options: []
46
+
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: "0"
54
+ version:
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ requirements: []
62
+
63
+ rubyforge_project:
64
+ rubygems_version: 1.3.5
65
+ signing_key:
66
+ specification_version: 3
67
+ summary: Access to structured metadata in CONTENTdm collections
68
+ test_files: []
69
+