cul-fedora-arm 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,300 @@
1
+ require 'cul/fedora/image/image'
2
+ require 'pathname'
3
+ require 'rexml/document'
4
+ require 'uri'
5
+ module Cul
6
+ module Fedora
7
+ module Arm
8
+ AGGREGATOR_FOXML = <<INGEST
9
+ <?xml version="1.0" encoding="UTF-8"?>
10
+ <foxml:digitalObject PID="{0[pid]}" VERSION="1.1"
11
+ xmlns:foxml="info:fedora/fedora-system:def/foxml#"
12
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:fedora/fedora-system:def/foxml# http://www.fedora.info/definitions/1/0/foxml1-1.xsd">
13
+ <foxml:objectProperties>
14
+ <foxml:property NAME="info:fedora/fedora-system:def/model#state" VALUE="Active"/>
15
+ <foxml:property NAME="info:fedora/fedora-system:def/model#label" VALUE="{0[title_attr]}"/>
16
+ <foxml:property NAME="info:fedora/fedora-system:def/model#ownerId" VALUE="fedoraAdmin"/>
17
+ <foxml:property NAME="info:fedora/fedora-system:def/model#createdDate" VALUE="{0[timestamp]}"/>
18
+ <foxml:property NAME="info:fedora/fedora-system:def/view#lastModifiedDate" VALUE="{0[timestamp]}"/>
19
+ </foxml:objectProperties>
20
+ <foxml:datastream CONTROL_GROUP="X" ID="DC" STATE="A" VERSIONABLE="true">
21
+ <foxml:datastreamVersion CREATED="{0[timestamp]}"
22
+ FORMAT_URI="http://www.openarchives.org/OAI/2.0/oai_dc/" ID="DC1.0"
23
+ LABEL="Dublin Core Record for this object" MIMETYPE="text/xml">
24
+ <foxml:xmlContent>
25
+ <oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/">
26
+ <dc:title>{0[title]}</dc:title>
27
+ <dc:creator>BATCH</dc:creator>
28
+ <dc:type>{0[dc_type]}</dc:type>
29
+ <dc:identifier>{0[id]}</dc:identifier>
30
+ </oai_dc:dc>
31
+ </foxml:xmlContent>
32
+ </foxml:datastreamVersion>
33
+ </foxml:datastream>
34
+ <foxml:datastream CONTROL_GROUP="X" ID="RELS-EXT" STATE="A" VERSIONABLE="true">
35
+ <foxml:datastreamVersion CREATED="{0[timestamp]}"
36
+ FORMAT_URI="info:fedora/fedora-system:FedoraRELSExt-1.0" ID="RELS-EXT1.0"
37
+ LABEL="RDF Statements about this object" MIMETYPE="application/rdf+xml">
38
+ <foxml:xmlContent>
39
+ <rdf:RDF xmlns:fedora-model="info:fedora/fedora-system:def/model#"
40
+ xmlns:ore="http://www.openarchives.org/ore/terms/"
41
+ xmlns:cul="http://purl.oclc.org/NET/CUL/"
42
+ xmlns:cc="http://creativecommons.org/ns#"
43
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
44
+ <rdf:Description rdf:about="info:fedora/{0[pid]}">
45
+ <fedora-model:hasModel rdf:resource="info:fedora/{0[content_model]}"/>
46
+ <rdf:type rdf:resource="http://purl.oclc.org/NET/CUL/Aggregator" />
47
+ <cc:license rdf:resource="info:fedora/{0[license]}" />
48
+ {0[rels]}
49
+ </rdf:Description>
50
+ </rdf:RDF>
51
+ </foxml:xmlContent>
52
+ </foxml:datastreamVersion>
53
+ </foxml:datastream>
54
+ </foxml:digitalObject>
55
+ INGEST
56
+ # >>
57
+ METADATA_FOXML = <<INGEST
58
+ <?xml version="1.0" encoding="UTF-8"?>
59
+ <foxml:digitalObject VERSION="1.1" PID="{0[pid]}" xmlns:foxml="info:fedora/fedora-system:def/foxml#" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:fedora/fedora-system:def/foxml# http://www.fedora.info/definitions/1/0/foxml1-1.xsd">
60
+ <foxml:objectProperties>
61
+ <foxml:property NAME="info:fedora/fedora-system:def/model#state" VALUE="A"/>
62
+ <foxml:property NAME="info:fedora/fedora-system:def/model#label" VALUE="{0[title_attr]}"/>
63
+ </foxml:objectProperties>
64
+ <foxml:datastream ID="DC" STATE="A" CONTROL_GROUP="X" VERSIONABLE="true">
65
+ <foxml:datastreamVersion FORMAT_URI="http://www.openarchives.org/OAI/2.0/oai_dc/" ID="DC.0" MIMETYPE="text/xml" LABEL="Dublin Core Record for this object" SIZE="488" CREATED="2004-12-10T00:21:58.000Z">
66
+ <foxml:xmlContent>
67
+ <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/">
68
+ <dc:title>{0[title]}</dc:title>
69
+ <dc:creator>BATCH</dc:creator>
70
+ <dc:type>Text</dc:type>
71
+ <dc:format>text/xml</dc:format>
72
+ <dc:publisher>Columbia University Libraries</dc:publisher>
73
+ <dc:identifier>{0[identifier]}</dc:identifier>
74
+ <dc:source>{0[source]}</dc:source>
75
+ </oai_dc:dc>
76
+ </foxml:xmlContent>
77
+ </foxml:datastreamVersion>
78
+ </foxml:datastream>
79
+ <foxml:datastream ID="RELS-EXT" CONTROL_GROUP="X">
80
+ <foxml:datastreamVersion FORMAT_URI="info:fedora/fedora-system:FedoraRELSExt-1.0"
81
+ ID="RELS-EXT.0" MIMETYPE="application/rdf+xml"
82
+ LABEL="RDF Statements about this object" CREATED="{0[timestamp]}">
83
+ <foxml:xmlContent>
84
+ <rdf:RDF xmlns:fedora-model="info:fedora/fedora-system:def/model#"
85
+ xmlns:rel="info:fedora/fedora-system:def/relations-external#"
86
+ xmlns:cul="http://purl.oclc.org/NET/CUL/"
87
+ xmlns:cc="http://creativecommons.org/ns#"
88
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
89
+ <rdf:Description rdf:about="info:fedora/{0[pid]}">
90
+ <fedora-model:hasModel rdf:resource="info:fedora/ldpd:MODSMetadata"/>
91
+ <rdf:type rdf:resource="http://purl.oclc.org/NET/CUL/Metadata" />
92
+ {0[rels]}
93
+ </rdf:Description>
94
+ </rdf:RDF>
95
+ </foxml:xmlContent>
96
+ </foxml:datastreamVersion>
97
+ </foxml:datastream>
98
+ <foxml:datastream CONTROL_GROUP="X" ID="CONTENT" STATE="A" VERSIONABLE="true">
99
+ <foxml:datastreamVersion CREATED="{0[timestamp]}"
100
+ ID="CONTENT.0" LABEL="{0[title_attr]}" MIMETYPE="text/xml">
101
+ <foxml:xmlContent>{0[metadata]}</foxml:xmlContent>
102
+ </foxml:datastreamVersion>
103
+ </foxml:datastream>
104
+ </foxml:digitalObject>
105
+ INGEST
106
+ # >>
107
+ RESOURCE_FOXML = <<RESOURCE
108
+ <?xml version="1.0" encoding="UTF-8"?>
109
+ <foxml:digitalObject PID="{0[pid]}" VERSION="1.1"
110
+ xmlns:foxml="info:fedora/fedora-system:def/foxml#"
111
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
112
+ xsi:schemaLocation="info:fedora/fedora-system:def/foxml# http://www.fedora.info/definitions/1/0/foxml1-1.xsd">
113
+ <foxml:objectProperties>
114
+ <foxml:property NAME="info:fedora/fedora-system:def/model#state" VALUE="Active"/>
115
+ <foxml:property NAME="info:fedora/fedora-system:def/model#label" VALUE="{0[title_attr]}"/>
116
+ <foxml:property NAME="info:fedora/fedora-system:def/model#ownerId" VALUE="fedoraAdmin"/>
117
+ <foxml:property NAME="info:fedora/fedora-system:def/model#createdDate" VALUE="{0[timestamp]}"/>
118
+ <foxml:property NAME="info:fedora/fedora-system:def/view#lastModifiedDate" VALUE="{0[timestamp]}"/>
119
+ </foxml:objectProperties>
120
+ <foxml:datastream CONTROL_GROUP="X" ID="DC" STATE="A" VERSIONABLE="true">
121
+ <foxml:datastreamVersion CREATED="{0[timestamp]}"
122
+ FORMAT_URI="http://www.openarchives.org/OAI/2.0/oai_dc/" ID="DC1.0"
123
+ LABEL="Dublin Core Record for this object" MIMETYPE="text/xml">
124
+ <foxml:xmlContent>
125
+ <oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" >
126
+ <dc:creator>BATCH</dc:creator>
127
+ <dc:publisher>Columbia University Libraries</dc:publisher>
128
+ <dc:type>{0[dc_type]}</dc:type>
129
+ <dc:format>{0[mime]}</dc:format>
130
+ <dc:identifier>{0[pid]}</dc:identifier>
131
+ <dc:source>{0[src]}</dc:source>
132
+ </oai_dc:dc>
133
+ </foxml:xmlContent>
134
+ </foxml:datastreamVersion>
135
+ </foxml:datastream>
136
+ <foxml:datastream CONTROL_GROUP="X" ID="RELS-EXT" STATE="A" VERSIONABLE="true">
137
+ <foxml:datastreamVersion CREATED="{0[timestamp]}"
138
+ FORMAT_URI="info:fedora/fedora-system:FedoraRELSExt-1.0" ID="RELS-EXT1.0"
139
+ LABEL="RDF Statements about this object" MIMETYPE="application/rdf+xml">
140
+ <foxml:xmlContent>
141
+ <rdf:RDF xmlns:fedora-model="info:fedora/fedora-system:def/model#"
142
+ xmlns:dcmi="http://purl.org/dc/terms/"
143
+ xmlns:si-basic="http://purl.oclc.org/NET/CUL/RESOURCE/STILLIMAGE/BASIC/"
144
+ xmlns:si-assess="http://purl.oclc.org/NET/CUL/RESOURCE/STILLIMAGE/ASSESSMENT/"
145
+ xmlns:cul="http://purl.oclc.org/NET/CUL/"
146
+ xmlns:rel="info:fedora/fedora-system:def/relations-external#"
147
+ xmlns:cc="http://creativecommons.org/ns#"
148
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
149
+ <rdf:Description rdf:about="info:fedora/{0[pid]}">
150
+ <rdf:type rdf:resource="http://purl.oclc.org/NET/CUL/Resource" />
151
+ <fedora-model:hasModel rdf:resource="info:fedora/ldpd:Resource"/>
152
+ {0[rels]}
153
+ <cc:license rdf:resource="info:fedora/{0[license]}" />
154
+ </rdf:Description>
155
+ </rdf:RDF>
156
+ </foxml:xmlContent>
157
+ </foxml:datastreamVersion>
158
+ </foxml:datastream>
159
+ <foxml:datastream CONTROL_GROUP="{0[datastream_type]}" ID="CONTENT" STATE="A" VERSIONABLE="true">
160
+ <foxml:datastreamVersion CREATED="{0[timestamp]}"
161
+ ID="CONTENT.0" LABEL="{0[title_attr]}" MIMETYPE="{0[mime]}">
162
+ <foxml:contentLocation
163
+ REF="{0[source]}" TYPE="INTERNAL_ID"/>
164
+ </foxml:datastreamVersion>
165
+ </foxml:datastream>
166
+ </foxml:digitalObject>
167
+ RESOURCE
168
+ # >>
169
+
170
+
171
+ class FoxmlBuilder
172
+ include Cul::Fedora::Image
173
+ STATIC_IMAGE_DEFAULTS = {
174
+ :content_model => 'ldpd:StaticImageAggregator',
175
+ :title => 'Image Aggregator',
176
+ :title_attr => 'Image Aggregator',
177
+ :dc_type => 'Image',
178
+ }
179
+ CONTENT_DEFAULTS = {
180
+ :content_model => 'ldpd:ContentAggregator',
181
+ :title => 'Generic Content Aggregator',
182
+ :title_attr => 'Generic Content Aggregator',
183
+ :dc_type => 'InteractiveResource',
184
+ }
185
+ METADATA_DEFAULTS = {
186
+ :content_model => 'ldpd:MODSMetadata',
187
+ :dc_type => 'Text',
188
+ :dc_format => 'text/xml'
189
+ }
190
+ RESOURCE_DEFAULTS = {
191
+ :content_model => 'ldpd:Resource'
192
+ }
193
+ DEFAULTS = {
194
+ :staticimage_aggregator => STATIC_IMAGE_DEFAULTS,
195
+ :content_aggregator => CONTENT_DEFAULTS,
196
+ :metadata => METADATA_DEFAULTS,
197
+ :image_resource => RESOURCE_DEFAULTS,
198
+ :resource => RESOURCE_DEFAULTS
199
+ }
200
+ TEMPLATES = {
201
+ :aggregator => Cul::Fedora::Arm::AGGREGATOR_FOXML,
202
+ :metadata => Cul::Fedora::Arm::METADATA_FOXML,
203
+ :resource => Cul::Fedora::Arm::RESOURCE_FOXML
204
+ }
205
+ METADATA_FOR = "<cul:metadataFor rdf:resource=\"info:fedora/%s\" />"
206
+ MEMBER_OF = "<cul:memberOf rdf:resource=\"info:fedora/%s\" />"
207
+ UTF8_MARKER = "\xEF\xBB\xBF"
208
+
209
+ def build(value_hash)
210
+ template_type = value_hash[:template_type]
211
+ model_type = value_hash[:model_type]
212
+ value_default_key = (template_type.downcase + "_" + model_type.downcase).intern
213
+ subs = {}
214
+ if(DEFAULTS.has_key?(value_default_key))
215
+ subs = value_hash.merge(DEFAULTS[value_default_key])
216
+ now = Time.now
217
+ subs[:timestamp] = now.strftime("%Y-%m-%dT%H:%M:%S.000Z")
218
+ subs[:rels] = build_rels(subs)
219
+ else
220
+ subs = value_hash.merge({:timestamp=>Time.now.strftime("%Y-%m-%dT%H:%M:%S.000Z")})
221
+ subs[:rels] = build_rels(subs)
222
+ end
223
+ if (subs.has_key?(:source))
224
+ subs[:source].strip!
225
+ if(subs[:source].length > 0)
226
+ if (subs[:source].index('http:') != 0)
227
+ if(subs[:model_type].eql?('Metadata'))
228
+ subs[:metadata] = File.open(subs[:source]){|file| file.read() }
229
+ subs.merge!(parse_mods(subs[:metadata]))
230
+ end
231
+ subs[:source] = 'file:/' + Pathname.new(subs[:source]).realpath
232
+ subs[:datastream_type] = 'E'
233
+ else
234
+ subs[:datastream_type] = 'M'
235
+ end
236
+ end
237
+ end
238
+
239
+ template_key = model_type.downcase.intern
240
+ if(TEMPLATES.has_key?(template_key))
241
+ return sub_values(subs,TEMPLATES[template_key])
242
+ else
243
+ raise "Unknown model type #{value_hash[:model_type]}"
244
+ end
245
+ end
246
+ protected
247
+
248
+ def parse_mods(data)
249
+ # attempt to assign title/title_attr and identifier
250
+ result = {}
251
+ xml = REXML::Document.new(data)
252
+ element = xml.elements["/mods/titleInfo/title"]
253
+ if (element)
254
+ result[:title] = element.text
255
+ result[:title_attr] = URI.escape(element.text)
256
+ end
257
+ element = xml.elements["/mods/recordInfo/recordIdentifier"]
258
+ if (element)
259
+ result[:identifier] = element.text
260
+ elsif (element = xml.elements["/mods/identifier"])
261
+ result[:identifier] = element.text
262
+ end
263
+ result
264
+ end
265
+
266
+ def build_rels(value_hash)
267
+ if (value_hash[:target].nil?)
268
+ return ''
269
+ end
270
+ rels = ''
271
+ targets = value_hash[:target].split(';')
272
+ tmp = value_hash[:model_type].downcase.eql?('metadata') ? METADATA_FOR : MEMBER_OF
273
+ targets.each {|target|
274
+ rels += sprintf(tmp,target)
275
+ }
276
+ if (value_hash[:model_type].eql?('Resource') and not value_hash[:dc_format].index('Image').nil?)
277
+ image_props = analyze_image(value_hash[:source])
278
+ value_hash[:mime] = image_props[:mime]
279
+ image_rels = map_image_properties(image_props)
280
+ image_rels.each {|rel|
281
+ rels += rel
282
+ }
283
+ end
284
+ rels
285
+ end
286
+
287
+ def sub_values(value_hash,template)
288
+ data = template.gsub(/\{0\[(\w+)\]\}/) {|match|
289
+ value_hash[$1.intern]
290
+ }
291
+ data.strip!
292
+ if (data.index(UTF8_MARKER) == 0)
293
+ data.slice!(0...UTF8_MARKER.length)
294
+ end
295
+ data
296
+ end
297
+ end
298
+ end
299
+ end
300
+ end
@@ -0,0 +1,106 @@
1
+ require 'base64'
2
+ require 'net/http'
3
+ require 'net/https'
4
+ require 'soap/wsdlDriver'
5
+ module Cul
6
+ module Fedora
7
+ module Arm
8
+ module Tasks
9
+ APIM = "/fedora/services/management"
10
+ class Task
11
+ def response
12
+ @response
13
+ end
14
+ def initialize()
15
+ super()
16
+ @apim = nil
17
+ @args = {}
18
+ end
19
+ def post(connector)
20
+ if (@apim.nil?)
21
+ raise "Missing APIM SOAPAction name"
22
+ end
23
+ if (@args.empty?)
24
+ raise "No soap arguments"
25
+ end
26
+ @response = connector.apim_call(@apim, @args) unless (@apim.nil? or @args.empty?)
27
+ @response
28
+ end
29
+ end # Task
30
+ class PurgeTask < Task
31
+ def initialize(pid)
32
+ super()
33
+ @apim = :purgeObject
34
+ @args = {:pid=>pid,:force=>'false',:logMessage=>'purging test objects'}
35
+ end
36
+ end
37
+ class ReservePidsTask < Task
38
+ def initialize(numPids, namespace="demo")
39
+ super()
40
+ @apim = :getNextPID
41
+ @numPids = numPids
42
+ @namespace = namespace
43
+ @args = {:numPIDs=>numPids,:pidNamespace=>namespace}
44
+ end
45
+ end
46
+ class InsertTask < Task
47
+ def initialize()
48
+ super()
49
+ @apim = :ingest
50
+ end
51
+ def post(driver)
52
+ response = super(driver)
53
+ @pid = @response[:pid]
54
+ response
55
+ end
56
+ end
57
+ class InsertFoxmlTask < InsertTask
58
+ def initialize(data)
59
+ super()
60
+ @args[:logMessage] = 'Batch update'
61
+ @args[:format] = 'info:fedora/fedora-system:FOXML-1.1'
62
+ @args[:objectXML] = data
63
+ end
64
+ end
65
+ class UpdateTask < Task
66
+ def initialize(pid)
67
+ super()
68
+ @pid = pid
69
+ end
70
+ end
71
+ class UpdateXmlDatastreamTask < UpdateTask
72
+ def initialize(pid,dsId,dsLabel,dsMIME,formatURI,data)
73
+ super(pid)
74
+ @apim = :modifyDatastreamByValue
75
+ @dsId = dsId
76
+ @inlineData = data
77
+ @args[:pid] = pid
78
+ @args[:dsID] = dsId
79
+ @args[:dsLabel] = dsLabel
80
+ @args[:MIMEType] = dsMIME
81
+ @args[:formatURI] = formatURI
82
+ @args[:dsContent] = data
83
+ @args[:altIDs] = []
84
+ @args[:checksumType] = 'DISABLED'
85
+ @args[:checksum] = 'none'
86
+ @args[:logMessage] = 'Batch update'
87
+ @args[:force] = 'false'
88
+ end
89
+ end
90
+ class UpdateMODSTask < UpdateXmlDatastreamTask
91
+ def initialize(pid,data)
92
+ super(pid,"CONTENT","MODS Desciptive Metadata","text/xml","http://www.loc.gov/mods/v3",data)
93
+ end
94
+ end
95
+ class UpdateDCTask < UpdateXmlDatastreamTask
96
+ def initialize(pid,data)
97
+ super(pid,"DC","Dublin Core Metadata","text/xml","http://www.openarchives.org/OAI/2.0/oai_dc/",data)
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
104
+ if __FILE__ == $0
105
+ # TODO Generated stub
106
+ end
@@ -0,0 +1,64 @@
1
+ module Cul
2
+ module Fedora
3
+ class Connector
4
+ attr_reader :config
5
+
6
+ def self.parse(environments)
7
+ connectors = {}
8
+
9
+ environments.each_pair do |environment, config|
10
+ connectors[environment] = Connector.new(config)
11
+ end
12
+
13
+ connectors
14
+ end
15
+
16
+ def initialize(config)
17
+ @config = config
18
+ end
19
+
20
+ def rest_interface()
21
+ http = Net::HTTP.start(config_for(:rest, :host),config_for(:rest, :port))
22
+ yield http
23
+ http.finish()
24
+ end
25
+
26
+ def rest_location()
27
+ url_builder(:rest, "")
28
+ end
29
+
30
+ def protocol_for(interface)
31
+ config_for(interface.to_s,"ssl") == true ? "https" : "http"
32
+ end
33
+
34
+ def config_for(interface, value)
35
+ (@config[interface.to_s] && @config[interface.to_s][value.to_s]) || @config[value.to_s]
36
+ end
37
+
38
+ def url_builder(interface, url)
39
+ "#{protocol_for(interface)}://#{config_for(interface,:host)}:#{config_for(interface,:port)}/#{url}"
40
+ end
41
+
42
+ def apim_interface()
43
+ wsdl = url_builder(:admin, "fedora/wsdl?api=API-M")
44
+ driver = SOAP::WSDLDriverFactory.new(wsdl).create_rpc_driver
45
+
46
+ if config_for(:admin, :ssl_verify)
47
+ raise "SSL verification not currently supported. Please specify ssl_verify: false"
48
+ else
49
+ driver.options['protocol.http.ssl_config.verify_mode'] = OpenSSL::SSL::VERIFY_NONE
50
+ end
51
+
52
+ driver.options["protocol.http.basic_auth"] << [url_builder(:admin,"fedora/services/management"), config_for(:admin,:user), config_for(:admin,:password)]
53
+
54
+ driver
55
+ end
56
+
57
+ def apim_call(method, *args)
58
+ options = args.extract_options!
59
+ apim_interface.method(method).call(options)
60
+ end
61
+
62
+ end
63
+ end
64
+ end