cul-fedora-arm 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,300 @@
1
+ require 'cul/fedora/image/image'
2
+ require 'pathname'
3
+ require 'rexml/document'
4
+ require 'uri'
5
+ module Cul
6
+ module Fedora
7
+ module Arm
8
+ AGGREGATOR_FOXML = <<INGEST
9
+ <?xml version="1.0" encoding="UTF-8"?>
10
+ <foxml:digitalObject PID="{0[pid]}" VERSION="1.1"
11
+ xmlns:foxml="info:fedora/fedora-system:def/foxml#"
12
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:fedora/fedora-system:def/foxml# http://www.fedora.info/definitions/1/0/foxml1-1.xsd">
13
+ <foxml:objectProperties>
14
+ <foxml:property NAME="info:fedora/fedora-system:def/model#state" VALUE="Active"/>
15
+ <foxml:property NAME="info:fedora/fedora-system:def/model#label" VALUE="{0[title_attr]}"/>
16
+ <foxml:property NAME="info:fedora/fedora-system:def/model#ownerId" VALUE="fedoraAdmin"/>
17
+ <foxml:property NAME="info:fedora/fedora-system:def/model#createdDate" VALUE="{0[timestamp]}"/>
18
+ <foxml:property NAME="info:fedora/fedora-system:def/view#lastModifiedDate" VALUE="{0[timestamp]}"/>
19
+ </foxml:objectProperties>
20
+ <foxml:datastream CONTROL_GROUP="X" ID="DC" STATE="A" VERSIONABLE="true">
21
+ <foxml:datastreamVersion CREATED="{0[timestamp]}"
22
+ FORMAT_URI="http://www.openarchives.org/OAI/2.0/oai_dc/" ID="DC1.0"
23
+ LABEL="Dublin Core Record for this object" MIMETYPE="text/xml">
24
+ <foxml:xmlContent>
25
+ <oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/">
26
+ <dc:title>{0[title]}</dc:title>
27
+ <dc:creator>BATCH</dc:creator>
28
+ <dc:type>{0[dc_type]}</dc:type>
29
+ <dc:identifier>{0[id]}</dc:identifier>
30
+ </oai_dc:dc>
31
+ </foxml:xmlContent>
32
+ </foxml:datastreamVersion>
33
+ </foxml:datastream>
34
+ <foxml:datastream CONTROL_GROUP="X" ID="RELS-EXT" STATE="A" VERSIONABLE="true">
35
+ <foxml:datastreamVersion CREATED="{0[timestamp]}"
36
+ FORMAT_URI="info:fedora/fedora-system:FedoraRELSExt-1.0" ID="RELS-EXT1.0"
37
+ LABEL="RDF Statements about this object" MIMETYPE="application/rdf+xml">
38
+ <foxml:xmlContent>
39
+ <rdf:RDF xmlns:fedora-model="info:fedora/fedora-system:def/model#"
40
+ xmlns:ore="http://www.openarchives.org/ore/terms/"
41
+ xmlns:cul="http://purl.oclc.org/NET/CUL/"
42
+ xmlns:cc="http://creativecommons.org/ns#"
43
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
44
+ <rdf:Description rdf:about="info:fedora/{0[pid]}">
45
+ <fedora-model:hasModel rdf:resource="info:fedora/{0[content_model]}"/>
46
+ <rdf:type rdf:resource="http://purl.oclc.org/NET/CUL/Aggregator" />
47
+ <cc:license rdf:resource="info:fedora/{0[license]}" />
48
+ {0[rels]}
49
+ </rdf:Description>
50
+ </rdf:RDF>
51
+ </foxml:xmlContent>
52
+ </foxml:datastreamVersion>
53
+ </foxml:datastream>
54
+ </foxml:digitalObject>
55
+ INGEST
56
+ # >>
57
+ METADATA_FOXML = <<INGEST
58
+ <?xml version="1.0" encoding="UTF-8"?>
59
+ <foxml:digitalObject VERSION="1.1" PID="{0[pid]}" xmlns:foxml="info:fedora/fedora-system:def/foxml#" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:fedora/fedora-system:def/foxml# http://www.fedora.info/definitions/1/0/foxml1-1.xsd">
60
+ <foxml:objectProperties>
61
+ <foxml:property NAME="info:fedora/fedora-system:def/model#state" VALUE="A"/>
62
+ <foxml:property NAME="info:fedora/fedora-system:def/model#label" VALUE="{0[title_attr]}"/>
63
+ </foxml:objectProperties>
64
+ <foxml:datastream ID="DC" STATE="A" CONTROL_GROUP="X" VERSIONABLE="true">
65
+ <foxml:datastreamVersion FORMAT_URI="http://www.openarchives.org/OAI/2.0/oai_dc/" ID="DC.0" MIMETYPE="text/xml" LABEL="Dublin Core Record for this object" SIZE="488" CREATED="2004-12-10T00:21:58.000Z">
66
+ <foxml:xmlContent>
67
+ <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/">
68
+ <dc:title>{0[title]}</dc:title>
69
+ <dc:creator>BATCH</dc:creator>
70
+ <dc:type>Text</dc:type>
71
+ <dc:format>text/xml</dc:format>
72
+ <dc:publisher>Columbia University Libraries</dc:publisher>
73
+ <dc:identifier>{0[identifier]}</dc:identifier>
74
+ <dc:source>{0[source]}</dc:source>
75
+ </oai_dc:dc>
76
+ </foxml:xmlContent>
77
+ </foxml:datastreamVersion>
78
+ </foxml:datastream>
79
+ <foxml:datastream ID="RELS-EXT" CONTROL_GROUP="X">
80
+ <foxml:datastreamVersion FORMAT_URI="info:fedora/fedora-system:FedoraRELSExt-1.0"
81
+ ID="RELS-EXT.0" MIMETYPE="application/rdf+xml"
82
+ LABEL="RDF Statements about this object" CREATED="{0[timestamp]}">
83
+ <foxml:xmlContent>
84
+ <rdf:RDF xmlns:fedora-model="info:fedora/fedora-system:def/model#"
85
+ xmlns:rel="info:fedora/fedora-system:def/relations-external#"
86
+ xmlns:cul="http://purl.oclc.org/NET/CUL/"
87
+ xmlns:cc="http://creativecommons.org/ns#"
88
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
89
+ <rdf:Description rdf:about="info:fedora/{0[pid]}">
90
+ <fedora-model:hasModel rdf:resource="info:fedora/ldpd:MODSMetadata"/>
91
+ <rdf:type rdf:resource="http://purl.oclc.org/NET/CUL/Metadata" />
92
+ {0[rels]}
93
+ </rdf:Description>
94
+ </rdf:RDF>
95
+ </foxml:xmlContent>
96
+ </foxml:datastreamVersion>
97
+ </foxml:datastream>
98
+ <foxml:datastream CONTROL_GROUP="X" ID="CONTENT" STATE="A" VERSIONABLE="true">
99
+ <foxml:datastreamVersion CREATED="{0[timestamp]}"
100
+ ID="CONTENT.0" LABEL="{0[title_attr]}" MIMETYPE="text/xml">
101
+ <foxml:xmlContent>{0[metadata]}</foxml:xmlContent>
102
+ </foxml:datastreamVersion>
103
+ </foxml:datastream>
104
+ </foxml:digitalObject>
105
+ INGEST
106
+ # >>
107
+ RESOURCE_FOXML = <<RESOURCE
108
+ <?xml version="1.0" encoding="UTF-8"?>
109
+ <foxml:digitalObject PID="{0[pid]}" VERSION="1.1"
110
+ xmlns:foxml="info:fedora/fedora-system:def/foxml#"
111
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
112
+ xsi:schemaLocation="info:fedora/fedora-system:def/foxml# http://www.fedora.info/definitions/1/0/foxml1-1.xsd">
113
+ <foxml:objectProperties>
114
+ <foxml:property NAME="info:fedora/fedora-system:def/model#state" VALUE="Active"/>
115
+ <foxml:property NAME="info:fedora/fedora-system:def/model#label" VALUE="{0[title_attr]}"/>
116
+ <foxml:property NAME="info:fedora/fedora-system:def/model#ownerId" VALUE="fedoraAdmin"/>
117
+ <foxml:property NAME="info:fedora/fedora-system:def/model#createdDate" VALUE="{0[timestamp]}"/>
118
+ <foxml:property NAME="info:fedora/fedora-system:def/view#lastModifiedDate" VALUE="{0[timestamp]}"/>
119
+ </foxml:objectProperties>
120
+ <foxml:datastream CONTROL_GROUP="X" ID="DC" STATE="A" VERSIONABLE="true">
121
+ <foxml:datastreamVersion CREATED="{0[timestamp]}"
122
+ FORMAT_URI="http://www.openarchives.org/OAI/2.0/oai_dc/" ID="DC1.0"
123
+ LABEL="Dublin Core Record for this object" MIMETYPE="text/xml">
124
+ <foxml:xmlContent>
125
+ <oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" >
126
+ <dc:creator>BATCH</dc:creator>
127
+ <dc:publisher>Columbia University Libraries</dc:publisher>
128
+ <dc:type>{0[dc_type]}</dc:type>
129
+ <dc:format>{0[mime]}</dc:format>
130
+ <dc:identifier>{0[pid]}</dc:identifier>
131
+ <dc:source>{0[src]}</dc:source>
132
+ </oai_dc:dc>
133
+ </foxml:xmlContent>
134
+ </foxml:datastreamVersion>
135
+ </foxml:datastream>
136
+ <foxml:datastream CONTROL_GROUP="X" ID="RELS-EXT" STATE="A" VERSIONABLE="true">
137
+ <foxml:datastreamVersion CREATED="{0[timestamp]}"
138
+ FORMAT_URI="info:fedora/fedora-system:FedoraRELSExt-1.0" ID="RELS-EXT1.0"
139
+ LABEL="RDF Statements about this object" MIMETYPE="application/rdf+xml">
140
+ <foxml:xmlContent>
141
+ <rdf:RDF xmlns:fedora-model="info:fedora/fedora-system:def/model#"
142
+ xmlns:dcmi="http://purl.org/dc/terms/"
143
+ xmlns:si-basic="http://purl.oclc.org/NET/CUL/RESOURCE/STILLIMAGE/BASIC/"
144
+ xmlns:si-assess="http://purl.oclc.org/NET/CUL/RESOURCE/STILLIMAGE/ASSESSMENT/"
145
+ xmlns:cul="http://purl.oclc.org/NET/CUL/"
146
+ xmlns:rel="info:fedora/fedora-system:def/relations-external#"
147
+ xmlns:cc="http://creativecommons.org/ns#"
148
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
149
+ <rdf:Description rdf:about="info:fedora/{0[pid]}">
150
+ <rdf:type rdf:resource="http://purl.oclc.org/NET/CUL/Resource" />
151
+ <fedora-model:hasModel rdf:resource="info:fedora/ldpd:Resource"/>
152
+ {0[rels]}
153
+ <cc:license rdf:resource="info:fedora/{0[license]}" />
154
+ </rdf:Description>
155
+ </rdf:RDF>
156
+ </foxml:xmlContent>
157
+ </foxml:datastreamVersion>
158
+ </foxml:datastream>
159
+ <foxml:datastream CONTROL_GROUP="{0[datastream_type]}" ID="CONTENT" STATE="A" VERSIONABLE="true">
160
+ <foxml:datastreamVersion CREATED="{0[timestamp]}"
161
+ ID="CONTENT.0" LABEL="{0[title_attr]}" MIMETYPE="{0[mime]}">
162
+ <foxml:contentLocation
163
+ REF="{0[source]}" TYPE="INTERNAL_ID"/>
164
+ </foxml:datastreamVersion>
165
+ </foxml:datastream>
166
+ </foxml:digitalObject>
167
+ RESOURCE
168
+ # >>
169
+
170
+
171
+ class FoxmlBuilder
172
+ include Cul::Fedora::Image
173
+ STATIC_IMAGE_DEFAULTS = {
174
+ :content_model => 'ldpd:StaticImageAggregator',
175
+ :title => 'Image Aggregator',
176
+ :title_attr => 'Image Aggregator',
177
+ :dc_type => 'Image',
178
+ }
179
+ CONTENT_DEFAULTS = {
180
+ :content_model => 'ldpd:ContentAggregator',
181
+ :title => 'Generic Content Aggregator',
182
+ :title_attr => 'Generic Content Aggregator',
183
+ :dc_type => 'InteractiveResource',
184
+ }
185
+ METADATA_DEFAULTS = {
186
+ :content_model => 'ldpd:MODSMetadata',
187
+ :dc_type => 'Text',
188
+ :dc_format => 'text/xml'
189
+ }
190
+ RESOURCE_DEFAULTS = {
191
+ :content_model => 'ldpd:Resource'
192
+ }
193
+ DEFAULTS = {
194
+ :staticimage_aggregator => STATIC_IMAGE_DEFAULTS,
195
+ :content_aggregator => CONTENT_DEFAULTS,
196
+ :metadata => METADATA_DEFAULTS,
197
+ :image_resource => RESOURCE_DEFAULTS,
198
+ :resource => RESOURCE_DEFAULTS
199
+ }
200
+ TEMPLATES = {
201
+ :aggregator => Cul::Fedora::Arm::AGGREGATOR_FOXML,
202
+ :metadata => Cul::Fedora::Arm::METADATA_FOXML,
203
+ :resource => Cul::Fedora::Arm::RESOURCE_FOXML
204
+ }
205
+ METADATA_FOR = "<cul:metadataFor rdf:resource=\"info:fedora/%s\" />"
206
+ MEMBER_OF = "<cul:memberOf rdf:resource=\"info:fedora/%s\" />"
207
+ UTF8_MARKER = "\xEF\xBB\xBF"
208
+
209
+ def build(value_hash)
210
+ template_type = value_hash[:template_type]
211
+ model_type = value_hash[:model_type]
212
+ value_default_key = (template_type.downcase + "_" + model_type.downcase).intern
213
+ subs = {}
214
+ if(DEFAULTS.has_key?(value_default_key))
215
+ subs = value_hash.merge(DEFAULTS[value_default_key])
216
+ now = Time.now
217
+ subs[:timestamp] = now.strftime("%Y-%m-%dT%H:%M:%S.000Z")
218
+ subs[:rels] = build_rels(subs)
219
+ else
220
+ subs = value_hash.merge({:timestamp=>Time.now.strftime("%Y-%m-%dT%H:%M:%S.000Z")})
221
+ subs[:rels] = build_rels(subs)
222
+ end
223
+ if (subs.has_key?(:source))
224
+ subs[:source].strip!
225
+ if(subs[:source].length > 0)
226
+ if (subs[:source].index('http:') != 0)
227
+ if(subs[:model_type].eql?('Metadata'))
228
+ subs[:metadata] = File.open(subs[:source]){|file| file.read() }
229
+ subs.merge!(parse_mods(subs[:metadata]))
230
+ end
231
+ subs[:source] = 'file:/' + Pathname.new(subs[:source]).realpath
232
+ subs[:datastream_type] = 'E'
233
+ else
234
+ subs[:datastream_type] = 'M'
235
+ end
236
+ end
237
+ end
238
+
239
+ template_key = model_type.downcase.intern
240
+ if(TEMPLATES.has_key?(template_key))
241
+ return sub_values(subs,TEMPLATES[template_key])
242
+ else
243
+ raise "Unknown model type #{value_hash[:model_type]}"
244
+ end
245
+ end
246
+ protected
247
+
248
+ def parse_mods(data)
249
+ # attempt to assign title/title_attr and identifier
250
+ result = {}
251
+ xml = REXML::Document.new(data)
252
+ element = xml.elements["/mods/titleInfo/title"]
253
+ if (element)
254
+ result[:title] = element.text
255
+ result[:title_attr] = URI.escape(element.text)
256
+ end
257
+ element = xml.elements["/mods/recordInfo/recordIdentifier"]
258
+ if (element)
259
+ result[:identifier] = element.text
260
+ elsif (element = xml.elements["/mods/identifier"])
261
+ result[:identifier] = element.text
262
+ end
263
+ result
264
+ end
265
+
266
+ def build_rels(value_hash)
267
+ if (value_hash[:target].nil?)
268
+ return ''
269
+ end
270
+ rels = ''
271
+ targets = value_hash[:target].split(';')
272
+ tmp = value_hash[:model_type].downcase.eql?('metadata') ? METADATA_FOR : MEMBER_OF
273
+ targets.each {|target|
274
+ rels += sprintf(tmp,target)
275
+ }
276
+ if (value_hash[:model_type].eql?('Resource') and not value_hash[:dc_format].index('Image').nil?)
277
+ image_props = analyze_image(value_hash[:source])
278
+ value_hash[:mime] = image_props[:mime]
279
+ image_rels = map_image_properties(image_props)
280
+ image_rels.each {|rel|
281
+ rels += rel
282
+ }
283
+ end
284
+ rels
285
+ end
286
+
287
+ def sub_values(value_hash,template)
288
+ data = template.gsub(/\{0\[(\w+)\]\}/) {|match|
289
+ value_hash[$1.intern]
290
+ }
291
+ data.strip!
292
+ if (data.index(UTF8_MARKER) == 0)
293
+ data.slice!(0...UTF8_MARKER.length)
294
+ end
295
+ data
296
+ end
297
+ end
298
+ end
299
+ end
300
+ end
@@ -0,0 +1,106 @@
1
+ require 'base64'
2
+ require 'net/http'
3
+ require 'net/https'
4
+ require 'soap/wsdlDriver'
5
+ module Cul
6
+ module Fedora
7
+ module Arm
8
+ module Tasks
9
+ APIM = "/fedora/services/management"
10
+ class Task
11
+ def response
12
+ @response
13
+ end
14
+ def initialize()
15
+ super()
16
+ @apim = nil
17
+ @args = {}
18
+ end
19
+ def post(connector)
20
+ if (@apim.nil?)
21
+ raise "Missing APIM SOAPAction name"
22
+ end
23
+ if (@args.empty?)
24
+ raise "No soap arguments"
25
+ end
26
+ @response = connector.apim_call(@apim, @args) unless (@apim.nil? or @args.empty?)
27
+ @response
28
+ end
29
+ end # Task
30
+ class PurgeTask < Task
31
+ def initialize(pid)
32
+ super()
33
+ @apim = :purgeObject
34
+ @args = {:pid=>pid,:force=>'false',:logMessage=>'purging test objects'}
35
+ end
36
+ end
37
+ class ReservePidsTask < Task
38
+ def initialize(numPids, namespace="demo")
39
+ super()
40
+ @apim = :getNextPID
41
+ @numPids = numPids
42
+ @namespace = namespace
43
+ @args = {:numPIDs=>numPids,:pidNamespace=>namespace}
44
+ end
45
+ end
46
+ class InsertTask < Task
47
+ def initialize()
48
+ super()
49
+ @apim = :ingest
50
+ end
51
+ def post(driver)
52
+ response = super(driver)
53
+ @pid = @response[:pid]
54
+ response
55
+ end
56
+ end
57
+ class InsertFoxmlTask < InsertTask
58
+ def initialize(data)
59
+ super()
60
+ @args[:logMessage] = 'Batch update'
61
+ @args[:format] = 'info:fedora/fedora-system:FOXML-1.1'
62
+ @args[:objectXML] = data
63
+ end
64
+ end
65
+ class UpdateTask < Task
66
+ def initialize(pid)
67
+ super()
68
+ @pid = pid
69
+ end
70
+ end
71
+ class UpdateXmlDatastreamTask < UpdateTask
72
+ def initialize(pid,dsId,dsLabel,dsMIME,formatURI,data)
73
+ super(pid)
74
+ @apim = :modifyDatastreamByValue
75
+ @dsId = dsId
76
+ @inlineData = data
77
+ @args[:pid] = pid
78
+ @args[:dsID] = dsId
79
+ @args[:dsLabel] = dsLabel
80
+ @args[:MIMEType] = dsMIME
81
+ @args[:formatURI] = formatURI
82
+ @args[:dsContent] = data
83
+ @args[:altIDs] = []
84
+ @args[:checksumType] = 'DISABLED'
85
+ @args[:checksum] = 'none'
86
+ @args[:logMessage] = 'Batch update'
87
+ @args[:force] = 'false'
88
+ end
89
+ end
90
+ class UpdateMODSTask < UpdateXmlDatastreamTask
91
+ def initialize(pid,data)
92
+ super(pid,"CONTENT","MODS Desciptive Metadata","text/xml","http://www.loc.gov/mods/v3",data)
93
+ end
94
+ end
95
+ class UpdateDCTask < UpdateXmlDatastreamTask
96
+ def initialize(pid,data)
97
+ super(pid,"DC","Dublin Core Metadata","text/xml","http://www.openarchives.org/OAI/2.0/oai_dc/",data)
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
104
+ if __FILE__ == $0
105
+ # TODO Generated stub
106
+ end
@@ -0,0 +1,64 @@
1
+ module Cul
2
+ module Fedora
3
+ class Connector
4
+ attr_reader :config
5
+
6
+ def self.parse(environments)
7
+ connectors = {}
8
+
9
+ environments.each_pair do |environment, config|
10
+ connectors[environment] = Connector.new(config)
11
+ end
12
+
13
+ connectors
14
+ end
15
+
16
+ def initialize(config)
17
+ @config = config
18
+ end
19
+
20
+ def rest_interface()
21
+ http = Net::HTTP.start(config_for(:rest, :host),config_for(:rest, :port))
22
+ yield http
23
+ http.finish()
24
+ end
25
+
26
+ def rest_location()
27
+ url_builder(:rest, "")
28
+ end
29
+
30
+ def protocol_for(interface)
31
+ config_for(interface.to_s,"ssl") == true ? "https" : "http"
32
+ end
33
+
34
+ def config_for(interface, value)
35
+ (@config[interface.to_s] && @config[interface.to_s][value.to_s]) || @config[value.to_s]
36
+ end
37
+
38
+ def url_builder(interface, url)
39
+ "#{protocol_for(interface)}://#{config_for(interface,:host)}:#{config_for(interface,:port)}/#{url}"
40
+ end
41
+
42
+ def apim_interface()
43
+ wsdl = url_builder(:admin, "fedora/wsdl?api=API-M")
44
+ driver = SOAP::WSDLDriverFactory.new(wsdl).create_rpc_driver
45
+
46
+ if config_for(:admin, :ssl_verify)
47
+ raise "SSL verification not currently supported. Please specify ssl_verify: false"
48
+ else
49
+ driver.options['protocol.http.ssl_config.verify_mode'] = OpenSSL::SSL::VERIFY_NONE
50
+ end
51
+
52
+ driver.options["protocol.http.basic_auth"] << [url_builder(:admin,"fedora/services/management"), config_for(:admin,:user), config_for(:admin,:password)]
53
+
54
+ driver
55
+ end
56
+
57
+ def apim_call(method, *args)
58
+ options = args.extract_options!
59
+ apim_interface.method(method).call(options)
60
+ end
61
+
62
+ end
63
+ end
64
+ end