rof 1.0.7 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +9 -7
  3. data/LICENSE +201 -16
  4. data/Rakefile +46 -0
  5. data/bin/csv_to_rof +1 -2
  6. data/bin/fedora_to_rof +7 -1
  7. data/bin/jsonld_to_rof +26 -0
  8. data/bin/osf_to_rof +6 -2
  9. data/bin/rof +5 -19
  10. data/lib/rof.rb +2 -6
  11. data/lib/rof/access.rb +1 -1
  12. data/lib/rof/cli.rb +104 -67
  13. data/lib/rof/compare_rof.rb +68 -39
  14. data/lib/rof/filter.rb +21 -0
  15. data/lib/rof/filters.rb +38 -0
  16. data/lib/rof/filters/bendo.rb +15 -17
  17. data/lib/rof/filters/date_stamp.rb +5 -4
  18. data/lib/rof/filters/file_to_url.rb +5 -3
  19. data/lib/rof/filters/label.rb +9 -7
  20. data/lib/rof/filters/work.rb +7 -5
  21. data/lib/rof/ingest.rb +5 -0
  22. data/lib/rof/osf_context.rb +2 -2
  23. data/lib/rof/rdf_context.rb +2 -0
  24. data/lib/rof/translator.rb +18 -0
  25. data/lib/rof/translators.rb +23 -0
  26. data/lib/rof/{translate_csv.rb → translators/csv_to_rof.rb} +4 -3
  27. data/lib/rof/translators/fedora_to_rof.rb +244 -0
  28. data/lib/rof/translators/jsonld_to_rof.rb +112 -0
  29. data/lib/rof/translators/jsonld_to_rof/accumulator.rb +175 -0
  30. data/lib/rof/translators/jsonld_to_rof/predicate_handler.rb +223 -0
  31. data/lib/rof/translators/jsonld_to_rof/predicate_object_handler.rb +125 -0
  32. data/lib/rof/translators/jsonld_to_rof/statement_handler.rb +91 -0
  33. data/lib/rof/translators/osf_to_rof.rb +191 -0
  34. data/lib/rof/utility.rb +44 -1
  35. data/lib/rof/version.rb +1 -1
  36. data/rof.gemspec +10 -2
  37. data/spec/coverage_helper.rb +17 -0
  38. data/spec/fixtures/for_utility_load_items_from_json_file/multiple_items.json +8 -0
  39. data/spec/fixtures/for_utility_load_items_from_json_file/parse_error.json +3 -0
  40. data/spec/fixtures/for_utility_load_items_from_json_file/single_item.json +3 -0
  41. data/spec/fixtures/jsonld_to_rof/0g354f18610.jsonld +113 -0
  42. data/spec/fixtures/jsonld_to_rof/0g354f18610.rof +96 -0
  43. data/spec/fixtures/jsonld_to_rof/2j62s467216.jsonld +113 -0
  44. data/spec/fixtures/jsonld_to_rof/2j62s467216.rof +93 -0
  45. data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.jsonld +70 -0
  46. data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.rof +87 -0
  47. data/spec/fixtures/jsonld_to_rof/cr56n01253w.jsonld +84 -0
  48. data/spec/fixtures/jsonld_to_rof/cr56n01253w.rof +95 -0
  49. data/spec/fixtures/jsonld_to_rof/h989r21069m.jsonld +84 -0
  50. data/spec/fixtures/jsonld_to_rof/h989r21069m.rof +98 -0
  51. data/spec/fixtures/jsonld_to_rof/js956d59913.jsonld +79 -0
  52. data/spec/fixtures/jsonld_to_rof/js956d59913.rof +89 -0
  53. data/spec/fixtures/jsonld_to_rof/m039k358q5c.jsonld +80 -0
  54. data/spec/fixtures/jsonld_to_rof/m039k358q5c.rof +64 -0
  55. data/spec/fixtures/jsonld_to_rof/nk322b9161g.jsonld +89 -0
  56. data/spec/fixtures/jsonld_to_rof/nk322b9161g.rof +69 -0
  57. data/spec/fixtures/jsonld_to_rof/p8418k7430d.jsonld +84 -0
  58. data/spec/fixtures/jsonld_to_rof/p8418k7430d.rof +67 -0
  59. data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.jsonld +98 -0
  60. data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.rof +110 -0
  61. data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.jsonld +94 -0
  62. data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.rof +121 -0
  63. data/spec/fixtures/osf/phz6b.tar.gz +0 -0
  64. data/spec/lib/rof/access_spec.rb +30 -23
  65. data/spec/lib/rof/cli_spec.rb +83 -60
  66. data/spec/lib/rof/compare_rof_spec.rb +35 -24
  67. data/spec/lib/rof/filter_spec.rb +10 -0
  68. data/spec/lib/rof/filters/bendo_spec.rb +42 -0
  69. data/spec/lib/rof/filters/date_stamp_spec.rb +9 -5
  70. data/spec/lib/rof/filters/file_to_url_spec.rb +7 -3
  71. data/spec/lib/rof/filters/label_spec.rb +121 -77
  72. data/spec/lib/rof/filters/work_spec.rb +7 -4
  73. data/spec/lib/rof/filters_spec.rb +14 -0
  74. data/spec/lib/rof/translator_spec.rb +15 -0
  75. data/spec/lib/rof/{translate_csv_spec.rb → translators/csv_to_rof_spec.rb} +14 -14
  76. data/spec/lib/rof/translators/fedora_to_rof_spec.rb +64 -0
  77. data/spec/lib/rof/translators/jsonld_to_rof/accumulator_spec.rb +121 -0
  78. data/spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb +73 -0
  79. data/spec/lib/rof/translators/jsonld_to_rof/predicate_object_handler_spec.rb +48 -0
  80. data/spec/lib/rof/translators/jsonld_to_rof/statement_handler_spec.rb +40 -0
  81. data/spec/lib/rof/translators/jsonld_to_rof_spec.rb +120 -0
  82. data/spec/lib/rof/{osf_to_rof_spec.rb → translators/osf_to_rof_spec.rb} +55 -25
  83. data/spec/lib/rof/translators_spec.rb +14 -0
  84. data/spec/lib/rof/utility_spec.rb +47 -1
  85. data/spec/spec_helper.rb +1 -1
  86. data/spec/support/an_rof_filter.rb +10 -0
  87. metadata +186 -15
  88. data/lib/rof/get_from_fedora.rb +0 -211
  89. data/lib/rof/osf_to_rof.rb +0 -123
  90. data/spec/lib/rof/get_from_fedora_spec.rb +0 -22
@@ -0,0 +1,23 @@
1
+ Dir.glob(File.expand_path('../translators/*.rb', __FILE__)).each do |filename|
2
+ require filename
3
+ end
4
+
5
+ module ROF
6
+ # A namespace for organizing translating classes. A translating class is responsible for
7
+ # converting from one format to another format (e.g. CSV to ROF).
8
+ #
9
+ # @see ROF::Translator
10
+ # @see ROF::Translators::CsvToRof
11
+ # @see ROF::Translators::FedoraToRof
12
+ # @see ROF::Translators::OsfToRof
13
+ module Translators
14
+ # @api public
15
+ # @param [String] csv_contents - in the form of a CSV
16
+ # @param [Hash] config - Hash with symbols for keys
17
+ # @return [Hash] in ROF format
18
+ # @see ROF::Translators::CsvToRof for full details
19
+ def self.csv_to_rof(csv_contents, config = {})
20
+ CsvToRof.call(csv_contents, config)
21
+ end
22
+ end
23
+ end
@@ -1,7 +1,8 @@
1
+ require 'rof/translator'
1
2
  require('csv')
2
3
  require('json')
3
4
 
4
- module ROF
5
+ module ROF::Translators
5
6
  # Turn a CSV file into a ROF file.
6
7
  #
7
8
  # pass in the contents of the CSV file.
@@ -31,7 +32,7 @@ module ROF
31
32
  # with the previous work translated into ROF. This will allow a work to have
32
33
  # attached files with different access permissions, owners, etc...
33
34
  # Any extra files are appended to the file list for the work.
34
- class TranslateCSV
35
+ class CsvToRof < ROF::Translator
35
36
  class MissingOwnerOrType < RuntimeError
36
37
  end
37
38
 
@@ -45,7 +46,7 @@ module ROF
45
46
  class NoPriorWork < RuntimeError
46
47
  end
47
48
 
48
- def self.run(csv_contents)
49
+ def self.call(csv_contents, config = {})
49
50
  first_line = nil
50
51
  rof_contents = []
51
52
  previous_work = nil
@@ -0,0 +1,244 @@
1
+ require 'json'
2
+ require 'rexml/document'
3
+ require 'rdf/ntriples'
4
+ require 'rdf/rdfxml'
5
+ require 'rubydora'
6
+ require 'rof/translator'
7
+
8
+ module ROF
9
+ module Translators
10
+ # Responsible for translating Fedora PIDs to ROF objects
11
+ class FedoraToRof < ROF::Translator
12
+ # @param [Array] pids - Fedora PIDs
13
+ # @param [Hash] config - Hash with symbol keys
14
+ # @option config [Hash] :fedora_connection_information - The Hash that contains the connection information for Fedora
15
+ # @return [Hash] The ROF representation of teh Fedora objects
16
+ # @see Rubydora.connect
17
+ def self.call(pids, config = {})
18
+ new(pids, config).to_rof
19
+ end
20
+
21
+ def initialize(pids, config = {})
22
+ @pids = pids
23
+ @fedora_connection_information = config.fetch(:fedora_connection_information)
24
+ @config = config
25
+ connect_to_fedora!
26
+ end
27
+ attr_reader :pids, :fedora_connection_information, :config, :connection
28
+
29
+ private
30
+ def connect_to_fedora!
31
+ @connection = Rubydora.connect(fedora_connection_information)
32
+ end
33
+ public
34
+
35
+ def to_rof
36
+ # wrap the objects inside a JSON list
37
+ pids.map do |pid|
38
+ PidToRofElement.new(pid, connection, config).convert
39
+ end
40
+ end
41
+
42
+ # Responsible for converting a single PID to an ROF Element
43
+ class PidToRofElement
44
+ def initialize(pid, connection, config)
45
+ @pid = pid
46
+ @connection = connection
47
+ @config = config
48
+ @fedora_info = { 'pid' => pid, 'type' => 'fobject' }
49
+ @fedora_object = connection.find(pid)
50
+ end
51
+ attr_reader :pid, :config, :fedora_object
52
+
53
+ # Given a rubydora object, extract what we need
54
+ # to create our ROF object in an associative array
55
+ #
56
+ def convert
57
+ @fedora_info['af-model'] = setModel
58
+ # iterate through the data streams that are present.
59
+ # use reflection to call appropriate method for each
60
+ fedora_object.datastreams.each do |dsname, ds|
61
+ method_name = DATASTREAM_NAME_TO_METHOD_MAP.fetch(dsname) { :default_datastream_conversion }
62
+ send(method_name, dsname, ds)
63
+ end
64
+ @fedora_info
65
+ end
66
+
67
+ DATASTREAM_NAME_TO_METHOD_MAP = {
68
+ 'DC' => :skip_datastream,
69
+ 'RELS-EXT' => :convert_rels_ext,
70
+ 'rightsMetadata' => :convert_rights_metadata,
71
+ 'properties' => :default_datastream_conversion,
72
+ 'content' => :default_datastream_conversion,
73
+ 'descMetadata' => :convert_desc_metadata,
74
+ 'bendo-item' => :default_datastream_conversion,
75
+ 'characterization' => :default_datastream_conversion,
76
+ 'thumbnail' => :default_datastream_conversion
77
+ }
78
+
79
+ private
80
+
81
+ def default_datastream_conversion(dsname, ds)
82
+ # dump generic datastream
83
+ meta = create_meta(ds)
84
+ @fedora_info["#{dsname}-meta"] = meta unless meta.empty?
85
+
86
+ # if content is short < X bytes and valid utf-8, save as string
87
+ # if content is > X bytes or is not utf-8, save as file only if config option is given
88
+ content = ds.datastream_content
89
+ if content.length <= 1024 || config['inline']
90
+ # this downloads the contents of the datastream into memory
91
+ content_string = content.to_s.force_encoding('UTF-8')
92
+ if content_string.valid_encoding?
93
+ @fedora_info[dsname] = content_string
94
+ return # we're done! move on to next datastream
95
+ end
96
+ # not utf-8, so keep going and see if download option was given
97
+ end
98
+ return unless config['download']
99
+ # download option was given, so save this datastream as a file
100
+ fname = "#{@fedora_info['pid']}-#{dsname}"
101
+ abspath = File.join(config['download_path'], fname)
102
+ @fedora_info["#{dsname}-file"] = fname
103
+ if File.file?(config['download_path'])
104
+ $stderr.puts "Error: --download directory #{config['download_path']} specified is an existing file."
105
+ exit 1
106
+ end
107
+ FileUtils.mkdir_p(config['download_path'])
108
+ File.open(abspath, 'w') do |f|
109
+ f.write(content)
110
+ end
111
+ end
112
+
113
+ def create_meta(ds)
114
+ result = {}
115
+
116
+ label = ds.profile['dsLabel']
117
+ result['label'] = label unless label.nil? || label == ''
118
+ result['mime-type'] = ds.profile['dsMIME'] if ds.profile['dsMIME'] != 'text/plain'
119
+ # TODO(dbrower): make sure this is working as intended
120
+ if %w(R E).include?(ds.profile['dsControlGroup'])
121
+ s = result['URL'] = ds.profile['dsLocation']
122
+ s = s.sub(config['bendo'], 'bendo:') if config['bendo']
123
+ result['URL'] = s
124
+ end
125
+ result
126
+ end
127
+
128
+ # set fedora_indo['af-model']
129
+ #
130
+ def setModel
131
+ # only keep info:fedora/afmodel:XXXXX
132
+ models = fedora_object.profile['objModels'].map do |model|
133
+ Regexp.last_match(1) if model =~ /^info:fedora\/afmodel:(.*)/
134
+ end.compact
135
+ models[0]
136
+ end
137
+
138
+ # The methods below are called if the like-named datastream exists in fedora
139
+
140
+ def skip_datastream(*)
141
+ end
142
+
143
+ # set metadata
144
+ #
145
+ def convert_desc_metadata(_dsname, ds)
146
+ # desMetadata is encoded in ntriples, convert to JSON-LD using our special context
147
+ graph = RDF::Graph.new
148
+ data = ds.datastream_content
149
+ # force utf-8 encoding. fedora does not store the encoding, so it defaults to ASCII-8BIT
150
+ # see https://github.com/ruby-rdf/rdf/issues/142
151
+ data.force_encoding('utf-8')
152
+ graph.from_ntriples(data, format: :ntriples)
153
+ JSON::LD::API.fromRdf(graph) do |expanded|
154
+ result = JSON::LD::API.compact(expanded, RdfContext)
155
+ @fedora_info['metadata'] = result
156
+ end
157
+ end
158
+
159
+ # set rights
160
+ #
161
+ def convert_rights_metadata(_dsname, ds)
162
+ # rights is an XML document
163
+ # the access array may have read or edit elements
164
+ # each of these elements may contain group or person elements
165
+ xml_doc = REXML::Document.new(ds.datastream_content)
166
+
167
+ rights_array = {}
168
+
169
+ root = xml_doc.root
170
+
171
+ # check for optional embargo date - set if present
172
+ this_embargo = root.elements['embargo']
173
+ rights_array['embargo-date'] = this_embargo.elements['machine'].elements['date'][0] if Utility.has_embargo_date?(this_embargo)
174
+
175
+ %w(read edit).each do |access|
176
+ this_access = root.elements["//access[@type=\'#{access}\']"]
177
+
178
+ next if this_access.nil?
179
+
180
+ unless this_access.elements['machine'].elements['group'].nil?
181
+ group_array = []
182
+ this_access.elements['machine'].elements['group'].each do |this_group|
183
+ group_array << this_group
184
+ end
185
+ rights_array["#{access}-groups"] = group_array
186
+ end
187
+
188
+ next if this_access.elements['machine'].elements['person'].nil?
189
+ person_array = []
190
+
191
+ this_access.elements['machine'].elements['person'].each do |this_person|
192
+ person_array << this_person
193
+ end
194
+ rights_array[access.to_s] = person_array
195
+ end
196
+
197
+ @fedora_info['rights'] = rights_array
198
+ end
199
+
200
+ def convert_rels_ext(_dsname, ds)
201
+ # RELS-EXT is RDF-XML - parse it
202
+ ctx = ROF::RelsExtRefContext.dup
203
+ ctx.delete('@base') # @base causes problems when converting TO json-ld (it is = "info:/fedora") but info is not a namespace
204
+ graph = RDF::Graph.new
205
+ graph.from_rdfxml(ds.datastream_content)
206
+ result = nil
207
+ JSON::LD::API.fromRdf(graph) do |expanded|
208
+ result = JSON::LD::API.compact(expanded, ctx)
209
+ end
210
+ # now strip the info:fedora/ prefix from the URIs
211
+ strip_info_fedora(result)
212
+ # remove extra items
213
+ result.delete('hasModel')
214
+ @fedora_info['rels-ext'] = result
215
+ end
216
+
217
+ private
218
+
219
+ def strip_info_fedora(rels_ext)
220
+ rels_ext.each do |relation, targets|
221
+ next if relation == '@context'
222
+ if targets.is_a?(Hash)
223
+ strip_info_fedora(targets)
224
+ next
225
+ end
226
+ targets = [targets] if targets.is_a?(String)
227
+ targets.map! do |target|
228
+ if target.is_a?(Hash)
229
+ strip_info_fedora(target)
230
+ else
231
+ target.sub('info:fedora/', '')
232
+ end
233
+ end
234
+ # some single strings cannot be arrays in json-ld, so convert back
235
+ # this shouldn't cause any problems with items that began as arrays
236
+ targets = targets[0] if targets.length == 1
237
+ rels_ext[relation] = targets
238
+ end
239
+ end
240
+ end
241
+ private_constant :PidToRofElement
242
+ end
243
+ end
244
+ end
@@ -0,0 +1,112 @@
1
+ require 'rof/rdf_context'
2
+ require 'active_support/core_ext/array/wrap'
3
+ require 'rof/translators/jsonld_to_rof/statement_handler'
4
+ require 'rof/translators/jsonld_to_rof/predicate_handler'
5
+ require 'rof/translators/jsonld_to_rof/accumulator'
6
+
7
+ module ROF
8
+ module Translators
9
+ # @api public
10
+ #
11
+ # Responsible for converting JSON LD into an ROF Hash via registered URI maps.
12
+ #
13
+ # @note Some predicates require explicit mapping where as others have an assumed mapping. At present all URLs for @context of JSON-LD documents must be registered.
14
+ #
15
+ # @see ROF::Translators::JsonldToRof.call for details on how the JSON-LD is converted
16
+ # @see ROF::Translators::PredicateHandler.register for details on how Predicate URI's are mapped to nodes in the ROF document.
17
+ # @see ROF::Translators::JsonldToRof::PredicateHandler
18
+ # @see ROF::Translators::JsonldToRof::StatementHandler
19
+ module JsonldToRof
20
+ PredicateHandler.register('http://purl.org/ontology/bibo/') do |handler|
21
+ handler.namespace_prefix('bibo:')
22
+ handler.within(['metadata'])
23
+ end
24
+ PredicateHandler.register('info:fedora/fedora-system:def/relations-external') do |handler|
25
+ handler.map('#isMemberOfCollection', to: ['rels-ext', 'isMemberOfCollection'])
26
+ end
27
+ PredicateHandler.register('http://id.loc.gov/vocabulary/relators/') do |handler|
28
+ handler.namespace_prefix('mrel:')
29
+ handler.within(['metadata'])
30
+ end
31
+ PredicateHandler.register('http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#') do |handler|
32
+ handler.namespace_prefix('ebucore:')
33
+ handler.within(['metadata'])
34
+ end
35
+
36
+ PredicateHandler.register('https://library.nd.edu/ns/terms/') do |handler|
37
+ handler.map('accessEdit', to: ['rights', 'edit'])
38
+ handler.map('accessRead', to: ['rights', 'read'])
39
+ handler.map('accessEditGroup', to: ['rights', 'edit-groups'])
40
+ handler.map('accessReadGroup', to: ['rights', 'read-groups'])
41
+ handler.map('accessEmbargoDate', to: ['rights', 'embargo-date'])
42
+ handler.map('afmodel', to: ["af-model"])
43
+ handler.map('bendoitem', to: ["bendo-item"])
44
+ handler.map('depositor') do |object, accumulator|
45
+ accumulator.register_properties('depositor', object)
46
+ end
47
+ handler.map('owner') do |object, accumulator|
48
+ accumulator.register_properties('owner', object)
49
+ end
50
+ handler.map('representativeFile') do |object, accumulator|
51
+ accumulator.register_properties('representative', object)
52
+ end
53
+ end
54
+
55
+ PredicateHandler.register('http://purl.org/dc/terms/') do |handler|
56
+ handler.namespace_prefix('dc:')
57
+ handler.within(['metadata'])
58
+ handler.map('contributor', to: ['metadata', 'dc:contributor', 'dc:contributor'], force: true)
59
+ end
60
+
61
+ PredicateHandler.register('http://projecthydra.org/ns/relations#') do |handler|
62
+ handler.map('hasEditor', to: ['rels-ext', 'hydramata-rel:hasEditor'])
63
+ # We need to map the hasEditorGroup predicate to two different locations in the ROF
64
+ handler.map('hasEditorGroup', to: ['rels-ext', 'hydramata-rel:hasEditorGroup'], force: true)
65
+ handler.map('hasEditorGroup', to: ['rights', 'edit-groups'], force: true)
66
+ end
67
+
68
+ PredicateHandler.register('http://www.ndltd.org/standards/metadata/etdms/1.1/') do |handler|
69
+ handler.within(['metadata', 'ms:degree'])
70
+ handler.namespace_prefix('ms:')
71
+ handler.map('role', to: ['metadata', 'dc:contributor', 'ms:role'], force: true)
72
+ end
73
+
74
+ # @api public
75
+ #
76
+ # Convert's the given JSON-LD into an ROF document that can be used to batch ingest into Fedora.
77
+ #
78
+ # @param [Array<Hash>, Hash] jsonld - a Hash of JSON-LD data or an Array of JSON-LD Hashes
79
+ # @param [Hash] config (included to conform to the loose interface of translators)
80
+ # @return [Array<Hash>] An ROF document
81
+ def self.call(jsonld, config)
82
+ Array.wrap(jsonld).map! do |element|
83
+ Element.new(element).to_rof
84
+ end
85
+ end
86
+
87
+ # A single top-level element of a JSON-LD document
88
+ class Element
89
+ def initialize(element)
90
+ @element = element
91
+ end
92
+
93
+ def to_rof
94
+ @accumulator = Accumulator.new(base_rof)
95
+ JSON::LD::API.toRdf(element) do |statement|
96
+ StatementHandler.call(statement, accumulator)
97
+ end
98
+ @accumulator.to_rof
99
+ end
100
+
101
+ private
102
+
103
+ attr_reader :element, :accumulator
104
+
105
+ def base_rof
106
+ { "type" => "fobject", "metadata" => { "@context" => ROF::RdfContext }, "rels-ext" => { "@context" => ROF::RelsExtRefContext } }
107
+ end
108
+ end
109
+ private_constant :Element
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,175 @@
1
+ require 'active_support/core_ext/array/wrap'
2
+ require 'active_support/core_ext/module/delegation'
3
+ require 'active_support/core_ext/object/deep_dup'
4
+
5
+ module ROF
6
+ module Translators
7
+ module JsonldToRof
8
+ # @api pubilc
9
+ # The accumulator is a "passive" object. Things happen to it. All in the name of building the
10
+ # hash that is ROF.
11
+ #
12
+ # @note The accumulator is only for one PID. See [ROF::Translators::JsonldToRof::Accumulator#add_pid]
13
+ class Accumulator
14
+ # @param [Hash] initial_rof - The base ROF document to which we will be adding elements.
15
+ def initialize(initial_rof = {})
16
+ @rof = initial_rof
17
+ @blank_nodes = {}
18
+ @blank_node_locations = {}
19
+ end
20
+
21
+ # @api public
22
+ # @return [Hash]
23
+ def to_rof
24
+ rof = @rof.deep_dup
25
+ expand_blank_node_locations(rof)
26
+ append_properties_to(rof)
27
+ rof
28
+ end
29
+
30
+ private
31
+
32
+ # The antics of the blank node! See the specs for blank nodes to see the expected behavior.
33
+ def expand_blank_node_locations(rof)
34
+ @blank_node_locations.each_pair do |node, locations|
35
+ locations.each_pair do |location, key_value_pairs|
36
+ data = rof
37
+ location[0..-2].each do |slug|
38
+ data[slug] ||= {}
39
+ data = data[slug]
40
+ end
41
+
42
+ # We may encounter a shallow map, if so we need for it to behave differently
43
+ slug = location[-1]
44
+ if slug
45
+ data[slug] ||= []
46
+ hash = {}
47
+ else
48
+ hash = data
49
+ end
50
+ Array.wrap(key_value_pairs).each do |key_value|
51
+ key_value.each_pair do |key, value|
52
+ hash[key] ||= []
53
+ hash[key] += Array.wrap(value)
54
+ end
55
+ end
56
+ data[slug] << hash if slug
57
+ end
58
+ end
59
+ end
60
+
61
+ def append_properties_to(rof)
62
+ return rof unless @properties
63
+ rof['properties-meta'] = { "mime-type" => "text/xml" }
64
+ xml = '<fields>'
65
+ @properties.each do |node_name, object|
66
+ xml += "<#{node_name}>#{object}</#{node_name}>"
67
+ end
68
+ xml += '</fields>'
69
+ rof['properties'] = xml
70
+ rof
71
+ end
72
+
73
+ public
74
+
75
+ # @api public
76
+ # @param [String] node_name - the XML node's name (e.g. <node_name>node_value</node_name>)
77
+ # @param [String] node_value - the XML element's value
78
+ # @return [Array] of given node_name and node_value
79
+ def register_properties(node_name, node_value)
80
+ @properties ||= []
81
+ @properties << [node_name, coerce_object_to_string(node_value)]
82
+ [node_name, node_value]
83
+ end
84
+
85
+ class PidAlreadySetError < RuntimeError
86
+ end
87
+
88
+ # @api public
89
+ # @param [RDF::Statement] statement
90
+ # @return [RDF::Statement]
91
+ def add_blank_node(statement)
92
+ @blank_nodes[statement.subject] ||= {}
93
+ @blank_nodes[statement.subject][statement.predicate] ||= []
94
+ @blank_nodes[statement.subject][statement.predicate] << statement.object
95
+ statement
96
+ end
97
+
98
+ # @api public
99
+ # @param [RDF::Subject] subject - Fetch the corresponding blank node that was added
100
+ # @return [RDF::Statement]
101
+ # @raise [KeyError] when the subject has not previosly been added
102
+ # @see #add_blank_node
103
+ def fetch_blank_node(subject)
104
+ @blank_nodes.fetch(subject)
105
+ end
106
+
107
+ # @api public
108
+ # @param [String] pid - an identifier
109
+ # @return [String] pid
110
+ # @raise PidAlreadySetError - if you attempted to a different PID
111
+ def add_pid(pid)
112
+ pid = coerce_object_to_string(pid)
113
+ if @rof.key?('pid')
114
+ if @rof['pid'] != pid
115
+ raise PidAlreadySetError, "Attempted to set pid=#{pid}, but it is already set to #{@rof['pid']}"
116
+ end
117
+ else
118
+ @rof['pid'] = pid
119
+ end
120
+ pid
121
+ end
122
+
123
+ # @api public
124
+ # @param [Array<String>, String] location - a list of nested hash keys (or a single string)
125
+ # @param [String] value - a translated value for the original RDF Statement
126
+ # @param [false, RDF::Node] blank_node
127
+ # @return [Array] location, value
128
+ def add_predicate_location_and_value(location, value, blank_node = false)
129
+ # Because I am making transformation on the location via #shift method, I need a duplication.
130
+ location = Array.wrap(location)
131
+ if location == ['pid']
132
+ return add_pid(value)
133
+ end
134
+ if blank_node
135
+ add_predicate_location_and_value_direct_for_blank_node(location, value, blank_node)
136
+ else
137
+ add_predicate_location_and_value_direct_for_non_blank_node(location, value)
138
+ end
139
+ [location, value]
140
+ end
141
+
142
+ def add_predicate_location_and_value_direct_for_blank_node(location, value, blank_node)
143
+ fetch_blank_node(blank_node) # Ensure the node exists
144
+ @blank_node_locations[blank_node] ||= {}
145
+ @blank_node_locations[blank_node][location[0..-2]] ||= []
146
+ @blank_node_locations[blank_node][location[0..-2]] << { location[-1] => Array.wrap(coerce_object_to_string(value)) }
147
+ end
148
+
149
+ def add_predicate_location_and_value_direct_for_non_blank_node(location, value)
150
+ data = @rof
151
+ location[0..-2].each do |slug|
152
+ data[slug] ||= {}
153
+ data = data[slug]
154
+ end
155
+ slug = location[-1]
156
+ data[slug] ||= []
157
+ data[slug] << coerce_object_to_string(value)
158
+ end
159
+
160
+ private
161
+
162
+ def coerce_object_to_string(object)
163
+ return object if object.nil?
164
+ if object.to_s =~ %r{https?://curate.nd.edu/show/([^\\]+)/?}
165
+ return "und:#{$1}"
166
+ elsif object.respond_to?(:value)
167
+ return object.value
168
+ else
169
+ object
170
+ end
171
+ end
172
+ end
173
+ end
174
+ end
175
+ end