rof 1.0.7 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +9 -7
  3. data/LICENSE +201 -16
  4. data/Rakefile +46 -0
  5. data/bin/csv_to_rof +1 -2
  6. data/bin/fedora_to_rof +7 -1
  7. data/bin/jsonld_to_rof +26 -0
  8. data/bin/osf_to_rof +6 -2
  9. data/bin/rof +5 -19
  10. data/lib/rof.rb +2 -6
  11. data/lib/rof/access.rb +1 -1
  12. data/lib/rof/cli.rb +104 -67
  13. data/lib/rof/compare_rof.rb +68 -39
  14. data/lib/rof/filter.rb +21 -0
  15. data/lib/rof/filters.rb +38 -0
  16. data/lib/rof/filters/bendo.rb +15 -17
  17. data/lib/rof/filters/date_stamp.rb +5 -4
  18. data/lib/rof/filters/file_to_url.rb +5 -3
  19. data/lib/rof/filters/label.rb +9 -7
  20. data/lib/rof/filters/work.rb +7 -5
  21. data/lib/rof/ingest.rb +5 -0
  22. data/lib/rof/osf_context.rb +2 -2
  23. data/lib/rof/rdf_context.rb +2 -0
  24. data/lib/rof/translator.rb +18 -0
  25. data/lib/rof/translators.rb +23 -0
  26. data/lib/rof/{translate_csv.rb → translators/csv_to_rof.rb} +4 -3
  27. data/lib/rof/translators/fedora_to_rof.rb +244 -0
  28. data/lib/rof/translators/jsonld_to_rof.rb +112 -0
  29. data/lib/rof/translators/jsonld_to_rof/accumulator.rb +175 -0
  30. data/lib/rof/translators/jsonld_to_rof/predicate_handler.rb +223 -0
  31. data/lib/rof/translators/jsonld_to_rof/predicate_object_handler.rb +125 -0
  32. data/lib/rof/translators/jsonld_to_rof/statement_handler.rb +91 -0
  33. data/lib/rof/translators/osf_to_rof.rb +191 -0
  34. data/lib/rof/utility.rb +44 -1
  35. data/lib/rof/version.rb +1 -1
  36. data/rof.gemspec +10 -2
  37. data/spec/coverage_helper.rb +17 -0
  38. data/spec/fixtures/for_utility_load_items_from_json_file/multiple_items.json +8 -0
  39. data/spec/fixtures/for_utility_load_items_from_json_file/parse_error.json +3 -0
  40. data/spec/fixtures/for_utility_load_items_from_json_file/single_item.json +3 -0
  41. data/spec/fixtures/jsonld_to_rof/0g354f18610.jsonld +113 -0
  42. data/spec/fixtures/jsonld_to_rof/0g354f18610.rof +96 -0
  43. data/spec/fixtures/jsonld_to_rof/2j62s467216.jsonld +113 -0
  44. data/spec/fixtures/jsonld_to_rof/2j62s467216.rof +93 -0
  45. data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.jsonld +70 -0
  46. data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.rof +87 -0
  47. data/spec/fixtures/jsonld_to_rof/cr56n01253w.jsonld +84 -0
  48. data/spec/fixtures/jsonld_to_rof/cr56n01253w.rof +95 -0
  49. data/spec/fixtures/jsonld_to_rof/h989r21069m.jsonld +84 -0
  50. data/spec/fixtures/jsonld_to_rof/h989r21069m.rof +98 -0
  51. data/spec/fixtures/jsonld_to_rof/js956d59913.jsonld +79 -0
  52. data/spec/fixtures/jsonld_to_rof/js956d59913.rof +89 -0
  53. data/spec/fixtures/jsonld_to_rof/m039k358q5c.jsonld +80 -0
  54. data/spec/fixtures/jsonld_to_rof/m039k358q5c.rof +64 -0
  55. data/spec/fixtures/jsonld_to_rof/nk322b9161g.jsonld +89 -0
  56. data/spec/fixtures/jsonld_to_rof/nk322b9161g.rof +69 -0
  57. data/spec/fixtures/jsonld_to_rof/p8418k7430d.jsonld +84 -0
  58. data/spec/fixtures/jsonld_to_rof/p8418k7430d.rof +67 -0
  59. data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.jsonld +98 -0
  60. data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.rof +110 -0
  61. data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.jsonld +94 -0
  62. data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.rof +121 -0
  63. data/spec/fixtures/osf/phz6b.tar.gz +0 -0
  64. data/spec/lib/rof/access_spec.rb +30 -23
  65. data/spec/lib/rof/cli_spec.rb +83 -60
  66. data/spec/lib/rof/compare_rof_spec.rb +35 -24
  67. data/spec/lib/rof/filter_spec.rb +10 -0
  68. data/spec/lib/rof/filters/bendo_spec.rb +42 -0
  69. data/spec/lib/rof/filters/date_stamp_spec.rb +9 -5
  70. data/spec/lib/rof/filters/file_to_url_spec.rb +7 -3
  71. data/spec/lib/rof/filters/label_spec.rb +121 -77
  72. data/spec/lib/rof/filters/work_spec.rb +7 -4
  73. data/spec/lib/rof/filters_spec.rb +14 -0
  74. data/spec/lib/rof/translator_spec.rb +15 -0
  75. data/spec/lib/rof/{translate_csv_spec.rb → translators/csv_to_rof_spec.rb} +14 -14
  76. data/spec/lib/rof/translators/fedora_to_rof_spec.rb +64 -0
  77. data/spec/lib/rof/translators/jsonld_to_rof/accumulator_spec.rb +121 -0
  78. data/spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb +73 -0
  79. data/spec/lib/rof/translators/jsonld_to_rof/predicate_object_handler_spec.rb +48 -0
  80. data/spec/lib/rof/translators/jsonld_to_rof/statement_handler_spec.rb +40 -0
  81. data/spec/lib/rof/translators/jsonld_to_rof_spec.rb +120 -0
  82. data/spec/lib/rof/{osf_to_rof_spec.rb → translators/osf_to_rof_spec.rb} +55 -25
  83. data/spec/lib/rof/translators_spec.rb +14 -0
  84. data/spec/lib/rof/utility_spec.rb +47 -1
  85. data/spec/spec_helper.rb +1 -1
  86. data/spec/support/an_rof_filter.rb +10 -0
  87. metadata +186 -15
  88. data/lib/rof/get_from_fedora.rb +0 -211
  89. data/lib/rof/osf_to_rof.rb +0 -123
  90. data/spec/lib/rof/get_from_fedora_spec.rb +0 -22
@@ -0,0 +1,23 @@
1
+ Dir.glob(File.expand_path('../translators/*.rb', __FILE__)).each do |filename|
2
+ require filename
3
+ end
4
+
5
+ module ROF
6
+ # A namespace for organizing translating classes. A translating class is responsible for
7
+ # converting from one format to another format (e.g. CSV to ROF).
8
+ #
9
+ # @see ROF::Translator
10
+ # @see ROF::Translators::CsvToRof
11
+ # @see ROF::Translators::FedoraToRof
12
+ # @see ROF::Translators::OsfToRof
13
+ module Translators
14
+ # @api public
15
+ # @param [String] csv_contents - in the form of a CSV
16
+ # @param [Hash] config - Hash with symbols for keys
17
+ # @return [Hash] in ROF format
18
+ # @see ROF::Translators::CsvToRof for full details
19
+ def self.csv_to_rof(csv_contents, config = {})
20
+ CsvToRof.call(csv_contents, config)
21
+ end
22
+ end
23
+ end
@@ -1,7 +1,8 @@
1
+ require 'rof/translator'
1
2
  require('csv')
2
3
  require('json')
3
4
 
4
- module ROF
5
+ module ROF::Translators
5
6
  # Turn a CSV file into a ROF file.
6
7
  #
7
8
  # pass in the contents of the CSV file.
@@ -31,7 +32,7 @@ module ROF
31
32
  # with the previous work translated into ROF. This will allow a work to have
32
33
  # attached files with different access permissions, owners, etc...
33
34
  # Any extra files are appended to the file list for the work.
34
- class TranslateCSV
35
+ class CsvToRof < ROF::Translator
35
36
  class MissingOwnerOrType < RuntimeError
36
37
  end
37
38
 
@@ -45,7 +46,7 @@ module ROF
45
46
  class NoPriorWork < RuntimeError
46
47
  end
47
48
 
48
- def self.run(csv_contents)
49
+ def self.call(csv_contents, config = {})
49
50
  first_line = nil
50
51
  rof_contents = []
51
52
  previous_work = nil
@@ -0,0 +1,244 @@
1
+ require 'json'
2
+ require 'rexml/document'
3
+ require 'rdf/ntriples'
4
+ require 'rdf/rdfxml'
5
+ require 'rubydora'
6
+ require 'rof/translator'
7
+
8
+ module ROF
9
+ module Translators
10
+ # Responsible for translating Fedora PIDs to ROF objects
11
+ class FedoraToRof < ROF::Translator
12
+ # @param [Array] pids - Fedora PIDs
13
+ # @param [Hash] config - Hash with symbol keys
14
+ # @option config [Hash] :fedora_connection_information - The Hash that contains the connection information for Fedora
15
+ # @return [Hash] The ROF representation of teh Fedora objects
16
+ # @see Rubydora.connect
17
+ def self.call(pids, config = {})
18
+ new(pids, config).to_rof
19
+ end
20
+
21
+ def initialize(pids, config = {})
22
+ @pids = pids
23
+ @fedora_connection_information = config.fetch(:fedora_connection_information)
24
+ @config = config
25
+ connect_to_fedora!
26
+ end
27
+ attr_reader :pids, :fedora_connection_information, :config, :connection
28
+
29
+ private
30
+ def connect_to_fedora!
31
+ @connection = Rubydora.connect(fedora_connection_information)
32
+ end
33
+ public
34
+
35
+ def to_rof
36
+ # wrap the objects inside a JSON list
37
+ pids.map do |pid|
38
+ PidToRofElement.new(pid, connection, config).convert
39
+ end
40
+ end
41
+
42
+ # Responsible for converting a single PID to an ROF Element
43
+ class PidToRofElement
44
+ def initialize(pid, connection, config)
45
+ @pid = pid
46
+ @connection = connection
47
+ @config = config
48
+ @fedora_info = { 'pid' => pid, 'type' => 'fobject' }
49
+ @fedora_object = connection.find(pid)
50
+ end
51
+ attr_reader :pid, :config, :fedora_object
52
+
53
+ # Given a rubydora object, extract what we need
54
+ # to create our ROF object in an associative array
55
+ #
56
+ def convert
57
+ @fedora_info['af-model'] = setModel
58
+ # iterate through the data streams that are present.
59
+ # use reflection to call appropriate method for each
60
+ fedora_object.datastreams.each do |dsname, ds|
61
+ method_name = DATASTREAM_NAME_TO_METHOD_MAP.fetch(dsname) { :default_datastream_conversion }
62
+ send(method_name, dsname, ds)
63
+ end
64
+ @fedora_info
65
+ end
66
+
67
+ DATASTREAM_NAME_TO_METHOD_MAP = {
68
+ 'DC' => :skip_datastream,
69
+ 'RELS-EXT' => :convert_rels_ext,
70
+ 'rightsMetadata' => :convert_rights_metadata,
71
+ 'properties' => :default_datastream_conversion,
72
+ 'content' => :default_datastream_conversion,
73
+ 'descMetadata' => :convert_desc_metadata,
74
+ 'bendo-item' => :default_datastream_conversion,
75
+ 'characterization' => :default_datastream_conversion,
76
+ 'thumbnail' => :default_datastream_conversion
77
+ }
78
+
79
+ private
80
+
81
+ def default_datastream_conversion(dsname, ds)
82
+ # dump generic datastream
83
+ meta = create_meta(ds)
84
+ @fedora_info["#{dsname}-meta"] = meta unless meta.empty?
85
+
86
+ # if content is short < X bytes and valid utf-8, save as string
87
+ # if content is > X bytes or is not utf-8, save as file only if config option is given
88
+ content = ds.datastream_content
89
+ if content.length <= 1024 || config['inline']
90
+ # this downloads the contents of the datastream into memory
91
+ content_string = content.to_s.force_encoding('UTF-8')
92
+ if content_string.valid_encoding?
93
+ @fedora_info[dsname] = content_string
94
+ return # we're done! move on to next datastream
95
+ end
96
+ # not utf-8, so keep going and see if download option was given
97
+ end
98
+ return unless config['download']
99
+ # download option was given, so save this datastream as a file
100
+ fname = "#{@fedora_info['pid']}-#{dsname}"
101
+ abspath = File.join(config['download_path'], fname)
102
+ @fedora_info["#{dsname}-file"] = fname
103
+ if File.file?(config['download_path'])
104
+ $stderr.puts "Error: --download directory #{config['download_path']} specified is an existing file."
105
+ exit 1
106
+ end
107
+ FileUtils.mkdir_p(config['download_path'])
108
+ File.open(abspath, 'w') do |f|
109
+ f.write(content)
110
+ end
111
+ end
112
+
113
+ def create_meta(ds)
114
+ result = {}
115
+
116
+ label = ds.profile['dsLabel']
117
+ result['label'] = label unless label.nil? || label == ''
118
+ result['mime-type'] = ds.profile['dsMIME'] if ds.profile['dsMIME'] != 'text/plain'
119
+ # TODO(dbrower): make sure this is working as intended
120
+ if %w(R E).include?(ds.profile['dsControlGroup'])
121
+ s = result['URL'] = ds.profile['dsLocation']
122
+ s = s.sub(config['bendo'], 'bendo:') if config['bendo']
123
+ result['URL'] = s
124
+ end
125
+ result
126
+ end
127
+
128
+ # set fedora_indo['af-model']
129
+ #
130
+ def setModel
131
+ # only keep info:fedora/afmodel:XXXXX
132
+ models = fedora_object.profile['objModels'].map do |model|
133
+ Regexp.last_match(1) if model =~ /^info:fedora\/afmodel:(.*)/
134
+ end.compact
135
+ models[0]
136
+ end
137
+
138
+ # The methods below are called if the like-named datastream exists in fedora
139
+
140
+ def skip_datastream(*)
141
+ end
142
+
143
+ # set metadata
144
+ #
145
+ def convert_desc_metadata(_dsname, ds)
146
+ # desMetadata is encoded in ntriples, convert to JSON-LD using our special context
147
+ graph = RDF::Graph.new
148
+ data = ds.datastream_content
149
+ # force utf-8 encoding. fedora does not store the encoding, so it defaults to ASCII-8BIT
150
+ # see https://github.com/ruby-rdf/rdf/issues/142
151
+ data.force_encoding('utf-8')
152
+ graph.from_ntriples(data, format: :ntriples)
153
+ JSON::LD::API.fromRdf(graph) do |expanded|
154
+ result = JSON::LD::API.compact(expanded, RdfContext)
155
+ @fedora_info['metadata'] = result
156
+ end
157
+ end
158
+
159
+ # set rights
160
+ #
161
+ def convert_rights_metadata(_dsname, ds)
162
+ # rights is an XML document
163
+ # the access array may have read or edit elements
164
+ # each of these elements may contain group or person elements
165
+ xml_doc = REXML::Document.new(ds.datastream_content)
166
+
167
+ rights_array = {}
168
+
169
+ root = xml_doc.root
170
+
171
+ # check for optional embargo date - set if present
172
+ this_embargo = root.elements['embargo']
173
+ rights_array['embargo-date'] = this_embargo.elements['machine'].elements['date'][0] if Utility.has_embargo_date?(this_embargo)
174
+
175
+ %w(read edit).each do |access|
176
+ this_access = root.elements["//access[@type=\'#{access}\']"]
177
+
178
+ next if this_access.nil?
179
+
180
+ unless this_access.elements['machine'].elements['group'].nil?
181
+ group_array = []
182
+ this_access.elements['machine'].elements['group'].each do |this_group|
183
+ group_array << this_group
184
+ end
185
+ rights_array["#{access}-groups"] = group_array
186
+ end
187
+
188
+ next if this_access.elements['machine'].elements['person'].nil?
189
+ person_array = []
190
+
191
+ this_access.elements['machine'].elements['person'].each do |this_person|
192
+ person_array << this_person
193
+ end
194
+ rights_array[access.to_s] = person_array
195
+ end
196
+
197
+ @fedora_info['rights'] = rights_array
198
+ end
199
+
200
+ def convert_rels_ext(_dsname, ds)
201
+ # RELS-EXT is RDF-XML - parse it
202
+ ctx = ROF::RelsExtRefContext.dup
203
+ ctx.delete('@base') # @base causes problems when converting TO json-ld (it is = "info:/fedora") but info is not a namespace
204
+ graph = RDF::Graph.new
205
+ graph.from_rdfxml(ds.datastream_content)
206
+ result = nil
207
+ JSON::LD::API.fromRdf(graph) do |expanded|
208
+ result = JSON::LD::API.compact(expanded, ctx)
209
+ end
210
+ # now strip the info:fedora/ prefix from the URIs
211
+ strip_info_fedora(result)
212
+ # remove extra items
213
+ result.delete('hasModel')
214
+ @fedora_info['rels-ext'] = result
215
+ end
216
+
217
+ private
218
+
219
+ def strip_info_fedora(rels_ext)
220
+ rels_ext.each do |relation, targets|
221
+ next if relation == '@context'
222
+ if targets.is_a?(Hash)
223
+ strip_info_fedora(targets)
224
+ next
225
+ end
226
+ targets = [targets] if targets.is_a?(String)
227
+ targets.map! do |target|
228
+ if target.is_a?(Hash)
229
+ strip_info_fedora(target)
230
+ else
231
+ target.sub('info:fedora/', '')
232
+ end
233
+ end
234
+ # some single strings cannot be arrays in json-ld, so convert back
235
+ # this shouldn't cause any problems with items that began as arrays
236
+ targets = targets[0] if targets.length == 1
237
+ rels_ext[relation] = targets
238
+ end
239
+ end
240
+ end
241
+ private_constant :PidToRofElement
242
+ end
243
+ end
244
+ end
@@ -0,0 +1,112 @@
1
+ require 'rof/rdf_context'
2
+ require 'active_support/core_ext/array/wrap'
3
+ require 'rof/translators/jsonld_to_rof/statement_handler'
4
+ require 'rof/translators/jsonld_to_rof/predicate_handler'
5
+ require 'rof/translators/jsonld_to_rof/accumulator'
6
+
7
+ module ROF
8
+ module Translators
9
+ # @api public
10
+ #
11
+ # Responsible for converting JSON LD into an ROF Hash via registered URI maps.
12
+ #
13
+ # @note Some predicates require explicit mapping where as others have an assumed mapping. At present all URLs for @context of JSON-LD documents must be registered.
14
+ #
15
+ # @see ROF::Translators::JsonldToRof.call for details on how the JSON-LD is converted
16
+ # @see ROF::Translators::PredicateHandler.register for details on how Predicate URI's are mapped to nodes in the ROF document.
17
+ # @see ROF::Translators::JsonldToRof::PredicateHandler
18
+ # @see ROF::Translators::JsonldToRof::StatementHandler
19
+ module JsonldToRof
20
+ PredicateHandler.register('http://purl.org/ontology/bibo/') do |handler|
21
+ handler.namespace_prefix('bibo:')
22
+ handler.within(['metadata'])
23
+ end
24
+ PredicateHandler.register('info:fedora/fedora-system:def/relations-external') do |handler|
25
+ handler.map('#isMemberOfCollection', to: ['rels-ext', 'isMemberOfCollection'])
26
+ end
27
+ PredicateHandler.register('http://id.loc.gov/vocabulary/relators/') do |handler|
28
+ handler.namespace_prefix('mrel:')
29
+ handler.within(['metadata'])
30
+ end
31
+ PredicateHandler.register('http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#') do |handler|
32
+ handler.namespace_prefix('ebucore:')
33
+ handler.within(['metadata'])
34
+ end
35
+
36
+ PredicateHandler.register('https://library.nd.edu/ns/terms/') do |handler|
37
+ handler.map('accessEdit', to: ['rights', 'edit'])
38
+ handler.map('accessRead', to: ['rights', 'read'])
39
+ handler.map('accessEditGroup', to: ['rights', 'edit-groups'])
40
+ handler.map('accessReadGroup', to: ['rights', 'read-groups'])
41
+ handler.map('accessEmbargoDate', to: ['rights', 'embargo-date'])
42
+ handler.map('afmodel', to: ["af-model"])
43
+ handler.map('bendoitem', to: ["bendo-item"])
44
+ handler.map('depositor') do |object, accumulator|
45
+ accumulator.register_properties('depositor', object)
46
+ end
47
+ handler.map('owner') do |object, accumulator|
48
+ accumulator.register_properties('owner', object)
49
+ end
50
+ handler.map('representativeFile') do |object, accumulator|
51
+ accumulator.register_properties('representative', object)
52
+ end
53
+ end
54
+
55
+ PredicateHandler.register('http://purl.org/dc/terms/') do |handler|
56
+ handler.namespace_prefix('dc:')
57
+ handler.within(['metadata'])
58
+ handler.map('contributor', to: ['metadata', 'dc:contributor', 'dc:contributor'], force: true)
59
+ end
60
+
61
+ PredicateHandler.register('http://projecthydra.org/ns/relations#') do |handler|
62
+ handler.map('hasEditor', to: ['rels-ext', 'hydramata-rel:hasEditor'])
63
+ # We need to map the hasEditorGroup predicate to two different locations in the ROF
64
+ handler.map('hasEditorGroup', to: ['rels-ext', 'hydramata-rel:hasEditorGroup'], force: true)
65
+ handler.map('hasEditorGroup', to: ['rights', 'edit-groups'], force: true)
66
+ end
67
+
68
+ PredicateHandler.register('http://www.ndltd.org/standards/metadata/etdms/1.1/') do |handler|
69
+ handler.within(['metadata', 'ms:degree'])
70
+ handler.namespace_prefix('ms:')
71
+ handler.map('role', to: ['metadata', 'dc:contributor', 'ms:role'], force: true)
72
+ end
73
+
74
+ # @api public
75
+ #
76
+ # Convert's the given JSON-LD into an ROF document that can be used to batch ingest into Fedora.
77
+ #
78
+ # @param [Array<Hash>, Hash] jsonld - a Hash of JSON-LD data or an Array of JSON-LD Hashes
79
+ # @param [Hash] config (included to conform to the loose interface of translators)
80
+ # @return [Array<Hash>] An ROF document
81
+ def self.call(jsonld, config)
82
+ Array.wrap(jsonld).map! do |element|
83
+ Element.new(element).to_rof
84
+ end
85
+ end
86
+
87
+ # A single top-level element of a JSON-LD document
88
+ class Element
89
+ def initialize(element)
90
+ @element = element
91
+ end
92
+
93
+ def to_rof
94
+ @accumulator = Accumulator.new(base_rof)
95
+ JSON::LD::API.toRdf(element) do |statement|
96
+ StatementHandler.call(statement, accumulator)
97
+ end
98
+ @accumulator.to_rof
99
+ end
100
+
101
+ private
102
+
103
+ attr_reader :element, :accumulator
104
+
105
+ def base_rof
106
+ { "type" => "fobject", "metadata" => { "@context" => ROF::RdfContext }, "rels-ext" => { "@context" => ROF::RelsExtRefContext } }
107
+ end
108
+ end
109
+ private_constant :Element
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,175 @@
1
+ require 'active_support/core_ext/array/wrap'
2
+ require 'active_support/core_ext/module/delegation'
3
+ require 'active_support/core_ext/object/deep_dup'
4
+
5
+ module ROF
6
+ module Translators
7
+ module JsonldToRof
8
+ # @api pubilc
9
+ # The accumulator is a "passive" object. Things happen to it. All in the name of building the
10
+ # hash that is ROF.
11
+ #
12
+ # @note The accumulator is only for one PID. See [ROF::Translators::JsonldToRof::Accumulator#add_pid]
13
+ class Accumulator
14
+ # @param [Hash] initial_rof - The base ROF document to which we will be adding elements.
15
+ def initialize(initial_rof = {})
16
+ @rof = initial_rof
17
+ @blank_nodes = {}
18
+ @blank_node_locations = {}
19
+ end
20
+
21
+ # @api public
22
+ # @return [Hash]
23
+ def to_rof
24
+ rof = @rof.deep_dup
25
+ expand_blank_node_locations(rof)
26
+ append_properties_to(rof)
27
+ rof
28
+ end
29
+
30
+ private
31
+
32
+ # The antics of the blank node! See the specs for blank nodes to see the expected behavior.
33
+ def expand_blank_node_locations(rof)
34
+ @blank_node_locations.each_pair do |node, locations|
35
+ locations.each_pair do |location, key_value_pairs|
36
+ data = rof
37
+ location[0..-2].each do |slug|
38
+ data[slug] ||= {}
39
+ data = data[slug]
40
+ end
41
+
42
+ # We may encounter a shallow map, if so we need for it to behave differently
43
+ slug = location[-1]
44
+ if slug
45
+ data[slug] ||= []
46
+ hash = {}
47
+ else
48
+ hash = data
49
+ end
50
+ Array.wrap(key_value_pairs).each do |key_value|
51
+ key_value.each_pair do |key, value|
52
+ hash[key] ||= []
53
+ hash[key] += Array.wrap(value)
54
+ end
55
+ end
56
+ data[slug] << hash if slug
57
+ end
58
+ end
59
+ end
60
+
61
+ def append_properties_to(rof)
62
+ return rof unless @properties
63
+ rof['properties-meta'] = { "mime-type" => "text/xml" }
64
+ xml = '<fields>'
65
+ @properties.each do |node_name, object|
66
+ xml += "<#{node_name}>#{object}</#{node_name}>"
67
+ end
68
+ xml += '</fields>'
69
+ rof['properties'] = xml
70
+ rof
71
+ end
72
+
73
+ public
74
+
75
+ # @api public
76
+ # @param [String] node_name - the XML node's name (e.g. <node_name>node_value</node_name>)
77
+ # @param [String] node_value - the XML element's value
78
+ # @return [Array] of given node_name and node_value
79
+ def register_properties(node_name, node_value)
80
+ @properties ||= []
81
+ @properties << [node_name, coerce_object_to_string(node_value)]
82
+ [node_name, node_value]
83
+ end
84
+
85
+ class PidAlreadySetError < RuntimeError
86
+ end
87
+
88
+ # @api public
89
+ # @param [RDF::Statement] statement
90
+ # @return [RDF::Statement]
91
+ def add_blank_node(statement)
92
+ @blank_nodes[statement.subject] ||= {}
93
+ @blank_nodes[statement.subject][statement.predicate] ||= []
94
+ @blank_nodes[statement.subject][statement.predicate] << statement.object
95
+ statement
96
+ end
97
+
98
+ # @api public
99
+ # @param [RDF::Subject] subject - Fetch the corresponding blank node that was added
100
+ # @return [RDF::Statement]
101
+ # @raise [KeyError] when the subject has not previosly been added
102
+ # @see #add_blank_node
103
+ def fetch_blank_node(subject)
104
+ @blank_nodes.fetch(subject)
105
+ end
106
+
107
+ # @api public
108
+ # @param [String] pid - an identifier
109
+ # @return [String] pid
110
+ # @raise PidAlreadySetError - if you attempted to a different PID
111
+ def add_pid(pid)
112
+ pid = coerce_object_to_string(pid)
113
+ if @rof.key?('pid')
114
+ if @rof['pid'] != pid
115
+ raise PidAlreadySetError, "Attempted to set pid=#{pid}, but it is already set to #{@rof['pid']}"
116
+ end
117
+ else
118
+ @rof['pid'] = pid
119
+ end
120
+ pid
121
+ end
122
+
123
+ # @api public
124
+ # @param [Array<String>, String] location - a list of nested hash keys (or a single string)
125
+ # @param [String] value - a translated value for the original RDF Statement
126
+ # @param [false, RDF::Node] blank_node
127
+ # @return [Array] location, value
128
+ def add_predicate_location_and_value(location, value, blank_node = false)
129
+ # Because I am making transformation on the location via #shift method, I need a duplication.
130
+ location = Array.wrap(location)
131
+ if location == ['pid']
132
+ return add_pid(value)
133
+ end
134
+ if blank_node
135
+ add_predicate_location_and_value_direct_for_blank_node(location, value, blank_node)
136
+ else
137
+ add_predicate_location_and_value_direct_for_non_blank_node(location, value)
138
+ end
139
+ [location, value]
140
+ end
141
+
142
+ def add_predicate_location_and_value_direct_for_blank_node(location, value, blank_node)
143
+ fetch_blank_node(blank_node) # Ensure the node exists
144
+ @blank_node_locations[blank_node] ||= {}
145
+ @blank_node_locations[blank_node][location[0..-2]] ||= []
146
+ @blank_node_locations[blank_node][location[0..-2]] << { location[-1] => Array.wrap(coerce_object_to_string(value)) }
147
+ end
148
+
149
+ def add_predicate_location_and_value_direct_for_non_blank_node(location, value)
150
+ data = @rof
151
+ location[0..-2].each do |slug|
152
+ data[slug] ||= {}
153
+ data = data[slug]
154
+ end
155
+ slug = location[-1]
156
+ data[slug] ||= []
157
+ data[slug] << coerce_object_to_string(value)
158
+ end
159
+
160
+ private
161
+
162
+ def coerce_object_to_string(object)
163
+ return object if object.nil?
164
+ if object.to_s =~ %r{https?://curate.nd.edu/show/([^\\]+)/?}
165
+ return "und:#{$1}"
166
+ elsif object.respond_to?(:value)
167
+ return object.value
168
+ else
169
+ object
170
+ end
171
+ end
172
+ end
173
+ end
174
+ end
175
+ end