rof 1.0.7 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +9 -7
  3. data/LICENSE +201 -16
  4. data/Rakefile +46 -0
  5. data/bin/csv_to_rof +1 -2
  6. data/bin/fedora_to_rof +7 -1
  7. data/bin/jsonld_to_rof +26 -0
  8. data/bin/osf_to_rof +6 -2
  9. data/bin/rof +5 -19
  10. data/lib/rof.rb +2 -6
  11. data/lib/rof/access.rb +1 -1
  12. data/lib/rof/cli.rb +104 -67
  13. data/lib/rof/compare_rof.rb +68 -39
  14. data/lib/rof/filter.rb +21 -0
  15. data/lib/rof/filters.rb +38 -0
  16. data/lib/rof/filters/bendo.rb +15 -17
  17. data/lib/rof/filters/date_stamp.rb +5 -4
  18. data/lib/rof/filters/file_to_url.rb +5 -3
  19. data/lib/rof/filters/label.rb +9 -7
  20. data/lib/rof/filters/work.rb +7 -5
  21. data/lib/rof/ingest.rb +5 -0
  22. data/lib/rof/osf_context.rb +2 -2
  23. data/lib/rof/rdf_context.rb +2 -0
  24. data/lib/rof/translator.rb +18 -0
  25. data/lib/rof/translators.rb +23 -0
  26. data/lib/rof/{translate_csv.rb → translators/csv_to_rof.rb} +4 -3
  27. data/lib/rof/translators/fedora_to_rof.rb +244 -0
  28. data/lib/rof/translators/jsonld_to_rof.rb +112 -0
  29. data/lib/rof/translators/jsonld_to_rof/accumulator.rb +175 -0
  30. data/lib/rof/translators/jsonld_to_rof/predicate_handler.rb +223 -0
  31. data/lib/rof/translators/jsonld_to_rof/predicate_object_handler.rb +125 -0
  32. data/lib/rof/translators/jsonld_to_rof/statement_handler.rb +91 -0
  33. data/lib/rof/translators/osf_to_rof.rb +191 -0
  34. data/lib/rof/utility.rb +44 -1
  35. data/lib/rof/version.rb +1 -1
  36. data/rof.gemspec +10 -2
  37. data/spec/coverage_helper.rb +17 -0
  38. data/spec/fixtures/for_utility_load_items_from_json_file/multiple_items.json +8 -0
  39. data/spec/fixtures/for_utility_load_items_from_json_file/parse_error.json +3 -0
  40. data/spec/fixtures/for_utility_load_items_from_json_file/single_item.json +3 -0
  41. data/spec/fixtures/jsonld_to_rof/0g354f18610.jsonld +113 -0
  42. data/spec/fixtures/jsonld_to_rof/0g354f18610.rof +96 -0
  43. data/spec/fixtures/jsonld_to_rof/2j62s467216.jsonld +113 -0
  44. data/spec/fixtures/jsonld_to_rof/2j62s467216.rof +93 -0
  45. data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.jsonld +70 -0
  46. data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.rof +87 -0
  47. data/spec/fixtures/jsonld_to_rof/cr56n01253w.jsonld +84 -0
  48. data/spec/fixtures/jsonld_to_rof/cr56n01253w.rof +95 -0
  49. data/spec/fixtures/jsonld_to_rof/h989r21069m.jsonld +84 -0
  50. data/spec/fixtures/jsonld_to_rof/h989r21069m.rof +98 -0
  51. data/spec/fixtures/jsonld_to_rof/js956d59913.jsonld +79 -0
  52. data/spec/fixtures/jsonld_to_rof/js956d59913.rof +89 -0
  53. data/spec/fixtures/jsonld_to_rof/m039k358q5c.jsonld +80 -0
  54. data/spec/fixtures/jsonld_to_rof/m039k358q5c.rof +64 -0
  55. data/spec/fixtures/jsonld_to_rof/nk322b9161g.jsonld +89 -0
  56. data/spec/fixtures/jsonld_to_rof/nk322b9161g.rof +69 -0
  57. data/spec/fixtures/jsonld_to_rof/p8418k7430d.jsonld +84 -0
  58. data/spec/fixtures/jsonld_to_rof/p8418k7430d.rof +67 -0
  59. data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.jsonld +98 -0
  60. data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.rof +110 -0
  61. data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.jsonld +94 -0
  62. data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.rof +121 -0
  63. data/spec/fixtures/osf/phz6b.tar.gz +0 -0
  64. data/spec/lib/rof/access_spec.rb +30 -23
  65. data/spec/lib/rof/cli_spec.rb +83 -60
  66. data/spec/lib/rof/compare_rof_spec.rb +35 -24
  67. data/spec/lib/rof/filter_spec.rb +10 -0
  68. data/spec/lib/rof/filters/bendo_spec.rb +42 -0
  69. data/spec/lib/rof/filters/date_stamp_spec.rb +9 -5
  70. data/spec/lib/rof/filters/file_to_url_spec.rb +7 -3
  71. data/spec/lib/rof/filters/label_spec.rb +121 -77
  72. data/spec/lib/rof/filters/work_spec.rb +7 -4
  73. data/spec/lib/rof/filters_spec.rb +14 -0
  74. data/spec/lib/rof/translator_spec.rb +15 -0
  75. data/spec/lib/rof/{translate_csv_spec.rb → translators/csv_to_rof_spec.rb} +14 -14
  76. data/spec/lib/rof/translators/fedora_to_rof_spec.rb +64 -0
  77. data/spec/lib/rof/translators/jsonld_to_rof/accumulator_spec.rb +121 -0
  78. data/spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb +73 -0
  79. data/spec/lib/rof/translators/jsonld_to_rof/predicate_object_handler_spec.rb +48 -0
  80. data/spec/lib/rof/translators/jsonld_to_rof/statement_handler_spec.rb +40 -0
  81. data/spec/lib/rof/translators/jsonld_to_rof_spec.rb +120 -0
  82. data/spec/lib/rof/{osf_to_rof_spec.rb → translators/osf_to_rof_spec.rb} +55 -25
  83. data/spec/lib/rof/translators_spec.rb +14 -0
  84. data/spec/lib/rof/utility_spec.rb +47 -1
  85. data/spec/spec_helper.rb +1 -1
  86. data/spec/support/an_rof_filter.rb +10 -0
  87. metadata +186 -15
  88. data/lib/rof/get_from_fedora.rb +0 -211
  89. data/lib/rof/osf_to_rof.rb +0 -123
  90. data/spec/lib/rof/get_from_fedora_spec.rb +0 -22
@@ -0,0 +1,223 @@
1
+ require 'active_support/core_ext/array/wrap'
2
+
3
+ module ROF
4
+ module Translators
5
+ module JsonldToRof
6
+ # Responsible for dealing with registered predicates and how those are handled.
7
+ module PredicateHandler
8
+ class UnhandledPredicateError < RuntimeError
9
+ def initialize(predicate, urls)
10
+ super(%(Unable to handle predicate "#{predicate}". The following predicate URLs were registered #{urls.inspect}))
11
+ end
12
+ end
13
+
14
+ # @api public
15
+ #
16
+ # Parse the RDF predicate and RDF object and add it's contents to the accumulator
17
+ #
18
+ # @example
19
+ # Given the following 4 RDF N-Triples (subject, predicate, object). The first two, with subject "_:b0" represent blank nodes.
20
+ # The last one with subject "<https://curate.nd.edu/show/zk51vd69n1r>" has an object that points to the "_:b0" blank node.
21
+ # _:b0 <http://purl.org/dc/terms/contributor> "David R.Hyde" .
22
+ # _:b0 <http://www.ndltd.org/standards/metadata/etdms/1.1/role> "Research Director" .
23
+ # <https://curate.nd.edu/show/zk51vd69n1r> <http://purl.org/dc/terms/contributor> _:b0 .
24
+ # <https://curate.nd.edu/show/zk51vd69n1r> <http://projecthydra.org/ns/relations#hasEditorGroup> <https://curate.nd.edu/show/q524jm23g92> .
25
+ # For the first two N-Triples you would get a BlankNodeHandler; For the last two, you would get a UriSubjectHandler
26
+ #
27
+ # @note It is assumed that all blank nodes (e.g. RDF::Node) will be processed before you process any RDF::URI nodes.
28
+ #
29
+ # @param [RDF::Predicate] predicate - the RDF predicate that we will parse and add to the appropriate spot in the accumulator
30
+ # @param [RDF::Object] object - the RDF object that we will parse and add to the appropriate spot in the accumulator
31
+ # @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
32
+ # @return [ROF::Translators::JsonldToRof::Accumulator] the given accumulator
33
+ # @raise [ROF::Translators::JsonldToRof::UnhandledPredicateError] when we are unable to handle the given predicate
34
+ def self.call(predicate, object, accumulator, blank_node = false)
35
+ handler = registry.handler_for(predicate)
36
+ handler.handle(object, accumulator, blank_node)
37
+ accumulator
38
+ end
39
+
40
+ # @api public
41
+ # @param [String] url - The URL that we want to match against
42
+ # @yield The block to configure how we handle RDF Predicates that match the gvien URL
43
+ # @yieldparam [ROF::JsonldToRof::PredicateHandler::UrlHandler]
44
+ # @see ./spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb for details and usage usage
45
+ def self.register(url, &block)
46
+ registry << UrlHandler.new(url, &block)
47
+ end
48
+
49
+ # @api private
50
+ def self.registry
51
+ @registry ||= RegistrySet.new
52
+ end
53
+ private_class_method :registry
54
+
55
+ def self.clear_registry!(set_with = RegistrySet.new)
56
+ @registry = set_with
57
+ end
58
+ private_class_method :clear_registry!
59
+
60
+ class RegistrySet
61
+ def initialize
62
+ @set = []
63
+ end
64
+
65
+ def <<(value)
66
+ @set << value
67
+ end
68
+
69
+ def handler_for(predicate)
70
+ location_extractor = nil
71
+ @set.each do |handler|
72
+ location_extractor = handler.location_extractor_for(predicate)
73
+ break if location_extractor
74
+ end
75
+ raise UnhandledPredicateError.new(predicate, @set.map(&:url)) if location_extractor.nil?
76
+ location_extractor
77
+ end
78
+ end
79
+ private_constant :RegistrySet
80
+
81
+ # For a given URL map all of the predicates; Some predicates require explicit mapping, while others
82
+ # may use implicit mapping.
83
+ class UrlHandler
84
+ def initialize(url)
85
+ @url = url
86
+ @within = []
87
+ @namespace_prefix = ''
88
+ @slug_handlers = {}
89
+ yield(self) if block_given?
90
+ end
91
+ attr_reader :url
92
+
93
+ # The final key in the location array should be prefixed with the namespace_prefix; By default this is ""
94
+ # @param [String, nil] prefix - what is the namespace prefix to apply to the last location in the array.
95
+ # @return [String]
96
+ def namespace_prefix(prefix = nil)
97
+ return @namespace_prefix if prefix.nil?
98
+ @namespace_prefix = prefix
99
+ end
100
+
101
+ # Prepend the within array to the location array
102
+ # @param [Array<String>, nil] location - where in the ROF document are we putting the value
103
+ # @return [Array<String>]
104
+ def within(location = nil)
105
+ return @within if location.nil?
106
+ @within = Array.wrap(location)
107
+ end
108
+
109
+ # @param [#to_s] predicate
110
+ # @return [nil, LocationExtractor] if the given predicate does not match the url, return nil; Otherwise return a LocationExtractor
111
+ # @see LocationExtractor
112
+ def location_extractor_for(predicate)
113
+ return nil unless predicate.to_s =~ %r{^#{Regexp.escape(@url)}(.*)}
114
+ slug = $1
115
+ handlers = handlers_for(slug)
116
+ LocationExtractor.new(predicate, handlers)
117
+ end
118
+
119
+ private
120
+
121
+ # @param [String] slug - a slug that may or may not have been registered
122
+ # @return [Array<#call>] an array of handlers that each respond to #call
123
+ # @see ImplicitLocationHandler
124
+ # @see ExplicitLocationSlugHandler
125
+ # @see BlockSlugHandler
126
+ def handlers_for(slug)
127
+ Array.wrap(@slug_handlers.fetch(slug) { ImplicitLocationHandler.new(self, slug) })
128
+ end
129
+
130
+ public
131
+
132
+ # @param [String] slug =
133
+ # @param [Hash] options (with symbol keys)
134
+ # @option options [Boolean] :force - don't apply the within nor namespace prefix
135
+ # @option options [Array] :to - an array that will be nested Hash keys
136
+ # @yield If a block is given, call the block (and skip all other configuration)
137
+ # @yieldparam [String] object
138
+ # @see BlockSlugHandler for details concerning a mapping via a block
139
+ # @see ExplicitLocationSlugHandler for details concerning a mapping via a to: option
140
+ def map(slug, options = {}, &block)
141
+ @slug_handlers ||= {}
142
+ @slug_handlers[slug] ||= []
143
+ if block_given?
144
+ @slug_handlers[slug] << BlockSlugHandler.new(self, slug, options, block)
145
+ else
146
+ @slug_handlers[slug] << ExplicitLocationSlugHandler.new(self, slug, options)
147
+ end
148
+ end
149
+
150
+ # Responsible for coordinating the extraction of the
151
+ class LocationExtractor
152
+ def initialize(predicate, handlers)
153
+ @predicate = predicate
154
+ @handlers = Array.wrap(handlers)
155
+ end
156
+
157
+ def handle(object, accumulator, blank_node)
158
+ @handlers.each do |handler|
159
+ handler.call(object, accumulator, blank_node)
160
+ end
161
+ accumulator
162
+ end
163
+ end
164
+ private_constant :LocationExtractor
165
+
166
+ class ImplicitLocationHandler
167
+ def initialize(url_handler, slug)
168
+ @url_handler = url_handler
169
+ @slug = slug
170
+ end
171
+ attr_reader :slug
172
+ def call(object, accumulator, blank_node)
173
+ to = @url_handler.within + Array.wrap(slug)
174
+ to[-1] = "#{@url_handler.namespace_prefix}#{to[-1]}"
175
+ accumulator.add_predicate_location_and_value(to, object, blank_node)
176
+ end
177
+ end
178
+ private_constant :ImplicitLocationHandler
179
+
180
+ class BlockSlugHandler
181
+ def initialize(url_handler, slug, options, block)
182
+ @url_handler = url_handler
183
+ @slug = slug
184
+ @options = options
185
+ @block = block
186
+ end
187
+ attr_reader :slug
188
+
189
+ # @todo Are there differences that need to be handled for the blank_node?
190
+ def call(object, accumulator, _blank_node)
191
+ @block.call(object, accumulator)
192
+ end
193
+ end
194
+ private_constant :BlockSlugHandler
195
+
196
+ class ExplicitLocationSlugHandler
197
+ def initialize(url_handler, slug, options)
198
+ @url_handler = url_handler
199
+ @slug = slug
200
+ @options = options
201
+ end
202
+ attr_reader :slug
203
+
204
+ def call(object, accumulator, blank_node)
205
+ to = @options.fetch(:to)
206
+ unless force?
207
+ to = @url_handler.within + Array.wrap(to)
208
+ to[-1] = "#{@url_handler.namespace_prefix}#{to[-1]}"
209
+ end
210
+ accumulator.add_predicate_location_and_value(to, object, blank_node)
211
+ end
212
+
213
+ def force?
214
+ @options.fetch(:force, false)
215
+ end
216
+ end
217
+ private_constant :ExplicitLocationSlugHandler
218
+ end
219
+ private_constant :UrlHandler
220
+ end
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,125 @@
1
+ require 'rdf'
2
+ require 'rof/translators/jsonld_to_rof/predicate_handler'
3
+
4
+ module ROF
5
+ module Translators
6
+ module JsonldToRof
7
+ # We need to handle the Predicate / Object pair as one (thank you RDF blank nodes for this nuance)
8
+ module PredicateObjectHandler
9
+ # @api public
10
+ #
11
+ # Parse the RDF::Predicate, RDF::Object and the relevant data to the contents to the accumulator
12
+ #
13
+ # @example
14
+ # Given the following 4 RDF N-Triples (subject, predicate, object). The first and second RDF objects are RDF::Literal. The 3rd triple's object is
15
+ # and RDF::Node. And the last is an RDF::URI. Each require different handlers as they have nuanced differences.
16
+ # _:b0 <http://purl.org/dc/terms/contributor> "David R.Hyde" .
17
+ # _:b0 <http://www.ndltd.org/standards/metadata/etdms/1.1/role> "Research Director" .
18
+ # <https://curate.nd.edu/show/zk51vd69n1r> <http://purl.org/dc/terms/contributor> _:b0 .
19
+ # <https://curate.nd.edu/show/zk51vd69n1r> <http://projecthydra.org/ns/relations#hasEditorGroup> <https://curate.nd.edu/show/q524jm23g92> .
20
+ #
21
+ # @note It is assumed that all blank nodes (e.g. RDF::Node) will be processed before you process any RDF::URI nodes.
22
+ #
23
+ # @param [RDF::Predicate] predicate - the RDF predicate that we will parse and add to the appropriate spot in the accumulator
24
+ # @param [RDF::Object] object - the RDF object that we will parse and add to the appropriate spot in the accumulator
25
+ # @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
26
+ # @return [ROF::Translators::JsonldToRof::Accumulator] the given accumulator
27
+ # @raise [ROF::Translators::JsonldToRof::UnknownRdfObjectTypeError] when the RDF::Object's subject is not a valid type
28
+ def self.call(predicate, object, accumulator, options = {})
29
+ new(predicate, object, accumulator, options).call
30
+ accumulator
31
+ end
32
+
33
+ # @api private
34
+ #
35
+ # @param [RDF::Predicate] predicate - the RDF predicate that we will parse and add to the appropriate spot in the accumulator
36
+ # @param [RDF::Object] object - the RDF object that we will parse and add to the appropriate spot in the accumulator
37
+ # @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
38
+ # @return [#call]
39
+ def self.new(predicate, object, accumulator, options)
40
+ klass_for(object).new(predicate, object, accumulator, options)
41
+ end
42
+
43
+ class UnknownRdfObjectTypeError < RuntimeError
44
+ end
45
+
46
+ # @api private
47
+ def self.klass_for(object)
48
+ case object
49
+ when RDF::URI
50
+ UriPredicateObjectHandler
51
+ when RDF::Node
52
+ NodePredicateObjectHandler
53
+ when RDF::Literal
54
+ LiteralPredicateObjectHandler
55
+ else
56
+ raise UnknownRdfObjectTypeError, "Unable to determine object handler for #{object.inspect}"
57
+ end
58
+ end
59
+
60
+ # @api private
61
+ class UriPredicateObjectHandler
62
+ def initialize(predicate, object, accumulator, options)
63
+ @predicate = predicate
64
+ @object = object
65
+ @accumulator = accumulator
66
+ @options = options
67
+ end
68
+
69
+ def call
70
+ PredicateHandler.call(predicate, object, accumulator, options[:blank_node])
71
+ accumulator
72
+ end
73
+
74
+ private
75
+ attr_reader :predicate, :object, :accumulator, :options
76
+ end
77
+ private_constant :UriPredicateObjectHandler
78
+
79
+ # @api private
80
+ # Blank Nodes; Oh how we love thee. Let me count the ways
81
+ class NodePredicateObjectHandler
82
+ def initialize(predicate, object, accumulator, options)
83
+ @predicate = predicate
84
+ @object = object
85
+ @accumulator = accumulator
86
+ @options = options
87
+ end
88
+
89
+ def call
90
+ blank_node = accumulator.fetch_blank_node(object)
91
+ blank_node.each_pair do |blank_node_predicate, blank_node_objects|
92
+ blank_node_objects.each do |blank_node_object|
93
+ PredicateObjectHandler.call(blank_node_predicate, blank_node_object, accumulator, blank_node: object)
94
+ end
95
+ end
96
+ accumulator
97
+ end
98
+
99
+ private
100
+ attr_reader :predicate, :object, :accumulator, :options
101
+ end
102
+ private_constant :NodePredicateObjectHandler
103
+
104
+ # @api private
105
+ class LiteralPredicateObjectHandler
106
+ def initialize(predicate, object, accumulator, options)
107
+ @predicate = predicate
108
+ @object = object
109
+ @accumulator = accumulator
110
+ @options = options
111
+ end
112
+
113
+ def call
114
+ PredicateHandler.call(predicate, object, accumulator, options[:blank_node])
115
+ accumulator
116
+ end
117
+
118
+ private
119
+ attr_reader :predicate, :object, :accumulator, :options
120
+ end
121
+ private_constant :LiteralPredicateObjectHandler
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,91 @@
1
+ require 'rof/translators/jsonld_to_rof/predicate_object_handler'
2
+
3
+ module ROF
4
+ module Translators
5
+ module JsonldToRof
6
+ # Responsible for parsing an RDF statement and adding to the accumulator.
7
+ module StatementHandler
8
+ # @api public
9
+ #
10
+ # Parse the RDF statement and add it's contents to the accumulator
11
+ #
12
+ # @example
13
+ # Given the following 4 RDF N-Triples (subject, predicate, object). The first two, with subject "_:b0" represent blank nodes.
14
+ # The last one with subject "<https://curate.nd.edu/show/zk51vd69n1r>" has an object that points to the "_:b0" blank node.
15
+ # _:b0 <http://purl.org/dc/terms/contributor> "David R.Hyde" .
16
+ # _:b0 <http://www.ndltd.org/standards/metadata/etdms/1.1/role> "Research Director" .
17
+ # <https://curate.nd.edu/show/zk51vd69n1r> <http://purl.org/dc/terms/contributor> _:b0 .
18
+ # <https://curate.nd.edu/show/zk51vd69n1r> <http://projecthydra.org/ns/relations#hasEditorGroup> <https://curate.nd.edu/show/q524jm23g92> .
19
+ # For the first two N-Triples you would get a BlankNodeHandler; For the last two, you would get a UriSubjectHandler
20
+ #
21
+ # @note It is assumed that all blank nodes (e.g. RDF::Node) will be processed before you process any RDF::URI nodes.
22
+ #
23
+ # @param [RDF::Statement] statement - the RDF statement that we will parse and add to the appropriate spot in the accumulator
24
+ # @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
25
+ # @return [ROF::Translators::JsonldToRof::Accumulator] the given accumulator
26
+ # @raise [ROF::Translators::JsonldToRof::UnhandledRdfSubjectError] when the RDF::Statement's subject is not a valid type
27
+ def self.call(statement, accumulator)
28
+ new(statement, accumulator).call
29
+ accumulator
30
+ end
31
+
32
+ class UnhandledRdfSubjectError < RuntimeError
33
+ end
34
+
35
+ # @api private
36
+ def self.new(statement, accumulator)
37
+ case statement.subject
38
+ when RDF::URI
39
+ UriSubjectHandler.new(statement, accumulator)
40
+ when RDF::Node
41
+ BlankNodeHandler.new(statement, accumulator)
42
+ else
43
+ raise UnhandledRdfSubjectError, "Unable to determine subject handler for #{statement.inspect}"
44
+ end
45
+ end
46
+
47
+ # Responsible for accumulating the ROF data for a URI based resource
48
+ class UriSubjectHandler
49
+ def initialize(statement, accumulator)
50
+ @accumulator = accumulator
51
+ @statement = statement
52
+ end
53
+
54
+ def call
55
+ handle_subject
56
+ handle_predicate_and_object
57
+ @accumulator
58
+ end
59
+
60
+ private
61
+
62
+ def handle_predicate_and_object
63
+ PredicateObjectHandler.call(@statement.predicate, @statement.object, @accumulator)
64
+ end
65
+
66
+ def handle_subject
67
+ return nil unless @statement.subject.to_s =~ %r{https://curate.nd.edu/show/([^\\]+)/?}
68
+ pid = "und:#{$1}"
69
+ @accumulator.add_pid(pid)
70
+ end
71
+ end
72
+ private_constant :UriSubjectHandler
73
+
74
+ # Responsible for handling blank nodes in the RDF graph; Examples include ETD degree information
75
+ # Blank node subjects behave different from UriSubjectHandler
76
+ class BlankNodeHandler
77
+ def initialize(statement, accumulator)
78
+ @accumulator = accumulator
79
+ @statement = statement
80
+ end
81
+
82
+ def call
83
+ @accumulator.add_blank_node(@statement)
84
+ @accumulator
85
+ end
86
+ end
87
+ private_constant :BlankNodeHandler
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,191 @@
1
+ require 'json'
2
+ require 'zlib'
3
+ require 'rubygems/package'
4
+ require 'rdf/turtle'
5
+ require 'rof/osf_context'
6
+ require 'rof/rdf_context'
7
+ require 'rof/utility'
8
+ require 'rof/translator'
9
+
10
+ module ROF::Translators
11
+ # Class for managing OSF Archive data transformations
12
+ # It is called after the get-from-osf task, and before the work-xlat task
13
+ class OsfToRof < ROF::Translator
14
+ # @todo Set this to be something more meaningful than an empty lambda
15
+ # @return [#call]
16
+ def self.default_previously_archived_pid_finder
17
+ ->(archive_type, osf_project_identifier) {}
18
+ end
19
+
20
+ # Convert Osf Archive tar.gz to ROF
21
+ def self.call(project, config, previously_archived_pid_finder = default_previously_archived_pid_finder)
22
+ new(project, config, previously_archived_pid_finder).call
23
+ end
24
+
25
+ def initialize(project, config, previously_archived_pid_finder = self.class.default_previously_archived_pid_finder)
26
+ @config = config
27
+ @project = project
28
+ @previously_archived_pid_finder = previously_archived_pid_finder
29
+ @osf_map = ROF::OsfToNDMap
30
+ end
31
+
32
+ def call
33
+ rof_array = []
34
+ return {} if project.nil?
35
+ @ttl_data = ttl_from_targz(source_slug + '.ttl')
36
+ rof_array[0] = build_archive_record
37
+ rof_array
38
+ end
39
+
40
+ # @api private
41
+ # @see https://github.com/ndlib/curate_nd/blob/677c05c836ff913c01dcbbfc5e5d21366b87d500/app/repository_models/osf_archive.rb#L62
42
+ def archive_type
43
+ project.fetch('package_type')
44
+ end
45
+
46
+ # @api private
47
+ # This is a bit of a misnomer; As used it represents the path to the project or registration
48
+ # that we have ingested (e.g. https://osf.io/:source_slug)
49
+ #
50
+ # It was previously named :project_identifier in this class, but that gets conflated with the
51
+ # underlying object's osf_project_identifier (e.g. what OSF Project was this archive originally based on)
52
+ #
53
+ # @see https://github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/app/repository_models/osf_archive.rb#L96
54
+ def source_slug
55
+ project.fetch('project_identifier')
56
+ end
57
+
58
+ # @api private
59
+ # @see https://github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/app/repository_models/osf_archive.rb#L106
60
+ def osf_project_identifier
61
+ return source_slug if project['package_type'] == 'OSF Project'
62
+ osf_url_from_filename(ttl_data[0][@osf_map['registeredFrom']][0]['@id'])
63
+ end
64
+
65
+ private
66
+
67
+ attr_reader :config, :project
68
+
69
+ # A function responsible for finding the previously archive pid.
70
+ # @return [#call]
71
+ # @see #default_previously_archived_pid_finder for interface
72
+ attr_reader :previously_archived_pid_finder
73
+
74
+ # this is an array- the addition elements are the contributor(s)
75
+ # @return [Array<Hash>]
76
+ # @see #ttl_from_targz
77
+ attr_reader :ttl_data
78
+
79
+ # reads a ttl file and makes it a JSON-LD file that we can parse
80
+ def fetch_from_ttl(ttl_file)
81
+ graph = RDF::Turtle::Reader.open(ttl_file, prefixes: ROF::OsfPrefixList.dup)
82
+ JSON::LD::API.fromRdf(graph)
83
+ end
84
+
85
+ # extracts given ttl file from JHU tar.gz package
86
+ # - assumed to live under data/obj/root
87
+ # @return [Array<Hash>] the first element is the "work" and the additional elements, if any, are the contributor(s)
88
+ def ttl_from_targz(ttl_filename)
89
+ package_dir = config.fetch('package_dir')
90
+ ttl_path = File.join(source_slug, 'data/obj/root', ttl_filename)
91
+ ROF::Utility.file_from_targz(File.join(package_dir, source_slug + '.tar.gz'), ttl_path)
92
+ fetch_from_ttl(File.join(package_dir, ttl_path))
93
+ end
94
+
95
+ # Maps RELS-EXT
96
+ def map_rels_ext
97
+ rels_ext = {}
98
+ rels_ext['@context'] = ROF::RelsExtRefContext.dup
99
+ apply_previous_archived_version_if_applicable(rels_ext)
100
+ rels_ext
101
+ end
102
+
103
+ # sets metadata
104
+ def map_metadata
105
+ metadata = {}
106
+ metadata['@context'] = ROF::RdfContext.dup
107
+ # metdata derived from project ttl file
108
+ metadata['dc:created'] = Time.iso8601(ttl_data[0][@osf_map['dc:created']][0]['@value']).to_date.iso8601 + 'Z'
109
+ metadata['dc:title'] = ttl_data[0][@osf_map['dc:title']][0]['@value']
110
+ metadata['dc:description'] = ttl_data[0][@osf_map['dc:description']][0]['@value']
111
+ metadata['dc:subject'] = map_subject
112
+ # metadata derived from osf_projects data, passed from UI
113
+ metadata['dc:source'] = 'https://osf.io/' + source_slug
114
+ metadata['dc:creator#adminstrative_unit'] = project['administrative_unit']
115
+ metadata['dc:creator#affiliation'] = project['affiliation']
116
+ metadata['nd:osfProjectIdentifier'] = osf_project_identifier
117
+ metadata['dc:creator'] = map_creator
118
+ metadata['dc:type'] = project['package_type']
119
+ metadata
120
+ end
121
+
122
+ # For reference to the assumed RELS-EXT see the following spec in CurateND
123
+ # @see https://github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/spec/repository_models/osf_archive_spec.rb#L97
124
+ def apply_previous_archived_version_if_applicable(rels_ext)
125
+ # If a previously archived pid was passed in, use it to set pav:previousVersion
126
+ # If not, check SOLR for one.
127
+ pid = previously_archived_pid_finder.call(archive_type, osf_project_identifier)
128
+ pid = ROF::Utility.check_solr_for_previous(config, osf_project_identifier) if pid.nil?
129
+ rels_ext['pav:previousVersion'] = pid if pid
130
+ rels_ext
131
+ end
132
+
133
+ # Constructs OsfArchive Record from ttl_data, data from the UI form,
134
+ # and task config data
135
+ def build_archive_record
136
+ this_rof = {}
137
+ this_rof['owner'] = project['owner']
138
+ this_rof['type'] = 'OsfArchive'
139
+ this_rof['rights'] = map_rights
140
+ this_rof['rels-ext'] = map_rels_ext
141
+ this_rof['metadata'] = map_metadata
142
+ this_rof['files'] = [source_slug + '.tar.gz']
143
+ this_rof
144
+ end
145
+
146
+ # sets subject
147
+ def map_subject
148
+ if ttl_data[0].key?(@osf_map['dc:subject'])
149
+ return ttl_data[0][@osf_map['dc:subject']][0]['@value']
150
+ end
151
+ ''
152
+ end
153
+
154
+ # make osf url from bagfile name
155
+ def osf_url_from_filename(ttl_file)
156
+ project_id = ttl_file.rpartition('/')[2].rpartition('.')[0]
157
+ project_id
158
+ end
159
+
160
+ # figures out the rights
161
+ def map_rights
162
+ rights = {}
163
+ if ttl_data[0][@osf_map['isPublic']][0]['@value'] == 'true'
164
+ rights['read-groups'] = ['public']
165
+ end
166
+ rights
167
+ end
168
+
169
+ # sets the creator- needs to read another ttl for the User data
170
+ # only contrubutors with isBibliographic true are considered
171
+ def map_creator
172
+ creator = []
173
+ ttl_data[0][@osf_map['hasContributor']].each do |contributor|
174
+ # Looping through the primary document and the contributors
175
+ ttl_data.each do |item|
176
+ next unless item['@id'] == contributor['@id']
177
+ if item[@osf_map['isBibliographic']][0]['@value'] == 'true'
178
+ creator.push map_user_from_ttl(item[@osf_map['hasUser']][0]['@id'])
179
+ end
180
+ end
181
+ end
182
+ creator
183
+ end
184
+
185
+ # read user ttl file, extract User's full name
186
+ def map_user_from_ttl(file_subpath)
187
+ user_ttl_data = ttl_from_targz(File.basename(file_subpath))
188
+ user_ttl_data[0][@osf_map['hasFullName']][0]['@value']
189
+ end
190
+ end
191
+ end