rof 1.0.7 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +9 -7
  3. data/LICENSE +201 -16
  4. data/Rakefile +46 -0
  5. data/bin/csv_to_rof +1 -2
  6. data/bin/fedora_to_rof +7 -1
  7. data/bin/jsonld_to_rof +26 -0
  8. data/bin/osf_to_rof +6 -2
  9. data/bin/rof +5 -19
  10. data/lib/rof.rb +2 -6
  11. data/lib/rof/access.rb +1 -1
  12. data/lib/rof/cli.rb +104 -67
  13. data/lib/rof/compare_rof.rb +68 -39
  14. data/lib/rof/filter.rb +21 -0
  15. data/lib/rof/filters.rb +38 -0
  16. data/lib/rof/filters/bendo.rb +15 -17
  17. data/lib/rof/filters/date_stamp.rb +5 -4
  18. data/lib/rof/filters/file_to_url.rb +5 -3
  19. data/lib/rof/filters/label.rb +9 -7
  20. data/lib/rof/filters/work.rb +7 -5
  21. data/lib/rof/ingest.rb +5 -0
  22. data/lib/rof/osf_context.rb +2 -2
  23. data/lib/rof/rdf_context.rb +2 -0
  24. data/lib/rof/translator.rb +18 -0
  25. data/lib/rof/translators.rb +23 -0
  26. data/lib/rof/{translate_csv.rb → translators/csv_to_rof.rb} +4 -3
  27. data/lib/rof/translators/fedora_to_rof.rb +244 -0
  28. data/lib/rof/translators/jsonld_to_rof.rb +112 -0
  29. data/lib/rof/translators/jsonld_to_rof/accumulator.rb +175 -0
  30. data/lib/rof/translators/jsonld_to_rof/predicate_handler.rb +223 -0
  31. data/lib/rof/translators/jsonld_to_rof/predicate_object_handler.rb +125 -0
  32. data/lib/rof/translators/jsonld_to_rof/statement_handler.rb +91 -0
  33. data/lib/rof/translators/osf_to_rof.rb +191 -0
  34. data/lib/rof/utility.rb +44 -1
  35. data/lib/rof/version.rb +1 -1
  36. data/rof.gemspec +10 -2
  37. data/spec/coverage_helper.rb +17 -0
  38. data/spec/fixtures/for_utility_load_items_from_json_file/multiple_items.json +8 -0
  39. data/spec/fixtures/for_utility_load_items_from_json_file/parse_error.json +3 -0
  40. data/spec/fixtures/for_utility_load_items_from_json_file/single_item.json +3 -0
  41. data/spec/fixtures/jsonld_to_rof/0g354f18610.jsonld +113 -0
  42. data/spec/fixtures/jsonld_to_rof/0g354f18610.rof +96 -0
  43. data/spec/fixtures/jsonld_to_rof/2j62s467216.jsonld +113 -0
  44. data/spec/fixtures/jsonld_to_rof/2j62s467216.rof +93 -0
  45. data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.jsonld +70 -0
  46. data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.rof +87 -0
  47. data/spec/fixtures/jsonld_to_rof/cr56n01253w.jsonld +84 -0
  48. data/spec/fixtures/jsonld_to_rof/cr56n01253w.rof +95 -0
  49. data/spec/fixtures/jsonld_to_rof/h989r21069m.jsonld +84 -0
  50. data/spec/fixtures/jsonld_to_rof/h989r21069m.rof +98 -0
  51. data/spec/fixtures/jsonld_to_rof/js956d59913.jsonld +79 -0
  52. data/spec/fixtures/jsonld_to_rof/js956d59913.rof +89 -0
  53. data/spec/fixtures/jsonld_to_rof/m039k358q5c.jsonld +80 -0
  54. data/spec/fixtures/jsonld_to_rof/m039k358q5c.rof +64 -0
  55. data/spec/fixtures/jsonld_to_rof/nk322b9161g.jsonld +89 -0
  56. data/spec/fixtures/jsonld_to_rof/nk322b9161g.rof +69 -0
  57. data/spec/fixtures/jsonld_to_rof/p8418k7430d.jsonld +84 -0
  58. data/spec/fixtures/jsonld_to_rof/p8418k7430d.rof +67 -0
  59. data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.jsonld +98 -0
  60. data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.rof +110 -0
  61. data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.jsonld +94 -0
  62. data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.rof +121 -0
  63. data/spec/fixtures/osf/phz6b.tar.gz +0 -0
  64. data/spec/lib/rof/access_spec.rb +30 -23
  65. data/spec/lib/rof/cli_spec.rb +83 -60
  66. data/spec/lib/rof/compare_rof_spec.rb +35 -24
  67. data/spec/lib/rof/filter_spec.rb +10 -0
  68. data/spec/lib/rof/filters/bendo_spec.rb +42 -0
  69. data/spec/lib/rof/filters/date_stamp_spec.rb +9 -5
  70. data/spec/lib/rof/filters/file_to_url_spec.rb +7 -3
  71. data/spec/lib/rof/filters/label_spec.rb +121 -77
  72. data/spec/lib/rof/filters/work_spec.rb +7 -4
  73. data/spec/lib/rof/filters_spec.rb +14 -0
  74. data/spec/lib/rof/translator_spec.rb +15 -0
  75. data/spec/lib/rof/{translate_csv_spec.rb → translators/csv_to_rof_spec.rb} +14 -14
  76. data/spec/lib/rof/translators/fedora_to_rof_spec.rb +64 -0
  77. data/spec/lib/rof/translators/jsonld_to_rof/accumulator_spec.rb +121 -0
  78. data/spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb +73 -0
  79. data/spec/lib/rof/translators/jsonld_to_rof/predicate_object_handler_spec.rb +48 -0
  80. data/spec/lib/rof/translators/jsonld_to_rof/statement_handler_spec.rb +40 -0
  81. data/spec/lib/rof/translators/jsonld_to_rof_spec.rb +120 -0
  82. data/spec/lib/rof/{osf_to_rof_spec.rb → translators/osf_to_rof_spec.rb} +55 -25
  83. data/spec/lib/rof/translators_spec.rb +14 -0
  84. data/spec/lib/rof/utility_spec.rb +47 -1
  85. data/spec/spec_helper.rb +1 -1
  86. data/spec/support/an_rof_filter.rb +10 -0
  87. metadata +186 -15
  88. data/lib/rof/get_from_fedora.rb +0 -211
  89. data/lib/rof/osf_to_rof.rb +0 -123
  90. data/spec/lib/rof/get_from_fedora_spec.rb +0 -22
@@ -0,0 +1,223 @@
1
+ require 'active_support/core_ext/array/wrap'
2
+
3
+ module ROF
4
+ module Translators
5
+ module JsonldToRof
6
+ # Responsible for dealing with registered predicates and how those are handled.
7
+ module PredicateHandler
8
+ class UnhandledPredicateError < RuntimeError
9
+ def initialize(predicate, urls)
10
+ super(%(Unable to handle predicate "#{predicate}". The following predicate URLs were registered #{urls.inspect}))
11
+ end
12
+ end
13
+
14
+ # @api public
15
+ #
16
+ # Parse the RDF predicate and RDF object and add it's contents to the accumulator
17
+ #
18
+ # @example
19
+ # Given the following 4 RDF N-Triples (subject, predicate, object). The first two, with subject "_:b0" represent blank nodes.
20
+ # The last one with subject "<https://curate.nd.edu/show/zk51vd69n1r>" has an object that points to the "_:b0" blank node.
21
+ # _:b0 <http://purl.org/dc/terms/contributor> "David R.Hyde" .
22
+ # _:b0 <http://www.ndltd.org/standards/metadata/etdms/1.1/role> "Research Director" .
23
+ # <https://curate.nd.edu/show/zk51vd69n1r> <http://purl.org/dc/terms/contributor> _:b0 .
24
+ # <https://curate.nd.edu/show/zk51vd69n1r> <http://projecthydra.org/ns/relations#hasEditorGroup> <https://curate.nd.edu/show/q524jm23g92> .
25
+ # For the first two N-Triples you would get a BlankNodeHandler; For the last two, you would get a UriSubjectHandler
26
+ #
27
+ # @note It is assumed that all blank nodes (e.g. RDF::Node) will be processed before you process any RDF::URI nodes.
28
+ #
29
+ # @param [RDF::Predicate] predicate - the RDF predicate that we will parse and add to the appropriate spot in the accumulator
30
+ # @param [RDF::Object] object - the RDF object that we will parse and add to the appropriate spot in the accumulator
31
+ # @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
32
+ # @return [ROF::Translators::JsonldToRof::Accumulator] the given accumulator
33
+ # @raise [ROF::Translators::JsonldToRof::UnhandledPredicateError] when we are unable to handle the given predicate
34
+ def self.call(predicate, object, accumulator, blank_node = false)
35
+ handler = registry.handler_for(predicate)
36
+ handler.handle(object, accumulator, blank_node)
37
+ accumulator
38
+ end
39
+
40
+ # @api public
41
+ # @param [String] url - The URL that we want to match against
42
+ # @yield The block to configure how we handle RDF Predicates that match the gvien URL
43
+ # @yieldparam [ROF::JsonldToRof::PredicateHandler::UrlHandler]
44
+ # @see ./spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb for details and usage usage
45
+ def self.register(url, &block)
46
+ registry << UrlHandler.new(url, &block)
47
+ end
48
+
49
+ # @api private
50
+ def self.registry
51
+ @registry ||= RegistrySet.new
52
+ end
53
+ private_class_method :registry
54
+
55
+ def self.clear_registry!(set_with = RegistrySet.new)
56
+ @registry = set_with
57
+ end
58
+ private_class_method :clear_registry!
59
+
60
+ class RegistrySet
61
+ def initialize
62
+ @set = []
63
+ end
64
+
65
+ def <<(value)
66
+ @set << value
67
+ end
68
+
69
+ def handler_for(predicate)
70
+ location_extractor = nil
71
+ @set.each do |handler|
72
+ location_extractor = handler.location_extractor_for(predicate)
73
+ break if location_extractor
74
+ end
75
+ raise UnhandledPredicateError.new(predicate, @set.map(&:url)) if location_extractor.nil?
76
+ location_extractor
77
+ end
78
+ end
79
+ private_constant :RegistrySet
80
+
81
+ # For a given URL map all of the predicates; Some predicates require explicit mapping, while others
82
+ # may use implicit mapping.
83
+ class UrlHandler
84
+ def initialize(url)
85
+ @url = url
86
+ @within = []
87
+ @namespace_prefix = ''
88
+ @slug_handlers = {}
89
+ yield(self) if block_given?
90
+ end
91
+ attr_reader :url
92
+
93
+ # The final key in the location array should be prefixed with the namespace_prefix; By default this is ""
94
+ # @param [String, nil] prefix - what is the namespace prefix to apply to the last location in the array.
95
+ # @return [String]
96
+ def namespace_prefix(prefix = nil)
97
+ return @namespace_prefix if prefix.nil?
98
+ @namespace_prefix = prefix
99
+ end
100
+
101
+ # Prepend the within array to the location array
102
+ # @param [Array<String>, nil] location - where in the ROF document are we putting the value
103
+ # @return [Array<String>]
104
+ def within(location = nil)
105
+ return @within if location.nil?
106
+ @within = Array.wrap(location)
107
+ end
108
+
109
+ # @param [#to_s] predicate
110
+ # @return [nil, LocationExtractor] if the given predicate does not match the url, return nil; Otherwise return a LocationExtractor
111
+ # @see LocationExtractor
112
+ def location_extractor_for(predicate)
113
+ return nil unless predicate.to_s =~ %r{^#{Regexp.escape(@url)}(.*)}
114
+ slug = $1
115
+ handlers = handlers_for(slug)
116
+ LocationExtractor.new(predicate, handlers)
117
+ end
118
+
119
+ private
120
+
121
+ # @param [String] slug - a slug that may or may not have been registered
122
+ # @return [Array<#call>] an array of handlers that each respond to #call
123
+ # @see ImplicitLocationHandler
124
+ # @see ExplicitLocationSlugHandler
125
+ # @see BlockSlugHandler
126
+ def handlers_for(slug)
127
+ Array.wrap(@slug_handlers.fetch(slug) { ImplicitLocationHandler.new(self, slug) })
128
+ end
129
+
130
+ public
131
+
132
+ # @param [String] slug =
133
+ # @param [Hash] options (with symbol keys)
134
+ # @option options [Boolean] :force - don't apply the within nor namespace prefix
135
+ # @option options [Array] :to - an array that will be nested Hash keys
136
+ # @yield If a block is given, call the block (and skip all other configuration)
137
+ # @yieldparam [String] object
138
+ # @see BlockSlugHandler for details concerning a mapping via a block
139
+ # @see ExplicitLocationSlugHandler for details concerning a mapping via a to: option
140
+ def map(slug, options = {}, &block)
141
+ @slug_handlers ||= {}
142
+ @slug_handlers[slug] ||= []
143
+ if block_given?
144
+ @slug_handlers[slug] << BlockSlugHandler.new(self, slug, options, block)
145
+ else
146
+ @slug_handlers[slug] << ExplicitLocationSlugHandler.new(self, slug, options)
147
+ end
148
+ end
149
+
150
+ # Responsible for coordinating the extraction of the
151
+ class LocationExtractor
152
+ def initialize(predicate, handlers)
153
+ @predicate = predicate
154
+ @handlers = Array.wrap(handlers)
155
+ end
156
+
157
+ def handle(object, accumulator, blank_node)
158
+ @handlers.each do |handler|
159
+ handler.call(object, accumulator, blank_node)
160
+ end
161
+ accumulator
162
+ end
163
+ end
164
+ private_constant :LocationExtractor
165
+
166
+ class ImplicitLocationHandler
167
+ def initialize(url_handler, slug)
168
+ @url_handler = url_handler
169
+ @slug = slug
170
+ end
171
+ attr_reader :slug
172
+ def call(object, accumulator, blank_node)
173
+ to = @url_handler.within + Array.wrap(slug)
174
+ to[-1] = "#{@url_handler.namespace_prefix}#{to[-1]}"
175
+ accumulator.add_predicate_location_and_value(to, object, blank_node)
176
+ end
177
+ end
178
+ private_constant :ImplicitLocationHandler
179
+
180
+ class BlockSlugHandler
181
+ def initialize(url_handler, slug, options, block)
182
+ @url_handler = url_handler
183
+ @slug = slug
184
+ @options = options
185
+ @block = block
186
+ end
187
+ attr_reader :slug
188
+
189
+ # @todo Are there differences that need to be handled for the blank_node?
190
+ def call(object, accumulator, _blank_node)
191
+ @block.call(object, accumulator)
192
+ end
193
+ end
194
+ private_constant :BlockSlugHandler
195
+
196
+ class ExplicitLocationSlugHandler
197
+ def initialize(url_handler, slug, options)
198
+ @url_handler = url_handler
199
+ @slug = slug
200
+ @options = options
201
+ end
202
+ attr_reader :slug
203
+
204
+ def call(object, accumulator, blank_node)
205
+ to = @options.fetch(:to)
206
+ unless force?
207
+ to = @url_handler.within + Array.wrap(to)
208
+ to[-1] = "#{@url_handler.namespace_prefix}#{to[-1]}"
209
+ end
210
+ accumulator.add_predicate_location_and_value(to, object, blank_node)
211
+ end
212
+
213
+ def force?
214
+ @options.fetch(:force, false)
215
+ end
216
+ end
217
+ private_constant :ExplicitLocationSlugHandler
218
+ end
219
+ private_constant :UrlHandler
220
+ end
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,125 @@
1
+ require 'rdf'
2
+ require 'rof/translators/jsonld_to_rof/predicate_handler'
3
+
4
+ module ROF
5
+ module Translators
6
+ module JsonldToRof
7
+ # We need to handle the Predicate / Object pair as one (thank you RDF blank nodes for this nuance)
8
+ module PredicateObjectHandler
9
+ # @api public
10
+ #
11
+ # Parse the RDF::Predicate, RDF::Object and the relevant data to the contents to the accumulator
12
+ #
13
+ # @example
14
+ # Given the following 4 RDF N-Triples (subject, predicate, object). The first and second RDF objects are RDF::Literal. The 3rd triple's object is
15
+ # and RDF::Node. And the last is an RDF::URI. Each require different handlers as they have nuanced differences.
16
+ # _:b0 <http://purl.org/dc/terms/contributor> "David R.Hyde" .
17
+ # _:b0 <http://www.ndltd.org/standards/metadata/etdms/1.1/role> "Research Director" .
18
+ # <https://curate.nd.edu/show/zk51vd69n1r> <http://purl.org/dc/terms/contributor> _:b0 .
19
+ # <https://curate.nd.edu/show/zk51vd69n1r> <http://projecthydra.org/ns/relations#hasEditorGroup> <https://curate.nd.edu/show/q524jm23g92> .
20
+ #
21
+ # @note It is assumed that all blank nodes (e.g. RDF::Node) will be processed before you process any RDF::URI nodes.
22
+ #
23
+ # @param [RDF::Predicate] predicate - the RDF predicate that we will parse and add to the appropriate spot in the accumulator
24
+ # @param [RDF::Object] object - the RDF object that we will parse and add to the appropriate spot in the accumulator
25
+ # @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
26
+ # @return [ROF::Translators::JsonldToRof::Accumulator] the given accumulator
27
+ # @raise [ROF::Translators::JsonldToRof::UnknownRdfObjectTypeError] when the RDF::Object's subject is not a valid type
28
+ def self.call(predicate, object, accumulator, options = {})
29
+ new(predicate, object, accumulator, options).call
30
+ accumulator
31
+ end
32
+
33
+ # @api private
34
+ #
35
+ # @param [RDF::Predicate] predicate - the RDF predicate that we will parse and add to the appropriate spot in the accumulator
36
+ # @param [RDF::Object] object - the RDF object that we will parse and add to the appropriate spot in the accumulator
37
+ # @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
38
+ # @return [#call]
39
+ def self.new(predicate, object, accumulator, options)
40
+ klass_for(object).new(predicate, object, accumulator, options)
41
+ end
42
+
43
+ class UnknownRdfObjectTypeError < RuntimeError
44
+ end
45
+
46
+ # @api private
47
+ def self.klass_for(object)
48
+ case object
49
+ when RDF::URI
50
+ UriPredicateObjectHandler
51
+ when RDF::Node
52
+ NodePredicateObjectHandler
53
+ when RDF::Literal
54
+ LiteralPredicateObjectHandler
55
+ else
56
+ raise UnknownRdfObjectTypeError, "Unable to determine object handler for #{object.inspect}"
57
+ end
58
+ end
59
+
60
+ # @api private
61
+ class UriPredicateObjectHandler
62
+ def initialize(predicate, object, accumulator, options)
63
+ @predicate = predicate
64
+ @object = object
65
+ @accumulator = accumulator
66
+ @options = options
67
+ end
68
+
69
+ def call
70
+ PredicateHandler.call(predicate, object, accumulator, options[:blank_node])
71
+ accumulator
72
+ end
73
+
74
+ private
75
+ attr_reader :predicate, :object, :accumulator, :options
76
+ end
77
+ private_constant :UriPredicateObjectHandler
78
+
79
+ # @api private
80
+ # Blank Nodes; Oh how we love thee. Let me count the ways
81
+ class NodePredicateObjectHandler
82
+ def initialize(predicate, object, accumulator, options)
83
+ @predicate = predicate
84
+ @object = object
85
+ @accumulator = accumulator
86
+ @options = options
87
+ end
88
+
89
+ def call
90
+ blank_node = accumulator.fetch_blank_node(object)
91
+ blank_node.each_pair do |blank_node_predicate, blank_node_objects|
92
+ blank_node_objects.each do |blank_node_object|
93
+ PredicateObjectHandler.call(blank_node_predicate, blank_node_object, accumulator, blank_node: object)
94
+ end
95
+ end
96
+ accumulator
97
+ end
98
+
99
+ private
100
+ attr_reader :predicate, :object, :accumulator, :options
101
+ end
102
+ private_constant :NodePredicateObjectHandler
103
+
104
+ # @api private
105
+ class LiteralPredicateObjectHandler
106
+ def initialize(predicate, object, accumulator, options)
107
+ @predicate = predicate
108
+ @object = object
109
+ @accumulator = accumulator
110
+ @options = options
111
+ end
112
+
113
+ def call
114
+ PredicateHandler.call(predicate, object, accumulator, options[:blank_node])
115
+ accumulator
116
+ end
117
+
118
+ private
119
+ attr_reader :predicate, :object, :accumulator, :options
120
+ end
121
+ private_constant :LiteralPredicateObjectHandler
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,91 @@
1
+ require 'rof/translators/jsonld_to_rof/predicate_object_handler'
2
+
3
+ module ROF
4
+ module Translators
5
+ module JsonldToRof
6
+ # Responsible for parsing an RDF statement and adding to the accumulator.
7
+ module StatementHandler
8
+ # @api public
9
+ #
10
+ # Parse the RDF statement and add it's contents to the accumulator
11
+ #
12
+ # @example
13
+ # Given the following 4 RDF N-Triples (subject, predicate, object). The first two, with subject "_:b0" represent blank nodes.
14
+ # The last one with subject "<https://curate.nd.edu/show/zk51vd69n1r>" has an object that points to the "_:b0" blank node.
15
+ # _:b0 <http://purl.org/dc/terms/contributor> "David R.Hyde" .
16
+ # _:b0 <http://www.ndltd.org/standards/metadata/etdms/1.1/role> "Research Director" .
17
+ # <https://curate.nd.edu/show/zk51vd69n1r> <http://purl.org/dc/terms/contributor> _:b0 .
18
+ # <https://curate.nd.edu/show/zk51vd69n1r> <http://projecthydra.org/ns/relations#hasEditorGroup> <https://curate.nd.edu/show/q524jm23g92> .
19
+ # For the first two N-Triples you would get a BlankNodeHandler; For the last two, you would get a UriSubjectHandler
20
+ #
21
+ # @note It is assumed that all blank nodes (e.g. RDF::Node) will be processed before you process any RDF::URI nodes.
22
+ #
23
+ # @param [RDF::Statement] statement - the RDF statement that we will parse and add to the appropriate spot in the accumulator
24
+ # @param [ROF::Translators::JsonldToRof::Accumulator] accumulator - a data accumulator that will be changed in place
25
+ # @return [ROF::Translators::JsonldToRof::Accumulator] the given accumulator
26
+ # @raise [ROF::Translators::JsonldToRof::UnhandledRdfSubjectError] when the RDF::Statement's subject is not a valid type
27
+ def self.call(statement, accumulator)
28
+ new(statement, accumulator).call
29
+ accumulator
30
+ end
31
+
32
+ class UnhandledRdfSubjectError < RuntimeError
33
+ end
34
+
35
+ # @api private
36
+ def self.new(statement, accumulator)
37
+ case statement.subject
38
+ when RDF::URI
39
+ UriSubjectHandler.new(statement, accumulator)
40
+ when RDF::Node
41
+ BlankNodeHandler.new(statement, accumulator)
42
+ else
43
+ raise UnhandledRdfSubjectError, "Unable to determine subject handler for #{statement.inspect}"
44
+ end
45
+ end
46
+
47
+ # Responsible for accumulating the ROF data for a URI based resource
48
+ class UriSubjectHandler
49
+ def initialize(statement, accumulator)
50
+ @accumulator = accumulator
51
+ @statement = statement
52
+ end
53
+
54
+ def call
55
+ handle_subject
56
+ handle_predicate_and_object
57
+ @accumulator
58
+ end
59
+
60
+ private
61
+
62
+ def handle_predicate_and_object
63
+ PredicateObjectHandler.call(@statement.predicate, @statement.object, @accumulator)
64
+ end
65
+
66
+ def handle_subject
67
+ return nil unless @statement.subject.to_s =~ %r{https://curate.nd.edu/show/([^\\]+)/?}
68
+ pid = "und:#{$1}"
69
+ @accumulator.add_pid(pid)
70
+ end
71
+ end
72
+ private_constant :UriSubjectHandler
73
+
74
+ # Responsible for handling blank nodes in the RDF graph; Examples include ETD degree information
75
+ # Blank node subjects behave different from UriSubjectHandler
76
+ class BlankNodeHandler
77
+ def initialize(statement, accumulator)
78
+ @accumulator = accumulator
79
+ @statement = statement
80
+ end
81
+
82
+ def call
83
+ @accumulator.add_blank_node(@statement)
84
+ @accumulator
85
+ end
86
+ end
87
+ private_constant :BlankNodeHandler
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,191 @@
1
+ require 'json'
2
+ require 'zlib'
3
+ require 'rubygems/package'
4
+ require 'rdf/turtle'
5
+ require 'rof/osf_context'
6
+ require 'rof/rdf_context'
7
+ require 'rof/utility'
8
+ require 'rof/translator'
9
+
10
+ module ROF::Translators
11
+ # Class for managing OSF Archive data transformations
12
+ # It is called after the get-from-osf task, and before the work-xlat task
13
+ class OsfToRof < ROF::Translator
14
+ # @todo Set this to be something more meaningful than an empty lambda
15
+ # @return [#call]
16
+ def self.default_previously_archived_pid_finder
17
+ ->(archive_type, osf_project_identifier) {}
18
+ end
19
+
20
+ # Convert Osf Archive tar.gz to ROF
21
+ def self.call(project, config, previously_archived_pid_finder = default_previously_archived_pid_finder)
22
+ new(project, config, previously_archived_pid_finder).call
23
+ end
24
+
25
+ def initialize(project, config, previously_archived_pid_finder = self.class.default_previously_archived_pid_finder)
26
+ @config = config
27
+ @project = project
28
+ @previously_archived_pid_finder = previously_archived_pid_finder
29
+ @osf_map = ROF::OsfToNDMap
30
+ end
31
+
32
+ def call
33
+ rof_array = []
34
+ return {} if project.nil?
35
+ @ttl_data = ttl_from_targz(source_slug + '.ttl')
36
+ rof_array[0] = build_archive_record
37
+ rof_array
38
+ end
39
+
40
+ # @api private
41
+ # @see https://github.com/ndlib/curate_nd/blob/677c05c836ff913c01dcbbfc5e5d21366b87d500/app/repository_models/osf_archive.rb#L62
42
+ def archive_type
43
+ project.fetch('package_type')
44
+ end
45
+
46
+ # @api private
47
+ # This is a bit of a misnomer; As used it represents the path to the project or registration
48
+ # that we have ingested (e.g. https://osf.io/:source_slug)
49
+ #
50
+ # It was previously named :project_identifier in this class, but that gets conflated with the
51
+ # underlying object's osf_project_identifier (e.g. what OSF Project was this archive originally based on)
52
+ #
53
+ # @see https://github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/app/repository_models/osf_archive.rb#L96
54
+ def source_slug
55
+ project.fetch('project_identifier')
56
+ end
57
+
58
+ # @api private
59
+ # @see https://github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/app/repository_models/osf_archive.rb#L106
60
+ def osf_project_identifier
61
+ return source_slug if project['package_type'] == 'OSF Project'
62
+ osf_url_from_filename(ttl_data[0][@osf_map['registeredFrom']][0]['@id'])
63
+ end
64
+
65
+ private
66
+
67
+ attr_reader :config, :project
68
+
69
+ # A function responsible for finding the previously archive pid.
70
+ # @return [#call]
71
+ # @see #default_previously_archived_pid_finder for interface
72
+ attr_reader :previously_archived_pid_finder
73
+
74
+ # this is an array- the addition elements are the contributor(s)
75
+ # @return [Array<Hash>]
76
+ # @see #ttl_from_targz
77
+ attr_reader :ttl_data
78
+
79
+ # reads a ttl file and makes it a JSON-LD file that we can parse
80
+ def fetch_from_ttl(ttl_file)
81
+ graph = RDF::Turtle::Reader.open(ttl_file, prefixes: ROF::OsfPrefixList.dup)
82
+ JSON::LD::API.fromRdf(graph)
83
+ end
84
+
85
+ # extracts given ttl file from JHU tar.gz package
86
+ # - assumed to live under data/obj/root
87
+ # @return [Array<Hash>] the first element is the "work" and the additional elements, if any, are the contributor(s)
88
+ def ttl_from_targz(ttl_filename)
89
+ package_dir = config.fetch('package_dir')
90
+ ttl_path = File.join(source_slug, 'data/obj/root', ttl_filename)
91
+ ROF::Utility.file_from_targz(File.join(package_dir, source_slug + '.tar.gz'), ttl_path)
92
+ fetch_from_ttl(File.join(package_dir, ttl_path))
93
+ end
94
+
95
+ # Maps RELS-EXT
96
+ def map_rels_ext
97
+ rels_ext = {}
98
+ rels_ext['@context'] = ROF::RelsExtRefContext.dup
99
+ apply_previous_archived_version_if_applicable(rels_ext)
100
+ rels_ext
101
+ end
102
+
103
+ # sets metadata
104
+ def map_metadata
105
+ metadata = {}
106
+ metadata['@context'] = ROF::RdfContext.dup
107
+ # metdata derived from project ttl file
108
+ metadata['dc:created'] = Time.iso8601(ttl_data[0][@osf_map['dc:created']][0]['@value']).to_date.iso8601 + 'Z'
109
+ metadata['dc:title'] = ttl_data[0][@osf_map['dc:title']][0]['@value']
110
+ metadata['dc:description'] = ttl_data[0][@osf_map['dc:description']][0]['@value']
111
+ metadata['dc:subject'] = map_subject
112
+ # metadata derived from osf_projects data, passed from UI
113
+ metadata['dc:source'] = 'https://osf.io/' + source_slug
114
+ metadata['dc:creator#adminstrative_unit'] = project['administrative_unit']
115
+ metadata['dc:creator#affiliation'] = project['affiliation']
116
+ metadata['nd:osfProjectIdentifier'] = osf_project_identifier
117
+ metadata['dc:creator'] = map_creator
118
+ metadata['dc:type'] = project['package_type']
119
+ metadata
120
+ end
121
+
122
+ # For reference to the assumed RELS-EXT see the following spec in CurateND
123
+ # @see https://github.com/ndlib/curate_nd/blob/115efec2e046257282a86fe2cd98c7d229d04cf9/spec/repository_models/osf_archive_spec.rb#L97
124
+ def apply_previous_archived_version_if_applicable(rels_ext)
125
+ # If a previously archived pid was passed in, use it to set pav:previousVersion
126
+ # If not, check SOLR for one.
127
+ pid = previously_archived_pid_finder.call(archive_type, osf_project_identifier)
128
+ pid = ROF::Utility.check_solr_for_previous(config, osf_project_identifier) if pid.nil?
129
+ rels_ext['pav:previousVersion'] = pid if pid
130
+ rels_ext
131
+ end
132
+
133
+ # Constructs OsfArchive Record from ttl_data, data from the UI form,
134
+ # and task config data
135
+ def build_archive_record
136
+ this_rof = {}
137
+ this_rof['owner'] = project['owner']
138
+ this_rof['type'] = 'OsfArchive'
139
+ this_rof['rights'] = map_rights
140
+ this_rof['rels-ext'] = map_rels_ext
141
+ this_rof['metadata'] = map_metadata
142
+ this_rof['files'] = [source_slug + '.tar.gz']
143
+ this_rof
144
+ end
145
+
146
+ # sets subject
147
+ def map_subject
148
+ if ttl_data[0].key?(@osf_map['dc:subject'])
149
+ return ttl_data[0][@osf_map['dc:subject']][0]['@value']
150
+ end
151
+ ''
152
+ end
153
+
154
+ # make osf url from bagfile name
155
+ def osf_url_from_filename(ttl_file)
156
+ project_id = ttl_file.rpartition('/')[2].rpartition('.')[0]
157
+ project_id
158
+ end
159
+
160
+ # figures out the rights
161
+ def map_rights
162
+ rights = {}
163
+ if ttl_data[0][@osf_map['isPublic']][0]['@value'] == 'true'
164
+ rights['read-groups'] = ['public']
165
+ end
166
+ rights
167
+ end
168
+
169
+ # sets the creator- needs to read another ttl for the User data
170
+ # only contrubutors with isBibliographic true are considered
171
+ def map_creator
172
+ creator = []
173
+ ttl_data[0][@osf_map['hasContributor']].each do |contributor|
174
+ # Looping through the primary document and the contributors
175
+ ttl_data.each do |item|
176
+ next unless item['@id'] == contributor['@id']
177
+ if item[@osf_map['isBibliographic']][0]['@value'] == 'true'
178
+ creator.push map_user_from_ttl(item[@osf_map['hasUser']][0]['@id'])
179
+ end
180
+ end
181
+ end
182
+ creator
183
+ end
184
+
185
+ # read user ttl file, extract User's full name
186
+ def map_user_from_ttl(file_subpath)
187
+ user_ttl_data = ttl_from_targz(File.basename(file_subpath))
188
+ user_ttl_data[0][@osf_map['hasFullName']][0]['@value']
189
+ end
190
+ end
191
+ end