hybag 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/hybag/ingester.rb +44 -38
- data/lib/hybag/version.rb +1 -1
- data/spec/lib/hybag/ingester_spec.rb +43 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MjEwZDk1ODkxNDQ4OThiMzhhM2U0ZDI3MDdkM2M3YTAwZjM0MzI1Yw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
OTlmYmU4Y2UzMjBjY2Y1ODg0NmJhN2VjMzFkZTkyYjEyYzM3OGYzYQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NjhhZWY3NDczZmE2MzM4MDE2NjkyZGJjOWYxYzA0ZjgwYjhlMDAxM2Y0NWM0
|
10
|
+
ZmVhNjYyYmVkNzM1NzMyZDAzYTNhNWZkYjY3ZjEzNzRhMTM0YzA2YzVlMjcx
|
11
|
+
MTgzYjk1MzlmYzQxMTZmYTY0NmJhZWQxMjE4YmMxMWEyNjZhMWU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZTNmMzdkNjJjNmE0MGQwYzA4NTU0NjM5Yzk1OTYwZGU2YmFmOWJkNTY0YzI5
|
14
|
+
YzQ5MzRmNzM0MTg3NzhlZjhiYzI1NDJmOGRkMmRkNjc1MjhhZmYyNjhhNjY5
|
15
|
+
OWNhMjIzMjIyMGU0MzE0ZjI4OGVjZTJkMjBlZmUyZjRkOThmYmM=
|
data/lib/hybag/ingester.rb
CHANGED
@@ -11,7 +11,7 @@ module Hybag
|
|
11
11
|
raise "Unable to determine model from bag" if model_name.blank?
|
12
12
|
new_object = ActiveFedora.class_from_string(model_name.to_s).new
|
13
13
|
# Assign a pid
|
14
|
-
new_object.inner_object.pid =
|
14
|
+
new_object.inner_object.pid = new_object.inner_object.assign_pid
|
15
15
|
set_metadata_streams(new_object)
|
16
16
|
set_file_streams(new_object)
|
17
17
|
return new_object
|
@@ -19,58 +19,64 @@ module Hybag
|
|
19
19
|
|
20
20
|
private
|
21
21
|
|
22
|
-
# TODO: What to do if the bag has files that don't have model definitions?
|
23
22
|
# TODO: Add some sort of configuration to map bag filenames -> dsids.
|
24
23
|
def set_metadata_streams(object)
|
25
|
-
|
26
|
-
|
27
|
-
ds.content = bag_metastream(ds.dsid).read.strip
|
28
|
-
# Assume the first subject in the metadata is about this object.
|
29
|
-
# TODO: Move this to configuration?
|
30
|
-
first_subject = ds.graph.first_subject
|
31
|
-
new_repository = RDF::Repository.new
|
32
|
-
ds.graph.each_statement do |statement|
|
33
|
-
subject = statement.subject
|
34
|
-
subject = ds.rdf_subject if subject == first_subject
|
35
|
-
new_repository << [subject, statement.predicate, statement.object]
|
36
|
-
end
|
37
|
-
ds.instance_variable_set(:@graph,new_repository)
|
38
|
-
end
|
24
|
+
bag_tag_files.each do |tag_file|
|
25
|
+
add_bag_file_to_object(object, tag_file, false)
|
39
26
|
end
|
40
27
|
end
|
41
28
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
end
|
48
|
-
end
|
29
|
+
|
30
|
+
# Returns all registered tag files except those generated for the bag
|
31
|
+
# These includes the bag_info.txt, bagit.txt, and manifest files.
|
32
|
+
def bag_tag_files
|
33
|
+
bag.tag_files - [bag.bag_info_txt_file] - bag.manifest_files - [bag.bagit_txt_file]
|
49
34
|
end
|
50
35
|
|
51
|
-
|
52
|
-
|
53
|
-
|
36
|
+
def add_bag_file_to_object(object, bag_file, binary=true)
|
37
|
+
parsed_name = bag_filename_to_label(bag_file)
|
38
|
+
found_datastream = object.datastreams.values.find{|x| x.dsid.downcase == bag_filename_to_label(bag_file).downcase}
|
39
|
+
content = File.open(bag_file).read
|
40
|
+
content = transform_content(content) unless binary
|
41
|
+
if found_datastream
|
42
|
+
found_datastream = replace_subject(content, found_datastream)
|
43
|
+
else
|
44
|
+
object.add_file_datastream(content, :dsid => parsed_name)
|
45
|
+
end
|
54
46
|
end
|
55
47
|
|
56
|
-
def
|
57
|
-
|
48
|
+
def transform_content(content)
|
49
|
+
content = content.strip
|
58
50
|
end
|
59
51
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
52
|
+
# Replaces the subject in RDF files with the datastream's rdf_subject.
|
53
|
+
# TODO: Deal with what happens when there's no defined datastream.
|
54
|
+
def replace_subject(content, ds)
|
55
|
+
ds.content = content
|
56
|
+
if ds.respond_to?(:rdf_subject)
|
57
|
+
# Assume the first subject in the metadata is about this object.
|
58
|
+
# TODO: Move this to configuration?
|
59
|
+
first_subject = ds.graph.first_subject
|
60
|
+
new_repository = RDF::Repository.new
|
61
|
+
ds.graph.each_statement do |statement|
|
62
|
+
subject = statement.subject
|
63
|
+
subject = ds.rdf_subject if subject == first_subject
|
64
|
+
new_repository << [subject, statement.predicate, statement.object]
|
65
|
+
end
|
66
|
+
ds.instance_variable_set(:@graph,new_repository)
|
67
|
+
end
|
68
|
+
return ds
|
64
69
|
end
|
65
70
|
|
66
|
-
def
|
67
|
-
bag.
|
71
|
+
def set_file_streams(object)
|
72
|
+
bag.bag_files.each do |bag_file|
|
73
|
+
add_bag_file_to_object(object, bag_file)
|
74
|
+
end
|
68
75
|
end
|
69
76
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
return result
|
77
|
+
# TODO: Might consider decoration at some point.
|
78
|
+
def bag_filename_to_label(bag_filename)
|
79
|
+
Pathname.new(bag_filename).basename.sub_ext('').to_s
|
74
80
|
end
|
75
81
|
|
76
82
|
def model_name
|
data/lib/hybag/version.rb
CHANGED
@@ -112,6 +112,49 @@ describe Hybag::Ingester do
|
|
112
112
|
it "should not be persisted" do
|
113
113
|
expect(built_model).not_to be_persisted
|
114
114
|
end
|
115
|
+
context "when there is a file datastream and no matching datastream defined" do
|
116
|
+
before(:each) do
|
117
|
+
# Add the hydra.png from fixture
|
118
|
+
FakeFS.deactivate!
|
119
|
+
hybag_content = File.read(File.join(FIXTURE_PATH,"hydra.png"))
|
120
|
+
FakeFS.activate!
|
121
|
+
File.open(File.join(bag.data_dir,"new_content.png"),'wb') {|f| f.puts hybag_content}
|
122
|
+
end
|
123
|
+
it "should add that datastream" do
|
124
|
+
expect(built_model.datastreams.keys).to include("new_content")
|
125
|
+
end
|
126
|
+
end
|
127
|
+
context "when there is a metadata stream and no matching datastream defined" do
|
128
|
+
before(:each) do
|
129
|
+
# Add the example_datastream.nt from fixture
|
130
|
+
FakeFS.deactivate!
|
131
|
+
@hybag_content = File.read(File.join(FIXTURE_PATH,"example_datastream.nt"))
|
132
|
+
FakeFS.activate!
|
133
|
+
bag.add_tag_file("example_datastream.nt") do |f|
|
134
|
+
f.write @hybag_content
|
135
|
+
end
|
136
|
+
end
|
137
|
+
it "should add that file as a datastream" do
|
138
|
+
expect(built_model.datastreams.keys).to include("example_datastream")
|
139
|
+
expect(built_model.datastreams.values.find{|x| x.dsid == "example_datastream"}.content).to eq @hybag_content.strip
|
140
|
+
end
|
141
|
+
context "and it's an RDF datastream" do
|
142
|
+
it "should replace the subject"
|
143
|
+
end
|
144
|
+
end
|
145
|
+
context "when there is an unregistered tag file and no matching datastream defined" do
|
146
|
+
before(:each) do
|
147
|
+
FakeFS.deactivate!
|
148
|
+
@hybag_content = File.read(File.join(FIXTURE_PATH,"example_datastream.nt"))
|
149
|
+
FakeFS.activate!
|
150
|
+
File.open(File.join(bag.bag_dir, "new_tag_file.nt"), 'wb') {|f| f.puts @hybag_content}
|
151
|
+
end
|
152
|
+
# TODO: Write this when bagit supports returning unmarked tag files.
|
153
|
+
xit "should add that datastream" do
|
154
|
+
expect(built_model.datastreams.keys).to include("new_tag_file")
|
155
|
+
expect(built_model.datastreams.values.find{|x| x.dsid == "new_tag_file"}.content).to eq @hybag_content.strip
|
156
|
+
end
|
157
|
+
end
|
115
158
|
end
|
116
159
|
end
|
117
160
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hybag
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Trey Terrell
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-09-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|