hybag 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/hybag/ingester.rb +44 -38
- data/lib/hybag/version.rb +1 -1
- data/spec/lib/hybag/ingester_spec.rb +43 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MjEwZDk1ODkxNDQ4OThiMzhhM2U0ZDI3MDdkM2M3YTAwZjM0MzI1Yw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
OTlmYmU4Y2UzMjBjY2Y1ODg0NmJhN2VjMzFkZTkyYjEyYzM3OGYzYQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NjhhZWY3NDczZmE2MzM4MDE2NjkyZGJjOWYxYzA0ZjgwYjhlMDAxM2Y0NWM0
|
10
|
+
ZmVhNjYyYmVkNzM1NzMyZDAzYTNhNWZkYjY3ZjEzNzRhMTM0YzA2YzVlMjcx
|
11
|
+
MTgzYjk1MzlmYzQxMTZmYTY0NmJhZWQxMjE4YmMxMWEyNjZhMWU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZTNmMzdkNjJjNmE0MGQwYzA4NTU0NjM5Yzk1OTYwZGU2YmFmOWJkNTY0YzI5
|
14
|
+
YzQ5MzRmNzM0MTg3NzhlZjhiYzI1NDJmOGRkMmRkNjc1MjhhZmYyNjhhNjY5
|
15
|
+
OWNhMjIzMjIyMGU0MzE0ZjI4OGVjZTJkMjBlZmUyZjRkOThmYmM=
|
data/lib/hybag/ingester.rb
CHANGED
@@ -11,7 +11,7 @@ module Hybag
|
|
11
11
|
raise "Unable to determine model from bag" if model_name.blank?
|
12
12
|
new_object = ActiveFedora.class_from_string(model_name.to_s).new
|
13
13
|
# Assign a pid
|
14
|
-
new_object.inner_object.pid =
|
14
|
+
new_object.inner_object.pid = new_object.inner_object.assign_pid
|
15
15
|
set_metadata_streams(new_object)
|
16
16
|
set_file_streams(new_object)
|
17
17
|
return new_object
|
@@ -19,58 +19,64 @@ module Hybag
|
|
19
19
|
|
20
20
|
private
|
21
21
|
|
22
|
-
# TODO: What to do if the bag has files that don't have model definitions?
|
23
22
|
# TODO: Add some sort of configuration to map bag filenames -> dsids.
|
24
23
|
def set_metadata_streams(object)
|
25
|
-
|
26
|
-
|
27
|
-
ds.content = bag_metastream(ds.dsid).read.strip
|
28
|
-
# Assume the first subject in the metadata is about this object.
|
29
|
-
# TODO: Move this to configuration?
|
30
|
-
first_subject = ds.graph.first_subject
|
31
|
-
new_repository = RDF::Repository.new
|
32
|
-
ds.graph.each_statement do |statement|
|
33
|
-
subject = statement.subject
|
34
|
-
subject = ds.rdf_subject if subject == first_subject
|
35
|
-
new_repository << [subject, statement.predicate, statement.object]
|
36
|
-
end
|
37
|
-
ds.instance_variable_set(:@graph,new_repository)
|
38
|
-
end
|
24
|
+
bag_tag_files.each do |tag_file|
|
25
|
+
add_bag_file_to_object(object, tag_file, false)
|
39
26
|
end
|
40
27
|
end
|
41
28
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
end
|
48
|
-
end
|
29
|
+
|
30
|
+
# Returns all registered tag files except those generated for the bag
|
31
|
+
# These includes the bag_info.txt, bagit.txt, and manifest files.
|
32
|
+
def bag_tag_files
|
33
|
+
bag.tag_files - [bag.bag_info_txt_file] - bag.manifest_files - [bag.bagit_txt_file]
|
49
34
|
end
|
50
35
|
|
51
|
-
|
52
|
-
|
53
|
-
|
36
|
+
def add_bag_file_to_object(object, bag_file, binary=true)
|
37
|
+
parsed_name = bag_filename_to_label(bag_file)
|
38
|
+
found_datastream = object.datastreams.values.find{|x| x.dsid.downcase == bag_filename_to_label(bag_file).downcase}
|
39
|
+
content = File.open(bag_file).read
|
40
|
+
content = transform_content(content) unless binary
|
41
|
+
if found_datastream
|
42
|
+
found_datastream = replace_subject(content, found_datastream)
|
43
|
+
else
|
44
|
+
object.add_file_datastream(content, :dsid => parsed_name)
|
45
|
+
end
|
54
46
|
end
|
55
47
|
|
56
|
-
def
|
57
|
-
|
48
|
+
def transform_content(content)
|
49
|
+
content = content.strip
|
58
50
|
end
|
59
51
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
52
|
+
# Replaces the subject in RDF files with the datastream's rdf_subject.
|
53
|
+
# TODO: Deal with what happens when there's no defined datastream.
|
54
|
+
def replace_subject(content, ds)
|
55
|
+
ds.content = content
|
56
|
+
if ds.respond_to?(:rdf_subject)
|
57
|
+
# Assume the first subject in the metadata is about this object.
|
58
|
+
# TODO: Move this to configuration?
|
59
|
+
first_subject = ds.graph.first_subject
|
60
|
+
new_repository = RDF::Repository.new
|
61
|
+
ds.graph.each_statement do |statement|
|
62
|
+
subject = statement.subject
|
63
|
+
subject = ds.rdf_subject if subject == first_subject
|
64
|
+
new_repository << [subject, statement.predicate, statement.object]
|
65
|
+
end
|
66
|
+
ds.instance_variable_set(:@graph,new_repository)
|
67
|
+
end
|
68
|
+
return ds
|
64
69
|
end
|
65
70
|
|
66
|
-
def
|
67
|
-
bag.
|
71
|
+
def set_file_streams(object)
|
72
|
+
bag.bag_files.each do |bag_file|
|
73
|
+
add_bag_file_to_object(object, bag_file)
|
74
|
+
end
|
68
75
|
end
|
69
76
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
return result
|
77
|
+
# TODO: Might consider decoration at some point.
|
78
|
+
def bag_filename_to_label(bag_filename)
|
79
|
+
Pathname.new(bag_filename).basename.sub_ext('').to_s
|
74
80
|
end
|
75
81
|
|
76
82
|
def model_name
|
data/lib/hybag/version.rb
CHANGED
@@ -112,6 +112,49 @@ describe Hybag::Ingester do
|
|
112
112
|
it "should not be persisted" do
|
113
113
|
expect(built_model).not_to be_persisted
|
114
114
|
end
|
115
|
+
context "when there is a file datastream and no matching datastream defined" do
|
116
|
+
before(:each) do
|
117
|
+
# Add the hydra.png from fixture
|
118
|
+
FakeFS.deactivate!
|
119
|
+
hybag_content = File.read(File.join(FIXTURE_PATH,"hydra.png"))
|
120
|
+
FakeFS.activate!
|
121
|
+
File.open(File.join(bag.data_dir,"new_content.png"),'wb') {|f| f.puts hybag_content}
|
122
|
+
end
|
123
|
+
it "should add that datastream" do
|
124
|
+
expect(built_model.datastreams.keys).to include("new_content")
|
125
|
+
end
|
126
|
+
end
|
127
|
+
context "when there is a metadata stream and no matching datastream defined" do
|
128
|
+
before(:each) do
|
129
|
+
# Add the example_datastream.nt from fixture
|
130
|
+
FakeFS.deactivate!
|
131
|
+
@hybag_content = File.read(File.join(FIXTURE_PATH,"example_datastream.nt"))
|
132
|
+
FakeFS.activate!
|
133
|
+
bag.add_tag_file("example_datastream.nt") do |f|
|
134
|
+
f.write @hybag_content
|
135
|
+
end
|
136
|
+
end
|
137
|
+
it "should add that file as a datastream" do
|
138
|
+
expect(built_model.datastreams.keys).to include("example_datastream")
|
139
|
+
expect(built_model.datastreams.values.find{|x| x.dsid == "example_datastream"}.content).to eq @hybag_content.strip
|
140
|
+
end
|
141
|
+
context "and it's an RDF datastream" do
|
142
|
+
it "should replace the subject"
|
143
|
+
end
|
144
|
+
end
|
145
|
+
context "when there is an unregistered tag file and no matching datastream defined" do
|
146
|
+
before(:each) do
|
147
|
+
FakeFS.deactivate!
|
148
|
+
@hybag_content = File.read(File.join(FIXTURE_PATH,"example_datastream.nt"))
|
149
|
+
FakeFS.activate!
|
150
|
+
File.open(File.join(bag.bag_dir, "new_tag_file.nt"), 'wb') {|f| f.puts @hybag_content}
|
151
|
+
end
|
152
|
+
# TODO: Write this when bagit supports returning unmarked tag files.
|
153
|
+
xit "should add that datastream" do
|
154
|
+
expect(built_model.datastreams.keys).to include("new_tag_file")
|
155
|
+
expect(built_model.datastreams.values.find{|x| x.dsid == "new_tag_file"}.content).to eq @hybag_content.strip
|
156
|
+
end
|
157
|
+
end
|
115
158
|
end
|
116
159
|
end
|
117
160
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hybag
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Trey Terrell
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-09-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|