hybag 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YWRkN2Q0ZGNmODNmOTQ5NmVjOGY4ZWQ3MzhhZTc1MTU1NDFlZGYzNA==
4
+ MjEwZDk1ODkxNDQ4OThiMzhhM2U0ZDI3MDdkM2M3YTAwZjM0MzI1Yw==
5
5
  data.tar.gz: !binary |-
6
- Y2ZlMDIwYWE1OTExOGY4NzY2MTI2MTgxODliODhhOTdhZDM1Y2NkNg==
6
+ OTlmYmU4Y2UzMjBjY2Y1ODg0NmJhN2VjMzFkZTkyYjEyYzM3OGYzYQ==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- OTYwNDA0OWRlZGE3MWJkNjk5ZDM3YWMxMjA3NjRkNjM2ZmNiZGI4MmYwNTk3
10
- ZGQzMzk3NjI3ZDkxMjg3Y2UxYWM5ZWM4NThmMjA4NjM3YWY0NmJhYzBiZWRj
11
- ZWNjNzJiYTdmZDI4ZTM4NDZkNTc3YjNiZmRjYjhlNzI5OTE0ZjQ=
9
+ NjhhZWY3NDczZmE2MzM4MDE2NjkyZGJjOWYxYzA0ZjgwYjhlMDAxM2Y0NWM0
10
+ ZmVhNjYyYmVkNzM1NzMyZDAzYTNhNWZkYjY3ZjEzNzRhMTM0YzA2YzVlMjcx
11
+ MTgzYjk1MzlmYzQxMTZmYTY0NmJhZWQxMjE4YmMxMWEyNjZhMWU=
12
12
  data.tar.gz: !binary |-
13
- NWUwMTFjMDM0NTc5ZTdlOWE4YjYyNjgwNDI4M2JlMGZkY2IzMjZkNzUzOTE2
14
- ZDNlYjZiMzNjODk5ODRjM2IwNzQ0YTM4OGFkM2JkNmZjZTI4M2Q3MzUxMjIx
15
- MzNlYjYyZTU3Y2RhZDYzY2Q5MzcxMjc4MGJiYmM0ZGNkNDI1YmE=
13
+ ZTNmMzdkNjJjNmE0MGQwYzA4NTU0NjM5Yzk1OTYwZGU2YmFmOWJkNTY0YzI5
14
+ YzQ5MzRmNzM0MTg3NzhlZjhiYzI1NDJmOGRkMmRkNjc1MjhhZmYyNjhhNjY5
15
+ OWNhMjIzMjIyMGU0MzE0ZjI4OGVjZTJkMjBlZmUyZjRkOThmYmM=
@@ -11,7 +11,7 @@ module Hybag
11
11
  raise "Unable to determine model from bag" if model_name.blank?
12
12
  new_object = ActiveFedora.class_from_string(model_name.to_s).new
13
13
  # Assign a pid
14
- new_object.inner_object.pid = ActiveFedora::Base.assign_pid(new_object)
14
+ new_object.inner_object.pid = new_object.inner_object.assign_pid
15
15
  set_metadata_streams(new_object)
16
16
  set_file_streams(new_object)
17
17
  return new_object
@@ -19,58 +19,64 @@ module Hybag
19
19
 
20
20
  private
21
21
 
22
- # TODO: What to do if the bag has files that don't have model definitions?
23
22
  # TODO: Add some sort of configuration to map bag filenames -> dsids.
24
23
  def set_metadata_streams(object)
25
- object.metadata_streams.each do |ds|
26
- if bag_has_metastream?(ds.dsid)
27
- ds.content = bag_metastream(ds.dsid).read.strip
28
- # Assume the first subject in the metadata is about this object.
29
- # TODO: Move this to configuration?
30
- first_subject = ds.graph.first_subject
31
- new_repository = RDF::Repository.new
32
- ds.graph.each_statement do |statement|
33
- subject = statement.subject
34
- subject = ds.rdf_subject if subject == first_subject
35
- new_repository << [subject, statement.predicate, statement.object]
36
- end
37
- ds.instance_variable_set(:@graph,new_repository)
38
- end
24
+ bag_tag_files.each do |tag_file|
25
+ add_bag_file_to_object(object, tag_file, false)
39
26
  end
40
27
  end
41
28
 
42
- def set_file_streams(object)
43
- file_streams = object.datastreams.select{|k, ds| !ds.metadata?}.values
44
- file_streams.each do |ds|
45
- if bag_has_datastream?(ds.dsid)
46
- ds.content = bag_datastream(ds.dsid).read
47
- end
48
- end
29
+
30
+ # Returns all registered tag files except those generated for the bag
31
+ # These includes the bag_info.txt, bagit.txt, and manifest files.
32
+ def bag_tag_files
33
+ bag.tag_files - [bag.bag_info_txt_file] - bag.manifest_files - [bag.bagit_txt_file]
49
34
  end
50
35
 
51
- # TODO: Might consider decoration at some point.
52
- def bag_filename_to_label(bag_filename)
53
- Pathname.new(bag_filename).basename.sub_ext('').to_s
36
+ def add_bag_file_to_object(object, bag_file, binary=true)
37
+ parsed_name = bag_filename_to_label(bag_file)
38
+ found_datastream = object.datastreams.values.find{|x| x.dsid.downcase == bag_filename_to_label(bag_file).downcase}
39
+ content = File.open(bag_file).read
40
+ content = transform_content(content) unless binary
41
+ if found_datastream
42
+ found_datastream = replace_subject(content, found_datastream)
43
+ else
44
+ object.add_file_datastream(content, :dsid => parsed_name)
45
+ end
54
46
  end
55
47
 
56
- def bag_has_datastream?(label)
57
- bag.bag_files.any?{|x| bag_filename_to_label(x) == label}
48
+ def transform_content(content)
49
+ content = content.strip
58
50
  end
59
51
 
60
- def bag_datastream(label)
61
- bag_file = bag.bag_files.select{|x| bag_filename_to_label(x) == label}.first
62
- result = File.open(bag_file) unless bag_file.blank?
63
- return result
52
+ # Replaces the subject in RDF files with the datastream's rdf_subject.
53
+ # TODO: Deal with what happens when there's no defined datastream.
54
+ def replace_subject(content, ds)
55
+ ds.content = content
56
+ if ds.respond_to?(:rdf_subject)
57
+ # Assume the first subject in the metadata is about this object.
58
+ # TODO: Move this to configuration?
59
+ first_subject = ds.graph.first_subject
60
+ new_repository = RDF::Repository.new
61
+ ds.graph.each_statement do |statement|
62
+ subject = statement.subject
63
+ subject = ds.rdf_subject if subject == first_subject
64
+ new_repository << [subject, statement.predicate, statement.object]
65
+ end
66
+ ds.instance_variable_set(:@graph,new_repository)
67
+ end
68
+ return ds
64
69
  end
65
70
 
66
- def bag_has_metastream?(label)
67
- bag.tag_files.any?{|x| bag_filename_to_label(x) == label}
71
+ def set_file_streams(object)
72
+ bag.bag_files.each do |bag_file|
73
+ add_bag_file_to_object(object, bag_file)
74
+ end
68
75
  end
69
76
 
70
- def bag_metastream(label)
71
- tag_file = bag.tag_files.select{|x| bag_filename_to_label(x) == label}.first
72
- result = File.open(tag_file) unless tag_file.blank?
73
- return result
77
+ # TODO: Might consider decoration at some point.
78
+ def bag_filename_to_label(bag_filename)
79
+ Pathname.new(bag_filename).basename.sub_ext('').to_s
74
80
  end
75
81
 
76
82
  def model_name
data/lib/hybag/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Hybag
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.8"
3
3
  end
@@ -112,6 +112,49 @@ describe Hybag::Ingester do
112
112
  it "should not be persisted" do
113
113
  expect(built_model).not_to be_persisted
114
114
  end
115
+ context "when there is a file datastream and no matching datastream defined" do
116
+ before(:each) do
117
+ # Add the hydra.png from fixture
118
+ FakeFS.deactivate!
119
+ hybag_content = File.read(File.join(FIXTURE_PATH,"hydra.png"))
120
+ FakeFS.activate!
121
+ File.open(File.join(bag.data_dir,"new_content.png"),'wb') {|f| f.puts hybag_content}
122
+ end
123
+ it "should add that datastream" do
124
+ expect(built_model.datastreams.keys).to include("new_content")
125
+ end
126
+ end
127
+ context "when there is a metadata stream and no matching datastream defined" do
128
+ before(:each) do
129
+ # Add the example_datastream.nt from fixture
130
+ FakeFS.deactivate!
131
+ @hybag_content = File.read(File.join(FIXTURE_PATH,"example_datastream.nt"))
132
+ FakeFS.activate!
133
+ bag.add_tag_file("example_datastream.nt") do |f|
134
+ f.write @hybag_content
135
+ end
136
+ end
137
+ it "should add that file as a datastream" do
138
+ expect(built_model.datastreams.keys).to include("example_datastream")
139
+ expect(built_model.datastreams.values.find{|x| x.dsid == "example_datastream"}.content).to eq @hybag_content.strip
140
+ end
141
+ context "and it's an RDF datastream" do
142
+ it "should replace the subject"
143
+ end
144
+ end
145
+ context "when there is an unregistered tag file and no matching datastream defined" do
146
+ before(:each) do
147
+ FakeFS.deactivate!
148
+ @hybag_content = File.read(File.join(FIXTURE_PATH,"example_datastream.nt"))
149
+ FakeFS.activate!
150
+ File.open(File.join(bag.bag_dir, "new_tag_file.nt"), 'wb') {|f| f.puts @hybag_content}
151
+ end
152
+ # TODO: Write this when bagit supports returning unmarked tag files.
153
+ xit "should add that datastream" do
154
+ expect(built_model.datastreams.keys).to include("new_tag_file")
155
+ expect(built_model.datastreams.values.find{|x| x.dsid == "new_tag_file"}.content).to eq @hybag_content.strip
156
+ end
157
+ end
115
158
  end
116
159
  end
117
160
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hybag
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Trey Terrell
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-08-08 00:00:00.000000000 Z
11
+ date: 2013-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler