lyber-core 0.9.6.2
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.rdoc +58 -0
- data/lib/dlss_service.rb +82 -0
- data/lib/dor/base.rb +18 -0
- data/lib/dor/suri_service.rb +28 -0
- data/lib/dor/workflow_service.rb +111 -0
- data/lib/dor_service.rb +535 -0
- data/lib/lyber_core.rb +14 -0
- data/lib/lyber_core/connection.rb +97 -0
- data/lib/lyber_core/destroyer.rb +74 -0
- data/lib/lyber_core/exceptions/empty_queue.rb +9 -0
- data/lib/lyber_core/log.rb +105 -0
- data/lib/lyber_core/rake/dlss_release.rb +126 -0
- data/lib/lyber_core/robots/robot.rb +214 -0
- data/lib/lyber_core/robots/work_item.rb +103 -0
- data/lib/lyber_core/robots/work_queue.rb +154 -0
- data/lib/lyber_core/robots/workflow.rb +104 -0
- data/lib/lyber_core/robots/workspace.rb +77 -0
- data/lib/lyber_core/utils.rb +4 -0
- data/lib/lyber_core/utils/bagit_bag.rb +100 -0
- data/lib/lyber_core/utils/checksum_validate.rb +65 -0
- data/lib/lyber_core/utils/file_utilities.rb +168 -0
- data/lib/roxml_models/identity_metadata/dublin_core.rb +46 -0
- data/lib/roxml_models/identity_metadata/identity_metadata.rb +118 -0
- data/lib/tasks/rdoc.rake +32 -0
- metadata +371 -0
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'dor_service'
|
2
|
+
require "roxml_models/identity_metadata/identity_metadata"
|
3
|
+
require "roxml_models/identity_metadata/dublin_core"
|
4
|
+
|
5
|
+
# Represents a single object being processed as part of a workflow queue
|
6
|
+
module LyberCore
|
7
|
+
module Robots
|
8
|
+
class WorkItem
|
9
|
+
|
10
|
+
# The queue that this workitem is a member of
|
11
|
+
attr_reader :work_queue
|
12
|
+
# The primary id for the object being processed
|
13
|
+
attr_accessor :druid
|
14
|
+
# An object used to hold unmarshalled XML from the identityMetadata datastream
|
15
|
+
attr_accessor :identity_metadata
|
16
|
+
# Timings for this workitem's processing
|
17
|
+
attr_reader :start_time
|
18
|
+
attr_reader :end_time
|
19
|
+
attr_reader :elapsed_time
|
20
|
+
|
21
|
+
# Create a new WorkItem object, save a pointer to the parent WorkQueue, and start the timer
|
22
|
+
def initialize(work_queue)
|
23
|
+
@work_queue = work_queue
|
24
|
+
@start_time = Time.new
|
25
|
+
end
|
26
|
+
|
27
|
+
# Inject an IdentityMetadata object (currently used for unit testing only)
|
28
|
+
def identity_metadata=(identity_metadata)
|
29
|
+
@identity_metadata = identity_metadata
|
30
|
+
end
|
31
|
+
|
32
|
+
#save the IdentityMetadata object to identityMetadata datastream
|
33
|
+
def identity_metadata_save
|
34
|
+
unless DorService.get_datastream(@druid, 'identityMetadata')
|
35
|
+
DorService.add_datastream(@druid, 'identityMetadata', 'identityMetadata', self.identity_metadata.to_xml.to_xml)
|
36
|
+
else
|
37
|
+
DorService.update_datastream(@druid, 'identityMetadata', self.identity_metadata.to_xml.to_xml, content_type='application/xml', versionable = false)
|
38
|
+
end #unless
|
39
|
+
end #identity_metadata_save
|
40
|
+
|
41
|
+
# Return the IdentityMetadata object bound to identityMetadata datastream XML
|
42
|
+
def identity_metadata
|
43
|
+
if (@identity_metadata == nil)
|
44
|
+
if (@druid == nil)
|
45
|
+
@identity_metadata = IdentityMetadata.new
|
46
|
+
else
|
47
|
+
idmd_str = DorService.get_datastream(@druid, 'identityMetadata')
|
48
|
+
@identity_metadata = IdentityMetadata.from_xml(idmd_str)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
return @identity_metadata
|
52
|
+
end
|
53
|
+
|
54
|
+
# Return the identifier value for the specified identier name
|
55
|
+
def identifier(key)
|
56
|
+
return self.identity_metadata.get_identifier_value(key)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Add a new name,value pair to the set of identifiers
|
60
|
+
def identifier_add(key, value)
|
61
|
+
self.identity_metadata.add_identifier(key, value)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Return an array of strings where each entry consists of name:value
|
65
|
+
def id_pairs
|
66
|
+
self.identity_metadata.get_id_pairs
|
67
|
+
end
|
68
|
+
|
69
|
+
# Return the druid for the work item if it exists, else the first identifier value
|
70
|
+
def item_id
|
71
|
+
return @druid if @druid
|
72
|
+
pairs = self.identity_metadata.get_id_pairs
|
73
|
+
return pairs[0] if (pairs.size > 0)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Record the successful outcome of the workstep operation for this workitem
|
77
|
+
def set_success
|
78
|
+
@work_queue.success_count += 1
|
79
|
+
@end_time = Time.new
|
80
|
+
@elapsed_time = @end_time - @start_time
|
81
|
+
LyberCore::Log.info("#{item_id} completed in #{@elapsed_time} seconds")
|
82
|
+
if (@druid)
|
83
|
+
Dor::WorkflowService.update_workflow_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, 'completed', @elapsed_time)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Record the unsuccessful outcome of the workstep operation for this workitem
|
88
|
+
def set_error(e)
|
89
|
+
@work_queue.error_count += 1
|
90
|
+
@end_time = Time.new
|
91
|
+
@elapsed_time = @end_time - @start_time
|
92
|
+
LyberCore::Log.error("#{item_id} error - #{e.backtrace}")
|
93
|
+
# By default puts will output an array with a newline between each item.
|
94
|
+
if (@druid)
|
95
|
+
DorService.update_workflow_error_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, e.message)
|
96
|
+
end
|
97
|
+
# We've caught and processed the error at this point, I don't think we want to raise it again. --bess
|
98
|
+
# raise e
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'dor_service'
|
2
|
+
require 'dlss_service'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
module LyberCore
|
6
|
+
module Robots
|
7
|
+
# Represents a set of workitem objects to be processed by a given step of a workflow
|
8
|
+
class WorkQueue
|
9
|
+
|
10
|
+
# The workflow that this queue is a part of
|
11
|
+
attr_reader :workflow
|
12
|
+
# The step in the workflow that is being processed against this queue
|
13
|
+
attr_reader :workflow_step
|
14
|
+
# The workflow step that should have already been completed for the workitem object
|
15
|
+
attr_reader :prerequisite
|
16
|
+
# The maximum number of workitem objects to process in one run of a robot
|
17
|
+
attr_reader :batch_limit
|
18
|
+
# The maximum number of errors to allow before terminating the batch run
|
19
|
+
attr_reader :error_limit
|
20
|
+
# The array of primary identifiers for the workitem objects to be processed
|
21
|
+
attr_reader :druids
|
22
|
+
# An alternative identitier to be used when druids are not yet available (e.g at registration)
|
23
|
+
attr_reader :identifier_name
|
24
|
+
attr_reader :identifier_values
|
25
|
+
# The tally of how many items have been processed
|
26
|
+
attr_reader :item_count
|
27
|
+
attr_accessor :success_count
|
28
|
+
attr_accessor :error_count
|
29
|
+
# The timings for the batch run
|
30
|
+
attr_reader :start_time
|
31
|
+
attr_reader :end_time
|
32
|
+
attr :elapsed_time
|
33
|
+
|
34
|
+
attr_reader :config_file
|
35
|
+
|
36
|
+
|
37
|
+
# Create a new WorkQueue object for the specified step,
|
38
|
+
# save a pointer to the parent WorkFlow,
|
39
|
+
# start the timer,
|
40
|
+
# read in the configuration information for the work step
|
41
|
+
def initialize(workflow=nil, workflow_step=nil)
|
42
|
+
LyberCore::Log.debug("Initializing work queue with workflow #{workflow} and workflow_step #{workflow_step}")
|
43
|
+
@start_time = Time.new
|
44
|
+
LyberCore::Log.info("Starting #{workflow_step} at #{@start_time}")
|
45
|
+
@workflow = workflow
|
46
|
+
@workflow_step = workflow_step
|
47
|
+
@item_count = 0
|
48
|
+
@success_count = 0
|
49
|
+
@error_count = 0
|
50
|
+
# nil arguments should only be used if in test mode
|
51
|
+
if (workflow.nil? || workflow_step.nil?)
|
52
|
+
@batch_limit = 2
|
53
|
+
@error_limit = 1
|
54
|
+
return
|
55
|
+
end
|
56
|
+
|
57
|
+
self.process_config_file
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
def process_config_file
|
62
|
+
LyberCore::Log.debug("Processing config file ... ")
|
63
|
+
LyberCore::Log.debug("@workflow.workflow_config_dir = #{@workflow.workflow_config_dir}")
|
64
|
+
|
65
|
+
@config_file = File.join(@workflow.workflow_config_dir, 'process-config.yaml')
|
66
|
+
LyberCore::Log.debug("I'm opening the config file at #{@config_file}")
|
67
|
+
|
68
|
+
# Does the file exist?
|
69
|
+
raise "Can't open process-config file #{@config_file}" unless File.file? @config_file
|
70
|
+
|
71
|
+
process_config = YAML.load_file(config_file)
|
72
|
+
LyberCore::Log.debug("process_config: #{process_config.inspect}")
|
73
|
+
|
74
|
+
@prerequisite = process_config[@workflow_step]["prerequisite"]
|
75
|
+
LyberCore::Log.debug("@prerequisite: #{@prerequisite}")
|
76
|
+
|
77
|
+
@batch_limit = process_config[@workflow_step]['batch_limit']
|
78
|
+
LyberCore::Log.debug("@batch_limit: #{@batch_limit}")
|
79
|
+
|
80
|
+
@error_limit = process_config[@workflow_step]['error_limit']
|
81
|
+
LyberCore::Log.debug("@error_limit: #{@error_limit}")
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
# Explicitly specify a set of druids to be processed by the workflow step
|
86
|
+
def enqueue_druids(druid_array)
|
87
|
+
LyberCore::Log.debug("\nEnqueing an array of druids...")
|
88
|
+
@druids = druid_array
|
89
|
+
LyberCore::Log.debug("\n@druids = #{@druids}")
|
90
|
+
end
|
91
|
+
|
92
|
+
# Obtain the set of druids to be processed using a database query
|
93
|
+
# to obtain the repository objects that are awaiting this step
|
94
|
+
def enqueue_workstep_waiting()
|
95
|
+
begin
|
96
|
+
LyberCore::Log.debug("\nEnqueing workstep waiting...")
|
97
|
+
object_list_xml = DorService.get_objects_for_workstep(workflow.repository, workflow.workflow_id, @prerequisite, @workflow_step)
|
98
|
+
LyberCore::Log.debug("\nobject_list_xml = #{object_list_xml}")
|
99
|
+
@druids = DlssService.get_some_druids_from_object_list(object_list_xml,self.batch_limit)
|
100
|
+
LyberCore::Log.debug("\n@druids = #{@druids}")
|
101
|
+
rescue Exception => e
|
102
|
+
raise e
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Use an alternative set of identifiers as the basis of this queue
|
107
|
+
# e.g. use array of barcodes as basis for google register-object robot
|
108
|
+
def enqueue_identifiers(identifier_name, identifier_values)
|
109
|
+
@identifier_name = identifier_name
|
110
|
+
@identifier_values = identifier_values
|
111
|
+
end
|
112
|
+
|
113
|
+
# Get the next WorkItem to be processed by the robot for the workflow step
|
114
|
+
def next_item()
|
115
|
+
if (@item_count >= @batch_limit )
|
116
|
+
LyberCore::Log.info "Batch limit of #{@batch_limit} items reached"
|
117
|
+
return nil
|
118
|
+
end
|
119
|
+
if (@error_count >= @error_limit )
|
120
|
+
LyberCore::Log.info "Error limit of #{@error_limit} items reached"
|
121
|
+
return nil
|
122
|
+
end
|
123
|
+
work_item = LyberCore::Robots::WorkItem.new(self)
|
124
|
+
if (@druids)
|
125
|
+
return nil if (@item_count >= @druids.length)
|
126
|
+
work_item.druid= @druids[@item_count]
|
127
|
+
elsif (@identifier_values)
|
128
|
+
return nil if (@item_count >= @identifier_values.length)
|
129
|
+
work_item.identifier_add(@identifier_name,@identifier_values[@item_count])
|
130
|
+
else
|
131
|
+
return nil
|
132
|
+
end
|
133
|
+
@item_count += 1
|
134
|
+
return work_item
|
135
|
+
end
|
136
|
+
|
137
|
+
# Output the batch's timings and other statistics to the main log file
|
138
|
+
def print_stats
|
139
|
+
@end_time = Time.new
|
140
|
+
@elapsed_time = @end_time - @start_time
|
141
|
+
LyberCore::Log.info "Total time: " + @elapsed_time.to_s + "\n"
|
142
|
+
LyberCore::Log.info "Completed objects: " + self.success_count.to_s + "\n"
|
143
|
+
LyberCore::Log.info "Errors: " + self.error_count.to_s + "\n"
|
144
|
+
end
|
145
|
+
|
146
|
+
def print_empty_stats
|
147
|
+
@end_time = Time.new
|
148
|
+
@elapsed_time = @end_time - @start_time
|
149
|
+
LyberCore::Log.info "Total time: " + @elapsed_time.to_s + "\n"
|
150
|
+
LyberCore::Log.info "Empty queue"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module LyberCore
|
2
|
+
module Robots
|
3
|
+
class Workflow
|
4
|
+
|
5
|
+
attr_reader :workflow_name
|
6
|
+
attr_reader :workflow_config_dir
|
7
|
+
attr_reader :workflow_config_file
|
8
|
+
attr_reader :workflow_config
|
9
|
+
|
10
|
+
attr_reader :collection_name
|
11
|
+
attr_reader :collection_config_dir
|
12
|
+
|
13
|
+
|
14
|
+
# @param [String] workflow_name name of the workflow
|
15
|
+
# @param [Hash] options a hash of optional arguments
|
16
|
+
# @return [LyberCore::Robots::Workflow] a workflow object
|
17
|
+
# @example Create a new workflow object with a collection_name
|
18
|
+
# @wf = LyberCore::Robots::Workflow.new(workflow_name, {:collection_name => collection})
|
19
|
+
def initialize(workflow_name, options = {})
|
20
|
+
# ROBOT_ROOT must be set before invoking a robot
|
21
|
+
raise "ROBOT_ROOT isn't set. Please set it to point to where your config files live." unless defined? ROBOT_ROOT
|
22
|
+
|
23
|
+
@workflow_name = workflow_name
|
24
|
+
@collection_name = options[:collection_name]
|
25
|
+
self.load_workflow_config
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_workflow_config
|
29
|
+
# # can override the default location of workflow config files
|
30
|
+
# # by setting WORKFLOW_CONFIG_HOME environmental variable
|
31
|
+
unless ROBOT_ROOT
|
32
|
+
if not (config_home = ENV['WORKFLOW_CONFIG_HOME'] )
|
33
|
+
config_home = File.join(File.dirname(__FILE__), "..", "..", "config")
|
34
|
+
end
|
35
|
+
else
|
36
|
+
config_home = File.join(ROBOT_ROOT, "config", "workflows")
|
37
|
+
end
|
38
|
+
|
39
|
+
@workflow_config_dir = File.join(config_home, @workflow_name )
|
40
|
+
LyberCore::Log.debug("@workflow_config_dir = #{@workflow_config_dir}")
|
41
|
+
@collection_config_dir = File.join(@workflow_config_dir, @collection_name ) if(@collection_name)
|
42
|
+
@workflow_config_file = File.join(@workflow_config_dir, 'workflow-config.yaml')
|
43
|
+
if (File.exist?(@workflow_config_file))
|
44
|
+
@workflow_config = YAML.load_file(workflow_config_file)
|
45
|
+
else
|
46
|
+
raise "Workflow config not found!
|
47
|
+
ROBOT_ROOT = #{ROBOT_ROOT}
|
48
|
+
expecting to find workflow_config_file in #{@workflow_config_file}
|
49
|
+
"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def workflow_collection
|
54
|
+
return @workflow_name + "_" + @collection_name
|
55
|
+
end
|
56
|
+
|
57
|
+
def workflow_id
|
58
|
+
return @workflow_name
|
59
|
+
end
|
60
|
+
|
61
|
+
# Which repository are we operating against?
|
62
|
+
# Should return either "dor" or "sdr"
|
63
|
+
def repository
|
64
|
+
return @workflow_config['repository']
|
65
|
+
end
|
66
|
+
|
67
|
+
# Construct the fully qualified filename and see if
|
68
|
+
# a file exists there. If it doesn't exist or isn't
|
69
|
+
# a file, raise an error.
|
70
|
+
def workflow_process_xml_filename
|
71
|
+
file = File.join(@workflow_config_dir, @workflow_name + '.xml')
|
72
|
+
if File.file? file
|
73
|
+
return file
|
74
|
+
else
|
75
|
+
raise "#{file} is not a file"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Return the contents of the file at workflow_process_xml_filename
|
80
|
+
def workflow_process_xml
|
81
|
+
return IO.read(workflow_process_xml_filename)
|
82
|
+
end
|
83
|
+
|
84
|
+
def object_template_filepath
|
85
|
+
Dir.foreach(@collection_config_dir) do |file|
|
86
|
+
if file.match(/ObjectTemplate.xml$/)
|
87
|
+
return File.join(@collection_config_dir, file)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
Dir.foreach(@workflow_config_dir) do |file|
|
91
|
+
if file.match(/ObjectTemplate.xml$/)
|
92
|
+
return File.join(@workflow_config_dir, file)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
raise "Object Template not found"
|
96
|
+
end
|
97
|
+
|
98
|
+
# receives a workflow step and returns
|
99
|
+
def queue(workflow_step)
|
100
|
+
return WorkQueue.new(self, workflow_step)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
module LyberCore
|
3
|
+
module Robots
|
4
|
+
class Workspace
|
5
|
+
|
6
|
+
attr_reader :workflow_name
|
7
|
+
attr_reader :collection_name
|
8
|
+
attr_reader :workspace_base
|
9
|
+
|
10
|
+
def initialize(workflow_name, collection_name=nil)
|
11
|
+
@workflow_name = workflow_name
|
12
|
+
@collection_name = collection_name
|
13
|
+
@workspace_base = set_workspace_base
|
14
|
+
ensure_workspace_exists(@workspace_base)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Usually WORKSPACE_HOME is set in your environment config file,
|
18
|
+
# but you can override the default location of workspace files
|
19
|
+
# by setting a WORKSPACE_HOME environment variable
|
20
|
+
def set_workspace_home
|
21
|
+
begin
|
22
|
+
if not (workspace_home = ENV['WORKSPACE_HOME'] )
|
23
|
+
workspace_home = WORKSPACE_HOME
|
24
|
+
end
|
25
|
+
rescue NameError => e
|
26
|
+
LyberCore::Log.fatal("WORKSPACE_HOME is undefined. Do you need to set it in your config file?")
|
27
|
+
raise e
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def set_workspace_base
|
32
|
+
workspace_home = set_workspace_home
|
33
|
+
if (@collection_name)
|
34
|
+
@workspace_base = File.join(workspace_home, @workflow_name, @collection_name)
|
35
|
+
else
|
36
|
+
@workspace_base = File.join(workspace_home, @workflow_name)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def ensure_workspace_exists(workspace)
|
41
|
+
begin
|
42
|
+
FileUtils.mkdir_p(workspace) unless File.directory?(workspace)
|
43
|
+
rescue
|
44
|
+
LyberCore::Log.fatal("Can't create workspace_base #{workspace}")
|
45
|
+
raise
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Remove the first part of the druid
|
50
|
+
# @param [String] druid
|
51
|
+
# @return [String]
|
52
|
+
def normalized_druid(druid)
|
53
|
+
druid.sub(/druid:/, '')
|
54
|
+
end
|
55
|
+
|
56
|
+
def object_dir(dir_type, druid)
|
57
|
+
dir_name = File.join(@workspace_base, dir_type, normalized_druid(druid))
|
58
|
+
ensure_workspace_exists(dir_name)
|
59
|
+
return dir_name
|
60
|
+
end
|
61
|
+
|
62
|
+
# The place where the original tar file from google is stored
|
63
|
+
def original_dir(druid)
|
64
|
+
object_dir('original', druid)
|
65
|
+
end
|
66
|
+
|
67
|
+
def content_dir(druid)
|
68
|
+
return object_dir('content', druid)
|
69
|
+
end
|
70
|
+
|
71
|
+
def metadata_dir(druid)
|
72
|
+
return object_dir('metadata', druid)
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'find'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'bagit' # http://github.com/flazz/bagit
|
4
|
+
require 'dor_service'
|
5
|
+
|
6
|
+
module LyberCore
|
7
|
+
module Utils
|
8
|
+
class BagitBag
|
9
|
+
|
10
|
+
def initialize(bag_dir)
|
11
|
+
@bag_dir = bag_dir
|
12
|
+
if (File.exist?(@bag_dir))
|
13
|
+
FileUtils.rm_r(@bag_dir)
|
14
|
+
end
|
15
|
+
@bag = BagIt::Bag.new @bag_dir
|
16
|
+
end
|
17
|
+
|
18
|
+
def add_content_files(source_dir, use_links)
|
19
|
+
data_content_dir = File.join(@bag_dir, 'data', 'content')
|
20
|
+
copy_dir(source_dir,data_content_dir, use_links)
|
21
|
+
end
|
22
|
+
|
23
|
+
def copy_dir(source_dir, target_dir, use_links)
|
24
|
+
FileUtils.mkdir_p(target_dir)
|
25
|
+
Dir.foreach(source_dir) do |file|
|
26
|
+
unless (file == '.' or file == '..')
|
27
|
+
source_file = File.join(source_dir, file)
|
28
|
+
target_file = File.join(target_dir, file)
|
29
|
+
if File.directory?(source_file)
|
30
|
+
copy_dir(source_file, target_file, use_links)
|
31
|
+
elsif (use_links)
|
32
|
+
File.link(source_file, target_file)
|
33
|
+
else
|
34
|
+
File.copy(source_file, target_file)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def add_metadata_file_from_string( metadata_string, file_name)
|
41
|
+
if (not metadata_string.nil? )
|
42
|
+
data_file_path = "metadata/#{file_name}"
|
43
|
+
@bag.add_file(data_file_path) do |io|
|
44
|
+
io.puts metadata_string
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def write_metadata_info(md_hash)
|
50
|
+
payload = bag_payload()
|
51
|
+
bag_info_hash = {
|
52
|
+
'Bag-Size' => bag_size_human(payload[0]),
|
53
|
+
'Payload-Oxum' => "#{payload[0]}.#{payload[1]}",
|
54
|
+
}
|
55
|
+
@bag.write_bag_info(md_hash.merge(bag_info_hash))
|
56
|
+
File.rename(@bag.bag_info_txt_file, File.join(@bag.bag_dir,'bag-info.txt'))
|
57
|
+
end
|
58
|
+
|
59
|
+
def bag_payload()
|
60
|
+
bytes = 0
|
61
|
+
files = 0
|
62
|
+
Find.find(@bag.data_dir) do |filepath|
|
63
|
+
if (not File.directory?(filepath))
|
64
|
+
bytes += File.size(filepath)
|
65
|
+
files += 1
|
66
|
+
end
|
67
|
+
end
|
68
|
+
return [bytes, files]
|
69
|
+
end
|
70
|
+
|
71
|
+
def bag_size_human(bytes)
|
72
|
+
count = 0
|
73
|
+
size = bytes
|
74
|
+
while ( size >= 1000 and count < 4 )
|
75
|
+
size /= 1000.0
|
76
|
+
count += 1
|
77
|
+
end
|
78
|
+
if (count == 0)
|
79
|
+
return sprintf("%d B", size)
|
80
|
+
else
|
81
|
+
return sprintf("%.2f %s", size, %w[B KB MB GB TB][count] )
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def write_manifests()
|
86
|
+
@bag.manifest!
|
87
|
+
@bag.tagmanifest!
|
88
|
+
end
|
89
|
+
|
90
|
+
def validate()
|
91
|
+
if not @bag.valid?
|
92
|
+
raise "bag not valid: #{@bag_dir}"
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
end
|