lyber-core 0.9.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.rdoc +58 -0
- data/lib/dlss_service.rb +82 -0
- data/lib/dor/base.rb +18 -0
- data/lib/dor/suri_service.rb +28 -0
- data/lib/dor/workflow_service.rb +111 -0
- data/lib/dor_service.rb +535 -0
- data/lib/lyber_core.rb +14 -0
- data/lib/lyber_core/connection.rb +97 -0
- data/lib/lyber_core/destroyer.rb +74 -0
- data/lib/lyber_core/exceptions/empty_queue.rb +9 -0
- data/lib/lyber_core/log.rb +105 -0
- data/lib/lyber_core/rake/dlss_release.rb +126 -0
- data/lib/lyber_core/robots/robot.rb +214 -0
- data/lib/lyber_core/robots/work_item.rb +103 -0
- data/lib/lyber_core/robots/work_queue.rb +154 -0
- data/lib/lyber_core/robots/workflow.rb +104 -0
- data/lib/lyber_core/robots/workspace.rb +77 -0
- data/lib/lyber_core/utils.rb +4 -0
- data/lib/lyber_core/utils/bagit_bag.rb +100 -0
- data/lib/lyber_core/utils/checksum_validate.rb +65 -0
- data/lib/lyber_core/utils/file_utilities.rb +168 -0
- data/lib/roxml_models/identity_metadata/dublin_core.rb +46 -0
- data/lib/roxml_models/identity_metadata/identity_metadata.rb +118 -0
- data/lib/tasks/rdoc.rake +32 -0
- metadata +371 -0
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'dor_service'
|
2
|
+
require "roxml_models/identity_metadata/identity_metadata"
|
3
|
+
require "roxml_models/identity_metadata/dublin_core"
|
4
|
+
|
5
|
+
# Represents a single object being processed as part of a workflow queue
|
6
|
+
module LyberCore
|
7
|
+
module Robots
|
8
|
+
class WorkItem
|
9
|
+
|
10
|
+
# The queue that this workitem is a member of
|
11
|
+
attr_reader :work_queue
|
12
|
+
# The primary id for the object being processed
|
13
|
+
attr_accessor :druid
|
14
|
+
# An object used to hold unmarshalled XML from the identityMetadata datastream
|
15
|
+
attr_accessor :identity_metadata
|
16
|
+
# Timings for this workitem's processing
|
17
|
+
attr_reader :start_time
|
18
|
+
attr_reader :end_time
|
19
|
+
attr_reader :elapsed_time
|
20
|
+
|
21
|
+
# Create a new WorkItem object, save a pointer to the parent WorkQueue, and start the timer
|
22
|
+
def initialize(work_queue)
|
23
|
+
@work_queue = work_queue
|
24
|
+
@start_time = Time.new
|
25
|
+
end
|
26
|
+
|
27
|
+
# Inject an IdentityMetadata object (currently used for unit testing only)
|
28
|
+
def identity_metadata=(identity_metadata)
|
29
|
+
@identity_metadata = identity_metadata
|
30
|
+
end
|
31
|
+
|
32
|
+
#save the IdentityMetadata object to identityMetadata datastream
|
33
|
+
def identity_metadata_save
|
34
|
+
unless DorService.get_datastream(@druid, 'identityMetadata')
|
35
|
+
DorService.add_datastream(@druid, 'identityMetadata', 'identityMetadata', self.identity_metadata.to_xml.to_xml)
|
36
|
+
else
|
37
|
+
DorService.update_datastream(@druid, 'identityMetadata', self.identity_metadata.to_xml.to_xml, content_type='application/xml', versionable = false)
|
38
|
+
end #unless
|
39
|
+
end #identity_metadata_save
|
40
|
+
|
41
|
+
# Return the IdentityMetadata object bound to identityMetadata datastream XML
|
42
|
+
def identity_metadata
|
43
|
+
if (@identity_metadata == nil)
|
44
|
+
if (@druid == nil)
|
45
|
+
@identity_metadata = IdentityMetadata.new
|
46
|
+
else
|
47
|
+
idmd_str = DorService.get_datastream(@druid, 'identityMetadata')
|
48
|
+
@identity_metadata = IdentityMetadata.from_xml(idmd_str)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
return @identity_metadata
|
52
|
+
end
|
53
|
+
|
54
|
+
# Return the identifier value for the specified identier name
|
55
|
+
def identifier(key)
|
56
|
+
return self.identity_metadata.get_identifier_value(key)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Add a new name,value pair to the set of identifiers
|
60
|
+
def identifier_add(key, value)
|
61
|
+
self.identity_metadata.add_identifier(key, value)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Return an array of strings where each entry consists of name:value
|
65
|
+
def id_pairs
|
66
|
+
self.identity_metadata.get_id_pairs
|
67
|
+
end
|
68
|
+
|
69
|
+
# Return the druid for the work item if it exists, else the first identifier value
|
70
|
+
def item_id
|
71
|
+
return @druid if @druid
|
72
|
+
pairs = self.identity_metadata.get_id_pairs
|
73
|
+
return pairs[0] if (pairs.size > 0)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Record the successful outcome of the workstep operation for this workitem
|
77
|
+
def set_success
|
78
|
+
@work_queue.success_count += 1
|
79
|
+
@end_time = Time.new
|
80
|
+
@elapsed_time = @end_time - @start_time
|
81
|
+
LyberCore::Log.info("#{item_id} completed in #{@elapsed_time} seconds")
|
82
|
+
if (@druid)
|
83
|
+
Dor::WorkflowService.update_workflow_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, 'completed', @elapsed_time)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Record the unsuccessful outcome of the workstep operation for this workitem
|
88
|
+
def set_error(e)
|
89
|
+
@work_queue.error_count += 1
|
90
|
+
@end_time = Time.new
|
91
|
+
@elapsed_time = @end_time - @start_time
|
92
|
+
LyberCore::Log.error("#{item_id} error - #{e.backtrace}")
|
93
|
+
# By default puts will output an array with a newline between each item.
|
94
|
+
if (@druid)
|
95
|
+
DorService.update_workflow_error_status(@work_queue.workflow.repository, @druid, @work_queue.workflow.workflow_id, @work_queue.workflow_step, e.message)
|
96
|
+
end
|
97
|
+
# We've caught and processed the error at this point, I don't think we want to raise it again. --bess
|
98
|
+
# raise e
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'dor_service'
|
2
|
+
require 'dlss_service'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
module LyberCore
|
6
|
+
module Robots
|
7
|
+
# Represents a set of workitem objects to be processed by a given step of a workflow
|
8
|
+
class WorkQueue
|
9
|
+
|
10
|
+
# The workflow that this queue is a part of
|
11
|
+
attr_reader :workflow
|
12
|
+
# The step in the workflow that is being processed against this queue
|
13
|
+
attr_reader :workflow_step
|
14
|
+
# The workflow step that should have already been completed for the workitem object
|
15
|
+
attr_reader :prerequisite
|
16
|
+
# The maximum number of workitem objects to process in one run of a robot
|
17
|
+
attr_reader :batch_limit
|
18
|
+
# The maximum number of errors to allow before terminating the batch run
|
19
|
+
attr_reader :error_limit
|
20
|
+
# The array of primary identifiers for the workitem objects to be processed
|
21
|
+
attr_reader :druids
|
22
|
+
# An alternative identitier to be used when druids are not yet available (e.g at registration)
|
23
|
+
attr_reader :identifier_name
|
24
|
+
attr_reader :identifier_values
|
25
|
+
# The tally of how many items have been processed
|
26
|
+
attr_reader :item_count
|
27
|
+
attr_accessor :success_count
|
28
|
+
attr_accessor :error_count
|
29
|
+
# The timings for the batch run
|
30
|
+
attr_reader :start_time
|
31
|
+
attr_reader :end_time
|
32
|
+
attr :elapsed_time
|
33
|
+
|
34
|
+
attr_reader :config_file
|
35
|
+
|
36
|
+
|
37
|
+
# Create a new WorkQueue object for the specified step,
|
38
|
+
# save a pointer to the parent WorkFlow,
|
39
|
+
# start the timer,
|
40
|
+
# read in the configuration information for the work step
|
41
|
+
def initialize(workflow=nil, workflow_step=nil)
|
42
|
+
LyberCore::Log.debug("Initializing work queue with workflow #{workflow} and workflow_step #{workflow_step}")
|
43
|
+
@start_time = Time.new
|
44
|
+
LyberCore::Log.info("Starting #{workflow_step} at #{@start_time}")
|
45
|
+
@workflow = workflow
|
46
|
+
@workflow_step = workflow_step
|
47
|
+
@item_count = 0
|
48
|
+
@success_count = 0
|
49
|
+
@error_count = 0
|
50
|
+
# nil arguments should only be used if in test mode
|
51
|
+
if (workflow.nil? || workflow_step.nil?)
|
52
|
+
@batch_limit = 2
|
53
|
+
@error_limit = 1
|
54
|
+
return
|
55
|
+
end
|
56
|
+
|
57
|
+
self.process_config_file
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
def process_config_file
|
62
|
+
LyberCore::Log.debug("Processing config file ... ")
|
63
|
+
LyberCore::Log.debug("@workflow.workflow_config_dir = #{@workflow.workflow_config_dir}")
|
64
|
+
|
65
|
+
@config_file = File.join(@workflow.workflow_config_dir, 'process-config.yaml')
|
66
|
+
LyberCore::Log.debug("I'm opening the config file at #{@config_file}")
|
67
|
+
|
68
|
+
# Does the file exist?
|
69
|
+
raise "Can't open process-config file #{@config_file}" unless File.file? @config_file
|
70
|
+
|
71
|
+
process_config = YAML.load_file(config_file)
|
72
|
+
LyberCore::Log.debug("process_config: #{process_config.inspect}")
|
73
|
+
|
74
|
+
@prerequisite = process_config[@workflow_step]["prerequisite"]
|
75
|
+
LyberCore::Log.debug("@prerequisite: #{@prerequisite}")
|
76
|
+
|
77
|
+
@batch_limit = process_config[@workflow_step]['batch_limit']
|
78
|
+
LyberCore::Log.debug("@batch_limit: #{@batch_limit}")
|
79
|
+
|
80
|
+
@error_limit = process_config[@workflow_step]['error_limit']
|
81
|
+
LyberCore::Log.debug("@error_limit: #{@error_limit}")
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
# Explicitly specify a set of druids to be processed by the workflow step
|
86
|
+
def enqueue_druids(druid_array)
|
87
|
+
LyberCore::Log.debug("\nEnqueing an array of druids...")
|
88
|
+
@druids = druid_array
|
89
|
+
LyberCore::Log.debug("\n@druids = #{@druids}")
|
90
|
+
end
|
91
|
+
|
92
|
+
# Obtain the set of druids to be processed using a database query
|
93
|
+
# to obtain the repository objects that are awaiting this step
|
94
|
+
def enqueue_workstep_waiting()
|
95
|
+
begin
|
96
|
+
LyberCore::Log.debug("\nEnqueing workstep waiting...")
|
97
|
+
object_list_xml = DorService.get_objects_for_workstep(workflow.repository, workflow.workflow_id, @prerequisite, @workflow_step)
|
98
|
+
LyberCore::Log.debug("\nobject_list_xml = #{object_list_xml}")
|
99
|
+
@druids = DlssService.get_some_druids_from_object_list(object_list_xml,self.batch_limit)
|
100
|
+
LyberCore::Log.debug("\n@druids = #{@druids}")
|
101
|
+
rescue Exception => e
|
102
|
+
raise e
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Use an alternative set of identifiers as the basis of this queue
|
107
|
+
# e.g. use array of barcodes as basis for google register-object robot
|
108
|
+
def enqueue_identifiers(identifier_name, identifier_values)
|
109
|
+
@identifier_name = identifier_name
|
110
|
+
@identifier_values = identifier_values
|
111
|
+
end
|
112
|
+
|
113
|
+
# Get the next WorkItem to be processed by the robot for the workflow step
|
114
|
+
def next_item()
|
115
|
+
if (@item_count >= @batch_limit )
|
116
|
+
LyberCore::Log.info "Batch limit of #{@batch_limit} items reached"
|
117
|
+
return nil
|
118
|
+
end
|
119
|
+
if (@error_count >= @error_limit )
|
120
|
+
LyberCore::Log.info "Error limit of #{@error_limit} items reached"
|
121
|
+
return nil
|
122
|
+
end
|
123
|
+
work_item = LyberCore::Robots::WorkItem.new(self)
|
124
|
+
if (@druids)
|
125
|
+
return nil if (@item_count >= @druids.length)
|
126
|
+
work_item.druid= @druids[@item_count]
|
127
|
+
elsif (@identifier_values)
|
128
|
+
return nil if (@item_count >= @identifier_values.length)
|
129
|
+
work_item.identifier_add(@identifier_name,@identifier_values[@item_count])
|
130
|
+
else
|
131
|
+
return nil
|
132
|
+
end
|
133
|
+
@item_count += 1
|
134
|
+
return work_item
|
135
|
+
end
|
136
|
+
|
137
|
+
# Output the batch's timings and other statistics to the main log file
|
138
|
+
def print_stats
|
139
|
+
@end_time = Time.new
|
140
|
+
@elapsed_time = @end_time - @start_time
|
141
|
+
LyberCore::Log.info "Total time: " + @elapsed_time.to_s + "\n"
|
142
|
+
LyberCore::Log.info "Completed objects: " + self.success_count.to_s + "\n"
|
143
|
+
LyberCore::Log.info "Errors: " + self.error_count.to_s + "\n"
|
144
|
+
end
|
145
|
+
|
146
|
+
def print_empty_stats
|
147
|
+
@end_time = Time.new
|
148
|
+
@elapsed_time = @end_time - @start_time
|
149
|
+
LyberCore::Log.info "Total time: " + @elapsed_time.to_s + "\n"
|
150
|
+
LyberCore::Log.info "Empty queue"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module LyberCore
|
2
|
+
module Robots
|
3
|
+
class Workflow
|
4
|
+
|
5
|
+
attr_reader :workflow_name
|
6
|
+
attr_reader :workflow_config_dir
|
7
|
+
attr_reader :workflow_config_file
|
8
|
+
attr_reader :workflow_config
|
9
|
+
|
10
|
+
attr_reader :collection_name
|
11
|
+
attr_reader :collection_config_dir
|
12
|
+
|
13
|
+
|
14
|
+
# @param [String] workflow_name name of the workflow
|
15
|
+
# @param [Hash] options a hash of optional arguments
|
16
|
+
# @return [LyberCore::Robots::Workflow] a workflow object
|
17
|
+
# @example Create a new workflow object with a collection_name
|
18
|
+
# @wf = LyberCore::Robots::Workflow.new(workflow_name, {:collection_name => collection})
|
19
|
+
def initialize(workflow_name, options = {})
|
20
|
+
# ROBOT_ROOT must be set before invoking a robot
|
21
|
+
raise "ROBOT_ROOT isn't set. Please set it to point to where your config files live." unless defined? ROBOT_ROOT
|
22
|
+
|
23
|
+
@workflow_name = workflow_name
|
24
|
+
@collection_name = options[:collection_name]
|
25
|
+
self.load_workflow_config
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_workflow_config
|
29
|
+
# # can override the default location of workflow config files
|
30
|
+
# # by setting WORKFLOW_CONFIG_HOME environmental variable
|
31
|
+
unless ROBOT_ROOT
|
32
|
+
if not (config_home = ENV['WORKFLOW_CONFIG_HOME'] )
|
33
|
+
config_home = File.join(File.dirname(__FILE__), "..", "..", "config")
|
34
|
+
end
|
35
|
+
else
|
36
|
+
config_home = File.join(ROBOT_ROOT, "config", "workflows")
|
37
|
+
end
|
38
|
+
|
39
|
+
@workflow_config_dir = File.join(config_home, @workflow_name )
|
40
|
+
LyberCore::Log.debug("@workflow_config_dir = #{@workflow_config_dir}")
|
41
|
+
@collection_config_dir = File.join(@workflow_config_dir, @collection_name ) if(@collection_name)
|
42
|
+
@workflow_config_file = File.join(@workflow_config_dir, 'workflow-config.yaml')
|
43
|
+
if (File.exist?(@workflow_config_file))
|
44
|
+
@workflow_config = YAML.load_file(workflow_config_file)
|
45
|
+
else
|
46
|
+
raise "Workflow config not found!
|
47
|
+
ROBOT_ROOT = #{ROBOT_ROOT}
|
48
|
+
expecting to find workflow_config_file in #{@workflow_config_file}
|
49
|
+
"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def workflow_collection
|
54
|
+
return @workflow_name + "_" + @collection_name
|
55
|
+
end
|
56
|
+
|
57
|
+
def workflow_id
|
58
|
+
return @workflow_name
|
59
|
+
end
|
60
|
+
|
61
|
+
# Which repository are we operating against?
|
62
|
+
# Should return either "dor" or "sdr"
|
63
|
+
def repository
|
64
|
+
return @workflow_config['repository']
|
65
|
+
end
|
66
|
+
|
67
|
+
# Construct the fully qualified filename and see if
|
68
|
+
# a file exists there. If it doesn't exist or isn't
|
69
|
+
# a file, raise an error.
|
70
|
+
def workflow_process_xml_filename
|
71
|
+
file = File.join(@workflow_config_dir, @workflow_name + '.xml')
|
72
|
+
if File.file? file
|
73
|
+
return file
|
74
|
+
else
|
75
|
+
raise "#{file} is not a file"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Return the contents of the file at workflow_process_xml_filename
|
80
|
+
def workflow_process_xml
|
81
|
+
return IO.read(workflow_process_xml_filename)
|
82
|
+
end
|
83
|
+
|
84
|
+
def object_template_filepath
|
85
|
+
Dir.foreach(@collection_config_dir) do |file|
|
86
|
+
if file.match(/ObjectTemplate.xml$/)
|
87
|
+
return File.join(@collection_config_dir, file)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
Dir.foreach(@workflow_config_dir) do |file|
|
91
|
+
if file.match(/ObjectTemplate.xml$/)
|
92
|
+
return File.join(@workflow_config_dir, file)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
raise "Object Template not found"
|
96
|
+
end
|
97
|
+
|
98
|
+
# receives a workflow step and returns
|
99
|
+
def queue(workflow_step)
|
100
|
+
return WorkQueue.new(self, workflow_step)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
module LyberCore
|
3
|
+
module Robots
|
4
|
+
class Workspace
|
5
|
+
|
6
|
+
attr_reader :workflow_name
|
7
|
+
attr_reader :collection_name
|
8
|
+
attr_reader :workspace_base
|
9
|
+
|
10
|
+
def initialize(workflow_name, collection_name=nil)
|
11
|
+
@workflow_name = workflow_name
|
12
|
+
@collection_name = collection_name
|
13
|
+
@workspace_base = set_workspace_base
|
14
|
+
ensure_workspace_exists(@workspace_base)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Usually WORKSPACE_HOME is set in your environment config file,
|
18
|
+
# but you can override the default location of workspace files
|
19
|
+
# by setting a WORKSPACE_HOME environment variable
|
20
|
+
def set_workspace_home
|
21
|
+
begin
|
22
|
+
if not (workspace_home = ENV['WORKSPACE_HOME'] )
|
23
|
+
workspace_home = WORKSPACE_HOME
|
24
|
+
end
|
25
|
+
rescue NameError => e
|
26
|
+
LyberCore::Log.fatal("WORKSPACE_HOME is undefined. Do you need to set it in your config file?")
|
27
|
+
raise e
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def set_workspace_base
|
32
|
+
workspace_home = set_workspace_home
|
33
|
+
if (@collection_name)
|
34
|
+
@workspace_base = File.join(workspace_home, @workflow_name, @collection_name)
|
35
|
+
else
|
36
|
+
@workspace_base = File.join(workspace_home, @workflow_name)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def ensure_workspace_exists(workspace)
|
41
|
+
begin
|
42
|
+
FileUtils.mkdir_p(workspace) unless File.directory?(workspace)
|
43
|
+
rescue
|
44
|
+
LyberCore::Log.fatal("Can't create workspace_base #{workspace}")
|
45
|
+
raise
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Remove the first part of the druid
|
50
|
+
# @param [String] druid
|
51
|
+
# @return [String]
|
52
|
+
def normalized_druid(druid)
|
53
|
+
druid.sub(/druid:/, '')
|
54
|
+
end
|
55
|
+
|
56
|
+
def object_dir(dir_type, druid)
|
57
|
+
dir_name = File.join(@workspace_base, dir_type, normalized_druid(druid))
|
58
|
+
ensure_workspace_exists(dir_name)
|
59
|
+
return dir_name
|
60
|
+
end
|
61
|
+
|
62
|
+
# The place where the original tar file from google is stored
|
63
|
+
def original_dir(druid)
|
64
|
+
object_dir('original', druid)
|
65
|
+
end
|
66
|
+
|
67
|
+
def content_dir(druid)
|
68
|
+
return object_dir('content', druid)
|
69
|
+
end
|
70
|
+
|
71
|
+
def metadata_dir(druid)
|
72
|
+
return object_dir('metadata', druid)
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'find'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'bagit' # http://github.com/flazz/bagit
|
4
|
+
require 'dor_service'
|
5
|
+
|
6
|
+
module LyberCore
|
7
|
+
module Utils
|
8
|
+
class BagitBag
|
9
|
+
|
10
|
+
def initialize(bag_dir)
|
11
|
+
@bag_dir = bag_dir
|
12
|
+
if (File.exist?(@bag_dir))
|
13
|
+
FileUtils.rm_r(@bag_dir)
|
14
|
+
end
|
15
|
+
@bag = BagIt::Bag.new @bag_dir
|
16
|
+
end
|
17
|
+
|
18
|
+
def add_content_files(source_dir, use_links)
|
19
|
+
data_content_dir = File.join(@bag_dir, 'data', 'content')
|
20
|
+
copy_dir(source_dir,data_content_dir, use_links)
|
21
|
+
end
|
22
|
+
|
23
|
+
def copy_dir(source_dir, target_dir, use_links)
|
24
|
+
FileUtils.mkdir_p(target_dir)
|
25
|
+
Dir.foreach(source_dir) do |file|
|
26
|
+
unless (file == '.' or file == '..')
|
27
|
+
source_file = File.join(source_dir, file)
|
28
|
+
target_file = File.join(target_dir, file)
|
29
|
+
if File.directory?(source_file)
|
30
|
+
copy_dir(source_file, target_file, use_links)
|
31
|
+
elsif (use_links)
|
32
|
+
File.link(source_file, target_file)
|
33
|
+
else
|
34
|
+
File.copy(source_file, target_file)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def add_metadata_file_from_string( metadata_string, file_name)
|
41
|
+
if (not metadata_string.nil? )
|
42
|
+
data_file_path = "metadata/#{file_name}"
|
43
|
+
@bag.add_file(data_file_path) do |io|
|
44
|
+
io.puts metadata_string
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def write_metadata_info(md_hash)
|
50
|
+
payload = bag_payload()
|
51
|
+
bag_info_hash = {
|
52
|
+
'Bag-Size' => bag_size_human(payload[0]),
|
53
|
+
'Payload-Oxum' => "#{payload[0]}.#{payload[1]}",
|
54
|
+
}
|
55
|
+
@bag.write_bag_info(md_hash.merge(bag_info_hash))
|
56
|
+
File.rename(@bag.bag_info_txt_file, File.join(@bag.bag_dir,'bag-info.txt'))
|
57
|
+
end
|
58
|
+
|
59
|
+
def bag_payload()
|
60
|
+
bytes = 0
|
61
|
+
files = 0
|
62
|
+
Find.find(@bag.data_dir) do |filepath|
|
63
|
+
if (not File.directory?(filepath))
|
64
|
+
bytes += File.size(filepath)
|
65
|
+
files += 1
|
66
|
+
end
|
67
|
+
end
|
68
|
+
return [bytes, files]
|
69
|
+
end
|
70
|
+
|
71
|
+
def bag_size_human(bytes)
|
72
|
+
count = 0
|
73
|
+
size = bytes
|
74
|
+
while ( size >= 1000 and count < 4 )
|
75
|
+
size /= 1000.0
|
76
|
+
count += 1
|
77
|
+
end
|
78
|
+
if (count == 0)
|
79
|
+
return sprintf("%d B", size)
|
80
|
+
else
|
81
|
+
return sprintf("%.2f %s", size, %w[B KB MB GB TB][count] )
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def write_manifests()
|
86
|
+
@bag.manifest!
|
87
|
+
@bag.tagmanifest!
|
88
|
+
end
|
89
|
+
|
90
|
+
def validate()
|
91
|
+
if not @bag.valid?
|
92
|
+
raise "bag not valid: #{@bag_dir}"
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
end
|