lyber-core 1.3.0 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,104 +0,0 @@
1
- module LyberCore
2
- module Robots
3
- class Workflow
4
-
5
- attr_reader :workflow_name
6
- attr_reader :workflow_config_dir
7
- attr_reader :workflow_config_file
8
- attr_reader :workflow_config
9
-
10
- attr_reader :collection_name
11
- attr_reader :collection_config_dir
12
-
13
-
14
- # @param [String] workflow_name name of the workflow
15
- # @param [Hash] options a hash of optional arguments
16
- # @return [LyberCore::Robots::Workflow] a workflow object
17
- # @example Create a new workflow object with a collection_name
18
- # @wf = LyberCore::Robots::Workflow.new(workflow_name, {:collection_name => collection})
19
- def initialize(workflow_name, options = {})
20
- # ROBOT_ROOT must be set before invoking a robot
21
- raise "ROBOT_ROOT isn't set. Please set it to point to where your config files live." unless defined? ROBOT_ROOT
22
-
23
- @workflow_name = workflow_name
24
- @collection_name = options[:collection_name]
25
- self.load_workflow_config
26
- end
27
-
28
- def load_workflow_config
29
- # # can override the default location of workflow config files
30
- # # by setting WORKFLOW_CONFIG_HOME environmental variable
31
- unless ROBOT_ROOT
32
- if not (config_home = ENV['WORKFLOW_CONFIG_HOME'] )
33
- config_home = File.join(File.dirname(__FILE__), "..", "..", "config")
34
- end
35
- else
36
- config_home = File.join(ROBOT_ROOT, "config", "workflows")
37
- end
38
-
39
- @workflow_config_dir = File.join(config_home, @workflow_name )
40
- LyberCore::Log.debug("@workflow_config_dir = #{@workflow_config_dir}")
41
- @collection_config_dir = File.join(@workflow_config_dir, @collection_name ) if(@collection_name)
42
- @workflow_config_file = File.join(@workflow_config_dir, 'workflow-config.yaml')
43
- if (File.exist?(@workflow_config_file))
44
- @workflow_config = YAML.load_file(workflow_config_file)
45
- else
46
- raise "Workflow config not found!
47
- ROBOT_ROOT = #{ROBOT_ROOT}
48
- expecting to find workflow_config_file in #{@workflow_config_file}
49
- "
50
- end
51
- end
52
-
53
- def workflow_collection
54
- return @workflow_name + "_" + @collection_name
55
- end
56
-
57
- def workflow_id
58
- return @workflow_name
59
- end
60
-
61
- # Which repository are we operating against?
62
- # Should return either "dor" or "sdr"
63
- def repository
64
- return @workflow_config['repository']
65
- end
66
-
67
- # Construct the fully qualified filename and see if
68
- # a file exists there. If it doesn't exist or isn't
69
- # a file, raise an error.
70
- def workflow_process_xml_filename
71
- file = File.join(@workflow_config_dir, @workflow_name + '.xml')
72
- if File.file? file
73
- return file
74
- else
75
- raise "#{file} is not a file"
76
- end
77
- end
78
-
79
- # Return the contents of the file at workflow_process_xml_filename
80
- def workflow_process_xml
81
- return IO.read(workflow_process_xml_filename)
82
- end
83
-
84
- def object_template_filepath
85
- Dir.foreach(@collection_config_dir) do |file|
86
- if file.match(/ObjectTemplate.xml$/)
87
- return File.join(@collection_config_dir, file)
88
- end
89
- end
90
- Dir.foreach(@workflow_config_dir) do |file|
91
- if file.match(/ObjectTemplate.xml$/)
92
- return File.join(@workflow_config_dir, file)
93
- end
94
- end
95
- raise "Object Template not found"
96
- end
97
-
98
- # receives a workflow step and returns
99
- def queue(workflow_step)
100
- return WorkQueue.new(self, workflow_step)
101
- end
102
- end
103
- end
104
- end
@@ -1,77 +0,0 @@
1
- require 'fileutils'
2
- module LyberCore
3
- module Robots
4
- class Workspace
5
-
6
- attr_reader :workflow_name
7
- attr_reader :collection_name
8
- attr_reader :workspace_base
9
-
10
- def initialize(workflow_name, collection_name=nil)
11
- @workflow_name = workflow_name
12
- @collection_name = collection_name
13
- @workspace_base = set_workspace_base
14
- ensure_workspace_exists(@workspace_base)
15
- end
16
-
17
- # Usually WORKSPACE_HOME is set in your environment config file,
18
- # but you can override the default location of workspace files
19
- # by setting a WORKSPACE_HOME environment variable
20
- def set_workspace_home
21
- begin
22
- if not (workspace_home = ENV['WORKSPACE_HOME'] )
23
- workspace_home = Dor::Config.robots.workspace
24
- end
25
- rescue NameError => e
26
- LyberCore::Log.fatal("WORKSPACE_HOME is undefined. Do you need to set it in your config file?")
27
- raise e
28
- end
29
- end
30
-
31
- def set_workspace_base
32
- workspace_home = set_workspace_home
33
- if (@collection_name)
34
- @workspace_base = File.join(workspace_home, @workflow_name, @collection_name)
35
- else
36
- @workspace_base = File.join(workspace_home, @workflow_name)
37
- end
38
- end
39
-
40
- def ensure_workspace_exists(workspace)
41
- begin
42
- FileUtils.mkdir_p(workspace) unless File.directory?(workspace)
43
- rescue
44
- LyberCore::Log.fatal("Can't create workspace_base #{workspace}")
45
- raise
46
- end
47
- end
48
-
49
- # Remove the first part of the druid
50
- # @param [String] druid
51
- # @return [String]
52
- def normalized_druid(druid)
53
- druid.sub(/druid:/, '')
54
- end
55
-
56
- def object_dir(dir_type, druid)
57
- dir_name = File.join(@workspace_base, dir_type, normalized_druid(druid))
58
- ensure_workspace_exists(dir_name)
59
- return dir_name
60
- end
61
-
62
- # The place where the original tar file from google is stored
63
- def original_dir(druid)
64
- object_dir('original', druid)
65
- end
66
-
67
- def content_dir(druid)
68
- return object_dir('content', druid)
69
- end
70
-
71
- def metadata_dir(druid)
72
- return object_dir('metadata', druid)
73
- end
74
-
75
- end
76
- end
77
- end
@@ -1,4 +0,0 @@
1
-
2
- require 'lyber_core/utils/file_utilities'
3
- require 'lyber_core/utils/checksum_validate'
4
- require 'lyber_core/utils/bagit_bag'
@@ -1,100 +0,0 @@
1
- require 'find'
2
- require 'fileutils'
3
- require 'bagit' # http://github.com/flazz/bagit
4
- require 'dor_service'
5
-
6
- module LyberCore
7
- module Utils
8
- class BagitBag
9
-
10
- def initialize(bag_dir)
11
- @bag_dir = bag_dir
12
- if (File.exist?(@bag_dir))
13
- FileUtils.rm_r(@bag_dir)
14
- end
15
- @bag = BagIt::Bag.new @bag_dir
16
- end
17
-
18
- def add_content_files(source_dir, use_links)
19
- data_content_dir = File.join(@bag_dir, 'data', 'content')
20
- copy_dir(source_dir,data_content_dir, use_links)
21
- end
22
-
23
- def copy_dir(source_dir, target_dir, use_links)
24
- FileUtils.mkdir_p(target_dir)
25
- Dir.foreach(source_dir) do |file|
26
- unless (file == '.' or file == '..')
27
- source_file = File.join(source_dir, file)
28
- target_file = File.join(target_dir, file)
29
- if File.directory?(source_file)
30
- copy_dir(source_file, target_file, use_links)
31
- elsif (use_links)
32
- File.link(source_file, target_file)
33
- else
34
- File.copy(source_file, target_file)
35
- end
36
- end
37
- end
38
- end
39
-
40
- def add_metadata_file_from_string( metadata_string, file_name)
41
- if (not metadata_string.nil? )
42
- data_file_path = "metadata/#{file_name}"
43
- @bag.add_file(data_file_path) do |io|
44
- io.puts metadata_string
45
- end
46
- end
47
- end
48
-
49
- def write_metadata_info(md_hash)
50
- payload = bag_payload()
51
- bag_info_hash = {
52
- 'Bag-Size' => bag_size_human(payload[0]),
53
- 'Payload-Oxum' => "#{payload[0]}.#{payload[1]}",
54
- }
55
- @bag.write_bag_info(md_hash.merge(bag_info_hash))
56
- File.rename(@bag.bag_info_txt_file, File.join(@bag.bag_dir,'bag-info.txt'))
57
- end
58
-
59
- def bag_payload()
60
- bytes = 0
61
- files = 0
62
- Find.find(@bag.data_dir) do |filepath|
63
- if (not File.directory?(filepath))
64
- bytes += File.size(filepath)
65
- files += 1
66
- end
67
- end
68
- return [bytes, files]
69
- end
70
-
71
- def bag_size_human(bytes)
72
- count = 0
73
- size = bytes
74
- while ( size >= 1000 and count < 4 )
75
- size /= 1000.0
76
- count += 1
77
- end
78
- if (count == 0)
79
- return sprintf("%d B", size)
80
- else
81
- return sprintf("%.2f %s", size, %w[B KB MB GB TB][count] )
82
- end
83
- end
84
-
85
- def write_manifests()
86
- @bag.manifest!
87
- @bag.tagmanifest!
88
- end
89
-
90
- def validate()
91
- if not @bag.valid?
92
- raise "bag not valid: #{@bag_dir}"
93
- end
94
-
95
- end
96
-
97
- end
98
-
99
- end
100
- end
@@ -1,65 +0,0 @@
1
- require 'nokogiri'
2
- require 'active_support'
3
-
4
- module LyberCore
5
- module Utils
6
- class ChecksumValidate
7
- #Code here
8
-
9
- def self.compare_hashes(hash1, hash2)
10
- return (hash1 == hash2)
11
- end
12
-
13
- def self.get_hash_differences(hash1, hash2)
14
- return hash1.diff(hash2)
15
- end
16
-
17
- def self.md5_hash_from_md5sum(md5sum)
18
- checksum_hash = {}
19
- md5sum.each do |line|
20
- line.chomp!
21
- digest,filename = line.split(/[ *]{2}/)
22
- checksum_hash[filename] = digest.downcase
23
- end
24
- return checksum_hash
25
- end
26
-
27
- def self.md5_hash_from_mets(mets)
28
- mets_checksum_hash = {}
29
- doc = Nokogiri::XML(mets)
30
- doc.xpath('/mets:mets/mets:fileSec//mets:file', {'mets' => 'http://www.loc.gov/METS/'}).each do |filenode|
31
- digest = filenode.attribute('CHECKSUM')
32
- if (digest)
33
- flocat = filenode.xpath('mets:FLocat', {'mets' => 'http://www.loc.gov/METS/'}).first
34
- if (flocat)
35
- filename = flocat.attribute_with_ns('href', 'http://www.w3.org/1999/xlink')
36
- if (filename)
37
- mets_checksum_hash[filename.text] = digest.text.downcase
38
- end
39
- end
40
- end
41
- end
42
- return mets_checksum_hash
43
- end
44
-
45
- def self.md5_hash_from_content_metadata(content_md)
46
- content_md_checksum_hash = {}
47
- doc = Nokogiri::XML(content_md)
48
- doc.xpath('/contentMetadata/resource[@type="page"]/file').each do |filenode|
49
- filename = filenode.attribute('id')
50
- if (filename)
51
- md5_element = filenode.xpath('checksum[@type="MD5"]').first
52
- if (md5_element)
53
- digest = md5_element.text
54
- if (digest)
55
- content_md_checksum_hash[filename.text] = digest.downcase
56
- end
57
- end
58
- end
59
- end
60
- return content_md_checksum_hash
61
-
62
- end
63
- end
64
- end
65
- end
@@ -1,168 +0,0 @@
1
- require 'fileutils'
2
- require 'systemu'
3
-
4
- # File Utilities for use in transferring filesystem objects,
5
- # decrypting a file, unpacking a targz archive, and validating checksums
6
- # @author rnanders@stanford.edu
7
- module LyberCore
8
- module Utils
9
- class FileUtilities
10
-
11
-
12
- # Executes a system command in a subprocess.
13
- # The method will return stdout from the command if execution was successful.
14
- # The method will raise an exception if if execution fails.
15
- # The exception's message will contain the explaination of the failure.
16
- # @param [String] command the command to be executed
17
- # @return [String] stdout from the command if execution was successful
18
- def FileUtilities.execute(command)
19
- status, stdout, stderr = systemu(command)
20
- if (status.exitstatus != 0)
21
- raise stderr
22
- end
23
- return stdout
24
- rescue
25
- msg = "Command failed to execute: [#{command}] caused by <STDERR = #{stderr.split($/).join('; ')}>"
26
- msg << " STDOUT = #{stdout.split($/).join('; ')}" if (stdout && (stdout.length > 0))
27
- raise msg
28
-
29
- end
30
-
31
- # Generates a dirname for storing or retrieving a file in
32
- # "pair tree" hierachical structure, where the path is derived
33
- # from segments of a barcode string
34
- #
35
- # = Input:
36
- # * barcode = barcode string
37
- #
38
- # = Return value:
39
- # * A string containing a slash-delimited dirname derived from the barcode
40
- def FileUtilities.pair_tree_from_barcode(barcode)
41
- if (barcode.class != String)
42
- raise "Barcode must be a String"
43
- end
44
- # figure out if this is a SUL barcode or from coordinate library
45
- library_prefix=barcode[0..4]
46
- if ( library_prefix == '36105' )
47
- pair_tree=barcode[5..10].gsub(/(..)/, '\1/')
48
- else
49
- library_prefix=barcode[0..2]
50
- pair_tree=barcode[3..8].gsub(/(..)/, '\1/')
51
- end
52
- return "#{library_prefix}/#{pair_tree}"
53
- end
54
-
55
- # Transfers a filesystem object (file or directory)
56
- # from a source to a target location. Uses rsync in "archive" mode
57
- # over an ssh connection.
58
- #
59
- # = Inputs:
60
- # * filename = basename of the filesystem object to be transferred
61
- # * source_dir = dirname of the source location from which the object is read
62
- # * dest_dir = dirname of the target location to which the object is written
63
- # If one of the locations is on a remote server, then the dirname should be
64
- # prefixed with user@hosthame:
65
- #
66
- # = Return value:
67
- # * The method will return true if the transfer is successful.
68
- # * The method will raise an exception if either the rsync command fails,
69
- # or a test for the existence of the transferred object fails.
70
- # The exception's message will contain the explaination of the failure
71
- #
72
- # Network transfers will only succeed if the appropriate public key
73
- # authentication has been previously set up.
74
- def FileUtilities.transfer_object(filename, source_dir, dest_dir)
75
- source_path=File.join(source_dir, filename)
76
- rsync='rsync -a -e ssh '
77
- rsync_cmd = rsync + "'" + source_path + "' " + dest_dir
78
- LyberCore::Log.debug("rsync command is: #{rsync_cmd}")
79
- self.execute(rsync_cmd)
80
- if not File.exists?(File.join(dest_dir, filename))
81
- raise "#{filename} is not found in #{dest_dir}"
82
- end
83
- return true
84
- end
85
-
86
- # Decrypts a GPG encrypted file using the "gpg" command
87
- #
88
- # = Inputs:
89
- # * workspace_dir = dirname containing the file
90
- # * targzgpg = the filename of the GPG encrypted file
91
- # * targz = the filename of the unencrypted file
92
- # * passphrase = the string used to decrypt the file
93
- #
94
- # = Return value:
95
- # * The method will return true if the decryption is successful.
96
- # * The method will raise an exception if either the decryption command fails,
97
- # or a test for the existence of the decrypted file fails.
98
- # The exception's message will contain the explaination of the failure
99
- def FileUtilities.gpgdecrypt(workspace_dir, targzgpg, targz, passphrase)
100
- LyberCore::Log.debug("decrypting #{targzgpg}")
101
- gpg_cmd="/usr/bin/gpg --passphrase '#{passphrase}' " +
102
- "--batch --no-mdc-warning --no-secmem-warning " +
103
- " --output " + File.join(workspace_dir, targz) +
104
- " --decrypt " + File.join(workspace_dir, targzgpg)
105
- self.execute(gpg_cmd)
106
- if not File.exists?(File.join(workspace_dir, targz))
107
- raise "#{targz} was not created in #{workspace_dir}"
108
- end
109
- return true
110
- end
111
-
112
- # Unpacks a TAR-ed, GZipped archive using a "tar -xzf" command
113
- #
114
- # = Inputs:
115
- # * original_dir = dirname containing the archive file
116
- # * targz = the filename of the archive file
117
- # * destination_dir = the target directory into which the contents are written
118
- #
119
- # = Return value:
120
- # * The method will return true if the unpacking is successful.
121
- # * The method will raise an exception if either the unpack command fails,
122
- # or a test for the existence of files in the target directory fails.
123
- # The exception's message will contain the explaination of the failure.
124
- def FileUtilities.unpack(original_dir, targz, destination_dir)
125
- LyberCore::Log.debug("unpacking #{targz}")
126
- FileUtils.mkdir_p(destination_dir)
127
- dir_save = Dir.pwd
128
- Dir.chdir(destination_dir)
129
- unpack_cmd="tar -xzf " + File.join(original_dir, targz)
130
- self.execute(unpack_cmd)
131
- if not (Dir.entries(destination_dir).length > 0)
132
- raise "#{destination_dir} is empty"
133
- end
134
- return true
135
- ensure
136
- Dir.chdir(dir_save)
137
- end
138
-
139
- # Verifies MD5 checksums for the files in a directory
140
- # against the checksum values in the supplied file
141
- # (Uses md5sum command)
142
- #
143
- # = Inputs:
144
- # * directory = dirname containing the file to be checked
145
- # * checksum_file = the name of the file containing the expected checksums
146
- #
147
- # = Return value:
148
- # * The method will return true if the verification is successful.
149
- # * The method will raise an exception if either the md5sum command fails,
150
- # or a test of the md5sum output indicates a checksum mismatch.
151
- # The exception's message will contain the explaination of the failure.
152
- def FileUtilities.verify_checksums(directory, checksum_file)
153
- LyberCore::Log.debug("verifying checksums in #{directory}")
154
- dir_save = Dir.pwd
155
- Dir.chdir(directory)
156
- checksum_cmd = 'md5sum -c ' + checksum_file + ' | grep -v OK | wc -l'
157
- badcount = self.execute(checksum_cmd).to_i
158
- if not (badcount==0)
159
- raise "#{badcount} files had bad checksums"
160
- end
161
- return true
162
- ensure
163
- Dir.chdir(dir_save)
164
- end
165
- end
166
-
167
- end
168
- end