sdr-replication 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,55 @@
1
+ require "rubygems"
2
+ require "amqp"
3
+ require "amqp/extensions/rabbitmq"
4
+ require "yaml"
5
+
6
+ EventMachine.run do
7
+
8
+
9
+ connection_params = {
10
+ :host => "127.0.0.1",
11
+ :port => 5672,
12
+ :user => "guest",
13
+ :pass => "guest",
14
+ :vhost => "/",
15
+ :ssl => false,
16
+ :frame_max => 131072
17
+ }
18
+
19
+ connection = AMQP.connect(connection_params)
20
+ channel = AMQP::Channel.new(connection, :auto_recovery => true)
21
+
22
+
23
+ channel.prefetch(1)
24
+
25
+ # Acknowledgements are good for letting the server know
26
+ # that the task is finished. If the consumer doesn't send
27
+ # the acknowledgement, then the task is considered to be unfinished
28
+ # and will be requeued when consumer closes AMQP connection (because of a crash, for example).
29
+
30
+ channel.queue("sdr.replication.notification", :durable => true, :auto_delete => false).subscribe(:ack => true) do |metadata, payload|
31
+ case metadata.type
32
+ when "sdr_replication_notification"
33
+ data = YAML.load(payload)
34
+ puts "sdr_replication_notification request with #{data.inspect}"
35
+
36
+ message = "'" + payload + "'"
37
+ header = "'" + metadata.attributes.to_yaml + "'"
38
+
39
+
40
+ (pid = fork) ? Process.detach(pid) : exec("./dpn_check_rep.rb #{message} #{header}")
41
+ #shellout = "./dpn_check_rep.rb '#{data[:name]}' #{data[:size]}"
42
+ #puts "dpn_check_rep.rb Executing #{shellout}"; system(shellout)
43
+ puts "Call to SDR DPN replication request done"
44
+ puts
45
+
46
+ when "dpn_replication_notification"
47
+ else
48
+ puts "[commands] Unknown command: #{metadata.type}"
49
+ end
50
+
51
+ # message is processed, acknowledge it so that broker discards it
52
+ metadata.ack
53
+ end
54
+ Signal.trap("INT") { connection.close { EventMachine.stop } }
55
+ end
@@ -0,0 +1,105 @@
1
+ require "amqp"
2
+ require "amqp/extensions/rabbitmq"
3
+ require "yaml"
4
+ require "json"
5
+ require "securerandom"
6
+ require "time"
7
+
8
+
9
+ connection_params = {
10
+ :host => "127.0.0.1",
11
+ :port => 5672,
12
+ :user => "guest",
13
+ :pass => "guest",
14
+ :vhost => "/",
15
+ :ssl => false,
16
+ :frame_max => 131072
17
+ }
18
+
19
+ #usually send one message only, rely on message broker to recover
20
+ doit = 1
21
+
22
+ EventMachine.run do
23
+ sleep(0.5)
24
+
25
+ connection = AMQP.connect(connection_params)
26
+ channel = AMQP::Channel.new(connection)
27
+
28
+
29
+ # publish new commands every 2 seconds, count + one times
30
+ count = 0
31
+ EventMachine.add_periodic_timer(2.0) do
32
+ count += 1
33
+ if count >= doit then
34
+ EM.stop_event_loop
35
+ end
36
+ puts "count = #{count}"
37
+ puts "Messaging that sdr has content for replication, parameters are name and size."
38
+ payload = { :name => "12345.tar", :size => 4096 }.to_yaml
39
+
40
+ correlation_id = SecureRandom.random_number(10000)
41
+ utctime = Time.now.utc
42
+ ttl = 20 #time for transaction to live
43
+
44
+ channel.default_exchange.publish(payload,
45
+ :arguments => { "x-message-ttl" => 10 },
46
+ :type => "sdr_replication_notification",
47
+ :routing_key => "sdr.replication.notification",
48
+ :reply_to => "dpn.replication.reply",
49
+ :correlation_id => correlation_id,
50
+ :timestamp => utctime.to_i
51
+ )
52
+
53
+ end
54
+
55
+ puts "Sending notification message"
56
+ Signal.trap("INT") { connection.close { EventMachine.stop } }
57
+ end
58
+
59
+
60
+
61
+ exit
62
+
63
+ #listen for response to previous message
64
+ EventMachine.run do
65
+ sleep(0.5)
66
+
67
+ connection = AMQP.connect(connection_params)
68
+ channel = AMQP::Channel.new(connection, :auto_recovery => true)
69
+ channel.prefetch(1)
70
+
71
+ count = 0
72
+ EventMachine.add_periodic_timer(10.0) do
73
+ count += 1
74
+ puts "waiting....."
75
+ if count > doit+1 then
76
+ EM.stop_event_loop
77
+ end
78
+ end
79
+
80
+ channel.queue("dpn.replication.notification", :durable => true, :auto_delete => false).subscribe(:ack => true) do |metadata, payload|
81
+ case metadata.type
82
+ when "dpn_replication_ack"
83
+ message = YAML.load(payload)
84
+ if message[:message] == "dpn_available_reply" && message[:acknak] == 'ack' then
85
+ puts "[sdr dpn says to go ahead and replicate content to temp area."
86
+ # (pid = fork) ? Process.detach(pid) : exec("start_replication_workflow.rb
87
+ puts "Start workflow to copy content to sdr dpn for replication. "
88
+ else
89
+ puts "no replication possible, DPN say's no!"
90
+ # (pid = fork) ? Process.detach(pid) : exec("replication_denied_workflow.rb
91
+ end
92
+
93
+ when "dpn_replication_notification"
94
+ puts "DPN wants to replicate"
95
+ else
96
+ puts "[commands] Unknown command: #{metadata.type}"
97
+ end
98
+
99
+ # message is processed, acknowledge it so that broker discards it
100
+ metadata.ack
101
+ end
102
+ puts "Waiting for 'ack' notification message"
103
+ Signal.trap("INT") { connection.close { EventMachine.stop } }
104
+
105
+ end
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ $: << "./"
4
+ require 'securerandom'
5
+ require "rubygems"
6
+ require 'time'
7
+ require 'date'
8
+ require "eventmachine"
9
+ require "amqp"
10
+ require "amqp/extensions/rabbitmq"
11
+ require "yaml"
12
+
13
+
14
+ connection_params = {
15
+ :host => "127.0.0.1",
16
+ :port => 5672,
17
+ :user => "guest",
18
+ :pass => "guest",
19
+ :vhost => "/",
20
+ :ssl => false,
21
+ :frame_max => 131072
22
+ }
23
+
24
+ #arguments passed are json strings. payload, header.
25
+ puts "*************************************"
26
+ puts "In dpn_check_rep.rb"
27
+ if ARGV.length != 2 then
28
+ exit 5001 #should not get here, but this is an error condition
29
+ end
30
+
31
+ message = YAML.load(ARGV[0])
32
+ header = YAML.load(ARGV[1])
33
+
34
+ if header[:type] != "sdr_replication_notification" then
35
+ exit 5002 #bad message type
36
+ end
37
+
38
+ puts "sdr_replication_notification message #{message.inspect}"
39
+ puts "sdr_replication_notification header #{header.inspect}"
40
+
41
+ puts "checking to see if DPN can replicate content from SDR"
42
+
43
+ #
44
+ #Do checking!
45
+ #
46
+
47
+ sleep 4.5 #simulate checking
48
+
49
+
50
+ exit
51
+
52
+ EventMachine.run do
53
+ sleep(0.5)
54
+ connection = AMQP.connect(connection_params)
55
+ channel = AMQP::Channel.new(connection)
56
+
57
+ count = 0
58
+ EventMachine.add_periodic_timer(2.0) do
59
+ count += 1
60
+ if count > 2 then
61
+ EM.stop_event_loop
62
+ end
63
+ end
64
+
65
+ EM.add_timer(1.0) do
66
+ puts "Working: #{Time.now}"
67
+ STDOUT.flush
68
+ end
69
+
70
+ puts "Acking that content can be replicated"
71
+ payload = { :message => "dpn_available_reply", :acknak => 'ack' }.to_yaml
72
+
73
+ channel.default_exchange.publish(payload,
74
+ :type => "dpn_replication_ack",
75
+ :routing_key => header[:reply_to],
76
+ :reply_to => "dpn.err.notification",
77
+ :correlation_id => header[:correlation_id],
78
+ :timestamp => header[:timestamp])
79
+
80
+ puts "Sending ack notification message"
81
+ Signal.trap("INT") { connection.close { EventMachine.stop } }
82
+
83
+ end
@@ -0,0 +1,98 @@
1
+ require File.join(File.dirname(__FILE__),'../libdir')
2
+ require 'sdr_replication'
3
+
4
+ module Replication
5
+
6
+ # The fixity properties of a file, used to determine file content equivalence.
7
+ # Placing this data in a class by itself facilitates using the MD5, SHA1, etc checksums (and optionally the file size)
8
+ # as a single key when doing comparisons against other file instances. The design assumes that this file fixity
9
+ # is sufficiently unique to act as a comparator for determining file equality or verifying checksum manifests.
10
+ #
11
+ # @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
12
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
13
+ class FileFixity
14
+
15
+ # @param [Hash<Symbol,Object>] options Key,Value pairs specifying initial values of attributes
16
+ def initialize(options=nil)
17
+ @checksums=Hash.new
18
+ options = {} if options.nil?
19
+ options.each do |key,value|
20
+ #instance_variable_set("@#{key}", value)
21
+ send "#{key}=", value
22
+ end
23
+ end
24
+
25
+ # @return [String] The name of the file, relative to its base directory
26
+ # (for payload files, path relative to the data folder. For tag files, path relative to the bag home folder)
27
+ attr_accessor :file_id
28
+
29
+ # @return [Integer] The size of the file in bytes
30
+ attr_accessor :bytes
31
+
32
+ # @return [Hash<Symbol,String>] The MD5, SHA1, SHA256, etc checksum values of the file
33
+ attr_accessor :checksums
34
+
35
+ # @param [Symbol,String] type The type of checksum (e.g. :md5, :sha1, :sha256)
36
+ # @return [String] The value of the file digest
37
+ def get_checksum(type)
38
+ checksum_type = type.to_s.downcase.to_sym
39
+ self.checksums[checksum_type]
40
+ end
41
+
42
+ # @param type [Symbol,String] The type of checksum
43
+ # @param value [String] value of the file digest
44
+ # @return [void] Set the value for the specified checksum type in the checksum hash
45
+ def set_checksum(type,value)
46
+ checksum_type = type.to_s.downcase.to_sym
47
+ Fixity.validate_checksum_types(checksum_type)
48
+ self.checksums[checksum_type] = value
49
+ end
50
+
51
+ # @param other [FileFixity] The other file fixity being compared to this fixity
52
+ # @return [Boolean] Returns true if self and other have comparable fixity data.
53
+ def eql?(other)
54
+ matching_checksum_types = self.checksums.keys & other.checksums.keys
55
+ return false if matching_checksum_types.size == 0
56
+ matching_checksum_types.each do |type|
57
+ return false if self.checksums[type] != other.checksums[type]
58
+ end
59
+ true
60
+ end
61
+
62
+ # (see #eql?)
63
+ def ==(other)
64
+ eql?(other)
65
+ end
66
+
67
+ # @return [Fixnum] Compute a hash-code for the fixity value array.
68
+ # Two file instances with the same content will have the same hash code (and will compare using eql?).
69
+ # @note The hash and eql? methods override the methods inherited from Object.
70
+ # These methods ensure that instances of this class can be used as Hash keys. See
71
+ # * {http://www.paulbutcher.com/2007/10/navigating-the-equality-maze/}
72
+ # * {http://techbot.me/2011/05/ruby-basics-equality-operators-ruby/}
73
+ # Also overriden is {#==} so that equality tests in other contexts will also return the expected result.
74
+ def hash
75
+ [self.file_id].hash
76
+ end
77
+
78
+ # @param [FileFixity] other The other FileFixity object being compared to this one
79
+ # @param [String] left The label to use for values from this base FileFixity object
80
+ # @param [String] right he label to use for values from the other FileFixity object
81
+ # @return [Hash<symbol,Hash<String,String>] details of the checksum differences between fixity objects
82
+ def diff(other,left='base',right='other')
83
+ diff_hash = Hash.new
84
+ matching_checksum_types = (self.checksums.keys & other.checksums.keys)
85
+ matching_checksum_types = (self.checksums.keys | other.checksums.keys) if matching_checksum_types.empty?
86
+ matching_checksum_types.each do |type|
87
+ base_checksum = self.checksums[type]
88
+ other_checksum = other.checksums[type]
89
+ if base_checksum != other_checksum
90
+ diff_hash[type] = {left => base_checksum, right => other_checksum }
91
+ end
92
+ end
93
+ return diff_hash.size > 0 ? diff_hash : nil
94
+ end
95
+
96
+ end
97
+
98
+ end
@@ -0,0 +1,155 @@
1
+ require File.join(File.dirname(__FILE__),'../libdir')
2
+ require 'sdr_replication'
3
+
4
+ module Replication
5
+
6
+ # A Struct to hold properties of a given checksum digest type
7
+ ChecksumType = Struct.new(:id, :hex_length, :names)
8
+
9
+ # A helper class that facilites the generation and processing of checksums
10
+ #
11
+ # @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
12
+ # All rights reserved. See {file:LICENSE.rdoc} for details.
13
+ class Fixity
14
+
15
+ @@default_checksum_types = [:sha1, :sha256]
16
+
17
+ # @return [Array<Symbol>] The list of checksum types to be used when generating fixity data
18
+ def Fixity.default_checksum_types
19
+ @@default_checksum_types
20
+ end
21
+
22
+ # @param [Array<Symbol>] types The list of checksum types to be used when generating fixity data
23
+ # @return [Void] Set the list of checksum types to be used when generating fixity data
24
+ def Fixity.default_checksum_types=(*types)
25
+ @@default_checksum_types = Fixity.validate_checksum_types(*types)
26
+ end
27
+
28
+ @@valid_checksum_types = [
29
+ ChecksumType.new(:md5, 32, ['MD5']),
30
+ ChecksumType.new(:sha1, 40, ['SHA-1', 'SHA1']),
31
+ ChecksumType.new(:sha256, 64, ['SHA-256', 'SHA256']),
32
+ ChecksumType.new(:sha384, 96, ['SHA-384', 'SHA384']),
33
+ ChecksumType.new(:sha512, 128, ['SHA-512', 'SHA512'])
34
+ ]
35
+
36
+ # @return [Array<ChecksumType>] The list of allowed ChecksumType structs containing the type's properties
37
+ def Fixity.valid_checksum_types
38
+ @@valid_checksum_types
39
+ end
40
+
41
+ # @return [Array<Symbol>] The list of allowed checksum types
42
+ def Fixity.valid_checksum_ids
43
+ @@valid_checksum_types.map { |type| type.id }
44
+ end
45
+
46
+ # @param [Array<Symbol>] types The list of checksum types being specified by the caller
47
+ # @return [Object] The list of specified checksum types after being checked for validity
48
+ def Fixity.validate_checksum_types(*types)
49
+ checksum_types = types.flatten
50
+ invalid_types = checksum_types - valid_checksum_ids
51
+ raise "Invalid digest type specified: #{invalid_types.inspect}" unless invalid_types.empty?
52
+ checksum_types
53
+ end
54
+
55
+ # @param [Array<Symbol>] checksum_types The list of checksum types being specified by the caller
56
+ # @return [Array<Digest::Class>] The list of digest implementation objects that will generate the checksums
57
+ def Fixity.get_digesters(checksum_types=@@default_checksum_types)
58
+ checksum_types.inject(Hash.new) do |digesters, checksum_type|
59
+ case checksum_type
60
+ when :md5
61
+ digesters[checksum_type] = Digest::MD5.new
62
+ when :sha1
63
+ digesters[checksum_type] = Digest::SHA1.new
64
+ when :sha256
65
+ digesters[checksum_type] = Digest::SHA2.new(256)
66
+ when :sha384
67
+ digesters[checksum_type] = Digest::SHA2.new(384)
68
+ when :sha512
69
+ digesters[checksum_type] = Digest::SHA2.new(512)
70
+ else
71
+ raise "Unrecognized checksum type: #{checksum_type}"
72
+ end
73
+ digesters
74
+ end
75
+ end
76
+
77
+ # @param pathname [Pathname] The location of the file to be digested
78
+ # @param [Object] base_pathname The base directory from which relative paths (file IDS) will be derived
79
+ # @param [Object] checksum_types The list of checksum types being specified by the caller (or default list)
80
+ # @return [FileFixity] Generate a FileFixity instance containing fixity properties measured from of a physical file
81
+ def Fixity.fixity_from_file(pathname, base_pathname, checksum_types=@@default_checksum_types)
82
+ file_fixity = FileFixity.new
83
+ file_fixity.file_id = pathname.relative_path_from(base_pathname).to_s
84
+ file_fixity.bytes = pathname.size
85
+ digesters = Fixity.get_digesters(checksum_types)
86
+ pathname.open("r") do |stream|
87
+ while buffer = stream.read(8192)
88
+ digesters.values.each { |digest| digest.update(buffer) }
89
+ end
90
+ end
91
+ digesters.each { |checksum_type, digest| file_fixity.checksums[checksum_type] = digest.hexdigest }
92
+ file_fixity
93
+ end
94
+
95
+ # @param [Pathname] base_pathname The directory path used as the base for deriving relative paths (file IDs)
96
+ # @param [Array<Pathname>] path_list The list of pathnames for files whose fixity will be generated
97
+ # @return [Hash<String,FileFixity>] A hash containing file ids and fixity data derived from the actual files
98
+ def Fixity.generate_checksums(base_pathname, path_list, checksum_types=@@default_checksum_types)
99
+ path_list = base_pathname.find if path_list.nil?
100
+ file_fixity_hash = Hash.new
101
+ path_list.select{|pathname| pathname.file?}.each do |file|
102
+ file_fixity = Fixity.fixity_from_file(file, base_pathname)
103
+ file_fixity_hash[file_fixity.file_id] = file_fixity
104
+ end
105
+ file_fixity_hash
106
+ end
107
+
108
+ # @param [Integer] length The length of the checksum value in hex format
109
+ # @return [ChecksumType] The ChecksumType struct that contains the properties of the matching checksum type
110
+ def Fixity.type_for_length(length)
111
+ @@valid_checksum_types.select {|type| type.hex_length == length}.first
112
+ end
113
+
114
+ # @param [Object] file_id The filename or relative path of the file from its base directory
115
+ # @param [Object] checksum_values The digest values of the file
116
+ # @return [FileFixity] Generate a FileFixity instance containing fixity properties supplied by the caller
117
+ def Fixity.fixity_from_checksum_values(file_id, checksum_values)
118
+ file_fixity = FileFixity.new
119
+ file_fixity.file_id = file_id
120
+ checksum_values.each do |digest|
121
+ checksum_type = Fixity.type_for_length(digest.length)
122
+ file_fixity.checksums[checksum_type.id] = digest
123
+ end
124
+ file_fixity
125
+ end
126
+
127
+ # @param [Hash<String,FileFixity>] file_fixity_hash A hash containing file ids and fixity data derived from the manifest files
128
+ # @return [Hash<String,Hash<Symbol,String] A hash containing file ids and checksum data derived from the file_fixity_hash
129
+ def Fixity.file_checksum_hash(file_fixity_hash)
130
+ checksum_hash = Hash.new
131
+ file_fixity_hash.values.each{|file| checksum_hash[file.file_id] = file.checksums}
132
+ checksum_hash
133
+ end
134
+
135
+ # @param [Symbol,String] checksum_type The type of checksum digest to be generated
136
+ # @param [Pathname,String] file_pathname The location of the file to digest
137
+ # @return [String] The operating system shell command that will generate the checksum digest value
138
+ def Fixity.openssl_digest_command(checksum_type,file_pathname)
139
+ command = "openssl dgst -#{checksum_type} #{file_pathname}"
140
+ command
141
+ end
142
+
143
+ # @param [Symbol,String] checksum_type The type of checksum digest to be generated
144
+ # @param [Pathname,String] file_pathname The location of the file to digest
145
+ # @return [String] The checksum digest value for the file
146
+ def Fixity.openssl_digest(checksum_type,file_pathname)
147
+ command = openssl_digest_command(checksum_type,file_pathname)
148
+ stdout = OperatingSystem.execute(command)
149
+ checksum = stdout.scan(/[A-Za-z0-9]+/).last
150
+ checksum
151
+ end
152
+
153
+ end
154
+
155
+ end