RubyGems - lvmsync - Versions diffs - 1.0.0 - Mend

lvmsync 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/lib/lvm.rb ADDED Viewed

@@ -0,0 +1,5 @@
+require 'lvm/helpers'
+require 'lvm/thin_snapshot'
+require 'lvm/snapshot'
+require 'lvm/lv_config'
+require 'lvm/vg_config'

data/lib/lvm/helpers.rb ADDED Viewed

@@ -0,0 +1,18 @@
+module LVM; end
+module LVM::Helpers
+	# Are we on a big-endian system?  Needed for our htonq/ntohq methods
+	def big_endian?
+		@bigendian ||= [1].pack("s") == [1].pack("n")
+	end
+	def htonq val
+		# This won't work on a nUxi byte-order machine, but if you have one of
+		# those, I'm guessing you've got bigger problems
+		big_endian? ? ([val].pack("Q").reverse.unpack("Q").first) : val
+	end
+	def ntohq val
+		htonq val
+	end
+end

data/lib/lvm/lv_config.rb ADDED Viewed

@@ -0,0 +1,39 @@
+module LVM; end
+class LVM::LVConfig
+	attr_reader :name
+	def initialize(tree, name, vgcfg)
+		@root = tree
+		@name = name
+		@vgcfg = vgcfg
+	end
+	def thin?
+		@root.groups['segment1'].variable_value('type') == 'thin'
+	end
+	def snapshot?
+		thin? ? !origin.nil? : !@vgcfg.logical_volumes.values.find { |lv| lv.cow_store == name }.nil?
+	end
+	def thin_pool
+		@root.groups['segment1'].variable_value('thin_pool')
+	end
+	def device_id
+		@root.groups['segment1'].variable_value('device_id')
+	end
+	def origin
+		@root.groups['segment1'].variable_value('origin')
+	end
+	def cow_store
+		@root.groups['segment1'].variable_value('cow_store')
+	end
+	def chunk_size
+		@root.groups['segment1'].variable_value('chunk_size') * 512
+	end
+end

data/lib/lvm/pv_config.rb ADDED Viewed

@@ -0,0 +1,7 @@
+module LVM; end
+class LVM::PVConfig
+	def initialize(tree)
+		@root = tree
+	end
+end

data/lib/lvm/snapshot.rb ADDED Viewed

@@ -0,0 +1,113 @@
+require 'rexml/document'
+require 'lvm/helpers'
+module LVM; end
+class LVM::Snapshot
+	include LVM::Helpers
+	def initialize(vg, lv)
+		@vg = vg
+		@lv = lv
+	end
+	# Return an array of ranges which are the bytes which are different
+	# between the origin and the snapshot.
+	def differences
+		@differences ||= begin
+			# For a regular, old-skool snapshot, getting the differences is
+			# pretty trivial -- just read through the snapshot metadata, and
+			# the list of changed blocks is right there.
+			#
+			diff_block_list = []
+			File.open(metadata_device, 'r') do |metafd|
+				in_progress = true
+				# The first chunk of the metadata LV is the header, which we
+				# don't care for at all
+				metafd.seek chunk_size, IO::SEEK_SET
+				while in_progress
+					# The snapshot on-disk format is a stream of <blocklist>, <blockdata>
+					# sets; within each <blocklist>, it's network-byte-order 64-bit block
+					# IDs -- the first is the location (chunk_size * offset) in the origin
+					# LV that the data has been changed, the second is the location (again,
+					# chunk_size * offset) in the metadata LV where the changed data is
+					# being stored.
+					(chunk_size / 16).times do
+						origin_offset, snap_offset = metafd.read(16).unpack("QQ")
+						origin_offset = ntohq(origin_offset)
+						snap_offset   = ntohq(snap_offset)
+						# A snapshot offset of 0 would point back to the metadata
+						# device header, so that's clearly invalid -- hence it's the
+						# "no more blocks" indicator.
+						if snap_offset == 0
+							in_progress = false
+							break
+						end
+						diff_block_list << origin_offset
+					end
+					# We've read through a set of origin => data mappings; now we need
+					# to take a giant leap over the data blocks that follow it.
+					metafd.seek chunk_size * chunk_size / 16, IO::SEEK_CUR
+				end
+			end
+			# Block-to-byte-range is pretty trivial, and we're done!
+			diff_block_list.map do |b|
+				((b*chunk_size)..(((b+1)*chunk_size)-1))
+			end
+			# There is one optimisation we could make here that we haven't --
+			# coalescing adjacent byte ranges into single larger ranges.  I haven't
+			# done it for two reasons: Firstly, I don't have any idea how much of a
+			# real-world benefit it would be, and secondly, I couldn't work out how
+			# to do it elegantly.  So I punted.
+		end
+	end
+	def origin
+		# Man old-skool snapshots are weird
+		vgcfg.logical_volumes.values.find { |lv| lv.cow_store == @lv }.origin
+	end
+	private
+	def vgcfg
+		@vgcfg ||= LVM::VGConfig.new(@vg)
+	end
+	def chunk_size
+		@chunk_size ||= metadata_header[:chunk_size]
+	end
+	def metadata_header
+		@metadata_header ||= begin
+			magic, valid, version, chunk_size = File.read(metadata_device, 16).unpack("VVVV")
+			unless magic == 0x70416e53
+				raise RuntimeError,
+				      "#{@vg}/#{@lv}: Invalid snapshot magic number"
+			end
+			unless valid == 1
+				raise RuntimeError,
+				      "#{@vg}/#{@lv}: Snapshot is marked as invalid"
+			end
+			unless version == 1
+				raise RuntimeError,
+				      "#{@vg}/#{@lv}: Incompatible snapshot metadata version"
+			end
+			{ :chunk_size => chunk_size * 512 }
+		end
+	end
+	def metadata_device
+		"/dev/mapper/#{@vg}-#{@lv}-cow"
+	end
+end

data/lib/lvm/thin_snapshot.rb ADDED Viewed

@@ -0,0 +1,186 @@
+require 'rexml/document'
+module LVM; end
+class LVM::ThinSnapshot
+	def initialize(vg, lv)
+		@vg = vg
+		@lv = lv
+	end
+	# Return an array of ranges which are the bytes which are different
+	# between the origin and the snapshot.
+	def differences
+		# This is a relatively complicated multi-step process.  We have two
+		# piles of <lv block> => <pool block> mappings, one for the "origin"
+		# (the LV that's changing) and one for the "snapshot" (the LV that
+		# represents some past point-in-time).  What we need to get out at the
+		# end is an array of (<first byte>..<last byte>) ranges which cover
+		# the parts of the volumes which are different (or that at least point
+		# to different blocks within the data pool).
+		#
+		# This is going to take a few steps to accomplish.
+		#
+		# First, we translate each of the hashes into a list of two-element
+		# arrays, expanding out ranges, because it means we don't have to
+		# handle ranges differently in later steps (a worthwhile optimisation,
+		# in my opinion -- if you think differently, I'd *really* welcome a
+		# patch that handles ranges in-place without turning into a complete
+		# mind-fuck, because I couldn't manage it).
+		#
+		# Next, we work out which mappings are "different" in all the possible
+		# ways.  There's four cases we might come across:
+		#
+		# 1. Both origin and snapshot map the same LV block to the same data
+		#    block.  This is a mapping we can discard from the set of
+		#    differences, because, well, it isn't a difference.
+		#
+		# 2. Both origin and snapshot map the same LV block, but they point
+		#    to different data blocks.  That's the easiest sort of difference
+		#    to understand, and we *could* catch that just by comparing all
+		#    of the mappings in the origin with the mappings in the snapshot,
+		#    and listing those whose value differs.  But that wouldn't catch
+		#    these next two cases...
+		#
+		# 3. The origin maps a particular LV block to a data block, but the
+		#    snapshot doesn't have any mapping for that LV block.  This would
+		#    occur quite commonly -- whenever a location in the origin LV was
+		#    written to for the first time after the snapshot is taken.  You
+		#    would catch all these (as well as the previous case) by taking
+		#    the origin block map and removing any mappings which were
+		#    identical in the snapshot block map.  However, that would fail to
+		#    identify...
+		#
+		# 4. A block in the snapshot is mapped, when the corresponding origin
+		#    block is *not* mapped.  Given the assumption that the snapshot
+		#    was never written to, how could this possibly happen?  One word:
+		#    "discard".  Mappings in the origin block list are removed if
+		#    the block to which they refer is discarded.  Finding *these* (and also
+		#    all mappings of type 2) by the reverse process to that in case
+		#    3 -- simply remove from the snapshot block list all mappings which
+		#    appear identically in the origin block list.
+		#
+		# In order to get all of 2, 3, and 4 together, we can simply do the
+		# operations described in steps 3 & 4 and add the results together.  Sure,
+		# we'll get two copies of all "type 2" block maps, but #uniq is good at
+		# fixing that.
+		#
+		@differences ||= begin
+			diff_maps = ((flat_origin_blocklist - flat_snapshot_blocklist) +
+							 (flat_snapshot_blocklist - flat_origin_blocklist)
+							).uniq
+			# At this point, we're off to a good start -- we've got the mappings
+			# that are different.  But we're not actually interested in the
+			# mappings themselves -- all we want is "the list of LV blocks which
+			# are different" (we'll translate LV blocks into byte ranges next).
+			#
+			changed_blocks = diff_maps.map { |m| m[0] }.uniq
+			# Block-to-byte-range is pretty trivial, and we're done!
+			changed_blocks.map do |b|
+				((b*chunk_size)..(((b+1)*chunk_size)-1))
+			end
+			# There is one optimisation we could make here that we haven't --
+			# coalescing adjacent byte ranges into single larger ranges.  I haven't
+			# done it for two reasons: Firstly, I don't have any idea how much of a
+			# real-world benefit it would be, and secondly, I couldn't work out how
+			# to do it elegantly.  So I punted.
+		end
+	end
+	def origin
+		@origin ||= vgcfg.logical_volumes[@lv].origin
+	end
+	private
+	def vgcfg
+		@vgcfg ||= LVM::VGConfig.new(@vg)
+	end
+	def flat_origin_blocklist
+		@flat_origin_blocklist ||= flatten_blocklist(origin_blocklist)
+	end
+	def flat_snapshot_blocklist
+		@flat_snapshot_blocklist ||= flatten_blocklist(snapshot_blocklist)
+	end
+	def origin_blocklist
+		@origin_blocklist ||= vg_block_dump[@vgcfg.logical_volumes[origin].device_id]
+	end
+	def snapshot_blocklist
+		@snapshot_blocklist ||= vg_block_dump[@vgcfg.logical_volumes[@lv].device_id]
+	end
+	def thin_pool_name
+		@thin_pool_name ||= vgcfg.logical_volumes[@lv].thin_pool
+	end
+	def thin_pool
+		@thin_pool ||= vgcfg.logical_volumes[thin_pool_name]
+	end
+	def chunk_size
+		@chunk_size ||= thin_pool.chunk_size
+	end
+	# Take a hash of <block-or-range> => <block-or-range> elements and turn
+	# it into an array of [block, block] pairs -- any <range> => <range>
+	# elements get expanded out into their constituent <block> => <block>
+	# parts.
+	#
+	def flatten_blocklist(bl)
+		bl.to_a.map do |elem|
+			# Ranges are *hard*, let's go shopping
+			if elem[0].is_a? Range
+				lv_blocks = elem[0].to_a
+				data_blocks = elem[1].to_a
+				# This will now produce an array of two-element arrays, which
+				# will itself be inside the top-level array that we're mapping.
+				# A flatten(1) at the end will take care of that problem,
+				# though.
+				lv_blocks.inject([]) { |a, v| a << [v, data_blocks[a.length]] }
+			elsif elem[0].is_a? Fixnum
+				# We wrap the [lv, data] pair that is `elem` into another array,
+				# so that the coming #flatten call doesn't de-array our matched
+				# pair
+				[elem]
+			else
+				raise ArgumentError,
+				      "CAN'T HAPPEN: Unknown key type (#{elem.class}) found in blocklist"
+			end
+		end.flatten(1)
+	end
+	def vg_block_dump
+		@vg_block_dump ||= begin
+			doc = REXML::Document.new(`thin_dump /dev/mapper/#{@vg.gsub('-', '--')}-#{thin_pool_name.gsub('-','--')}_tmeta`)
+			doc.elements['superblock'].inject({}) do |h, dev|
+				next h unless dev.node_type == :element
+				maps = dev.elements[''].inject({}) do |h2, r|
+					next h2 unless r.node_type == :element
+					if r.name == 'single_mapping'
+						h2[r.attribute('origin_block').value.to_i] = r.attribute('data_block').value.to_i
+					else
+						len = r.attribute('length').value.to_i
+						ori = r.attribute('origin_begin').value.to_i
+						dat = r.attribute('data_begin').value.to_i
+						h2[(dat..dat+len-1)] = (ori..ori+len-1)
+					end
+					h2
+				end
+				h[dev.attribute('dev_id').value.to_i] = maps
+				h
+			end
+		end
+	end
+end

data/lib/lvm/vg_config.rb ADDED Viewed

@@ -0,0 +1,72 @@
+require 'tempfile'
+require 'open3'
+require 'treetop'
+require File.expand_path('../../vgcfgbackup', __FILE__)
+Treetop.load(File.expand_path('../../vgcfgbackup.treetop', __FILE__))
+require 'lvm/lv_config'
+require 'lvm/pv_config'
+require 'lvm/snapshot'
+require 'lvm/thin_snapshot'
+module LVM; end
+class LVM::VGConfig
+	def initialize(vg_name, opts = {})
+		@vgcfgbackup_cmd = opts[:vgcfgbackup_command] || 'vgcfgbackup'
+		@vg_name = vg_name
+		@parser = VgCfgBackupParser.new
+		@root = @parser.parse(vgcfgbackup_output)
+		if @root.nil?
+			raise RuntimeError,
+			      "Cannot parse vgcfgbackup output: #{@parser.failure_reason}"
+		end
+	end
+	def version
+		@version ||= @root.variable_value('version')
+	end
+	def description
+		@description ||= @root.variable_value('description')
+	end
+	def uuid
+		@uuid ||= volume_group.variable_value('id')
+	end
+	def volume_group
+		@volume_group ||= @root.groups[@vg_name]
+	end
+	def physical_volumes
+		@physical_volumes ||= volume_group.groups['physical_volumes'].groups.to_a.inject({}) { |h,v| h[v[0]] = LVM::PVConfig.new(v[1]); h }
+	end
+	def logical_volumes
+		@logical_volumes ||= volume_group.groups['logical_volumes'].groups.to_a.inject({}) { |h,v| h[v[0]] = LVM::LVConfig.new(v[1], v[0], self); h }
+	end
+	private
+	def vgcfgbackup_output
+		@vgcfgbackup_output ||= begin
+			Tempfile.open('vg_config') do |tmpf|
+				cmd = "#{@vgcfgbackup_cmd} -f #{tmpf.path} #{@vg_name}"
+				Open3.popen3(cmd) do |stdin_fd, stdout_fd, stderr_fd, thr|
+					stdin_fd.close
+					stdout = stdout_fd.read
+					stderr = stderr_fd.read
+					exit_status = thr.value
+					if exit_status != 0
+						raise RuntimeError,
+						      "Failed to run vgcfgbackup: #{stdout}\n#{stderr}"
+					end
+				end
+				File.read(tmpf.path)
+			end
+		end
+	end
+end