lvmsync 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/lvm.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'lvm/helpers'
2
+ require 'lvm/thin_snapshot'
3
+ require 'lvm/snapshot'
4
+ require 'lvm/lv_config'
5
+ require 'lvm/vg_config'
@@ -0,0 +1,18 @@
1
+ module LVM; end
2
+
3
+ module LVM::Helpers
4
+ # Are we on a big-endian system? Needed for our htonq/ntohq methods
5
+ def big_endian?
6
+ @bigendian ||= [1].pack("s") == [1].pack("n")
7
+ end
8
+
9
+ def htonq val
10
+ # This won't work on a nUxi byte-order machine, but if you have one of
11
+ # those, I'm guessing you've got bigger problems
12
+ big_endian? ? ([val].pack("Q").reverse.unpack("Q").first) : val
13
+ end
14
+
15
+ def ntohq val
16
+ htonq val
17
+ end
18
+ end
@@ -0,0 +1,39 @@
1
+ module LVM; end
2
+
3
+ class LVM::LVConfig
4
+ attr_reader :name
5
+
6
+ def initialize(tree, name, vgcfg)
7
+ @root = tree
8
+ @name = name
9
+ @vgcfg = vgcfg
10
+ end
11
+
12
+ def thin?
13
+ @root.groups['segment1'].variable_value('type') == 'thin'
14
+ end
15
+
16
+ def snapshot?
17
+ thin? ? !origin.nil? : !@vgcfg.logical_volumes.values.find { |lv| lv.cow_store == name }.nil?
18
+ end
19
+
20
+ def thin_pool
21
+ @root.groups['segment1'].variable_value('thin_pool')
22
+ end
23
+
24
+ def device_id
25
+ @root.groups['segment1'].variable_value('device_id')
26
+ end
27
+
28
+ def origin
29
+ @root.groups['segment1'].variable_value('origin')
30
+ end
31
+
32
+ def cow_store
33
+ @root.groups['segment1'].variable_value('cow_store')
34
+ end
35
+
36
+ def chunk_size
37
+ @root.groups['segment1'].variable_value('chunk_size') * 512
38
+ end
39
+ end
@@ -0,0 +1,7 @@
1
+ module LVM; end
2
+
3
+ class LVM::PVConfig
4
+ def initialize(tree)
5
+ @root = tree
6
+ end
7
+ end
@@ -0,0 +1,113 @@
1
+ require 'rexml/document'
2
+ require 'lvm/helpers'
3
+
4
+ module LVM; end
5
+
6
+ class LVM::Snapshot
7
+ include LVM::Helpers
8
+
9
+ def initialize(vg, lv)
10
+ @vg = vg
11
+ @lv = lv
12
+ end
13
+
14
+ # Return an array of ranges which are the bytes which are different
15
+ # between the origin and the snapshot.
16
+ def differences
17
+ @differences ||= begin
18
+ # For a regular, old-skool snapshot, getting the differences is
19
+ # pretty trivial -- just read through the snapshot metadata, and
20
+ # the list of changed blocks is right there.
21
+ #
22
+ diff_block_list = []
23
+
24
+ File.open(metadata_device, 'r') do |metafd|
25
+ in_progress = true
26
+
27
+ # The first chunk of the metadata LV is the header, which we
28
+ # don't care for at all
29
+ metafd.seek chunk_size, IO::SEEK_SET
30
+
31
+ while in_progress
32
+ # The snapshot on-disk format is a stream of <blocklist>, <blockdata>
33
+ # sets; within each <blocklist>, it's network-byte-order 64-bit block
34
+ # IDs -- the first is the location (chunk_size * offset) in the origin
35
+ # LV that the data has been changed, the second is the location (again,
36
+ # chunk_size * offset) in the metadata LV where the changed data is
37
+ # being stored.
38
+ (chunk_size / 16).times do
39
+ origin_offset, snap_offset = metafd.read(16).unpack("QQ")
40
+ origin_offset = ntohq(origin_offset)
41
+ snap_offset = ntohq(snap_offset)
42
+
43
+ # A snapshot offset of 0 would point back to the metadata
44
+ # device header, so that's clearly invalid -- hence it's the
45
+ # "no more blocks" indicator.
46
+ if snap_offset == 0
47
+ in_progress = false
48
+ break
49
+ end
50
+
51
+ diff_block_list << origin_offset
52
+ end
53
+
54
+ # We've read through a set of origin => data mappings; now we need
55
+ # to take a giant leap over the data blocks that follow it.
56
+ metafd.seek chunk_size * chunk_size / 16, IO::SEEK_CUR
57
+ end
58
+ end
59
+
60
+ # Block-to-byte-range is pretty trivial, and we're done!
61
+ diff_block_list.map do |b|
62
+ ((b*chunk_size)..(((b+1)*chunk_size)-1))
63
+ end
64
+
65
+ # There is one optimisation we could make here that we haven't --
66
+ # coalescing adjacent byte ranges into single larger ranges. I haven't
67
+ # done it for two reasons: Firstly, I don't have any idea how much of a
68
+ # real-world benefit it would be, and secondly, I couldn't work out how
69
+ # to do it elegantly. So I punted.
70
+ end
71
+ end
72
+
73
+ def origin
74
+ # Man old-skool snapshots are weird
75
+ vgcfg.logical_volumes.values.find { |lv| lv.cow_store == @lv }.origin
76
+ end
77
+
78
+ private
79
+ def vgcfg
80
+ @vgcfg ||= LVM::VGConfig.new(@vg)
81
+ end
82
+
83
+ def chunk_size
84
+ @chunk_size ||= metadata_header[:chunk_size]
85
+ end
86
+
87
+ def metadata_header
88
+ @metadata_header ||= begin
89
+ magic, valid, version, chunk_size = File.read(metadata_device, 16).unpack("VVVV")
90
+
91
+ unless magic == 0x70416e53
92
+ raise RuntimeError,
93
+ "#{@vg}/#{@lv}: Invalid snapshot magic number"
94
+ end
95
+
96
+ unless valid == 1
97
+ raise RuntimeError,
98
+ "#{@vg}/#{@lv}: Snapshot is marked as invalid"
99
+ end
100
+
101
+ unless version == 1
102
+ raise RuntimeError,
103
+ "#{@vg}/#{@lv}: Incompatible snapshot metadata version"
104
+ end
105
+
106
+ { :chunk_size => chunk_size * 512 }
107
+ end
108
+ end
109
+
110
+ def metadata_device
111
+ "/dev/mapper/#{@vg}-#{@lv}-cow"
112
+ end
113
+ end
@@ -0,0 +1,186 @@
1
+ require 'rexml/document'
2
+
3
+ module LVM; end
4
+
5
+ class LVM::ThinSnapshot
6
+ def initialize(vg, lv)
7
+ @vg = vg
8
+ @lv = lv
9
+ end
10
+
11
+ # Return an array of ranges which are the bytes which are different
12
+ # between the origin and the snapshot.
13
+ def differences
14
+ # This is a relatively complicated multi-step process. We have two
15
+ # piles of <lv block> => <pool block> mappings, one for the "origin"
16
+ # (the LV that's changing) and one for the "snapshot" (the LV that
17
+ # represents some past point-in-time). What we need to get out at the
18
+ # end is an array of (<first byte>..<last byte>) ranges which cover
19
+ # the parts of the volumes which are different (or that at least point
20
+ # to different blocks within the data pool).
21
+ #
22
+ # This is going to take a few steps to accomplish.
23
+ #
24
+ # First, we translate each of the hashes into a list of two-element
25
+ # arrays, expanding out ranges, because it means we don't have to
26
+ # handle ranges differently in later steps (a worthwhile optimisation,
27
+ # in my opinion -- if you think differently, I'd *really* welcome a
28
+ # patch that handles ranges in-place without turning into a complete
29
+ # mind-fuck, because I couldn't manage it).
30
+ #
31
+ # Next, we work out which mappings are "different" in all the possible
32
+ # ways. There's four cases we might come across:
33
+ #
34
+ # 1. Both origin and snapshot map the same LV block to the same data
35
+ # block. This is a mapping we can discard from the set of
36
+ # differences, because, well, it isn't a difference.
37
+ #
38
+ # 2. Both origin and snapshot map the same LV block, but they point
39
+ # to different data blocks. That's the easiest sort of difference
40
+ # to understand, and we *could* catch that just by comparing all
41
+ # of the mappings in the origin with the mappings in the snapshot,
42
+ # and listing those whose value differs. But that wouldn't catch
43
+ # these next two cases...
44
+ #
45
+ # 3. The origin maps a particular LV block to a data block, but the
46
+ # snapshot doesn't have any mapping for that LV block. This would
47
+ # occur quite commonly -- whenever a location in the origin LV was
48
+ # written to for the first time after the snapshot is taken. You
49
+ # would catch all these (as well as the previous case) by taking
50
+ # the origin block map and removing any mappings which were
51
+ # identical in the snapshot block map. However, that would fail to
52
+ # identify...
53
+ #
54
+ # 4. A block in the snapshot is mapped, when the corresponding origin
55
+ # block is *not* mapped. Given the assumption that the snapshot
56
+ # was never written to, how could this possibly happen? One word:
57
+ # "discard". Mappings in the origin block list are removed if
58
+ # the block to which they refer is discarded. Finding *these* (and also
59
+ # all mappings of type 2) by the reverse process to that in case
60
+ # 3 -- simply remove from the snapshot block list all mappings which
61
+ # appear identically in the origin block list.
62
+ #
63
+ # In order to get all of 2, 3, and 4 together, we can simply do the
64
+ # operations described in steps 3 & 4 and add the results together. Sure,
65
+ # we'll get two copies of all "type 2" block maps, but #uniq is good at
66
+ # fixing that.
67
+ #
68
+ @differences ||= begin
69
+ diff_maps = ((flat_origin_blocklist - flat_snapshot_blocklist) +
70
+ (flat_snapshot_blocklist - flat_origin_blocklist)
71
+ ).uniq
72
+
73
+ # At this point, we're off to a good start -- we've got the mappings
74
+ # that are different. But we're not actually interested in the
75
+ # mappings themselves -- all we want is "the list of LV blocks which
76
+ # are different" (we'll translate LV blocks into byte ranges next).
77
+ #
78
+ changed_blocks = diff_maps.map { |m| m[0] }.uniq
79
+
80
+ # Block-to-byte-range is pretty trivial, and we're done!
81
+ changed_blocks.map do |b|
82
+ ((b*chunk_size)..(((b+1)*chunk_size)-1))
83
+ end
84
+
85
+ # There is one optimisation we could make here that we haven't --
86
+ # coalescing adjacent byte ranges into single larger ranges. I haven't
87
+ # done it for two reasons: Firstly, I don't have any idea how much of a
88
+ # real-world benefit it would be, and secondly, I couldn't work out how
89
+ # to do it elegantly. So I punted.
90
+ end
91
+ end
92
+
93
+ def origin
94
+ @origin ||= vgcfg.logical_volumes[@lv].origin
95
+ end
96
+
97
+ private
98
+ def vgcfg
99
+ @vgcfg ||= LVM::VGConfig.new(@vg)
100
+ end
101
+
102
+ def flat_origin_blocklist
103
+ @flat_origin_blocklist ||= flatten_blocklist(origin_blocklist)
104
+ end
105
+
106
+ def flat_snapshot_blocklist
107
+ @flat_snapshot_blocklist ||= flatten_blocklist(snapshot_blocklist)
108
+ end
109
+
110
+ def origin_blocklist
111
+ @origin_blocklist ||= vg_block_dump[@vgcfg.logical_volumes[origin].device_id]
112
+ end
113
+
114
+ def snapshot_blocklist
115
+ @snapshot_blocklist ||= vg_block_dump[@vgcfg.logical_volumes[@lv].device_id]
116
+ end
117
+
118
+ def thin_pool_name
119
+ @thin_pool_name ||= vgcfg.logical_volumes[@lv].thin_pool
120
+ end
121
+
122
+ def thin_pool
123
+ @thin_pool ||= vgcfg.logical_volumes[thin_pool_name]
124
+ end
125
+
126
+ def chunk_size
127
+ @chunk_size ||= thin_pool.chunk_size
128
+ end
129
+
130
+ # Take a hash of <block-or-range> => <block-or-range> elements and turn
131
+ # it into an array of [block, block] pairs -- any <range> => <range>
132
+ # elements get expanded out into their constituent <block> => <block>
133
+ # parts.
134
+ #
135
+ def flatten_blocklist(bl)
136
+ bl.to_a.map do |elem|
137
+ # Ranges are *hard*, let's go shopping
138
+ if elem[0].is_a? Range
139
+ lv_blocks = elem[0].to_a
140
+ data_blocks = elem[1].to_a
141
+
142
+ # This will now produce an array of two-element arrays, which
143
+ # will itself be inside the top-level array that we're mapping.
144
+ # A flatten(1) at the end will take care of that problem,
145
+ # though.
146
+ lv_blocks.inject([]) { |a, v| a << [v, data_blocks[a.length]] }
147
+ elsif elem[0].is_a? Fixnum
148
+ # We wrap the [lv, data] pair that is `elem` into another array,
149
+ # so that the coming #flatten call doesn't de-array our matched
150
+ # pair
151
+ [elem]
152
+ else
153
+ raise ArgumentError,
154
+ "CAN'T HAPPEN: Unknown key type (#{elem.class}) found in blocklist"
155
+ end
156
+ end.flatten(1)
157
+ end
158
+
159
+ def vg_block_dump
160
+ @vg_block_dump ||= begin
161
+ doc = REXML::Document.new(`thin_dump /dev/mapper/#{@vg.gsub('-', '--')}-#{thin_pool_name.gsub('-','--')}_tmeta`)
162
+
163
+ doc.elements['superblock'].inject({}) do |h, dev|
164
+ next h unless dev.node_type == :element
165
+
166
+ maps = dev.elements[''].inject({}) do |h2, r|
167
+ next h2 unless r.node_type == :element
168
+
169
+ if r.name == 'single_mapping'
170
+ h2[r.attribute('origin_block').value.to_i] = r.attribute('data_block').value.to_i
171
+ else
172
+ len = r.attribute('length').value.to_i
173
+ ori = r.attribute('origin_begin').value.to_i
174
+ dat = r.attribute('data_begin').value.to_i
175
+ h2[(dat..dat+len-1)] = (ori..ori+len-1)
176
+ end
177
+
178
+ h2
179
+ end
180
+
181
+ h[dev.attribute('dev_id').value.to_i] = maps
182
+ h
183
+ end
184
+ end
185
+ end
186
+ end
@@ -0,0 +1,72 @@
1
+ require 'tempfile'
2
+ require 'open3'
3
+ require 'treetop'
4
+ require File.expand_path('../../vgcfgbackup', __FILE__)
5
+
6
+ Treetop.load(File.expand_path('../../vgcfgbackup.treetop', __FILE__))
7
+
8
+ require 'lvm/lv_config'
9
+ require 'lvm/pv_config'
10
+ require 'lvm/snapshot'
11
+ require 'lvm/thin_snapshot'
12
+
13
+ module LVM; end
14
+
15
+ class LVM::VGConfig
16
+ def initialize(vg_name, opts = {})
17
+ @vgcfgbackup_cmd = opts[:vgcfgbackup_command] || 'vgcfgbackup'
18
+ @vg_name = vg_name
19
+ @parser = VgCfgBackupParser.new
20
+ @root = @parser.parse(vgcfgbackup_output)
21
+ if @root.nil?
22
+ raise RuntimeError,
23
+ "Cannot parse vgcfgbackup output: #{@parser.failure_reason}"
24
+ end
25
+ end
26
+
27
+ def version
28
+ @version ||= @root.variable_value('version')
29
+ end
30
+
31
+ def description
32
+ @description ||= @root.variable_value('description')
33
+ end
34
+
35
+ def uuid
36
+ @uuid ||= volume_group.variable_value('id')
37
+ end
38
+
39
+ def volume_group
40
+ @volume_group ||= @root.groups[@vg_name]
41
+ end
42
+
43
+ def physical_volumes
44
+ @physical_volumes ||= volume_group.groups['physical_volumes'].groups.to_a.inject({}) { |h,v| h[v[0]] = LVM::PVConfig.new(v[1]); h }
45
+ end
46
+
47
+ def logical_volumes
48
+ @logical_volumes ||= volume_group.groups['logical_volumes'].groups.to_a.inject({}) { |h,v| h[v[0]] = LVM::LVConfig.new(v[1], v[0], self); h }
49
+ end
50
+
51
+ private
52
+ def vgcfgbackup_output
53
+ @vgcfgbackup_output ||= begin
54
+ Tempfile.open('vg_config') do |tmpf|
55
+ cmd = "#{@vgcfgbackup_cmd} -f #{tmpf.path} #{@vg_name}"
56
+ Open3.popen3(cmd) do |stdin_fd, stdout_fd, stderr_fd, thr|
57
+ stdin_fd.close
58
+ stdout = stdout_fd.read
59
+ stderr = stderr_fd.read
60
+ exit_status = thr.value
61
+
62
+ if exit_status != 0
63
+ raise RuntimeError,
64
+ "Failed to run vgcfgbackup: #{stdout}\n#{stderr}"
65
+ end
66
+ end
67
+
68
+ File.read(tmpf.path)
69
+ end
70
+ end
71
+ end
72
+ end