vcdiff.rb 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in vcdiff.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Adam Prescott
2
+
3
+ (MIT License)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,73 @@
1
+ # VCDIFF
2
+
3
+ A pure-Ruby implementation VCDIFF encoder/decoder. Aims to provide similar
4
+ functionality to Google's [open-vcdiff](https://code.google.com/p/open-vcdiff)
5
+ (which the [vcdiff](https://github.com/romanbsd/vcdiff) gem wraps), but without
6
+ the C.
7
+
8
+ Some important notes and to-be-implemented things:
9
+
10
+ * Encoding isn't implemented yet, although the plan is to use the
11
+ [`bentley_mcilroy`](https://github.com/aprescott/bentley_mcilroy) gem,
12
+ following the same strategy as open-vcdiff. There is a question of what block
13
+ size to use for windows when finding common substrings.
14
+ * The decoder can't handle custom code tables or any sort of compression flags.
15
+ Compression is probably a won't-fix on account of there being no compressor
16
+ ID standards and the RFC for VCDIFF doesn't specify one. Custom code table
17
+ support is desirable so every VCDIFF encoding is supported when decoding.
18
+ * The decoder doesn't handle any window where the `VCD_TARGET` bit is set in
19
+ the window indicator (`Win_indicator`). As with custom code tables, it would
20
+ be good to have this. It's currently omitted for simplicity.
21
+
22
+ Further reading:
23
+
24
+ * [RFC3284](http://tools.ietf.org/html/rfc3284#section-7) — The VCDIFF Generic Differencing and Compression Data Format
25
+
26
+ # Installation
27
+
28
+ Add this line to your application's Gemfile:
29
+
30
+ ```
31
+ gem "vcdiff.rb"
32
+ ```
33
+
34
+ And then execute:
35
+
36
+ ```bash
37
+ $ bundle install
38
+ ```
39
+
40
+ Or install it yourself as:
41
+
42
+ ```bash
43
+ $ gem install vcdiff.rb
44
+ ```
45
+
46
+ # Usage
47
+
48
+ ## Encoding
49
+
50
+ Not yet implemented.
51
+
52
+ ## Decoding
53
+
54
+ ```ruby
55
+ decoder = VCDIFF::Decoder.new("path/to/dictionary_source")
56
+ original_target = decoder.decode("path/to/delta_file")
57
+ ```
58
+
59
+ # Contributing
60
+
61
+ 1. Fork it
62
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
63
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
64
+ 4. Push to the branch (`git push origin my-new-feature`)
65
+ 5. Create new Pull Request
66
+
67
+ The issue tracker is [on GitHub](https://github.com/aprescott/vcdiff.rb/issues).
68
+ If you find any bugs, just open an issue.
69
+
70
+ # License
71
+
72
+ Copyright (c) Adam Prescott, released under the MIT license. See the license file.
73
+ Any contributions will be assumed to be under the same terms.
@@ -0,0 +1,65 @@
1
+ module VCDIFF
2
+ # Default code table as defined in RFC 3284 (5.6)
3
+ #
4
+ # TYPE SIZE MODE TYPE SIZE MODE INDEX
5
+ # ---------------------------------------------------------------
6
+ # 1. RUN 0 0 NOOP 0 0 0
7
+ # 2. ADD 0, [1,17] 0 NOOP 0 0 [1,18]
8
+ # 3. COPY 0, [4,18] 0 NOOP 0 0 [19,34]
9
+ # 4. COPY 0, [4,18] 1 NOOP 0 0 [35,50]
10
+ # 5. COPY 0, [4,18] 2 NOOP 0 0 [51,66]
11
+ # 6. COPY 0, [4,18] 3 NOOP 0 0 [67,82]
12
+ # 7. COPY 0, [4,18] 4 NOOP 0 0 [83,98]
13
+ # 8. COPY 0, [4,18] 5 NOOP 0 0 [99,114]
14
+ # 9. COPY 0, [4,18] 6 NOOP 0 0 [115,130]
15
+ # 10. COPY 0, [4,18] 7 NOOP 0 0 [131,146]
16
+ # 11. COPY 0, [4,18] 8 NOOP 0 0 [147,162]
17
+ # 12. ADD [1,4] 0 COPY [4,6] 0 [163,174]
18
+ # 13. ADD [1,4] 0 COPY [4,6] 1 [175,186]
19
+ # 14. ADD [1,4] 0 COPY [4,6] 2 [187,198]
20
+ # 15. ADD [1,4] 0 COPY [4,6] 3 [199,210]
21
+ # 16. ADD [1,4] 0 COPY [4,6] 4 [211,222]
22
+ # 17. ADD [1,4] 0 COPY [4,6] 5 [223,234]
23
+ # 18. ADD [1,4] 0 COPY 4 6 [235,238]
24
+ # 19. ADD [1,4] 0 COPY 4 7 [239,242]
25
+ # 20. ADD [1,4] 0 COPY 4 8 [243,246]
26
+ # 21. COPY 4 [0,8] ADD 1 0 [247,255]
27
+ # ---------------------------------------------------------------
28
+ class CodeTable
29
+ NOOP, ADD, RUN, COPY = 0, 1, 2, 3
30
+
31
+ DEFAULT_TABLE = [
32
+ [RUN, 0, 0, NOOP, 0, 0],
33
+ ]
34
+
35
+ (0..17).each do |n|
36
+ DEFAULT_TABLE << [ADD, n, 0, NOOP, 0, 0]
37
+ end
38
+
39
+ (0..8).each do |mode|
40
+ DEFAULT_TABLE << [COPY, 0, mode, NOOP, 0, 0]
41
+
42
+ (4..18).each do |size|
43
+ DEFAULT_TABLE << [COPY, size, mode, NOOP, 0, 0]
44
+ end
45
+ end
46
+
47
+ (0..5).each do |mode|
48
+ (1..4).each do |add_size|
49
+ (4..6).each do |copy_size|
50
+ DEFAULT_TABLE << [ADD, add_size, 0, COPY, copy_size, mode]
51
+ end
52
+ end
53
+ end
54
+
55
+ (6..8).each do |mode|
56
+ (1..4).each do |add_size|
57
+ DEFAULT_TABLE << [ADD, add_size, 0, COPY, 4, mode]
58
+ end
59
+ end
60
+
61
+ (0..8).each do |mode|
62
+ DEFAULT_TABLE << [COPY, 4, mode, ADD, 1, 0]
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,92 @@
1
+ require "bindata"
2
+
3
+ module VCDIFF
4
+ # As described in RFC 3284 (http://tools.ietf.org/html/rfc3284)
5
+ # unsigned integers are treated as a number in base 128.
6
+ # Each digit in this representation is encoded in the lower
7
+ # 7 bits of a byte. Runs of bytes b_1, b_2, b_3, ..., b_n
8
+ # for one integer have the most significant bit set to 1
9
+ # for each b_i, i = 1, ..., n-1, and set to 0 for b_n.
10
+ #
11
+ # So 123456789 encodes to four 7-bit digits with values
12
+ # 58, 111, 26, 21:
13
+ #
14
+ # +-------------------------------------------+
15
+ # | 10111010 | 11101111 | 10011010 | 00010101 |
16
+ # +-------------------------------------------+
17
+ # MSB+58 MSB+111 MSB+26 0+21
18
+ #
19
+ class VCDIFFInt < BinData::BasePrimitive
20
+ def value_to_binary_string(value)
21
+ bytes = []
22
+
23
+ loop do
24
+ # get the value of the lowest 7 bits
25
+ next_value = value & 0b01111111
26
+
27
+ value >>= 7
28
+
29
+ # on every byte except the first one, flip the 8th bit on
30
+ next_value = 0b10000000 | next_value unless bytes.empty?
31
+
32
+ bytes.unshift(next_value)
33
+
34
+ break if value == 0
35
+ end
36
+
37
+ bytes.pack("C*")
38
+ end
39
+
40
+ def read_and_return_value(io)
41
+ byte_values = []
42
+ value = 0
43
+
44
+ loop do
45
+ b = next_byte(io)
46
+ last_byte = (b[7] == 0)
47
+
48
+ byte_values << (b & 0b01111111)
49
+
50
+ break if last_byte
51
+ end
52
+
53
+ byte_values.reverse.each_with_index do |e, i|
54
+ # add byte * 128**i, since e is considered to be
55
+ # a number in base 128
56
+ value += e * (1 << (7 * i))
57
+ end
58
+
59
+ value
60
+ end
61
+
62
+ def sensible_default
63
+ 0
64
+ end
65
+
66
+ # Converts a Ruby Integer into a string where each character is
67
+ # either 0 or 1, fully representing the bytes in the array.
68
+ #
69
+ # TODO: a non-awful method name and non-awful implementation
70
+ def value_to_zero_one_string
71
+ to_binary_s.unpack("C*").map { |e| e.to_s(2).rjust(8, "0") }.join("")
72
+ end
73
+
74
+ # Gives the VCDIFF integer as a regular Ruby Integer
75
+ def to_i
76
+ snapshot
77
+ end
78
+
79
+ private
80
+
81
+ # next byte as a fixnum
82
+ def next_byte(io)
83
+ io.readbytes(1).unpack("C")[0]
84
+ end
85
+
86
+ # Returns the lowest multiple of m
87
+ # greater than or equal to n.
88
+ def self.next_multiple(n, m)
89
+ n + (m - n % m) % m
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,90 @@
1
+ require "bindata"
2
+ require "vcdiff/integer"
3
+
4
+ module VCDIFF
5
+ class VCDIFFHeader < BinData::Record
6
+ endian :big
7
+
8
+ # header{1,2,3,4} is "VCD\0" with the upper bits turned on for "VCD"
9
+ uint8 :header_v, :initial_value => 0xD6, :assert => 0xD6
10
+ uint8 :header_c, :initial_value => 0xC3, :assert => 0xC3
11
+ uint8 :header_d, :initial_value => 0xC4, :assert => 0xC4
12
+ uint8 :header_zero, :initial_value => 0x00, :assert => 0x00
13
+
14
+ uint8 :header_indicator, :initial_value => 0
15
+ uint8 :secondary_compressor_id, :onlyif => :secondary_compressor?
16
+ vcdiff_int :custom_codetable_length, :onlyif => :custom_codetable?
17
+ array :code_table_data, :type => :uint8, :onlyif => :custom_codetable?, :initial_length => :custom_codetable_length
18
+
19
+ def secondary_compressor?
20
+ header_indicator[0] == 1
21
+ end
22
+
23
+ def custom_codetable?
24
+ header_indicator[1] == 1
25
+ end
26
+ end
27
+
28
+ class VCDIFFDeltaEncoding < BinData::Record
29
+ endian :big
30
+
31
+ vcdiff_int :bytes_remaining # bytes remaining for the delta encoding
32
+ vcdiff_int :target_length # the size of the decoded target file
33
+ uint8 :delta_indicator, :initial_value => 0
34
+ vcdiff_int :add_run_data_length
35
+ vcdiff_int :instructions_length
36
+ vcdiff_int :copy_addresses_length
37
+
38
+ array :add_run_data, :type => :uint8, :initial_length => :add_run_data_length
39
+ array :instructions, :type => :uint8, :initial_length => :instructions_length
40
+ array :copy_address_data, :type => :uint8, :initial_length => :copy_addresses_length
41
+
42
+ # VCD_DATACOMP bit value, for unmatched ADD and RUN data
43
+ def data_compressed?
44
+ delta_indicator[0] == 1
45
+ end
46
+
47
+ # VCD_INSTCOMP bit value, for the delta instructions and accompanying
48
+ # sizes
49
+ def instructions_compressed?
50
+ delta_indicator[1] == 1
51
+ end
52
+
53
+ # VCD_ADDRCOMP bit value, for the addresses for the COPY instructions
54
+ def addresses_compressed?
55
+ delta_indicator[2] == 1
56
+ end
57
+ end
58
+
59
+ class VCDIFFWindow < BinData::Record
60
+ endian :big
61
+
62
+ uint8 :window_indicator, :initial_value => 0, :assert => lambda { !(value[0] == 1 && value[1] == 1) }
63
+ vcdiff_int :source_data_length, :onlyif => lambda { !compressed_only? }
64
+ vcdiff_int :source_data_position, :onlyif => lambda { !compressed_only? }
65
+ vcdiff_delta_encoding :delta_encoding
66
+
67
+ # Returns true if VCD_SOURCE is set
68
+ def source_data?
69
+ window_indicator[0] == 1
70
+ end
71
+
72
+ # Returns true if VCD_TARGET is set
73
+ def target_data?
74
+ window_indicator[1] == 1
75
+ end
76
+
77
+ # If VCD_SOURCE and VCD_TARGET are both 0, then the target file was
78
+ # compressed by itself.
79
+ def compressed_only?
80
+ !source_data? && !target_data?
81
+ end
82
+ end
83
+
84
+ class DeltaFile < BinData::Record
85
+ endian :big
86
+
87
+ vcdiff_header :header
88
+ rest :windows
89
+ end
90
+ end
data/lib/vcdiff.rb ADDED
@@ -0,0 +1,209 @@
1
+ require "bentley_mcilroy"
2
+
3
+ require "vcdiff/integer"
4
+ require "vcdiff/records"
5
+ require "vcdiff/code_table"
6
+
7
+ module VCDIFF
8
+ class Encoder
9
+ #### Implement me ####
10
+ end
11
+
12
+ class Decoder
13
+ attr_accessor :dictionary
14
+
15
+ def initialize(dictionary)
16
+ @dictionary = File.read(dictionary)
17
+
18
+ @s_near = 4
19
+ @s_same = 3
20
+
21
+ # all cache values initialize to 0
22
+ @near_cache = Array.new(@s_near, 0)
23
+ @same_cache = Array.new(@s_same * 256, 0)
24
+ @next_slot = 0
25
+ end
26
+
27
+ # Decodes a delta file using the dictionary given to the decoder
28
+ def decode(file)
29
+ delta_file = DeltaFile.read(File.new(file))
30
+
31
+ if delta_file.header.header_indicator != 0
32
+ raise NotImplementedError, "Header indicator of #{delta_file.header.header_indicator} can't be handled"
33
+ end
34
+
35
+ if delta_file.header.custom_codetable?
36
+ raise NotImplementedError, "Unable to handle a custom codetable"
37
+ end
38
+
39
+ # there's no simple way to determine the number of windows, since the
40
+ # count isn't given ahead of time, so we'll need to manually iterate
41
+ # through the windows
42
+ window_stream = StringIO.new(delta_file.windows)
43
+
44
+ target_file = ""
45
+
46
+ until window_stream.eof?
47
+ # reads only one window's worth of bytes from the stream
48
+ next_window = VCDIFFWindow.read(window_stream)
49
+
50
+ if next_window.compressed_only? || next_window.target_data?
51
+ raise NotImplementedError, "Can only handle VCD_SOURCE windows"
52
+ end
53
+
54
+ length, position = next_window.source_data_length, next_window.source_data_position
55
+
56
+ source_window = @dictionary[position, length]
57
+
58
+ target_file << process_delta_encoding(source_window, next_window.delta_encoding)
59
+ end
60
+
61
+ target_file
62
+ end
63
+
64
+ # takes a delta encoding and processes it against the source
65
+ # window.
66
+ #
67
+ # this corresponds to section (6) in RFC3284, which outlines
68
+ # processing the instructions, data and addresses arrays.
69
+ def process_delta_encoding(source_window, delta_encoding)
70
+ # to_a is needed here to unwrap the BinData::Array, which doesn't
71
+ # know about method calls like #shift
72
+ instructions = delta_encoding.instructions.to_a
73
+ add_run_data = delta_encoding.add_run_data.to_a
74
+ copy_address_data = delta_encoding.copy_address_data.to_a
75
+
76
+ code_table = CodeTable::DEFAULT_TABLE
77
+
78
+ # the final string for this window
79
+ target_window = ""
80
+
81
+ add_run_index = 0
82
+
83
+ until instructions.empty?
84
+ # instructions is a sequence of tupes (index, [size1], [size2]),
85
+ # where size1 and size2 existence depends on the instruction entry
86
+ # which _index_ points to.
87
+ index = instructions.shift
88
+
89
+ # instruction pair looked up in the code table
90
+ instruction = code_table[index]
91
+ type1, size1, mode1, type2, size2, mode2 = instruction
92
+
93
+ if type1 != CodeTable::NOOP && size1 == 0
94
+ instruction_size_1 = read_int(instructions)
95
+ else
96
+ instruction_size_1 = size1
97
+ end
98
+
99
+ if type2 != CodeTable::NOOP && size2 == 0
100
+ instruction_size_2 = read_int(instructions)
101
+ else
102
+ instruction_size_2 = size2
103
+ end
104
+
105
+ case type1
106
+ when CodeTable::NOOP
107
+ next
108
+ when CodeTable::RUN
109
+ if mode1 != 0
110
+ warn "Warning: RUN found with mode #{mode1} -- value will be ignored"
111
+ end
112
+
113
+ if instruction_size_1 == 0
114
+ raise ArgumentError, "File contains a RUN instruction of size 0, must be > 0"
115
+ end
116
+
117
+ # repeat a single character instruction_size_1 times.
118
+ # since add_run_data is an array of byte values, we
119
+ # call #[x, 1] with *n to get n copies of the byte
120
+ # at index x, since ary[x, 1] == [ary[x]].
121
+ target_window << (add_run_data[add_run_index, 1] * instruction_size_1).pack("C*")
122
+ add_run_index += 1
123
+ when CodeTable::ADD
124
+ if mode1 != 0
125
+ warn "Warning: ADD found with mode #{mode1} -- value will be ignored"
126
+ end
127
+
128
+ if instruction_size_1 == 0
129
+ raise ArgumentError, "File contains an ADD instruction of size 0, must be > 0"
130
+ end
131
+
132
+ target_window << (add_run_data[add_run_index, instruction_size_1]).pack("C*")
133
+ add_run_index += instruction_size_1
134
+ when CodeTable::COPY
135
+ # from (5.3) of RFC3284:
136
+ #
137
+ # The address of a COPY instruction is encoded using different modes,
138
+ # depending on the type of cached address used, if any.
139
+ #
140
+ # Let "addr" be the address of a COPY instruction to be decoded and
141
+ # "here" be the current location in the target data (i.e., the start of
142
+ # the data about to be encoded or decoded). Let near[j] be the jth
143
+ # element in the near cache, and same[k] be the kth element in the same
144
+ # cache. Below are the possible address modes:
145
+ #
146
+ # VCD_SELF: This mode has value 0. The address was encoded by
147
+ # itself as an integer.
148
+ #
149
+ # VCD_HERE: This mode has value 1. The address was encoded as the
150
+ # integer value "here - addr".
151
+ #
152
+ # Near modes: The "near modes" are in the range [2,s_near+1]. Let m
153
+ # be the mode of the address encoding. The address was encoded
154
+ # as the integer value "addr - near[m-2]".
155
+ #
156
+ # Same modes: The "same modes" are in the range
157
+ # [s_near+2,s_near+s_same+1]. Let m be the mode of the encoding.
158
+ # The address was encoded as a single byte b such that "addr ==
159
+ # same[(m - (s_near+2))*256 + b]".
160
+ #
161
+
162
+ here = target_window.length - 1
163
+
164
+ case mode1
165
+ when 0 # VCD_SELF
166
+ addr = read_int(copy_address_data)
167
+ when 1 # VCD_HERE
168
+ addr = here - read_int(copy_address_data)
169
+ when 2..(@s_near + 1) # near modes
170
+ addr = read_int(copy_address_data) + @near_cache[mode1 - 2]
171
+ when (@s_near+2)..(@s_near+@s_same+1) # same modes
172
+ # address is encoded as a single byte
173
+ b = copy_address_data.shift
174
+ addr = @same_cache[(mode1 - (@s_near + 2))*256 + b]
175
+ else
176
+ raise ArgumentError, "invalid mode #{mode1}"
177
+ end
178
+
179
+ target_window << source_window[addr, instruction_size_1]
180
+
181
+ # now update the "near" and "same" caches.
182
+ if @s_near > 0
183
+ @near_cache[@next_slot] = addr
184
+ @next_slot = (@next_slot + 1) % @s_near
185
+ end
186
+
187
+ if @s_same > 0
188
+ @same_cache[addr % (@s_same * 256)] = addr
189
+ end
190
+ else
191
+ raise ArgumentError, "Invalid file format, instruction of #{type1} (found at index #{index}) doesn't exist"
192
+ end
193
+ end
194
+
195
+ target_window
196
+ end
197
+
198
+ # shifts a VCDIFF integer off the given array of bytes, modifying
199
+ # the array argument in-place.
200
+ def read_int(array)
201
+ # first index where the MSB = index-7 bit is 0
202
+ zero_msb_index = array.index { |e| e.to_i[7] == 0 }
203
+
204
+ int_bytes = array.shift(zero_msb_index + 1)
205
+
206
+ VCDIFFInt.read(int_bytes.pack("C*")).to_i
207
+ end
208
+ end
209
+ end
data/rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "rake"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:test) do |t|
5
+ t.rspec_opts = "-I test --color --format nested"
6
+ t.pattern = "test/**/*_spec.rb"
7
+ t.verbose = false
8
+ t.fail_on_error = true
9
+ end
10
+
11
+ task :default => :test
data/test/data/delta ADDED
Binary file
data/test/data/source ADDED
@@ -0,0 +1,16 @@
1
+ some header some header some header
2
+ some header some header some header
3
+ some header some header some header
4
+ some header some header some header
5
+ some header some header some header
6
+ some header some header some header
7
+
8
+ dynamic CONTENT
9
+
10
+ some footer some footer some footer
11
+ some footer some footer some footer
12
+ some footer some footer some footer
13
+ some footer some footer some footer
14
+ some footer some footer some footer
15
+ some footer some footer some footer
16
+
data/test/data/target ADDED
@@ -0,0 +1,16 @@
1
+ some header some header some header
2
+ some header some header some header
3
+ some header some header some header
4
+ some header some header some header
5
+ some header some header some header
6
+ some header some header some header
7
+
8
+ only the best dynamic content
9
+
10
+ some footer some footer some footer
11
+ some footer some footer some footer
12
+ some footer some footer some footer
13
+ some footer some footer some footer
14
+ some footer some footer some footer
15
+ some footer some footer some footer
16
+
@@ -0,0 +1,3 @@
1
+ require "tempfile"
2
+
3
+ require "vcdiff"
@@ -0,0 +1,9 @@
1
+ require "test_helper"
2
+
3
+ describe VCDIFF::CodeTable do
4
+ describe "DEFAULT_TABLE" do
5
+ it "has 256 entries" do
6
+ VCDIFF::CodeTable::DEFAULT_TABLE.length == 256
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,31 @@
1
+ require "test_helper"
2
+
3
+ describe VCDIFF::Decoder do
4
+ subject { VCDIFF::Decoder.new("test/data/source") }
5
+
6
+ describe "#decode" do
7
+ it "can decode delta files, given the source, to derive the target" do
8
+ subject.decode(File.new("test/data/delta")).should == File.read("test/data/target")
9
+ end
10
+
11
+ it "cannot handle a non-zero header indicator" do
12
+ # secondary compressor
13
+ delta = Tempfile.new("secondary_compressor_bit_set")
14
+ content = File.read("test/data/delta")
15
+ content.setbyte(4, 0x01)
16
+ delta.write(content)
17
+ delta.rewind
18
+
19
+ expect { subject.decode(delta) }.to raise_error(NotImplementedError)
20
+
21
+ # custom codetable
22
+ delta = Tempfile.new("custom_codetable_bit_set")
23
+ content = File.read("test/data/delta")
24
+ content.setbyte(4, 0x02)
25
+ delta.write(content)
26
+ delta.rewind
27
+
28
+ expect { subject.decode(delta) }.to raise_error(NotImplementedError)
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,28 @@
1
+ require "test_helper"
2
+
3
+ KNOWN_ENCODINGS = {
4
+ 266478 => "100100001010000101101110",
5
+ 488908 => "100111011110101101001100",
6
+ 1311959 => "110100001000100101010111",
7
+ 290936 => "100100011110000001111000",
8
+ 1306432 => "110011111101111001000000",
9
+ 1318485 => "110100001011110001010101",
10
+ 983071 => "101111001000000000011111",
11
+ 918966 => "101110001000101100110110",
12
+ 1119947 => "110001001010110101001011",
13
+ 1186056 => "110010001011001000001000"
14
+ }
15
+
16
+ describe VCDIFF::VCDIFFInt do
17
+ it "converts between different representations" do
18
+ KNOWN_ENCODINGS.each do |int, str|
19
+ packed = [str].pack("B*")
20
+
21
+ i = VCDIFF::VCDIFFInt.read(packed)
22
+
23
+ i.snapshot.should == int
24
+ i.to_binary_s.should == packed
25
+ i.to_i.should == i.snapshot
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,33 @@
1
+ require "test_helper"
2
+
3
+ describe VCDIFF::VCDIFFHeader do
4
+ it "requires a valid header" do
5
+ # requires VCD\0 with the uppermost bits set to 1 for "V","C","D"
6
+ expect { VCDIFF::VCDIFFHeader.read("\xD6\xC3\xC4\x00") }.to_not raise_error(BinData::ValidityError)
7
+ expect { VCDIFF::VCDIFFHeader.read("\x01\x02\x03\x00") }.to raise_error(BinData::ValidityError)
8
+ end
9
+
10
+ describe "#secondary_compressor?" do
11
+ it "is true if the header_indicator has the appropriate bit set" do
12
+ header = VCDIFF::VCDIFFHeader.read("\xD6\xC3\xC4\x00\x01\x00\x00\x00\x00\x00\x00")
13
+ header.secondary_compressor?.should be_true
14
+ header.header_indicator[0].should == 1
15
+ end
16
+ end
17
+
18
+ describe "#custom_codetable?" do
19
+ it "is true if the header_indicator has the appropriate bit set" do
20
+ # VCD\0 + 0b10 for custom code table, plus a bunch of zeroes to have enough bytes to read
21
+ header = VCDIFF::VCDIFFHeader.read("\xD6\xC3\xC4\x00\x02\x00\x00\x00\x00\x00\x00")
22
+ header.custom_codetable?.should be_true
23
+ header.header_indicator[1].should == 1
24
+ end
25
+ end
26
+ end
27
+
28
+ describe VCDIFF::DeltaFile do
29
+ it "requires a valid header" do
30
+ expect { VCDIFF::DeltaFile.read("\xD6\xC3\xC4\x00") }.to_not raise_error(BinData::ValidityError)
31
+ expect { VCDIFF::DeltaFile.read("\x01\x02\x03\x00") }.to raise_error(BinData::ValidityError)
32
+ end
33
+ end
data/vcdiff.rb.gemspec ADDED
@@ -0,0 +1,26 @@
1
+ Gem::Specification.new do |gem|
2
+ gem.name = "vcdiff.rb"
3
+ gem.version = "0.0.1"
4
+ gem.authors = ["Adam Prescott"]
5
+ gem.email = ["adam@aprescott.com"]
6
+ gem.description = "Pure-Ruby VCDIFF encoder/decoder."
7
+ gem.summary = "Pure-Ruby encoder and decoder for the VCDIFF format."
8
+ gem.homepage = "https://github.com/aprescott/vcdiff.rb"
9
+
10
+ gem.files = Dir["{lib/**/*,test/**/*,*.gemspec}"] + %w[rakefile LICENSE Gemfile README.md]
11
+ gem.require_path = "lib"
12
+
13
+ [
14
+ "bindata", "~> 1.6.0",
15
+ "bentley_mcilroy", ">= 0"
16
+ ].each_slice(2) do |name, version|
17
+ gem.add_runtime_dependency(name, version)
18
+ end
19
+
20
+ [
21
+ "rake", "~> 10.0.0",
22
+ "rspec", "~> 2.5"
23
+ ].each_slice(2) do |name, version|
24
+ gem.add_runtime_dependency(name, version)
25
+ end
26
+ end
metadata ADDED
@@ -0,0 +1,126 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: vcdiff.rb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Adam Prescott
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-09-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bindata
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 1.6.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 1.6.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: bentley_mcilroy
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 10.0.0
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 10.0.0
62
+ - !ruby/object:Gem::Dependency
63
+ name: rspec
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '2.5'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '2.5'
78
+ description: Pure-Ruby VCDIFF encoder/decoder.
79
+ email:
80
+ - adam@aprescott.com
81
+ executables: []
82
+ extensions: []
83
+ extra_rdoc_files: []
84
+ files:
85
+ - lib/vcdiff.rb
86
+ - lib/vcdiff/integer.rb
87
+ - lib/vcdiff/records.rb
88
+ - lib/vcdiff/code_table.rb
89
+ - test/test_helper.rb
90
+ - test/vcdiff_code_table_spec.rb
91
+ - test/vcdiff_decoder_spec.rb
92
+ - test/vcdiff_integer_spec.rb
93
+ - test/data/delta
94
+ - test/data/target
95
+ - test/data/source
96
+ - test/vcdiff_records_spec.rb
97
+ - vcdiff.rb.gemspec
98
+ - rakefile
99
+ - LICENSE
100
+ - Gemfile
101
+ - README.md
102
+ homepage: https://github.com/aprescott/vcdiff.rb
103
+ licenses: []
104
+ post_install_message:
105
+ rdoc_options: []
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ none: false
110
+ requirements:
111
+ - - ! '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ required_rubygems_version: !ruby/object:Gem::Requirement
115
+ none: false
116
+ requirements:
117
+ - - ! '>='
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ requirements: []
121
+ rubyforge_project:
122
+ rubygems_version: 1.8.24
123
+ signing_key:
124
+ specification_version: 3
125
+ summary: Pure-Ruby encoder and decoder for the VCDIFF format.
126
+ test_files: []