vcdiff.rb 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +73 -0
- data/lib/vcdiff/code_table.rb +65 -0
- data/lib/vcdiff/integer.rb +92 -0
- data/lib/vcdiff/records.rb +90 -0
- data/lib/vcdiff.rb +209 -0
- data/rakefile +11 -0
- data/test/data/delta +0 -0
- data/test/data/source +16 -0
- data/test/data/target +16 -0
- data/test/test_helper.rb +3 -0
- data/test/vcdiff_code_table_spec.rb +9 -0
- data/test/vcdiff_decoder_spec.rb +31 -0
- data/test/vcdiff_integer_spec.rb +28 -0
- data/test/vcdiff_records_spec.rb +33 -0
- data/vcdiff.rb.gemspec +26 -0
- metadata +126 -0
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Adam Prescott
|
2
|
+
|
3
|
+
(MIT License)
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
# VCDIFF
|
2
|
+
|
3
|
+
A pure-Ruby implementation VCDIFF encoder/decoder. Aims to provide similar
|
4
|
+
functionality to Google's [open-vcdiff](https://code.google.com/p/open-vcdiff)
|
5
|
+
(which the [vcdiff](https://github.com/romanbsd/vcdiff) gem wraps), but without
|
6
|
+
the C.
|
7
|
+
|
8
|
+
Some important notes and to-be-implemented things:
|
9
|
+
|
10
|
+
* Encoding isn't implemented yet, although the plan is to use the
|
11
|
+
[`bentley_mcilroy`](https://github.com/aprescott/bentley_mcilroy) gem,
|
12
|
+
following the same strategy as open-vcdiff. There is a question of what block
|
13
|
+
size to use for windows when finding common substrings.
|
14
|
+
* The decoder can't handle custom code tables or any sort of compression flags.
|
15
|
+
Compression is probably a won't-fix on account of there being no compressor
|
16
|
+
ID standards and the RFC for VCDIFF doesn't specify one. Custom code table
|
17
|
+
support is desirable so every VCDIFF encoding is supported when decoding.
|
18
|
+
* The decoder doesn't handle any window where the `VCD_TARGET` bit is set in
|
19
|
+
the window indicator (`Win_indicator`). As with custom code tables, it would
|
20
|
+
be good to have this. It's currently omitted for simplicity.
|
21
|
+
|
22
|
+
Further reading:
|
23
|
+
|
24
|
+
* [RFC3284](http://tools.ietf.org/html/rfc3284#section-7) — The VCDIFF Generic Differencing and Compression Data Format
|
25
|
+
|
26
|
+
# Installation
|
27
|
+
|
28
|
+
Add this line to your application's Gemfile:
|
29
|
+
|
30
|
+
```
|
31
|
+
gem "vcdiff.rb"
|
32
|
+
```
|
33
|
+
|
34
|
+
And then execute:
|
35
|
+
|
36
|
+
```bash
|
37
|
+
$ bundle install
|
38
|
+
```
|
39
|
+
|
40
|
+
Or install it yourself as:
|
41
|
+
|
42
|
+
```bash
|
43
|
+
$ gem install vcdiff.rb
|
44
|
+
```
|
45
|
+
|
46
|
+
# Usage
|
47
|
+
|
48
|
+
## Encoding
|
49
|
+
|
50
|
+
Not yet implemented.
|
51
|
+
|
52
|
+
## Decoding
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
decoder = VCDIFF::Decoder.new("path/to/dictionary_source")
|
56
|
+
original_target = decoder.decode("path/to/delta_file")
|
57
|
+
```
|
58
|
+
|
59
|
+
# Contributing
|
60
|
+
|
61
|
+
1. Fork it
|
62
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
63
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
64
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
65
|
+
5. Create new Pull Request
|
66
|
+
|
67
|
+
The issue tracker is [on GitHub](https://github.com/aprescott/vcdiff.rb/issues).
|
68
|
+
If you find any bugs, just open an issue.
|
69
|
+
|
70
|
+
# License
|
71
|
+
|
72
|
+
Copyright (c) Adam Prescott, released under the MIT license. See the license file.
|
73
|
+
Any contributions will be assumed to be under the same terms.
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module VCDIFF
|
2
|
+
# Default code table as defined in RFC 3284 (5.6)
|
3
|
+
#
|
4
|
+
# TYPE SIZE MODE TYPE SIZE MODE INDEX
|
5
|
+
# ---------------------------------------------------------------
|
6
|
+
# 1. RUN 0 0 NOOP 0 0 0
|
7
|
+
# 2. ADD 0, [1,17] 0 NOOP 0 0 [1,18]
|
8
|
+
# 3. COPY 0, [4,18] 0 NOOP 0 0 [19,34]
|
9
|
+
# 4. COPY 0, [4,18] 1 NOOP 0 0 [35,50]
|
10
|
+
# 5. COPY 0, [4,18] 2 NOOP 0 0 [51,66]
|
11
|
+
# 6. COPY 0, [4,18] 3 NOOP 0 0 [67,82]
|
12
|
+
# 7. COPY 0, [4,18] 4 NOOP 0 0 [83,98]
|
13
|
+
# 8. COPY 0, [4,18] 5 NOOP 0 0 [99,114]
|
14
|
+
# 9. COPY 0, [4,18] 6 NOOP 0 0 [115,130]
|
15
|
+
# 10. COPY 0, [4,18] 7 NOOP 0 0 [131,146]
|
16
|
+
# 11. COPY 0, [4,18] 8 NOOP 0 0 [147,162]
|
17
|
+
# 12. ADD [1,4] 0 COPY [4,6] 0 [163,174]
|
18
|
+
# 13. ADD [1,4] 0 COPY [4,6] 1 [175,186]
|
19
|
+
# 14. ADD [1,4] 0 COPY [4,6] 2 [187,198]
|
20
|
+
# 15. ADD [1,4] 0 COPY [4,6] 3 [199,210]
|
21
|
+
# 16. ADD [1,4] 0 COPY [4,6] 4 [211,222]
|
22
|
+
# 17. ADD [1,4] 0 COPY [4,6] 5 [223,234]
|
23
|
+
# 18. ADD [1,4] 0 COPY 4 6 [235,238]
|
24
|
+
# 19. ADD [1,4] 0 COPY 4 7 [239,242]
|
25
|
+
# 20. ADD [1,4] 0 COPY 4 8 [243,246]
|
26
|
+
# 21. COPY 4 [0,8] ADD 1 0 [247,255]
|
27
|
+
# ---------------------------------------------------------------
|
28
|
+
class CodeTable
|
29
|
+
NOOP, ADD, RUN, COPY = 0, 1, 2, 3
|
30
|
+
|
31
|
+
DEFAULT_TABLE = [
|
32
|
+
[RUN, 0, 0, NOOP, 0, 0],
|
33
|
+
]
|
34
|
+
|
35
|
+
(0..17).each do |n|
|
36
|
+
DEFAULT_TABLE << [ADD, n, 0, NOOP, 0, 0]
|
37
|
+
end
|
38
|
+
|
39
|
+
(0..8).each do |mode|
|
40
|
+
DEFAULT_TABLE << [COPY, 0, mode, NOOP, 0, 0]
|
41
|
+
|
42
|
+
(4..18).each do |size|
|
43
|
+
DEFAULT_TABLE << [COPY, size, mode, NOOP, 0, 0]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
(0..5).each do |mode|
|
48
|
+
(1..4).each do |add_size|
|
49
|
+
(4..6).each do |copy_size|
|
50
|
+
DEFAULT_TABLE << [ADD, add_size, 0, COPY, copy_size, mode]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
(6..8).each do |mode|
|
56
|
+
(1..4).each do |add_size|
|
57
|
+
DEFAULT_TABLE << [ADD, add_size, 0, COPY, 4, mode]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
(0..8).each do |mode|
|
62
|
+
DEFAULT_TABLE << [COPY, 4, mode, ADD, 1, 0]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require "bindata"
|
2
|
+
|
3
|
+
module VCDIFF
|
4
|
+
# As described in RFC 3284 (http://tools.ietf.org/html/rfc3284)
|
5
|
+
# unsigned integers are treated as a number in base 128.
|
6
|
+
# Each digit in this representation is encoded in the lower
|
7
|
+
# 7 bits of a byte. Runs of bytes b_1, b_2, b_3, ..., b_n
|
8
|
+
# for one integer have the most significant bit set to 1
|
9
|
+
# for each b_i, i = 1, ..., n-1, and set to 0 for b_n.
|
10
|
+
#
|
11
|
+
# So 123456789 encodes to four 7-bit digits with values
|
12
|
+
# 58, 111, 26, 21:
|
13
|
+
#
|
14
|
+
# +-------------------------------------------+
|
15
|
+
# | 10111010 | 11101111 | 10011010 | 00010101 |
|
16
|
+
# +-------------------------------------------+
|
17
|
+
# MSB+58 MSB+111 MSB+26 0+21
|
18
|
+
#
|
19
|
+
class VCDIFFInt < BinData::BasePrimitive
|
20
|
+
def value_to_binary_string(value)
|
21
|
+
bytes = []
|
22
|
+
|
23
|
+
loop do
|
24
|
+
# get the value of the lowest 7 bits
|
25
|
+
next_value = value & 0b01111111
|
26
|
+
|
27
|
+
value >>= 7
|
28
|
+
|
29
|
+
# on every byte except the first one, flip the 8th bit on
|
30
|
+
next_value = 0b10000000 | next_value unless bytes.empty?
|
31
|
+
|
32
|
+
bytes.unshift(next_value)
|
33
|
+
|
34
|
+
break if value == 0
|
35
|
+
end
|
36
|
+
|
37
|
+
bytes.pack("C*")
|
38
|
+
end
|
39
|
+
|
40
|
+
def read_and_return_value(io)
|
41
|
+
byte_values = []
|
42
|
+
value = 0
|
43
|
+
|
44
|
+
loop do
|
45
|
+
b = next_byte(io)
|
46
|
+
last_byte = (b[7] == 0)
|
47
|
+
|
48
|
+
byte_values << (b & 0b01111111)
|
49
|
+
|
50
|
+
break if last_byte
|
51
|
+
end
|
52
|
+
|
53
|
+
byte_values.reverse.each_with_index do |e, i|
|
54
|
+
# add byte * 128**i, since e is considered to be
|
55
|
+
# a number in base 128
|
56
|
+
value += e * (1 << (7 * i))
|
57
|
+
end
|
58
|
+
|
59
|
+
value
|
60
|
+
end
|
61
|
+
|
62
|
+
def sensible_default
|
63
|
+
0
|
64
|
+
end
|
65
|
+
|
66
|
+
# Converts a Ruby Integer into a string where each character is
|
67
|
+
# either 0 or 1, fully representing the bytes in the array.
|
68
|
+
#
|
69
|
+
# TODO: a non-awful method name and non-awful implementation
|
70
|
+
def value_to_zero_one_string
|
71
|
+
to_binary_s.unpack("C*").map { |e| e.to_s(2).rjust(8, "0") }.join("")
|
72
|
+
end
|
73
|
+
|
74
|
+
# Gives the VCDIFF integer as a regular Ruby Integer
|
75
|
+
def to_i
|
76
|
+
snapshot
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
# next byte as a fixnum
|
82
|
+
def next_byte(io)
|
83
|
+
io.readbytes(1).unpack("C")[0]
|
84
|
+
end
|
85
|
+
|
86
|
+
# Returns the lowest multiple of m
|
87
|
+
# greater than or equal to n.
|
88
|
+
def self.next_multiple(n, m)
|
89
|
+
n + (m - n % m) % m
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require "bindata"
|
2
|
+
require "vcdiff/integer"
|
3
|
+
|
4
|
+
module VCDIFF
|
5
|
+
class VCDIFFHeader < BinData::Record
|
6
|
+
endian :big
|
7
|
+
|
8
|
+
# header{1,2,3,4} is "VCD\0" with the upper bits turned on for "VCD"
|
9
|
+
uint8 :header_v, :initial_value => 0xD6, :assert => 0xD6
|
10
|
+
uint8 :header_c, :initial_value => 0xC3, :assert => 0xC3
|
11
|
+
uint8 :header_d, :initial_value => 0xC4, :assert => 0xC4
|
12
|
+
uint8 :header_zero, :initial_value => 0x00, :assert => 0x00
|
13
|
+
|
14
|
+
uint8 :header_indicator, :initial_value => 0
|
15
|
+
uint8 :secondary_compressor_id, :onlyif => :secondary_compressor?
|
16
|
+
vcdiff_int :custom_codetable_length, :onlyif => :custom_codetable?
|
17
|
+
array :code_table_data, :type => :uint8, :onlyif => :custom_codetable?, :initial_length => :custom_codetable_length
|
18
|
+
|
19
|
+
def secondary_compressor?
|
20
|
+
header_indicator[0] == 1
|
21
|
+
end
|
22
|
+
|
23
|
+
def custom_codetable?
|
24
|
+
header_indicator[1] == 1
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class VCDIFFDeltaEncoding < BinData::Record
|
29
|
+
endian :big
|
30
|
+
|
31
|
+
vcdiff_int :bytes_remaining # bytes remaining for the delta encoding
|
32
|
+
vcdiff_int :target_length # the size of the decoded target file
|
33
|
+
uint8 :delta_indicator, :initial_value => 0
|
34
|
+
vcdiff_int :add_run_data_length
|
35
|
+
vcdiff_int :instructions_length
|
36
|
+
vcdiff_int :copy_addresses_length
|
37
|
+
|
38
|
+
array :add_run_data, :type => :uint8, :initial_length => :add_run_data_length
|
39
|
+
array :instructions, :type => :uint8, :initial_length => :instructions_length
|
40
|
+
array :copy_address_data, :type => :uint8, :initial_length => :copy_addresses_length
|
41
|
+
|
42
|
+
# VCD_DATACOMP bit value, for unmatched ADD and RUN data
|
43
|
+
def data_compressed?
|
44
|
+
delta_indicator[0] == 1
|
45
|
+
end
|
46
|
+
|
47
|
+
# VCD_INSTCOMP bit value, for the delta instructions and accompanying
|
48
|
+
# sizes
|
49
|
+
def instructions_compressed?
|
50
|
+
delta_indicator[1] == 1
|
51
|
+
end
|
52
|
+
|
53
|
+
# VCD_ADDRCOMP bit value, for the addresses for the COPY instructions
|
54
|
+
def addresses_compressed?
|
55
|
+
delta_indicator[2] == 1
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class VCDIFFWindow < BinData::Record
|
60
|
+
endian :big
|
61
|
+
|
62
|
+
uint8 :window_indicator, :initial_value => 0, :assert => lambda { !(value[0] == 1 && value[1] == 1) }
|
63
|
+
vcdiff_int :source_data_length, :onlyif => lambda { !compressed_only? }
|
64
|
+
vcdiff_int :source_data_position, :onlyif => lambda { !compressed_only? }
|
65
|
+
vcdiff_delta_encoding :delta_encoding
|
66
|
+
|
67
|
+
# Returns true if VCD_SOURCE is set
|
68
|
+
def source_data?
|
69
|
+
window_indicator[0] == 1
|
70
|
+
end
|
71
|
+
|
72
|
+
# Returns true if VCD_TARGET is set
|
73
|
+
def target_data?
|
74
|
+
window_indicator[1] == 1
|
75
|
+
end
|
76
|
+
|
77
|
+
# If VCD_SOURCE and VCD_TARGET are both 0, then the target file was
|
78
|
+
# compressed by itself.
|
79
|
+
def compressed_only?
|
80
|
+
!source_data? && !target_data?
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class DeltaFile < BinData::Record
|
85
|
+
endian :big
|
86
|
+
|
87
|
+
vcdiff_header :header
|
88
|
+
rest :windows
|
89
|
+
end
|
90
|
+
end
|
data/lib/vcdiff.rb
ADDED
@@ -0,0 +1,209 @@
|
|
1
|
+
require "bentley_mcilroy"
|
2
|
+
|
3
|
+
require "vcdiff/integer"
|
4
|
+
require "vcdiff/records"
|
5
|
+
require "vcdiff/code_table"
|
6
|
+
|
7
|
+
module VCDIFF
|
8
|
+
class Encoder
|
9
|
+
#### Implement me ####
|
10
|
+
end
|
11
|
+
|
12
|
+
class Decoder
|
13
|
+
attr_accessor :dictionary
|
14
|
+
|
15
|
+
def initialize(dictionary)
|
16
|
+
@dictionary = File.read(dictionary)
|
17
|
+
|
18
|
+
@s_near = 4
|
19
|
+
@s_same = 3
|
20
|
+
|
21
|
+
# all cache values initialize to 0
|
22
|
+
@near_cache = Array.new(@s_near, 0)
|
23
|
+
@same_cache = Array.new(@s_same * 256, 0)
|
24
|
+
@next_slot = 0
|
25
|
+
end
|
26
|
+
|
27
|
+
# Decodes a delta file using the dictionary given to the decoder
|
28
|
+
def decode(file)
|
29
|
+
delta_file = DeltaFile.read(File.new(file))
|
30
|
+
|
31
|
+
if delta_file.header.header_indicator != 0
|
32
|
+
raise NotImplementedError, "Header indicator of #{delta_file.header.header_indicator} can't be handled"
|
33
|
+
end
|
34
|
+
|
35
|
+
if delta_file.header.custom_codetable?
|
36
|
+
raise NotImplementedError, "Unable to handle a custom codetable"
|
37
|
+
end
|
38
|
+
|
39
|
+
# there's no simple way to determine the number of windows, since the
|
40
|
+
# count isn't given ahead of time, so we'll need to manually iterate
|
41
|
+
# through the windows
|
42
|
+
window_stream = StringIO.new(delta_file.windows)
|
43
|
+
|
44
|
+
target_file = ""
|
45
|
+
|
46
|
+
until window_stream.eof?
|
47
|
+
# reads only one window's worth of bytes from the stream
|
48
|
+
next_window = VCDIFFWindow.read(window_stream)
|
49
|
+
|
50
|
+
if next_window.compressed_only? || next_window.target_data?
|
51
|
+
raise NotImplementedError, "Can only handle VCD_SOURCE windows"
|
52
|
+
end
|
53
|
+
|
54
|
+
length, position = next_window.source_data_length, next_window.source_data_position
|
55
|
+
|
56
|
+
source_window = @dictionary[position, length]
|
57
|
+
|
58
|
+
target_file << process_delta_encoding(source_window, next_window.delta_encoding)
|
59
|
+
end
|
60
|
+
|
61
|
+
target_file
|
62
|
+
end
|
63
|
+
|
64
|
+
# takes a delta encoding and processes it against the source
|
65
|
+
# window.
|
66
|
+
#
|
67
|
+
# this corresponds to section (6) in RFC3284, which outlines
|
68
|
+
# processing the instructions, data and addresses arrays.
|
69
|
+
def process_delta_encoding(source_window, delta_encoding)
|
70
|
+
# to_a is needed here to unwrap the BinData::Array, which doesn't
|
71
|
+
# know about method calls like #shift
|
72
|
+
instructions = delta_encoding.instructions.to_a
|
73
|
+
add_run_data = delta_encoding.add_run_data.to_a
|
74
|
+
copy_address_data = delta_encoding.copy_address_data.to_a
|
75
|
+
|
76
|
+
code_table = CodeTable::DEFAULT_TABLE
|
77
|
+
|
78
|
+
# the final string for this window
|
79
|
+
target_window = ""
|
80
|
+
|
81
|
+
add_run_index = 0
|
82
|
+
|
83
|
+
until instructions.empty?
|
84
|
+
# instructions is a sequence of tupes (index, [size1], [size2]),
|
85
|
+
# where size1 and size2 existence depends on the instruction entry
|
86
|
+
# which _index_ points to.
|
87
|
+
index = instructions.shift
|
88
|
+
|
89
|
+
# instruction pair looked up in the code table
|
90
|
+
instruction = code_table[index]
|
91
|
+
type1, size1, mode1, type2, size2, mode2 = instruction
|
92
|
+
|
93
|
+
if type1 != CodeTable::NOOP && size1 == 0
|
94
|
+
instruction_size_1 = read_int(instructions)
|
95
|
+
else
|
96
|
+
instruction_size_1 = size1
|
97
|
+
end
|
98
|
+
|
99
|
+
if type2 != CodeTable::NOOP && size2 == 0
|
100
|
+
instruction_size_2 = read_int(instructions)
|
101
|
+
else
|
102
|
+
instruction_size_2 = size2
|
103
|
+
end
|
104
|
+
|
105
|
+
case type1
|
106
|
+
when CodeTable::NOOP
|
107
|
+
next
|
108
|
+
when CodeTable::RUN
|
109
|
+
if mode1 != 0
|
110
|
+
warn "Warning: RUN found with mode #{mode1} -- value will be ignored"
|
111
|
+
end
|
112
|
+
|
113
|
+
if instruction_size_1 == 0
|
114
|
+
raise ArgumentError, "File contains a RUN instruction of size 0, must be > 0"
|
115
|
+
end
|
116
|
+
|
117
|
+
# repeat a single character instruction_size_1 times.
|
118
|
+
# since add_run_data is an array of byte values, we
|
119
|
+
# call #[x, 1] with *n to get n copies of the byte
|
120
|
+
# at index x, since ary[x, 1] == [ary[x]].
|
121
|
+
target_window << (add_run_data[add_run_index, 1] * instruction_size_1).pack("C*")
|
122
|
+
add_run_index += 1
|
123
|
+
when CodeTable::ADD
|
124
|
+
if mode1 != 0
|
125
|
+
warn "Warning: ADD found with mode #{mode1} -- value will be ignored"
|
126
|
+
end
|
127
|
+
|
128
|
+
if instruction_size_1 == 0
|
129
|
+
raise ArgumentError, "File contains an ADD instruction of size 0, must be > 0"
|
130
|
+
end
|
131
|
+
|
132
|
+
target_window << (add_run_data[add_run_index, instruction_size_1]).pack("C*")
|
133
|
+
add_run_index += instruction_size_1
|
134
|
+
when CodeTable::COPY
|
135
|
+
# from (5.3) of RFC3284:
|
136
|
+
#
|
137
|
+
# The address of a COPY instruction is encoded using different modes,
|
138
|
+
# depending on the type of cached address used, if any.
|
139
|
+
#
|
140
|
+
# Let "addr" be the address of a COPY instruction to be decoded and
|
141
|
+
# "here" be the current location in the target data (i.e., the start of
|
142
|
+
# the data about to be encoded or decoded). Let near[j] be the jth
|
143
|
+
# element in the near cache, and same[k] be the kth element in the same
|
144
|
+
# cache. Below are the possible address modes:
|
145
|
+
#
|
146
|
+
# VCD_SELF: This mode has value 0. The address was encoded by
|
147
|
+
# itself as an integer.
|
148
|
+
#
|
149
|
+
# VCD_HERE: This mode has value 1. The address was encoded as the
|
150
|
+
# integer value "here - addr".
|
151
|
+
#
|
152
|
+
# Near modes: The "near modes" are in the range [2,s_near+1]. Let m
|
153
|
+
# be the mode of the address encoding. The address was encoded
|
154
|
+
# as the integer value "addr - near[m-2]".
|
155
|
+
#
|
156
|
+
# Same modes: The "same modes" are in the range
|
157
|
+
# [s_near+2,s_near+s_same+1]. Let m be the mode of the encoding.
|
158
|
+
# The address was encoded as a single byte b such that "addr ==
|
159
|
+
# same[(m - (s_near+2))*256 + b]".
|
160
|
+
#
|
161
|
+
|
162
|
+
here = target_window.length - 1
|
163
|
+
|
164
|
+
case mode1
|
165
|
+
when 0 # VCD_SELF
|
166
|
+
addr = read_int(copy_address_data)
|
167
|
+
when 1 # VCD_HERE
|
168
|
+
addr = here - read_int(copy_address_data)
|
169
|
+
when 2..(@s_near + 1) # near modes
|
170
|
+
addr = read_int(copy_address_data) + @near_cache[mode1 - 2]
|
171
|
+
when (@s_near+2)..(@s_near+@s_same+1) # same modes
|
172
|
+
# address is encoded as a single byte
|
173
|
+
b = copy_address_data.shift
|
174
|
+
addr = @same_cache[(mode1 - (@s_near + 2))*256 + b]
|
175
|
+
else
|
176
|
+
raise ArgumentError, "invalid mode #{mode1}"
|
177
|
+
end
|
178
|
+
|
179
|
+
target_window << source_window[addr, instruction_size_1]
|
180
|
+
|
181
|
+
# now update the "near" and "same" caches.
|
182
|
+
if @s_near > 0
|
183
|
+
@near_cache[@next_slot] = addr
|
184
|
+
@next_slot = (@next_slot + 1) % @s_near
|
185
|
+
end
|
186
|
+
|
187
|
+
if @s_same > 0
|
188
|
+
@same_cache[addr % (@s_same * 256)] = addr
|
189
|
+
end
|
190
|
+
else
|
191
|
+
raise ArgumentError, "Invalid file format, instruction of #{type1} (found at index #{index}) doesn't exist"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
target_window
|
196
|
+
end
|
197
|
+
|
198
|
+
# shifts a VCDIFF integer off the given array of bytes, modifying
|
199
|
+
# the array argument in-place.
|
200
|
+
def read_int(array)
|
201
|
+
# first index where the MSB = index-7 bit is 0
|
202
|
+
zero_msb_index = array.index { |e| e.to_i[7] == 0 }
|
203
|
+
|
204
|
+
int_bytes = array.shift(zero_msb_index + 1)
|
205
|
+
|
206
|
+
VCDIFFInt.read(int_bytes.pack("C*")).to_i
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
data/rakefile
ADDED
data/test/data/delta
ADDED
Binary file
|
data/test/data/source
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
some header some header some header
|
2
|
+
some header some header some header
|
3
|
+
some header some header some header
|
4
|
+
some header some header some header
|
5
|
+
some header some header some header
|
6
|
+
some header some header some header
|
7
|
+
|
8
|
+
dynamic CONTENT
|
9
|
+
|
10
|
+
some footer some footer some footer
|
11
|
+
some footer some footer some footer
|
12
|
+
some footer some footer some footer
|
13
|
+
some footer some footer some footer
|
14
|
+
some footer some footer some footer
|
15
|
+
some footer some footer some footer
|
16
|
+
|
data/test/data/target
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
some header some header some header
|
2
|
+
some header some header some header
|
3
|
+
some header some header some header
|
4
|
+
some header some header some header
|
5
|
+
some header some header some header
|
6
|
+
some header some header some header
|
7
|
+
|
8
|
+
only the best dynamic content
|
9
|
+
|
10
|
+
some footer some footer some footer
|
11
|
+
some footer some footer some footer
|
12
|
+
some footer some footer some footer
|
13
|
+
some footer some footer some footer
|
14
|
+
some footer some footer some footer
|
15
|
+
some footer some footer some footer
|
16
|
+
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
describe VCDIFF::Decoder do
|
4
|
+
subject { VCDIFF::Decoder.new("test/data/source") }
|
5
|
+
|
6
|
+
describe "#decode" do
|
7
|
+
it "can decode delta files, given the source, to derive the target" do
|
8
|
+
subject.decode(File.new("test/data/delta")).should == File.read("test/data/target")
|
9
|
+
end
|
10
|
+
|
11
|
+
it "cannot handle a non-zero header indicator" do
|
12
|
+
# secondary compressor
|
13
|
+
delta = Tempfile.new("secondary_compressor_bit_set")
|
14
|
+
content = File.read("test/data/delta")
|
15
|
+
content.setbyte(4, 0x01)
|
16
|
+
delta.write(content)
|
17
|
+
delta.rewind
|
18
|
+
|
19
|
+
expect { subject.decode(delta) }.to raise_error(NotImplementedError)
|
20
|
+
|
21
|
+
# custom codetable
|
22
|
+
delta = Tempfile.new("custom_codetable_bit_set")
|
23
|
+
content = File.read("test/data/delta")
|
24
|
+
content.setbyte(4, 0x02)
|
25
|
+
delta.write(content)
|
26
|
+
delta.rewind
|
27
|
+
|
28
|
+
expect { subject.decode(delta) }.to raise_error(NotImplementedError)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
KNOWN_ENCODINGS = {
|
4
|
+
266478 => "100100001010000101101110",
|
5
|
+
488908 => "100111011110101101001100",
|
6
|
+
1311959 => "110100001000100101010111",
|
7
|
+
290936 => "100100011110000001111000",
|
8
|
+
1306432 => "110011111101111001000000",
|
9
|
+
1318485 => "110100001011110001010101",
|
10
|
+
983071 => "101111001000000000011111",
|
11
|
+
918966 => "101110001000101100110110",
|
12
|
+
1119947 => "110001001010110101001011",
|
13
|
+
1186056 => "110010001011001000001000"
|
14
|
+
}
|
15
|
+
|
16
|
+
describe VCDIFF::VCDIFFInt do
|
17
|
+
it "converts between different representations" do
|
18
|
+
KNOWN_ENCODINGS.each do |int, str|
|
19
|
+
packed = [str].pack("B*")
|
20
|
+
|
21
|
+
i = VCDIFF::VCDIFFInt.read(packed)
|
22
|
+
|
23
|
+
i.snapshot.should == int
|
24
|
+
i.to_binary_s.should == packed
|
25
|
+
i.to_i.should == i.snapshot
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
describe VCDIFF::VCDIFFHeader do
|
4
|
+
it "requires a valid header" do
|
5
|
+
# requires VCD\0 with the uppermost bits set to 1 for "V","C","D"
|
6
|
+
expect { VCDIFF::VCDIFFHeader.read("\xD6\xC3\xC4\x00") }.to_not raise_error(BinData::ValidityError)
|
7
|
+
expect { VCDIFF::VCDIFFHeader.read("\x01\x02\x03\x00") }.to raise_error(BinData::ValidityError)
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "#secondary_compressor?" do
|
11
|
+
it "is true if the header_indicator has the appropriate bit set" do
|
12
|
+
header = VCDIFF::VCDIFFHeader.read("\xD6\xC3\xC4\x00\x01\x00\x00\x00\x00\x00\x00")
|
13
|
+
header.secondary_compressor?.should be_true
|
14
|
+
header.header_indicator[0].should == 1
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "#custom_codetable?" do
|
19
|
+
it "is true if the header_indicator has the appropriate bit set" do
|
20
|
+
# VCD\0 + 0b10 for custom code table, plus a bunch of zeroes to have enough bytes to read
|
21
|
+
header = VCDIFF::VCDIFFHeader.read("\xD6\xC3\xC4\x00\x02\x00\x00\x00\x00\x00\x00")
|
22
|
+
header.custom_codetable?.should be_true
|
23
|
+
header.header_indicator[1].should == 1
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe VCDIFF::DeltaFile do
|
29
|
+
it "requires a valid header" do
|
30
|
+
expect { VCDIFF::DeltaFile.read("\xD6\xC3\xC4\x00") }.to_not raise_error(BinData::ValidityError)
|
31
|
+
expect { VCDIFF::DeltaFile.read("\x01\x02\x03\x00") }.to raise_error(BinData::ValidityError)
|
32
|
+
end
|
33
|
+
end
|
data/vcdiff.rb.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Gem::Specification.new do |gem|
|
2
|
+
gem.name = "vcdiff.rb"
|
3
|
+
gem.version = "0.0.1"
|
4
|
+
gem.authors = ["Adam Prescott"]
|
5
|
+
gem.email = ["adam@aprescott.com"]
|
6
|
+
gem.description = "Pure-Ruby VCDIFF encoder/decoder."
|
7
|
+
gem.summary = "Pure-Ruby encoder and decoder for the VCDIFF format."
|
8
|
+
gem.homepage = "https://github.com/aprescott/vcdiff.rb"
|
9
|
+
|
10
|
+
gem.files = Dir["{lib/**/*,test/**/*,*.gemspec}"] + %w[rakefile LICENSE Gemfile README.md]
|
11
|
+
gem.require_path = "lib"
|
12
|
+
|
13
|
+
[
|
14
|
+
"bindata", "~> 1.6.0",
|
15
|
+
"bentley_mcilroy", ">= 0"
|
16
|
+
].each_slice(2) do |name, version|
|
17
|
+
gem.add_runtime_dependency(name, version)
|
18
|
+
end
|
19
|
+
|
20
|
+
[
|
21
|
+
"rake", "~> 10.0.0",
|
22
|
+
"rspec", "~> 2.5"
|
23
|
+
].each_slice(2) do |name, version|
|
24
|
+
gem.add_runtime_dependency(name, version)
|
25
|
+
end
|
26
|
+
end
|
metadata
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: vcdiff.rb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Adam Prescott
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-09-11 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bindata
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.6.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.6.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: bentley_mcilroy
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 10.0.0
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 10.0.0
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rspec
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '2.5'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '2.5'
|
78
|
+
description: Pure-Ruby VCDIFF encoder/decoder.
|
79
|
+
email:
|
80
|
+
- adam@aprescott.com
|
81
|
+
executables: []
|
82
|
+
extensions: []
|
83
|
+
extra_rdoc_files: []
|
84
|
+
files:
|
85
|
+
- lib/vcdiff.rb
|
86
|
+
- lib/vcdiff/integer.rb
|
87
|
+
- lib/vcdiff/records.rb
|
88
|
+
- lib/vcdiff/code_table.rb
|
89
|
+
- test/test_helper.rb
|
90
|
+
- test/vcdiff_code_table_spec.rb
|
91
|
+
- test/vcdiff_decoder_spec.rb
|
92
|
+
- test/vcdiff_integer_spec.rb
|
93
|
+
- test/data/delta
|
94
|
+
- test/data/target
|
95
|
+
- test/data/source
|
96
|
+
- test/vcdiff_records_spec.rb
|
97
|
+
- vcdiff.rb.gemspec
|
98
|
+
- rakefile
|
99
|
+
- LICENSE
|
100
|
+
- Gemfile
|
101
|
+
- README.md
|
102
|
+
homepage: https://github.com/aprescott/vcdiff.rb
|
103
|
+
licenses: []
|
104
|
+
post_install_message:
|
105
|
+
rdoc_options: []
|
106
|
+
require_paths:
|
107
|
+
- lib
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
109
|
+
none: false
|
110
|
+
requirements:
|
111
|
+
- - ! '>='
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '0'
|
114
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
115
|
+
none: false
|
116
|
+
requirements:
|
117
|
+
- - ! '>='
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
120
|
+
requirements: []
|
121
|
+
rubyforge_project:
|
122
|
+
rubygems_version: 1.8.24
|
123
|
+
signing_key:
|
124
|
+
specification_version: 3
|
125
|
+
summary: Pure-Ruby encoder and decoder for the VCDIFF format.
|
126
|
+
test_files: []
|