vcdiff.rb 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +73 -0
- data/lib/vcdiff/code_table.rb +65 -0
- data/lib/vcdiff/integer.rb +92 -0
- data/lib/vcdiff/records.rb +90 -0
- data/lib/vcdiff.rb +209 -0
- data/rakefile +11 -0
- data/test/data/delta +0 -0
- data/test/data/source +16 -0
- data/test/data/target +16 -0
- data/test/test_helper.rb +3 -0
- data/test/vcdiff_code_table_spec.rb +9 -0
- data/test/vcdiff_decoder_spec.rb +31 -0
- data/test/vcdiff_integer_spec.rb +28 -0
- data/test/vcdiff_records_spec.rb +33 -0
- data/vcdiff.rb.gemspec +26 -0
- metadata +126 -0
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Adam Prescott
|
2
|
+
|
3
|
+
(MIT License)
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
# VCDIFF
|
2
|
+
|
3
|
+
A pure-Ruby implementation VCDIFF encoder/decoder. Aims to provide similar
|
4
|
+
functionality to Google's [open-vcdiff](https://code.google.com/p/open-vcdiff)
|
5
|
+
(which the [vcdiff](https://github.com/romanbsd/vcdiff) gem wraps), but without
|
6
|
+
the C.
|
7
|
+
|
8
|
+
Some important notes and to-be-implemented things:
|
9
|
+
|
10
|
+
* Encoding isn't implemented yet, although the plan is to use the
|
11
|
+
[`bentley_mcilroy`](https://github.com/aprescott/bentley_mcilroy) gem,
|
12
|
+
following the same strategy as open-vcdiff. There is a question of what block
|
13
|
+
size to use for windows when finding common substrings.
|
14
|
+
* The decoder can't handle custom code tables or any sort of compression flags.
|
15
|
+
Compression is probably a won't-fix on account of there being no compressor
|
16
|
+
ID standards and the RFC for VCDIFF doesn't specify one. Custom code table
|
17
|
+
support is desirable so every VCDIFF encoding is supported when decoding.
|
18
|
+
* The decoder doesn't handle any window where the `VCD_TARGET` bit is set in
|
19
|
+
the window indicator (`Win_indicator`). As with custom code tables, it would
|
20
|
+
be good to have this. It's currently omitted for simplicity.
|
21
|
+
|
22
|
+
Further reading:
|
23
|
+
|
24
|
+
* [RFC3284](http://tools.ietf.org/html/rfc3284#section-7) — The VCDIFF Generic Differencing and Compression Data Format
|
25
|
+
|
26
|
+
# Installation
|
27
|
+
|
28
|
+
Add this line to your application's Gemfile:
|
29
|
+
|
30
|
+
```
|
31
|
+
gem "vcdiff.rb"
|
32
|
+
```
|
33
|
+
|
34
|
+
And then execute:
|
35
|
+
|
36
|
+
```bash
|
37
|
+
$ bundle install
|
38
|
+
```
|
39
|
+
|
40
|
+
Or install it yourself as:
|
41
|
+
|
42
|
+
```bash
|
43
|
+
$ gem install vcdiff.rb
|
44
|
+
```
|
45
|
+
|
46
|
+
# Usage
|
47
|
+
|
48
|
+
## Encoding
|
49
|
+
|
50
|
+
Not yet implemented.
|
51
|
+
|
52
|
+
## Decoding
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
decoder = VCDIFF::Decoder.new("path/to/dictionary_source")
|
56
|
+
original_target = decoder.decode("path/to/delta_file")
|
57
|
+
```
|
58
|
+
|
59
|
+
# Contributing
|
60
|
+
|
61
|
+
1. Fork it
|
62
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
63
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
64
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
65
|
+
5. Create new Pull Request
|
66
|
+
|
67
|
+
The issue tracker is [on GitHub](https://github.com/aprescott/vcdiff.rb/issues).
|
68
|
+
If you find any bugs, just open an issue.
|
69
|
+
|
70
|
+
# License
|
71
|
+
|
72
|
+
Copyright (c) Adam Prescott, released under the MIT license. See the license file.
|
73
|
+
Any contributions will be assumed to be under the same terms.
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module VCDIFF
|
2
|
+
# Default code table as defined in RFC 3284 (5.6)
|
3
|
+
#
|
4
|
+
# TYPE SIZE MODE TYPE SIZE MODE INDEX
|
5
|
+
# ---------------------------------------------------------------
|
6
|
+
# 1. RUN 0 0 NOOP 0 0 0
|
7
|
+
# 2. ADD 0, [1,17] 0 NOOP 0 0 [1,18]
|
8
|
+
# 3. COPY 0, [4,18] 0 NOOP 0 0 [19,34]
|
9
|
+
# 4. COPY 0, [4,18] 1 NOOP 0 0 [35,50]
|
10
|
+
# 5. COPY 0, [4,18] 2 NOOP 0 0 [51,66]
|
11
|
+
# 6. COPY 0, [4,18] 3 NOOP 0 0 [67,82]
|
12
|
+
# 7. COPY 0, [4,18] 4 NOOP 0 0 [83,98]
|
13
|
+
# 8. COPY 0, [4,18] 5 NOOP 0 0 [99,114]
|
14
|
+
# 9. COPY 0, [4,18] 6 NOOP 0 0 [115,130]
|
15
|
+
# 10. COPY 0, [4,18] 7 NOOP 0 0 [131,146]
|
16
|
+
# 11. COPY 0, [4,18] 8 NOOP 0 0 [147,162]
|
17
|
+
# 12. ADD [1,4] 0 COPY [4,6] 0 [163,174]
|
18
|
+
# 13. ADD [1,4] 0 COPY [4,6] 1 [175,186]
|
19
|
+
# 14. ADD [1,4] 0 COPY [4,6] 2 [187,198]
|
20
|
+
# 15. ADD [1,4] 0 COPY [4,6] 3 [199,210]
|
21
|
+
# 16. ADD [1,4] 0 COPY [4,6] 4 [211,222]
|
22
|
+
# 17. ADD [1,4] 0 COPY [4,6] 5 [223,234]
|
23
|
+
# 18. ADD [1,4] 0 COPY 4 6 [235,238]
|
24
|
+
# 19. ADD [1,4] 0 COPY 4 7 [239,242]
|
25
|
+
# 20. ADD [1,4] 0 COPY 4 8 [243,246]
|
26
|
+
# 21. COPY 4 [0,8] ADD 1 0 [247,255]
|
27
|
+
# ---------------------------------------------------------------
|
28
|
+
class CodeTable
|
29
|
+
NOOP, ADD, RUN, COPY = 0, 1, 2, 3
|
30
|
+
|
31
|
+
DEFAULT_TABLE = [
|
32
|
+
[RUN, 0, 0, NOOP, 0, 0],
|
33
|
+
]
|
34
|
+
|
35
|
+
(0..17).each do |n|
|
36
|
+
DEFAULT_TABLE << [ADD, n, 0, NOOP, 0, 0]
|
37
|
+
end
|
38
|
+
|
39
|
+
(0..8).each do |mode|
|
40
|
+
DEFAULT_TABLE << [COPY, 0, mode, NOOP, 0, 0]
|
41
|
+
|
42
|
+
(4..18).each do |size|
|
43
|
+
DEFAULT_TABLE << [COPY, size, mode, NOOP, 0, 0]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
(0..5).each do |mode|
|
48
|
+
(1..4).each do |add_size|
|
49
|
+
(4..6).each do |copy_size|
|
50
|
+
DEFAULT_TABLE << [ADD, add_size, 0, COPY, copy_size, mode]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
(6..8).each do |mode|
|
56
|
+
(1..4).each do |add_size|
|
57
|
+
DEFAULT_TABLE << [ADD, add_size, 0, COPY, 4, mode]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
(0..8).each do |mode|
|
62
|
+
DEFAULT_TABLE << [COPY, 4, mode, ADD, 1, 0]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require "bindata"
|
2
|
+
|
3
|
+
module VCDIFF
|
4
|
+
# As described in RFC 3284 (http://tools.ietf.org/html/rfc3284)
|
5
|
+
# unsigned integers are treated as a number in base 128.
|
6
|
+
# Each digit in this representation is encoded in the lower
|
7
|
+
# 7 bits of a byte. Runs of bytes b_1, b_2, b_3, ..., b_n
|
8
|
+
# for one integer have the most significant bit set to 1
|
9
|
+
# for each b_i, i = 1, ..., n-1, and set to 0 for b_n.
|
10
|
+
#
|
11
|
+
# So 123456789 encodes to four 7-bit digits with values
|
12
|
+
# 58, 111, 26, 21:
|
13
|
+
#
|
14
|
+
# +-------------------------------------------+
|
15
|
+
# | 10111010 | 11101111 | 10011010 | 00010101 |
|
16
|
+
# +-------------------------------------------+
|
17
|
+
# MSB+58 MSB+111 MSB+26 0+21
|
18
|
+
#
|
19
|
+
class VCDIFFInt < BinData::BasePrimitive
|
20
|
+
def value_to_binary_string(value)
|
21
|
+
bytes = []
|
22
|
+
|
23
|
+
loop do
|
24
|
+
# get the value of the lowest 7 bits
|
25
|
+
next_value = value & 0b01111111
|
26
|
+
|
27
|
+
value >>= 7
|
28
|
+
|
29
|
+
# on every byte except the first one, flip the 8th bit on
|
30
|
+
next_value = 0b10000000 | next_value unless bytes.empty?
|
31
|
+
|
32
|
+
bytes.unshift(next_value)
|
33
|
+
|
34
|
+
break if value == 0
|
35
|
+
end
|
36
|
+
|
37
|
+
bytes.pack("C*")
|
38
|
+
end
|
39
|
+
|
40
|
+
def read_and_return_value(io)
|
41
|
+
byte_values = []
|
42
|
+
value = 0
|
43
|
+
|
44
|
+
loop do
|
45
|
+
b = next_byte(io)
|
46
|
+
last_byte = (b[7] == 0)
|
47
|
+
|
48
|
+
byte_values << (b & 0b01111111)
|
49
|
+
|
50
|
+
break if last_byte
|
51
|
+
end
|
52
|
+
|
53
|
+
byte_values.reverse.each_with_index do |e, i|
|
54
|
+
# add byte * 128**i, since e is considered to be
|
55
|
+
# a number in base 128
|
56
|
+
value += e * (1 << (7 * i))
|
57
|
+
end
|
58
|
+
|
59
|
+
value
|
60
|
+
end
|
61
|
+
|
62
|
+
def sensible_default
|
63
|
+
0
|
64
|
+
end
|
65
|
+
|
66
|
+
# Converts a Ruby Integer into a string where each character is
|
67
|
+
# either 0 or 1, fully representing the bytes in the array.
|
68
|
+
#
|
69
|
+
# TODO: a non-awful method name and non-awful implementation
|
70
|
+
def value_to_zero_one_string
|
71
|
+
to_binary_s.unpack("C*").map { |e| e.to_s(2).rjust(8, "0") }.join("")
|
72
|
+
end
|
73
|
+
|
74
|
+
# Gives the VCDIFF integer as a regular Ruby Integer
|
75
|
+
def to_i
|
76
|
+
snapshot
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
# next byte as a fixnum
|
82
|
+
def next_byte(io)
|
83
|
+
io.readbytes(1).unpack("C")[0]
|
84
|
+
end
|
85
|
+
|
86
|
+
# Returns the lowest multiple of m
|
87
|
+
# greater than or equal to n.
|
88
|
+
def self.next_multiple(n, m)
|
89
|
+
n + (m - n % m) % m
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require "bindata"
|
2
|
+
require "vcdiff/integer"
|
3
|
+
|
4
|
+
module VCDIFF
|
5
|
+
class VCDIFFHeader < BinData::Record
|
6
|
+
endian :big
|
7
|
+
|
8
|
+
# header{1,2,3,4} is "VCD\0" with the upper bits turned on for "VCD"
|
9
|
+
uint8 :header_v, :initial_value => 0xD6, :assert => 0xD6
|
10
|
+
uint8 :header_c, :initial_value => 0xC3, :assert => 0xC3
|
11
|
+
uint8 :header_d, :initial_value => 0xC4, :assert => 0xC4
|
12
|
+
uint8 :header_zero, :initial_value => 0x00, :assert => 0x00
|
13
|
+
|
14
|
+
uint8 :header_indicator, :initial_value => 0
|
15
|
+
uint8 :secondary_compressor_id, :onlyif => :secondary_compressor?
|
16
|
+
vcdiff_int :custom_codetable_length, :onlyif => :custom_codetable?
|
17
|
+
array :code_table_data, :type => :uint8, :onlyif => :custom_codetable?, :initial_length => :custom_codetable_length
|
18
|
+
|
19
|
+
def secondary_compressor?
|
20
|
+
header_indicator[0] == 1
|
21
|
+
end
|
22
|
+
|
23
|
+
def custom_codetable?
|
24
|
+
header_indicator[1] == 1
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class VCDIFFDeltaEncoding < BinData::Record
|
29
|
+
endian :big
|
30
|
+
|
31
|
+
vcdiff_int :bytes_remaining # bytes remaining for the delta encoding
|
32
|
+
vcdiff_int :target_length # the size of the decoded target file
|
33
|
+
uint8 :delta_indicator, :initial_value => 0
|
34
|
+
vcdiff_int :add_run_data_length
|
35
|
+
vcdiff_int :instructions_length
|
36
|
+
vcdiff_int :copy_addresses_length
|
37
|
+
|
38
|
+
array :add_run_data, :type => :uint8, :initial_length => :add_run_data_length
|
39
|
+
array :instructions, :type => :uint8, :initial_length => :instructions_length
|
40
|
+
array :copy_address_data, :type => :uint8, :initial_length => :copy_addresses_length
|
41
|
+
|
42
|
+
# VCD_DATACOMP bit value, for unmatched ADD and RUN data
|
43
|
+
def data_compressed?
|
44
|
+
delta_indicator[0] == 1
|
45
|
+
end
|
46
|
+
|
47
|
+
# VCD_INSTCOMP bit value, for the delta instructions and accompanying
|
48
|
+
# sizes
|
49
|
+
def instructions_compressed?
|
50
|
+
delta_indicator[1] == 1
|
51
|
+
end
|
52
|
+
|
53
|
+
# VCD_ADDRCOMP bit value, for the addresses for the COPY instructions
|
54
|
+
def addresses_compressed?
|
55
|
+
delta_indicator[2] == 1
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class VCDIFFWindow < BinData::Record
|
60
|
+
endian :big
|
61
|
+
|
62
|
+
uint8 :window_indicator, :initial_value => 0, :assert => lambda { !(value[0] == 1 && value[1] == 1) }
|
63
|
+
vcdiff_int :source_data_length, :onlyif => lambda { !compressed_only? }
|
64
|
+
vcdiff_int :source_data_position, :onlyif => lambda { !compressed_only? }
|
65
|
+
vcdiff_delta_encoding :delta_encoding
|
66
|
+
|
67
|
+
# Returns true if VCD_SOURCE is set
|
68
|
+
def source_data?
|
69
|
+
window_indicator[0] == 1
|
70
|
+
end
|
71
|
+
|
72
|
+
# Returns true if VCD_TARGET is set
|
73
|
+
def target_data?
|
74
|
+
window_indicator[1] == 1
|
75
|
+
end
|
76
|
+
|
77
|
+
# If VCD_SOURCE and VCD_TARGET are both 0, then the target file was
|
78
|
+
# compressed by itself.
|
79
|
+
def compressed_only?
|
80
|
+
!source_data? && !target_data?
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class DeltaFile < BinData::Record
|
85
|
+
endian :big
|
86
|
+
|
87
|
+
vcdiff_header :header
|
88
|
+
rest :windows
|
89
|
+
end
|
90
|
+
end
|
data/lib/vcdiff.rb
ADDED
@@ -0,0 +1,209 @@
|
|
1
|
+
require "bentley_mcilroy"
|
2
|
+
|
3
|
+
require "vcdiff/integer"
|
4
|
+
require "vcdiff/records"
|
5
|
+
require "vcdiff/code_table"
|
6
|
+
|
7
|
+
module VCDIFF
|
8
|
+
class Encoder
|
9
|
+
#### Implement me ####
|
10
|
+
end
|
11
|
+
|
12
|
+
class Decoder
|
13
|
+
attr_accessor :dictionary
|
14
|
+
|
15
|
+
def initialize(dictionary)
|
16
|
+
@dictionary = File.read(dictionary)
|
17
|
+
|
18
|
+
@s_near = 4
|
19
|
+
@s_same = 3
|
20
|
+
|
21
|
+
# all cache values initialize to 0
|
22
|
+
@near_cache = Array.new(@s_near, 0)
|
23
|
+
@same_cache = Array.new(@s_same * 256, 0)
|
24
|
+
@next_slot = 0
|
25
|
+
end
|
26
|
+
|
27
|
+
# Decodes a delta file using the dictionary given to the decoder
|
28
|
+
def decode(file)
|
29
|
+
delta_file = DeltaFile.read(File.new(file))
|
30
|
+
|
31
|
+
if delta_file.header.header_indicator != 0
|
32
|
+
raise NotImplementedError, "Header indicator of #{delta_file.header.header_indicator} can't be handled"
|
33
|
+
end
|
34
|
+
|
35
|
+
if delta_file.header.custom_codetable?
|
36
|
+
raise NotImplementedError, "Unable to handle a custom codetable"
|
37
|
+
end
|
38
|
+
|
39
|
+
# there's no simple way to determine the number of windows, since the
|
40
|
+
# count isn't given ahead of time, so we'll need to manually iterate
|
41
|
+
# through the windows
|
42
|
+
window_stream = StringIO.new(delta_file.windows)
|
43
|
+
|
44
|
+
target_file = ""
|
45
|
+
|
46
|
+
until window_stream.eof?
|
47
|
+
# reads only one window's worth of bytes from the stream
|
48
|
+
next_window = VCDIFFWindow.read(window_stream)
|
49
|
+
|
50
|
+
if next_window.compressed_only? || next_window.target_data?
|
51
|
+
raise NotImplementedError, "Can only handle VCD_SOURCE windows"
|
52
|
+
end
|
53
|
+
|
54
|
+
length, position = next_window.source_data_length, next_window.source_data_position
|
55
|
+
|
56
|
+
source_window = @dictionary[position, length]
|
57
|
+
|
58
|
+
target_file << process_delta_encoding(source_window, next_window.delta_encoding)
|
59
|
+
end
|
60
|
+
|
61
|
+
target_file
|
62
|
+
end
|
63
|
+
|
64
|
+
# takes a delta encoding and processes it against the source
|
65
|
+
# window.
|
66
|
+
#
|
67
|
+
# this corresponds to section (6) in RFC3284, which outlines
|
68
|
+
# processing the instructions, data and addresses arrays.
|
69
|
+
def process_delta_encoding(source_window, delta_encoding)
|
70
|
+
# to_a is needed here to unwrap the BinData::Array, which doesn't
|
71
|
+
# know about method calls like #shift
|
72
|
+
instructions = delta_encoding.instructions.to_a
|
73
|
+
add_run_data = delta_encoding.add_run_data.to_a
|
74
|
+
copy_address_data = delta_encoding.copy_address_data.to_a
|
75
|
+
|
76
|
+
code_table = CodeTable::DEFAULT_TABLE
|
77
|
+
|
78
|
+
# the final string for this window
|
79
|
+
target_window = ""
|
80
|
+
|
81
|
+
add_run_index = 0
|
82
|
+
|
83
|
+
until instructions.empty?
|
84
|
+
# instructions is a sequence of tupes (index, [size1], [size2]),
|
85
|
+
# where size1 and size2 existence depends on the instruction entry
|
86
|
+
# which _index_ points to.
|
87
|
+
index = instructions.shift
|
88
|
+
|
89
|
+
# instruction pair looked up in the code table
|
90
|
+
instruction = code_table[index]
|
91
|
+
type1, size1, mode1, type2, size2, mode2 = instruction
|
92
|
+
|
93
|
+
if type1 != CodeTable::NOOP && size1 == 0
|
94
|
+
instruction_size_1 = read_int(instructions)
|
95
|
+
else
|
96
|
+
instruction_size_1 = size1
|
97
|
+
end
|
98
|
+
|
99
|
+
if type2 != CodeTable::NOOP && size2 == 0
|
100
|
+
instruction_size_2 = read_int(instructions)
|
101
|
+
else
|
102
|
+
instruction_size_2 = size2
|
103
|
+
end
|
104
|
+
|
105
|
+
case type1
|
106
|
+
when CodeTable::NOOP
|
107
|
+
next
|
108
|
+
when CodeTable::RUN
|
109
|
+
if mode1 != 0
|
110
|
+
warn "Warning: RUN found with mode #{mode1} -- value will be ignored"
|
111
|
+
end
|
112
|
+
|
113
|
+
if instruction_size_1 == 0
|
114
|
+
raise ArgumentError, "File contains a RUN instruction of size 0, must be > 0"
|
115
|
+
end
|
116
|
+
|
117
|
+
# repeat a single character instruction_size_1 times.
|
118
|
+
# since add_run_data is an array of byte values, we
|
119
|
+
# call #[x, 1] with *n to get n copies of the byte
|
120
|
+
# at index x, since ary[x, 1] == [ary[x]].
|
121
|
+
target_window << (add_run_data[add_run_index, 1] * instruction_size_1).pack("C*")
|
122
|
+
add_run_index += 1
|
123
|
+
when CodeTable::ADD
|
124
|
+
if mode1 != 0
|
125
|
+
warn "Warning: ADD found with mode #{mode1} -- value will be ignored"
|
126
|
+
end
|
127
|
+
|
128
|
+
if instruction_size_1 == 0
|
129
|
+
raise ArgumentError, "File contains an ADD instruction of size 0, must be > 0"
|
130
|
+
end
|
131
|
+
|
132
|
+
target_window << (add_run_data[add_run_index, instruction_size_1]).pack("C*")
|
133
|
+
add_run_index += instruction_size_1
|
134
|
+
when CodeTable::COPY
|
135
|
+
# from (5.3) of RFC3284:
|
136
|
+
#
|
137
|
+
# The address of a COPY instruction is encoded using different modes,
|
138
|
+
# depending on the type of cached address used, if any.
|
139
|
+
#
|
140
|
+
# Let "addr" be the address of a COPY instruction to be decoded and
|
141
|
+
# "here" be the current location in the target data (i.e., the start of
|
142
|
+
# the data about to be encoded or decoded). Let near[j] be the jth
|
143
|
+
# element in the near cache, and same[k] be the kth element in the same
|
144
|
+
# cache. Below are the possible address modes:
|
145
|
+
#
|
146
|
+
# VCD_SELF: This mode has value 0. The address was encoded by
|
147
|
+
# itself as an integer.
|
148
|
+
#
|
149
|
+
# VCD_HERE: This mode has value 1. The address was encoded as the
|
150
|
+
# integer value "here - addr".
|
151
|
+
#
|
152
|
+
# Near modes: The "near modes" are in the range [2,s_near+1]. Let m
|
153
|
+
# be the mode of the address encoding. The address was encoded
|
154
|
+
# as the integer value "addr - near[m-2]".
|
155
|
+
#
|
156
|
+
# Same modes: The "same modes" are in the range
|
157
|
+
# [s_near+2,s_near+s_same+1]. Let m be the mode of the encoding.
|
158
|
+
# The address was encoded as a single byte b such that "addr ==
|
159
|
+
# same[(m - (s_near+2))*256 + b]".
|
160
|
+
#
|
161
|
+
|
162
|
+
here = target_window.length - 1
|
163
|
+
|
164
|
+
case mode1
|
165
|
+
when 0 # VCD_SELF
|
166
|
+
addr = read_int(copy_address_data)
|
167
|
+
when 1 # VCD_HERE
|
168
|
+
addr = here - read_int(copy_address_data)
|
169
|
+
when 2..(@s_near + 1) # near modes
|
170
|
+
addr = read_int(copy_address_data) + @near_cache[mode1 - 2]
|
171
|
+
when (@s_near+2)..(@s_near+@s_same+1) # same modes
|
172
|
+
# address is encoded as a single byte
|
173
|
+
b = copy_address_data.shift
|
174
|
+
addr = @same_cache[(mode1 - (@s_near + 2))*256 + b]
|
175
|
+
else
|
176
|
+
raise ArgumentError, "invalid mode #{mode1}"
|
177
|
+
end
|
178
|
+
|
179
|
+
target_window << source_window[addr, instruction_size_1]
|
180
|
+
|
181
|
+
# now update the "near" and "same" caches.
|
182
|
+
if @s_near > 0
|
183
|
+
@near_cache[@next_slot] = addr
|
184
|
+
@next_slot = (@next_slot + 1) % @s_near
|
185
|
+
end
|
186
|
+
|
187
|
+
if @s_same > 0
|
188
|
+
@same_cache[addr % (@s_same * 256)] = addr
|
189
|
+
end
|
190
|
+
else
|
191
|
+
raise ArgumentError, "Invalid file format, instruction of #{type1} (found at index #{index}) doesn't exist"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
target_window
|
196
|
+
end
|
197
|
+
|
198
|
+
# shifts a VCDIFF integer off the given array of bytes, modifying
|
199
|
+
# the array argument in-place.
|
200
|
+
def read_int(array)
|
201
|
+
# first index where the MSB = index-7 bit is 0
|
202
|
+
zero_msb_index = array.index { |e| e.to_i[7] == 0 }
|
203
|
+
|
204
|
+
int_bytes = array.shift(zero_msb_index + 1)
|
205
|
+
|
206
|
+
VCDIFFInt.read(int_bytes.pack("C*")).to_i
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
data/rakefile
ADDED
data/test/data/delta
ADDED
Binary file
|
data/test/data/source
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
some header some header some header
|
2
|
+
some header some header some header
|
3
|
+
some header some header some header
|
4
|
+
some header some header some header
|
5
|
+
some header some header some header
|
6
|
+
some header some header some header
|
7
|
+
|
8
|
+
dynamic CONTENT
|
9
|
+
|
10
|
+
some footer some footer some footer
|
11
|
+
some footer some footer some footer
|
12
|
+
some footer some footer some footer
|
13
|
+
some footer some footer some footer
|
14
|
+
some footer some footer some footer
|
15
|
+
some footer some footer some footer
|
16
|
+
|
data/test/data/target
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
some header some header some header
|
2
|
+
some header some header some header
|
3
|
+
some header some header some header
|
4
|
+
some header some header some header
|
5
|
+
some header some header some header
|
6
|
+
some header some header some header
|
7
|
+
|
8
|
+
only the best dynamic content
|
9
|
+
|
10
|
+
some footer some footer some footer
|
11
|
+
some footer some footer some footer
|
12
|
+
some footer some footer some footer
|
13
|
+
some footer some footer some footer
|
14
|
+
some footer some footer some footer
|
15
|
+
some footer some footer some footer
|
16
|
+
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
describe VCDIFF::Decoder do
|
4
|
+
subject { VCDIFF::Decoder.new("test/data/source") }
|
5
|
+
|
6
|
+
describe "#decode" do
|
7
|
+
it "can decode delta files, given the source, to derive the target" do
|
8
|
+
subject.decode(File.new("test/data/delta")).should == File.read("test/data/target")
|
9
|
+
end
|
10
|
+
|
11
|
+
it "cannot handle a non-zero header indicator" do
|
12
|
+
# secondary compressor
|
13
|
+
delta = Tempfile.new("secondary_compressor_bit_set")
|
14
|
+
content = File.read("test/data/delta")
|
15
|
+
content.setbyte(4, 0x01)
|
16
|
+
delta.write(content)
|
17
|
+
delta.rewind
|
18
|
+
|
19
|
+
expect { subject.decode(delta) }.to raise_error(NotImplementedError)
|
20
|
+
|
21
|
+
# custom codetable
|
22
|
+
delta = Tempfile.new("custom_codetable_bit_set")
|
23
|
+
content = File.read("test/data/delta")
|
24
|
+
content.setbyte(4, 0x02)
|
25
|
+
delta.write(content)
|
26
|
+
delta.rewind
|
27
|
+
|
28
|
+
expect { subject.decode(delta) }.to raise_error(NotImplementedError)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
KNOWN_ENCODINGS = {
|
4
|
+
266478 => "100100001010000101101110",
|
5
|
+
488908 => "100111011110101101001100",
|
6
|
+
1311959 => "110100001000100101010111",
|
7
|
+
290936 => "100100011110000001111000",
|
8
|
+
1306432 => "110011111101111001000000",
|
9
|
+
1318485 => "110100001011110001010101",
|
10
|
+
983071 => "101111001000000000011111",
|
11
|
+
918966 => "101110001000101100110110",
|
12
|
+
1119947 => "110001001010110101001011",
|
13
|
+
1186056 => "110010001011001000001000"
|
14
|
+
}
|
15
|
+
|
16
|
+
describe VCDIFF::VCDIFFInt do
|
17
|
+
it "converts between different representations" do
|
18
|
+
KNOWN_ENCODINGS.each do |int, str|
|
19
|
+
packed = [str].pack("B*")
|
20
|
+
|
21
|
+
i = VCDIFF::VCDIFFInt.read(packed)
|
22
|
+
|
23
|
+
i.snapshot.should == int
|
24
|
+
i.to_binary_s.should == packed
|
25
|
+
i.to_i.should == i.snapshot
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
describe VCDIFF::VCDIFFHeader do
|
4
|
+
it "requires a valid header" do
|
5
|
+
# requires VCD\0 with the uppermost bits set to 1 for "V","C","D"
|
6
|
+
expect { VCDIFF::VCDIFFHeader.read("\xD6\xC3\xC4\x00") }.to_not raise_error(BinData::ValidityError)
|
7
|
+
expect { VCDIFF::VCDIFFHeader.read("\x01\x02\x03\x00") }.to raise_error(BinData::ValidityError)
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "#secondary_compressor?" do
|
11
|
+
it "is true if the header_indicator has the appropriate bit set" do
|
12
|
+
header = VCDIFF::VCDIFFHeader.read("\xD6\xC3\xC4\x00\x01\x00\x00\x00\x00\x00\x00")
|
13
|
+
header.secondary_compressor?.should be_true
|
14
|
+
header.header_indicator[0].should == 1
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "#custom_codetable?" do
|
19
|
+
it "is true if the header_indicator has the appropriate bit set" do
|
20
|
+
# VCD\0 + 0b10 for custom code table, plus a bunch of zeroes to have enough bytes to read
|
21
|
+
header = VCDIFF::VCDIFFHeader.read("\xD6\xC3\xC4\x00\x02\x00\x00\x00\x00\x00\x00")
|
22
|
+
header.custom_codetable?.should be_true
|
23
|
+
header.header_indicator[1].should == 1
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe VCDIFF::DeltaFile do
|
29
|
+
it "requires a valid header" do
|
30
|
+
expect { VCDIFF::DeltaFile.read("\xD6\xC3\xC4\x00") }.to_not raise_error(BinData::ValidityError)
|
31
|
+
expect { VCDIFF::DeltaFile.read("\x01\x02\x03\x00") }.to raise_error(BinData::ValidityError)
|
32
|
+
end
|
33
|
+
end
|
data/vcdiff.rb.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Gem::Specification.new do |gem|
|
2
|
+
gem.name = "vcdiff.rb"
|
3
|
+
gem.version = "0.0.1"
|
4
|
+
gem.authors = ["Adam Prescott"]
|
5
|
+
gem.email = ["adam@aprescott.com"]
|
6
|
+
gem.description = "Pure-Ruby VCDIFF encoder/decoder."
|
7
|
+
gem.summary = "Pure-Ruby encoder and decoder for the VCDIFF format."
|
8
|
+
gem.homepage = "https://github.com/aprescott/vcdiff.rb"
|
9
|
+
|
10
|
+
gem.files = Dir["{lib/**/*,test/**/*,*.gemspec}"] + %w[rakefile LICENSE Gemfile README.md]
|
11
|
+
gem.require_path = "lib"
|
12
|
+
|
13
|
+
[
|
14
|
+
"bindata", "~> 1.6.0",
|
15
|
+
"bentley_mcilroy", ">= 0"
|
16
|
+
].each_slice(2) do |name, version|
|
17
|
+
gem.add_runtime_dependency(name, version)
|
18
|
+
end
|
19
|
+
|
20
|
+
[
|
21
|
+
"rake", "~> 10.0.0",
|
22
|
+
"rspec", "~> 2.5"
|
23
|
+
].each_slice(2) do |name, version|
|
24
|
+
gem.add_runtime_dependency(name, version)
|
25
|
+
end
|
26
|
+
end
|
metadata
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: vcdiff.rb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Adam Prescott
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-09-11 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bindata
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.6.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.6.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: bentley_mcilroy
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 10.0.0
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 10.0.0
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rspec
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '2.5'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '2.5'
|
78
|
+
description: Pure-Ruby VCDIFF encoder/decoder.
|
79
|
+
email:
|
80
|
+
- adam@aprescott.com
|
81
|
+
executables: []
|
82
|
+
extensions: []
|
83
|
+
extra_rdoc_files: []
|
84
|
+
files:
|
85
|
+
- lib/vcdiff.rb
|
86
|
+
- lib/vcdiff/integer.rb
|
87
|
+
- lib/vcdiff/records.rb
|
88
|
+
- lib/vcdiff/code_table.rb
|
89
|
+
- test/test_helper.rb
|
90
|
+
- test/vcdiff_code_table_spec.rb
|
91
|
+
- test/vcdiff_decoder_spec.rb
|
92
|
+
- test/vcdiff_integer_spec.rb
|
93
|
+
- test/data/delta
|
94
|
+
- test/data/target
|
95
|
+
- test/data/source
|
96
|
+
- test/vcdiff_records_spec.rb
|
97
|
+
- vcdiff.rb.gemspec
|
98
|
+
- rakefile
|
99
|
+
- LICENSE
|
100
|
+
- Gemfile
|
101
|
+
- README.md
|
102
|
+
homepage: https://github.com/aprescott/vcdiff.rb
|
103
|
+
licenses: []
|
104
|
+
post_install_message:
|
105
|
+
rdoc_options: []
|
106
|
+
require_paths:
|
107
|
+
- lib
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
109
|
+
none: false
|
110
|
+
requirements:
|
111
|
+
- - ! '>='
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '0'
|
114
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
115
|
+
none: false
|
116
|
+
requirements:
|
117
|
+
- - ! '>='
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
120
|
+
requirements: []
|
121
|
+
rubyforge_project:
|
122
|
+
rubygems_version: 1.8.24
|
123
|
+
signing_key:
|
124
|
+
specification_version: 3
|
125
|
+
summary: Pure-Ruby encoder and decoder for the VCDIFF format.
|
126
|
+
test_files: []
|