rbzip2 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +1 -1
  3. data/README.md +73 -37
  4. data/Rakefile +2 -0
  5. data/lib/core_ext/io.rb +12 -0
  6. data/lib/rbzip2.rb +13 -9
  7. data/lib/rbzip2/adapter.rb +17 -0
  8. data/lib/rbzip2/ffi.rb +33 -0
  9. data/lib/rbzip2/ffi/compressor.rb +85 -0
  10. data/lib/rbzip2/ffi/constants.rb +30 -0
  11. data/lib/rbzip2/ffi/decompressor.rb +163 -0
  12. data/lib/rbzip2/ffi/errors.rb +14 -0
  13. data/lib/rbzip2/io.rb +19 -5
  14. data/lib/rbzip2/java.rb +23 -0
  15. data/lib/rbzip2/java/compressor.rb +38 -0
  16. data/lib/rbzip2/java/decompressor.rb +65 -0
  17. data/lib/rbzip2/ruby.rb +18 -0
  18. data/lib/rbzip2/{compressor.rb → ruby/compressor.rb} +141 -191
  19. data/lib/rbzip2/{constants.rb → ruby/constants.rb} +2 -2
  20. data/lib/rbzip2/ruby/crc.rb +70 -0
  21. data/lib/rbzip2/{decompressor.rb → ruby/decompressor.rb} +107 -127
  22. data/lib/rbzip2/{input_data.rb → ruby/input_data.rb} +8 -18
  23. data/lib/rbzip2/{output_data.rb → ruby/output_data.rb} +6 -9
  24. data/lib/rbzip2/version.rb +2 -2
  25. data/spec/common/compressor_spec.rb +68 -0
  26. data/spec/common/decompressor_spec.rb +63 -0
  27. data/spec/ffi/compressor_spec.rb +12 -0
  28. data/spec/ffi/decompressor_spec.rb +12 -0
  29. data/spec/java/compressor_spec.rb +12 -0
  30. data/spec/java/decompressor_spec.rb +12 -0
  31. data/spec/ruby/compressor_spec.rb +12 -0
  32. data/spec/ruby/decompressor_spec.rb +12 -0
  33. metadata +56 -149
  34. data/.gemtest +0 -0
  35. data/.gitignore +0 -3
  36. data/.travis.yml +0 -9
  37. data/Gemfile +0 -3
  38. data/Gemfile.lock +0 -28
  39. data/lib/rbzip2/crc.rb +0 -105
  40. data/rbzip2.gemspec +0 -27
  41. data/spec/compressor_spec.rb +0 -42
  42. data/spec/decompressor_spec.rb +0 -41
  43. data/spec/fixtures/big_test.bz2 +0 -0
  44. data/spec/fixtures/big_test.txt +0 -2018
  45. data/spec/fixtures/test.bz2 +0 -0
  46. data/spec/fixtures/test.txt +0 -11
  47. data/spec/helper.rb +0 -12
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5211bc090f23faf0813dcea8e9c404e4cf178823
4
+ data.tar.gz: 5694495d649a64221b17a89cf9d0c27527cd955e
5
+ SHA512:
6
+ metadata.gz: 171681ece3ad8fa0266f4754e3cfff9be61fcc82d447d01d0195d7a12d62e2cccabb4b43806922fb37fee06aafef1a0488c91259cdb2833deeb0bd692da4e6b4
7
+ data.tar.gz: d17e1d0989d1105f38709d9a6b941729e63bef120764c6262f879228df852a963ea485d0376979d781eb4534c68d6d584f471dc6ee7c4233852233bbee47a4d9
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2011, Sebastian Staudt
1
+ Copyright (c) 2011-2017, Sebastian Staudt
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without modification,
data/README.md CHANGED
@@ -1,37 +1,54 @@
1
1
  RBzip2
2
2
  ======
3
3
 
4
- RBzip2 is a gem providing a pure Ruby implementation of the [bzip2][1]
5
- algorithm used for compression and decompression.
4
+ RBzip2 is a gem providing various implementations of the [bzip2][bzip2]
5
+ algorithm used for compression and decompression. Currently, it includes a
6
+ [FFI][ffi]-based implementation and a pure Ruby implementation that's slower
7
+ but works on any Ruby VM. Additionally, there's a JRuby specific implementation
8
+ that's based on Commons Compress.
6
9
 
7
- It is based on the code of the [Apache Commons Compress][2] project and adds
8
- a straight Ruby-like API. There are no external dependencies like other gems or
9
- libraries. Therefore it will run on any Ruby implementation and the respective
10
- operating systems supported by those implementations.
10
+ The pure Ruby implementations is based on the code of the
11
+ [Apache Commons Compress][commons] project and adds a straight Ruby-like API.
12
+ There are no external dependencies like other gems or libraries. Therefore it
13
+ will run on any Ruby implementation and the respective operating systems
14
+ supported by those implementations.
15
+
16
+ The FFI implementation is using `libbz2` and provides fast performance on
17
+ platforms where both `libbz2` and FFI are available. It is derived from this
18
+ [Gist by Brian Lopez][gist].
19
+
20
+ The Java-based implementation can use the
21
+ [Commons Compress Java library][commons] if it is available in the classpath.
11
22
 
12
23
  ## Features
13
24
 
14
- * Compression of raw data into bzip2 compressed `IO`s (like `File` or
15
- `StringIO`)
16
- * Decompression of bzip2 compressed `IO`s (like `File` or `StringIO`)
25
+ * Compression of raw data into bzip2 compressed `IO`s (like `File` or
26
+ `StringIO`)
27
+ * Decompression of bzip2 compressed `IO`s (like `File` or `StringIO`)
17
28
 
18
29
  ## Usage
19
30
 
20
- require 'rbzip2'
21
-
22
- ### Compression
23
-
24
- data = some_data
25
- file = File.new 'somefile.bz2' # open the target file
26
- bz2 = RBzip2::Compressor.new file # wrap the file into the compressor
27
- bz2.write data # write the raw data to the compressor
28
- bz2.close # finish compression (important!)
29
-
30
- ### Decompression
31
-
32
- file = File.new 'somefile.bz2' # open a compressed file
33
- bz2 = RBzip2::Decompressor.new file # wrap the file into the decompressor
34
- data = io.read # read data into a string
31
+ ```ruby
32
+ require 'rbzip2'
33
+ ```
34
+
35
+ ### Compression
36
+
37
+ ```ruby
38
+ data = some_data
39
+ file = File.new 'somefile.bz2' # open the target file
40
+ bz2 = RBzip2::Compressor.new file # wrap the file into the compressor
41
+ bz2.write data # write the raw data to the compressor
42
+ bz2.close # finish compression (important!)
43
+ ```
44
+
45
+ ### Decompression
46
+
47
+ ```ruby
48
+ file = File.new 'somefile.bz2' # open a compressed file
49
+ bz2 = RBzip2::Decompressor.new file # wrap the file into the decompressor
50
+ data = io.read # read data into a string
51
+ ```
35
52
 
36
53
  ## Future plans
37
54
 
@@ -43,21 +60,37 @@ operating systems supported by those implementations.
43
60
 
44
61
  To install RBzip2 as a Ruby gem use the following command:
45
62
 
46
- gem install rbzip2
63
+ ```sh
64
+ $ gem install rbzip2
65
+ ```
47
66
 
48
67
  To use it as a dependency managed by Bundler add the following to your
49
68
  `Gemfile`:
50
69
 
51
- gem 'rbzip2'
70
+ ```ruby
71
+ gem 'rbzip2'
72
+ ```
73
+
74
+ If you want to use the FFI implementation on any non-JRuby VM, be sure to also
75
+ install the `ffi` gem.
76
+
77
+ ## Performance
78
+
79
+ The `bzip2-ruby` gem is a Ruby binding to `libbz2` and offers best performance,
80
+ but it is only available for MRI < 2.0.0 and Rubinius.
81
+
82
+ The FFI implementation binds to `libbz2` as well and has almost the same
83
+ performance as `bzip2-ruby`.
84
+
85
+ The Java implementation uses a native Java library and is slower by a factor of
86
+ about 2/10 while compressing/decompressing.
87
+
88
+ The pure Ruby implementation of RBzip2 is inherently slower than `bzip2-ruby`.
89
+ Currently, this is a plain port of Apache Commons' Java code to Ruby and no
90
+ effort has been made to optimize it. That's why the Ruby implementation of
91
+ RBzip2 is slower by a factor of about 130/100 while compressing/decompressing
92
+ (on Ruby 1.9.3). Ruby 1.8.7 is even slower.
52
93
 
53
- ## Performance
54
-
55
- Due to its pure Ruby implementation RBzip2 is inherently slower than
56
- bzip2-ruby, which is a Ruby binding to libbzip2. Currently, RBzip2 is a plain
57
- port of Apache Commons' Java code to Ruby and no effort has been made to
58
- optimize it. That's why RBzip2 is slower by a factor of about 140/1000 while
59
- compressing/decompressing (on Ruby 1.9.3). Ruby 1.8.7 is even slower.
60
-
61
94
  ## License
62
95
 
63
96
  This code is free software; you can redistribute it and/or modify it under the
@@ -67,12 +100,15 @@ included LICENSE file.
67
100
  ## Credits
68
101
 
69
102
  * Sebastian Staudt -- koraktor(at)gmail.com
103
+ * Brian Lopez -- seniorlopez(at)gmail.com
70
104
 
71
105
  ## See Also
72
106
 
73
107
  * [Documentation](http://rubydoc.info/gems/rbzip2)
74
108
  * [GitHub project page](https://github.com/koraktor/rbzip2)
75
- * [bzip2 project page][1]
109
+ * [bzip2 project page][bzip2]
76
110
 
77
- [1]: http://bzip.org
78
- [2]: http://commons.apache.org/compress
111
+ [bzip2]: http://bzip.org
112
+ [commons]: http://commons.apache.org/compress
113
+ [ffi]: https://github.com/ffi/ffi/wiki
114
+ [gist]: https://gist.github.com/brianmario/5833373
data/Rakefile CHANGED
@@ -5,10 +5,12 @@
5
5
  #
6
6
  # Copyright (c) 2011, Sebastian Staudt
7
7
 
8
+ require 'bundler/gem_tasks'
8
9
  require 'rspec/core/rake_task'
9
10
  require 'rubygems/package_task'
10
11
 
11
12
  task :default => :spec
13
+ task :test => :spec
12
14
 
13
15
  spec = Gem::Specification.load 'rbzip2.gemspec'
14
16
  Gem::PackageTask.new(spec) do |pkg|
@@ -0,0 +1,12 @@
1
+ # This code is free software; you can redistribute it and/or modify it under
2
+ # the terms of the new BSD License.
3
+ #
4
+ # Copyright (c) 2011-2013, Sebastian Staudt
5
+
6
+ unless IO.method_defined? :readbyte
7
+
8
+ def IO.readbyte
9
+ read(1)[0].ord
10
+ end
11
+
12
+ end
@@ -1,17 +1,21 @@
1
1
  # This code is free software; you can redistribute it and/or modify it under
2
2
  # the terms of the new BSD License.
3
3
  #
4
- # Copyright (c) 2011, Sebastian Staudt
4
+ # Copyright (c) 2011-2013, Sebastian Staudt
5
5
 
6
6
  module RBzip2
7
7
 
8
- autoload :CRC, 'rbzip2/crc'
9
- autoload :Compressor, 'rbzip2/compressor'
10
- autoload :Constants, 'rbzip2/constants'
11
- autoload :Decompressor, 'rbzip2/decompressor'
12
- autoload :IO, 'rbzip2/io'
13
- autoload :InputData, 'rbzip2/input_data'
14
- autoload :OutputData, 'rbzip2/output_data'
15
- autoload :VERSION, 'rbzip2/version'
8
+ autoload :Adapter, 'rbzip2/adapter'
9
+ autoload :FFI, 'rbzip2/ffi'
10
+ autoload :IO, 'rbzip2/io'
11
+ autoload :Java, 'rbzip2/java'
12
+ autoload :Ruby, 'rbzip2/ruby'
13
+ autoload :VERSION, 'rbzip2/version'
14
+
15
+ def self.default_adapter
16
+ return FFI if FFI.available?
17
+ return Java if Java.available?
18
+ Ruby
19
+ end
16
20
 
17
21
  end
@@ -0,0 +1,17 @@
1
+ # This code is free software; you can redistribute it and/or modify it under
2
+ # the terms of the new BSD License.
3
+ #
4
+ # Copyright (c) 2013, Sebastian Staudt
5
+
6
+ module RBzip2::Adapter
7
+
8
+ def self.extended(mod)
9
+ mod.send :class_variable_set, :@@available, true
10
+ mod.init if mod.respond_to? :init
11
+ end
12
+
13
+ def available?
14
+ class_variable_get :@@available
15
+ end
16
+
17
+ end
@@ -0,0 +1,33 @@
1
+ # This code is free software; you can redistribute it and/or modify it under
2
+ # the terms of the new BSD License.
3
+ #
4
+ # Copyright (c) 2013, Sebastian Staudt
5
+
6
+ begin
7
+ require 'ffi'
8
+ rescue LoadError
9
+ end
10
+
11
+ module RBzip2::FFI
12
+
13
+ def self.init
14
+ begin
15
+ extend ::FFI::Library
16
+ ffi_lib 'bz2'
17
+ rescue NameError, LoadError
18
+ @@available = false
19
+ end
20
+ end
21
+
22
+ extend RBzip2::Adapter
23
+
24
+ autoload :BufferError, 'rbzip2/ffi/errors'
25
+ autoload :Compressor, 'rbzip2/ffi/compressor'
26
+ autoload :ConfigError, 'rbzip2/ffi/errors'
27
+ autoload :CorruptError, 'rbzip2/ffi/errors'
28
+ autoload :Decompressor, 'rbzip2/ffi/decompressor'
29
+ autoload :Error, 'rbzip2/ffi/errors'
30
+
31
+ end
32
+
33
+ require 'rbzip2/ffi/constants'
@@ -0,0 +1,85 @@
1
+ # This code is free software; you can redistribute it and/or modify it under
2
+ # the terms of the new BSD License.
3
+ #
4
+ # Copyright (c) 2013, Brian Lopez
5
+ # Copyright (c) 2013-2017, Sebastian Staudt
6
+
7
+ class RBzip2::FFI::Compressor
8
+
9
+ extend ::FFI::Library
10
+
11
+ ffi_lib 'bz2'
12
+ attach_function :BZ2_bzBuffToBuffCompress,
13
+ [:pointer, :buffer_inout, :pointer, :uint32, :int, :int, :int],
14
+ :int
15
+
16
+ def self.compress(data, blksize = RBzip2::FFI::DEFAULT_BLK_SIZE, verbosity = 0, work_factor = 30)
17
+ blksize = 1 if blksize < 1
18
+ blksize = 9 if blksize > 9
19
+ verbosity = 0 if verbosity < 0
20
+ verbosity = 4 if verbosity > 4
21
+ work_factor = 0 if work_factor < 0
22
+ work_factor = 250 if work_factor > 250
23
+
24
+ out_len = data.bytesize + (data.bytesize * 0.01) + 600
25
+ dst_buf = ::FFI::MemoryPointer.new :char, out_len
26
+ dst_len = ::FFI::MemoryPointer.new :uint32
27
+ dst_len.write_uint out_len
28
+
29
+ src_buf = ::FFI::MemoryPointer.new :char, data.bytesize
30
+ src_buf.put_bytes 0, data
31
+
32
+ ret = BZ2_bzBuffToBuffCompress dst_buf, dst_len, src_buf, data.bytesize,
33
+ blksize, verbosity, work_factor
34
+
35
+ case ret
36
+ when RBzip2::FFI::BZ_OK
37
+ dst_buf.read_bytes dst_len.read_uint
38
+ when RBzip2::FFI::BZ_PARAM_ERROR
39
+ raise ArgumentError, 'One of blksize, verbosity or work_factor is out of range'
40
+ when RBzip2::FFI::BZ_MEM_ERROR
41
+ raise NoMemoryError, 'Out of memory'
42
+ when RBzip2::FFI::BZ_OUTBUFF_FULL
43
+ raise RBzip2::FFI::BufferError, "Output buffer isn't large enough"
44
+ when RBzip2::FFI::BZ_CONFIG_ERROR
45
+ raise RBzip2::FFI::ConfigError, 'libbz2 has been mis-compiled'
46
+ else
47
+ raise RBzip2::FFI::Error, "Unhandled error code: #{ret}"
48
+ end
49
+ end
50
+
51
+ def initialize(io)
52
+ @io = io
53
+ end
54
+
55
+ def flush
56
+ @io.flush unless @io.nil?
57
+ end
58
+
59
+ def close
60
+ flush
61
+ unless @io.nil?
62
+ @io.close
63
+ @io = nil
64
+ end
65
+ end
66
+
67
+ def putc(int)
68
+ if int.is_a? Numeric
69
+ write int & 0xff
70
+ else
71
+ write int.to_s[0].chr
72
+ end
73
+ end
74
+
75
+ def puts(line)
76
+ write line + $/
77
+ end
78
+
79
+ def write(bytes)
80
+ raise 'stream closed' if @io.nil?
81
+
82
+ @io.write self.class.compress(bytes, 9)
83
+ end
84
+
85
+ end
@@ -0,0 +1,30 @@
1
+ # This code is free software; you can redistribute it and/or modify it under
2
+ # the terms of the new BSD License.
3
+ #
4
+ # Copyright (c) 2013, Brian Lopez
5
+ # Copyright (c) 2013, Sebastian Staudt
6
+
7
+ module RBzip2::FFI
8
+
9
+ DEFAULT_BLK_SIZE = 3
10
+
11
+ BZ_RUN = 0
12
+ BZ_FLUSH = 1
13
+ BZ_FINISH = 2
14
+
15
+ BZ_OK = 0
16
+ BZ_RUN_OK = 1
17
+ BZ_FLUSH_OK = 2
18
+ BZ_FINISH_OK = 3
19
+ BZ_STREAM_END = 4
20
+ BZ_SEQUENCE_ERROR = -1
21
+ BZ_PARAM_ERROR = -2
22
+ BZ_MEM_ERROR = -3
23
+ BZ_DATA_ERROR = -4
24
+ BZ_DATA_ERROR_MAGIC = -5
25
+ BZ_IO_ERROR = -6
26
+ BZ_UNEXPECTED_EOF = -7
27
+ BZ_OUTBUFF_FULL = -8
28
+ BZ_CONFIG_ERROR = -9
29
+
30
+ end
@@ -0,0 +1,163 @@
1
+ # This code is free software; you can redistribute it and/or modify it under
2
+ # the terms of the new BSD License.
3
+ #
4
+ # Copyright (c) 2013, Brian Lopez
5
+ # Copyright (c) 2013-2017, Sebastian Staudt
6
+
7
+ class RBzip2::FFI::Decompressor
8
+
9
+ extend ::FFI::Library
10
+
11
+ ffi_lib ::FFI::Platform::LIBC
12
+ attach_function :fopen,
13
+ [:string, :string],
14
+ :pointer
15
+ attach_function :fclose,
16
+ [:pointer],
17
+ :int
18
+
19
+ ffi_lib 'bz2'
20
+ attach_function :BZ2_bzRead,
21
+ [:pointer, :pointer, :pointer, :int],
22
+ :int
23
+ attach_function :BZ2_bzReadClose,
24
+ [:pointer, :pointer],
25
+ :void
26
+ attach_function :BZ2_bzReadOpen,
27
+ [:pointer, :pointer, :int, :int, :pointer, :int],
28
+ :pointer
29
+ attach_function :BZ2_bzBuffToBuffDecompress,
30
+ [:pointer, :buffer_inout, :pointer, :uint32, :int, :int],
31
+ :int
32
+
33
+ def self.decompress(data, factor = 2, small = 0, verbosity = 0)
34
+ out_len = data.bytesize * factor
35
+ dst_buf = ::FFI::MemoryPointer.new :char, out_len
36
+ dst_len = ::FFI::MemoryPointer.new :uint32
37
+ dst_len.write_uint out_len
38
+
39
+ src_buf = ::FFI::MemoryPointer.new :char, data.bytesize
40
+ src_buf.put_bytes 0, data
41
+
42
+ ret = BZ2_bzBuffToBuffDecompress dst_buf, dst_len, src_buf, data.bytesize,
43
+ small, verbosity
44
+
45
+ case ret
46
+ when RBzip2::FFI::BZ_OK
47
+ dst_buf.read_bytes dst_len.read_uint
48
+ when RBzip2::FFI::BZ_PARAM_ERROR
49
+ raise ArgumentError, 'One of sall or verbosity'
50
+ when RBzip2::FFI::BZ_MEM_ERROR
51
+ raise NoMemoryError, 'Out of memory'
52
+ when RBzip2::FFI::BZ_OUTBUFF_FULL
53
+ raise RBzip2::FFI::BufferError, "Output buffer isn't large enough"
54
+ when RBzip2::FFI::BZ_DATA_ERROR, RBzip2::FFI::BZ_DATA_ERROR_MAGIC,
55
+ RBzip2::FFI::BZ_UNEXPECTED_EOF
56
+ raise RBzip2::FFI::CorruptError, 'Compressed data appears to be corrupt or unreadable'
57
+ when RBzip2::FFI::BZ_CONFIG_ERROR
58
+ raise RBzip2::FFI::ConfigError, 'libbz2 has been mis-compiled'
59
+ else
60
+ raise RBzip2::FFI::Error, "Unhandled error code: #{ret}"
61
+ end
62
+ end
63
+
64
+ def read_file(length)
65
+ error = ::FFI::MemoryPointer.new :uint32
66
+ dst_buf = ::FFI::MemoryPointer.new :char, length
67
+
68
+ open_file if @bz_file.nil?
69
+
70
+ BZ2_bzRead error, @bz_file, dst_buf, length
71
+
72
+ dst_buf.read_bytes length
73
+ end
74
+
75
+ def initialize(io)
76
+ @io = io
77
+ end
78
+
79
+ def close
80
+ if @io != $stdin
81
+ @io = nil
82
+ @data = nil
83
+ end
84
+
85
+ close_file unless @bz_file.nil?
86
+ end
87
+
88
+ def close_file
89
+ error = ::FFI::MemoryPointer.new :uint32
90
+ BZ2_bzReadClose error, @bz_file
91
+ fclose @file
92
+ end
93
+
94
+ def getc
95
+ read 1
96
+ end
97
+
98
+ def gets
99
+ line = ''
100
+ loop do
101
+ char = getc
102
+ line += char
103
+ break if char == "\n"
104
+ end
105
+ line
106
+ end
107
+
108
+ def open_file(verbosity = 0, small = 0)
109
+ raise 'IO not a file' unless @io.is_a? File
110
+
111
+ small = 0 if small < 0
112
+ verbosity = 0 if verbosity < 0
113
+ verbosity = 4 if verbosity > 4
114
+
115
+ error = ::FFI::MemoryPointer.new :uint32
116
+
117
+ @file = fopen @io.path, 'r'
118
+ @bz_file = BZ2_bzReadOpen error, @file, verbosity, small, nil, 0
119
+ end
120
+
121
+ def read(length = nil)
122
+ raise 'stream closed' if @io.nil?
123
+
124
+ if length.nil?
125
+ factor = 4
126
+ compressed_data = @io.read
127
+ data = nil
128
+ while data.nil?
129
+ begin
130
+ data = self.class.decompress compressed_data, factor
131
+ rescue RBzip2::FFI::BufferError
132
+ factor = factor ** 2
133
+ end
134
+ end
135
+ else
136
+ if @io.is_a? File
137
+ data = read_file length
138
+ else
139
+ raise NotImplementedError
140
+ end
141
+ end
142
+
143
+ data
144
+ end
145
+
146
+ def size
147
+ if @io.is_a? StringIO
148
+ @io.size
149
+ elsif @io.is_a? File
150
+ @io.stat.size
151
+ end
152
+ end
153
+
154
+ def uncompressed
155
+ @data = read
156
+ @data.size
157
+ end
158
+
159
+ def inspect
160
+ "#<#{self.class}: @io=#{@io.inspect} size=#{size} uncompressed=#{uncompressed}>"
161
+ end
162
+
163
+ end