extlz4 0.2.4.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,12 @@
1
+
2
+ module LZ4
3
+ class << self
4
+ alias raw_encode block_encode
5
+ alias raw_decode block_decode
6
+ alias raw_stream_encode block_stream_encode
7
+ alias raw_stream_decode block_stream_decode
8
+ end
9
+
10
+ RawStreamEncoder = BlockEncoder
11
+ RawStreamDecoder = BlockDecoder
12
+ end
@@ -0,0 +1,96 @@
1
+ require_relative "../extlz4"
2
+
3
+ module LZ4
4
+ class StreamFixerForBug_0_1 < LZ4::StreamDecoder
5
+ def fix(output, &block)
6
+ export_header(output)
7
+ export_fixedblocks(output, &block)
8
+ export_streamsum(output) if @streamchecksum
9
+
10
+ yield("fixed block translation is acompleshed", @io.pos, @io.size) if block
11
+
12
+ self
13
+ end
14
+
15
+ def export_header(output)
16
+ case @version
17
+ when 1
18
+ blocksize = BLOCK_MAXIMUM_SIZES.rassoc(@blockmaximum)[0]
19
+ header = [MAGIC_NUMBER].pack("V")
20
+ sd = VERSION_NUMBER |
21
+ (@blockindependence ? 0 : BLOCK_INDEPENDENCY) |
22
+ (@blockchecksum ? BLOCK_CHECKSUM : 0) |
23
+ (false ? STREAM_SIZE : 0) |
24
+ (@streamchecksum ? STREAM_CHECKSUM : 0) |
25
+ (false ? PRESET_DICTIONARY : 0)
26
+ bd = (blocksize << 4)
27
+ desc = [sd, bd].pack("CC")
28
+ header << desc
29
+ header << [XXhash.xxh32(desc, 0) >> 8].pack("C")
30
+ header << [@streamsize].pack("Q<") if @streamsize
31
+ header << [XXhash.xxh32(@predict)].pack("V") if @predict
32
+ output << header
33
+ else
34
+ raise LZ4::Error, "un-supported version"
35
+ end
36
+ end
37
+
38
+ BLOCK_PIVOT_SIZE = 4
39
+
40
+ def export_fixedblocks(output)
41
+ # base is copied from LZ4::StreamDecoder#getnextblock
42
+
43
+ total = @io.size
44
+ canyield = block_given?
45
+ endofblock = @io.size - BLOCK_PIVOT_SIZE
46
+ endofblock -= 4 if @blockchecksum
47
+ endofblock -= 4 if @streamchecksum
48
+
49
+ while true
50
+ yield("reading block", @io.pos, total) if canyield
51
+
52
+ flags = @io.read(4).unpack("V")[0]
53
+ iscomp = (flags >> 31) == 0 ? true : false
54
+ blocksize = flags & 0x7fffffff
55
+ unless blocksize > 0
56
+ output << [flags].pack("V")
57
+ break
58
+ end
59
+
60
+ unless iscomp
61
+ blocksize1 = endofblock - @io.pos
62
+ blocksize1 = @blockmaximum if blocksize1 > @blockmaximum
63
+
64
+ if blocksize > blocksize1
65
+ blocksize = blocksize1
66
+ flags = LITERAL_DATA_BLOCK_FLAG | blocksize
67
+ yield("correct block size", @io.pos - 4, total) if canyield
68
+ else
69
+ end
70
+ end
71
+
72
+ w = @io.read(blocksize, @readbuf)
73
+ unless w.bytesize == blocksize
74
+ raise IOError, "can not read block (readsize=#{w.bytesize}, needsize=#{blocksize} (#{"0x%x" % blocksize}))"
75
+ end
76
+ output << [flags].pack("V") << w
77
+ output << @io.read(4) if @blockchecksum
78
+ end
79
+ end
80
+
81
+ def export_streamsum(output)
82
+ output << @io.read(4)
83
+ end
84
+ end
85
+
86
+ def self.fix_extlz4_0_1_bug(inpath, outpath, &block)
87
+ open_file(inpath, "rb") do |infile|
88
+ open_file(outpath, "wb") do |outfile|
89
+ fixer = LZ4::StreamFixerForBug_0_1.new(infile)
90
+ fixer.fix(outfile, &block)
91
+ end
92
+ end
93
+
94
+ nil
95
+ end
96
+ end
@@ -0,0 +1,529 @@
1
+ #
2
+ # This code is under public domain (CC0)
3
+ # <http://creativecommons.org/publicdomain/zero/1.0/>.
4
+ #
5
+ # To the extent possible under law, dearblue has waived all copyright
6
+ # and related or neighboring rights to this work.
7
+ #
8
+ # dearblue <dearblue@users.noreply.github.com>
9
+ #
10
+
11
+ require_relative "../extlz4"
12
+ require "stringio"
13
+
14
+ require "rubygems"
15
+ gem "xxhash", "~> 0.3"
16
+ require "xxhash"
17
+
18
+ module LZ4
19
+ def self.encode_old(first, *args)
20
+ case args.size
21
+ when 0
22
+ level = nil
23
+ opts = StreamEncoder::OPTIONS
24
+ when 1
25
+ level = args[0]
26
+ if level.respond_to?(:to_hash)
27
+ opts = StreamEncoder::OPTIONS.merge(level)
28
+ level = nil
29
+ else
30
+ level = level.to_i
31
+ opts = StreamEncoder::OPTIONS
32
+ end
33
+ when 2
34
+ level = args[0].to_i
35
+ opts = StreamEncoder::OPTIONS.merge(args[1])
36
+ else
37
+ raise ArgumentError, "wrong number of arguments (#{args.size + 1} for 1 .. 3)"
38
+ end
39
+
40
+ left = opts.keys - StreamEncoder::OPTIONS.keys
41
+ unless left.empty?
42
+ if left.size > 10
43
+ raise ArgumentError, "unknown key - #{left[0]} (for #{StreamEncoder::OPTIONS.keys.slice(0, 10).join(", ")} and more...)"
44
+ else
45
+ raise ArgumentError, "unknown key - #{left[0]} (for #{StreamEncoder::OPTIONS.keys.join(", ")})"
46
+ end
47
+ end
48
+
49
+ if first.kind_of?(String)
50
+ src = first
51
+ dest = StringIO.new("".b)
52
+ else
53
+ src = nil
54
+ dest = first
55
+ end
56
+
57
+ lz4 = StreamEncoder.new(dest, level || 1,
58
+ opts[:blocksize], opts[:block_dependency],
59
+ opts[:block_checksum], opts[:stream_checksum])
60
+
61
+ case
62
+ when src
63
+ lz4 << src
64
+ lz4.close
65
+ dest.string
66
+ when block_given?
67
+ begin
68
+ yield(lz4)
69
+ ensure
70
+ lz4.close
71
+ end
72
+ else
73
+ lz4
74
+ end
75
+ end
76
+
77
+ def self.decode_old(io, &block)
78
+ if io.kind_of?(String)
79
+ lz4 = StreamDecoder.new(StringIO.new(io))
80
+ dest = lz4.read
81
+ lz4.close
82
+ return dest
83
+ end
84
+
85
+ dec = StreamDecoder.new(io)
86
+ return dec unless block_given?
87
+
88
+ begin
89
+ yield(dec)
90
+ ensure
91
+ dec.close
92
+ end
93
+ end
94
+
95
+
96
+ module BasicStream
97
+ MAGIC_NUMBER = 0x184D2204
98
+ MAGIC_NUMBER_LEGACY = 0x184C2102
99
+
100
+ BLOCK_MAXIMUM_SIZES = {
101
+ # 0 => not available
102
+ # 1 => not available
103
+ # 2 => not available
104
+ # 3 => not available
105
+ 4 => 1 << 16, # 64 KiB
106
+ 5 => 1 << 18, # 256 KiB
107
+ 6 => 1 << 20, # 1 MiB
108
+ 7 => 1 << 22, # 4 MiB
109
+ }
110
+
111
+ LITERAL_DATA_BLOCK_FLAG = 0x80000000
112
+
113
+ VERSION_NUMBER = 1 << 6
114
+ VERSION_NUMBER_MASK = 0x03 << 6
115
+ BLOCK_INDEPENDENCY = 1 << 5
116
+ BLOCK_CHECKSUM = 1 << 4
117
+ STREAM_SIZE = 1 << 3
118
+ STREAM_CHECKSUM = 1 << 2
119
+ PRESET_DICTIONARY = 1 << 0
120
+
121
+ Header = Struct.new(:magic,
122
+ :version,
123
+ :blockindependence,
124
+ :blockchecksum,
125
+ :streamchecksum,
126
+ :blocksize,
127
+ :streamsize,
128
+ :predictid)
129
+
130
+ class Header
131
+ def self.load(io)
132
+ case magic = io.read(4).unpack("V")
133
+ when MAGIC_NUMBER_LEGACY
134
+ new(magic, -1, true, false, false, 8 * 1024 * 1024, nil, nil)
135
+ when MAGIC_NUMBER
136
+ (sf, bd) = io.read(2).unpack("CC")
137
+ version = (sf >> 6) & 0x03
138
+ raise "stream header error - wrong version number" unless version == 0x01
139
+ blockindependence = ((sf >> 5) & 0x01) == 0 ? false : true
140
+ blockchecksum = ((sf >> 4) & 0x01) == 0 ? false : true
141
+ streamsize = ((sf >> 3) & 0x01) == 0 ? false : true
142
+ streamchecksum = ((sf >> 2) & 0x01) == 0 ? false : true
143
+ # reserved = (sf >> 1) & 0x01
144
+ predictid = ((sf >> 0) & 0x01) == 0 ? false : true
145
+
146
+ # reserved = (bd >> 7) & 0x01
147
+ blockmax = (bd >> 4) & 0x07
148
+ # reserved = (bd >> 0) & 0x0f
149
+
150
+ blocksize = BLOCK_MAXIMUM_SIZES[blockmax]
151
+ raise Error, "stream header error - wrong block maximum size (#{blockmax} for 4 .. 7)" unless blocksize
152
+
153
+ streamsize = io.read(8).unpack("Q<")[0] if streamsize
154
+ predictid = io.read(4).unpack("V")[0] if predictid
155
+
156
+ headerchecksum = io.getbyte
157
+
158
+ new(magic, version, blockindependence, blockchecksum, streamchecksum, blocksize, streamsize, predictid)
159
+ else
160
+ raise "could not recognized magic number (0x%08x)" % (magic || nil)
161
+ end
162
+ end
163
+
164
+ def self.pack(*args)
165
+ new(*args).pack
166
+ end
167
+
168
+ def pack
169
+ raise "wrong magic number" unless magic == MAGIC_NUMBER
170
+ raise "wrong version number" unless version == VERSION_NUMBER
171
+
172
+ header = [magic].pack("V")
173
+ sd = version |
174
+ (blockindependence ? BLOCK_INDEPENDENCY : 0) |
175
+ (blockchecksum ? BLOCK_CHECKSUM : 0) |
176
+ (streamsize ? STREAM_SIZE : 0) |
177
+ (streamchecksum ? STREAM_CHECKSUM : 0) |
178
+ (predictid ? PRESET_DICTIONARY : 0)
179
+ bd = (BLOCK_MAXIMUM_SIZES.rassoc(blocksize)[0] << 4)
180
+ desc = [sd, bd].pack("CC")
181
+ header << desc
182
+ header << [streamsize].pack("Q<") if streamsize
183
+ header << [predictid].pack("V") if predictid
184
+ header << [XXhash.xxh32(desc) >> 8].pack("C")
185
+ end
186
+ end
187
+
188
+ BlockHeader = Struct.new(:iscompress,
189
+ :packedsize)
190
+
191
+ class BlockHeader
192
+ alias compress? iscompress
193
+ undef iscompress
194
+ undef iscompress=
195
+ undef packedsize=
196
+
197
+ def pack
198
+ [(compress? ? 0 : LITERAL_DATA_BLOCK_FLAG) | packedsize].pack("V")
199
+ end
200
+
201
+ def self.pack(iscompress, packedsize)
202
+ new(iscompress, packedsize).pack
203
+ end
204
+
205
+ def self.unpack(data)
206
+ d = data.unpack("V")[0]
207
+ new((d & LITERAL_DATA_BLOCK_FLAG) == 0 ? true : false,
208
+ packedsize & ~LITERAL_DATA_BLOCK_FLAG)
209
+ end
210
+
211
+ def self.load(io)
212
+ unpack io.read(4)
213
+ end
214
+ end
215
+ end
216
+
217
+ #
218
+ # LZ4 stream encoder
219
+ #
220
+ class StreamEncoder
221
+ include BasicStream
222
+
223
+ OPTIONS = {
224
+ legacy: false,
225
+ blocksize: 7,
226
+ block_dependency: false,
227
+ block_checksum: false,
228
+ stream_checksum: true,
229
+ }
230
+
231
+ def initialize(io, level, blocksize, block_dependency,
232
+ block_checksum, stream_checksum)
233
+ @block_checksum = !!block_checksum
234
+ @stream_checksum = XXhash::XXhashInternal::StreamingHash32.new(0) if stream_checksum
235
+
236
+ @blocksize = BLOCK_MAXIMUM_SIZES[blocksize]
237
+ raise ArgumentError, "wrong blocksize (#{blocksize})" unless @blocksize
238
+
239
+ @block_dependency = !!block_dependency
240
+ level = level ? level.to_i : nil
241
+ case
242
+ when level.nil? || level < 4
243
+ level = nil
244
+ when level > 16
245
+ level = 16
246
+ end
247
+ @encoder = get_encoder(level, @block_dependency)
248
+ @io = io
249
+ @buf = "".force_encoding(Encoding::BINARY)
250
+
251
+ header = [MAGIC_NUMBER].pack("V")
252
+ sd = VERSION_NUMBER |
253
+ (@block_dependency ? 0 : BLOCK_INDEPENDENCY) |
254
+ (@block_checksum ? BLOCK_CHECKSUM : 0) |
255
+ (false ? STREAM_SIZE : 0) |
256
+ (@stream_checksum ? STREAM_CHECKSUM : 0) |
257
+ (false ? PRESET_DICTIONARY : 0)
258
+ bd = (blocksize << 4)
259
+ desc = [sd, bd].pack("CC")
260
+ header << desc
261
+ # TODO: header << [stream_size].pack("Q<") if stream_size
262
+ # TODO: header << [XXhash.xxh32(predict)].pack("V") if predict # preset dictionary
263
+ header << [XXhash.xxh32(desc) >> 8].pack("C")
264
+ @io << header
265
+ end
266
+
267
+ #
268
+ # call-seq:
269
+ # write(data) -> nil or self
270
+ #
271
+ # Write data to lz4 stream.
272
+ #
273
+ # If data is nil, return to process nothing.
274
+ #
275
+ # [RETURN (self)]
276
+ # Success write process.
277
+ #
278
+ # [RETURN (nil)]
279
+ # Given nil to data.
280
+ #
281
+ # [data (String)]
282
+ #
283
+ def write(data)
284
+ return nil if data.nil?
285
+ @slicebuf ||= ""
286
+ @inputproxy ||= StringIO.new
287
+ @inputproxy.string = String(data)
288
+ until @inputproxy.eof?
289
+ slicesize = @blocksize - @buf.bytesize
290
+ slicesize = @blocksize if slicesize > @blocksize
291
+ @buf << @inputproxy.read(slicesize, @slicebuf)
292
+ export_block if @buf.bytesize >= @blocksize
293
+ end
294
+
295
+ self
296
+ end
297
+
298
+ #
299
+ # Same as `write` method, but return self always.
300
+ #
301
+ def <<(data)
302
+ write data
303
+ self
304
+ end
305
+
306
+ def close
307
+ export_block unless @buf.empty?
308
+ @io << [0].pack("V")
309
+ @io << [@stream_checksum.digest].pack("V") if @stream_checksum
310
+ @io.flush if @io.respond_to?(:flush)
311
+ @io = nil
312
+ end
313
+
314
+ private
315
+ def get_encoder(level, block_dependency)
316
+ workencbuf = "".force_encoding(Encoding::BINARY)
317
+ if block_dependency
318
+ streamencoder = LZ4::BlockEncoder.new(level)
319
+ ->(src) { streamencoder.update(src, workencbuf) }
320
+ else
321
+ ->(src) { LZ4.block_encode(level, src, workencbuf) }
322
+ end
323
+ end
324
+
325
+ private
326
+ def export_block
327
+ w = @encoder.(@buf)
328
+ @stream_checksum.update(@buf) if @stream_checksum
329
+ if w.bytesize < @buf.bytesize
330
+ # 上限を超えずに圧縮できた
331
+ @io << [w.bytesize].pack("V") << w
332
+ else
333
+ # 圧縮後は上限を超過したため、無圧縮データを出力する
334
+ @io << [@buf.bytesize | LITERAL_DATA_BLOCK_FLAG].pack("V") << @buf
335
+ w = @buf
336
+ end
337
+
338
+ if @block_checksum
339
+ @io << [XXhash.xxh32(w)].pack("V")
340
+ end
341
+ @buf.clear
342
+ end
343
+ end
344
+
345
+ #
346
+ # LZ4 ストリームを伸張するためのクラスです。
347
+ #
348
+ class StreamDecoder
349
+ include BasicStream
350
+
351
+ attr_reader :version
352
+ attr_reader :blockindependence
353
+ attr_reader :blockchecksum
354
+ attr_reader :streamchecksum
355
+ attr_reader :blockmaximum
356
+ attr_reader :streamsize
357
+ attr_reader :presetdict
358
+
359
+ def initialize(io)
360
+ magic = io.read(4).unpack("V")[0]
361
+ case magic
362
+ when MAGIC_NUMBER
363
+ sf = io.getbyte
364
+ @version = (sf >> 6) & 0x03
365
+ raise "stream header error - wrong version number" unless @version == 0x01
366
+ @blockindependence = ((sf >> 5) & 0x01) == 0 ? false : true
367
+ @blockchecksum = ((sf >> 4) & 0x01) == 0 ? false : true
368
+ streamsize = ((sf >> 3) & 0x01) == 0 ? false : true
369
+ @streamchecksum = ((sf >> 2) & 0x01) == 0 ? false : true
370
+ # reserved = (sf >> 1) & 0x01
371
+ presetdict = ((sf >> 0) & 0x01) == 0 ? false : true
372
+
373
+ bd = io.getbyte
374
+ # reserved = (bd >> 7) & 0x01
375
+ blockmax = (bd >> 4) & 0x07
376
+ # reserved = (bd >> 0) & 0x0f
377
+
378
+ @blockmaximum = BLOCK_MAXIMUM_SIZES[blockmax]
379
+ raise Error, "stream header error - wrong block maximum size (#{blockmax} for 4 .. 7)" unless @blockmaximum
380
+
381
+ @streamsize = io.read(8).unpack("Q<")[0] if streamsize
382
+ @presetdict = io.read(4).unpack("V")[0] if presetdict
383
+
384
+ headerchecksum = io.getbyte
385
+
386
+ if @blockindependence
387
+ @decoder = LZ4.method(:block_decode)
388
+ else
389
+ @decoder = LZ4::BlockDecoder.new.method(:update)
390
+ end
391
+ when MAGIC_NUMBER_LEGACY
392
+ @version = -1
393
+ @blockindependence = true
394
+ @blockchecksum = false
395
+ @streamchecksum = false
396
+ @blockmaximum = 1 << 23 # 8 MiB
397
+ @streamsize = nil
398
+ @presetdict = nil
399
+ @decoder = LZ4.method(:block_decode)
400
+ else
401
+ raise Error, "stream header error - wrong magic number"
402
+ end
403
+
404
+ @io = io
405
+ @pos = 0
406
+
407
+ @readbuf = "".b
408
+ @decodebuf = "".b
409
+ end
410
+
411
+ def close
412
+ @io = nil
413
+ end
414
+
415
+ #
416
+ # call-seq:
417
+ # read -> string or nil
418
+ # read(size) -> string or nil
419
+ # read(size, dest) -> string or nil
420
+ #
421
+ def read(*args)
422
+ case args.size
423
+ when 0
424
+ read_all
425
+ when 1
426
+ read_part(args[0].to_i, "")
427
+ when 2
428
+ read_part(args[0].to_i, args[1])
429
+ else
430
+ raise ArgumentError, "wrong number of arguments (#{args.size} for 0 .. 2)"
431
+ end
432
+ end
433
+
434
+ def getbyte
435
+ w = read(1) or return nil
436
+ w.getbyte(0)
437
+ end
438
+
439
+ def eof
440
+ !@pos
441
+ end
442
+
443
+ alias eof? eof
444
+
445
+ def tell
446
+ raise NotImplementedError
447
+ end
448
+
449
+ def seek(off, cur)
450
+ raise NotImplementedError
451
+ end
452
+
453
+ def pos
454
+ raise NotImplementedError
455
+ end
456
+
457
+ def pos=(pos)
458
+ raise NotImplementedError
459
+ end
460
+
461
+ private
462
+ def read_all
463
+ if @buf
464
+ dest = @buf.read
465
+ else
466
+ dest = ""
467
+ end
468
+ @buf = nil
469
+ w = nil
470
+ dest << w while w = getnextblock
471
+ @pos = nil
472
+ dest
473
+ end
474
+
475
+ private
476
+ def read_part(size, dest)
477
+ dest.clear
478
+ return dest unless size > 0
479
+ return nil unless @pos
480
+
481
+ @slicebuf ||= ""
482
+
483
+ begin
484
+ unless @buf && !@buf.eof?
485
+ unless w = getnextblock
486
+ @pos = nil
487
+ if dest.empty?
488
+ return nil
489
+ else
490
+ return dest
491
+ end
492
+ end
493
+
494
+ # NOTE: StringIO を用いている理由について
495
+ # ruby-2.1 で String#slice 系を使って新しい文字列を生成すると、ヒープ領域の確保量が㌧でもない状況になる。
496
+ # StringIO#read に読み込みバッファを与えることで、この問題を軽減している。
497
+
498
+ @buf ||= StringIO.new
499
+ @buf.string = w
500
+ end
501
+
502
+ dest << @buf.read(size, @slicebuf)
503
+ size -= @slicebuf.bytesize
504
+ end while size > 0
505
+
506
+ dest
507
+ end
508
+
509
+ private
510
+ def getnextblock
511
+ return nil if @version == -1 && @io.eof?
512
+
513
+ flags = @io.read(4).unpack("V")[0]
514
+ iscomp = (flags >> 31) == 0 ? true : false
515
+ blocksize = flags & 0x7fffffff
516
+ return nil unless blocksize > 0
517
+ unless blocksize <= @blockmaximum
518
+ raise LZ4::Error, "block size is too big (blocksize is #{blocksize}, but blockmaximum is #{@blockmaximum}. may have damaged)."
519
+ end
520
+ w = @io.read(blocksize, @readbuf)
521
+ unless w.bytesize == blocksize
522
+ raise LZ4::Error, "can not read block (readsize=#{w.bytesize}, needsize=#{blocksize} (#{"0x%x" % blocksize}))"
523
+ end
524
+ w = @decoder.(w, @blockmaximum, @decodebuf) if iscomp
525
+ @io.read(4) if @blockchecksum # TODO: IMPLEMENT ME! compare checksum
526
+ w
527
+ end
528
+ end
529
+ end