extlz4 0.2.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+
2
+ module LZ4
3
+ class << self
4
+ alias raw_encode block_encode
5
+ alias raw_decode block_decode
6
+ alias raw_stream_encode block_stream_encode
7
+ alias raw_stream_decode block_stream_decode
8
+ end
9
+
10
+ RawStreamEncoder = BlockEncoder
11
+ RawStreamDecoder = BlockDecoder
12
+ end
@@ -0,0 +1,96 @@
1
+ require_relative "../extlz4"
2
+
3
+ module LZ4
4
+ class StreamFixerForBug_0_1 < LZ4::StreamDecoder
5
+ def fix(output, &block)
6
+ export_header(output)
7
+ export_fixedblocks(output, &block)
8
+ export_streamsum(output) if @streamchecksum
9
+
10
+ yield("fixed block translation is acompleshed", @io.pos, @io.size) if block
11
+
12
+ self
13
+ end
14
+
15
+ def export_header(output)
16
+ case @version
17
+ when 1
18
+ blocksize = BLOCK_MAXIMUM_SIZES.rassoc(@blockmaximum)[0]
19
+ header = [MAGIC_NUMBER].pack("V")
20
+ sd = VERSION_NUMBER |
21
+ (@blockindependence ? 0 : BLOCK_INDEPENDENCY) |
22
+ (@blockchecksum ? BLOCK_CHECKSUM : 0) |
23
+ (false ? STREAM_SIZE : 0) |
24
+ (@streamchecksum ? STREAM_CHECKSUM : 0) |
25
+ (false ? PRESET_DICTIONARY : 0)
26
+ bd = (blocksize << 4)
27
+ desc = [sd, bd].pack("CC")
28
+ header << desc
29
+ header << [XXhash.xxh32(desc, 0) >> 8].pack("C")
30
+ header << [@streamsize].pack("Q<") if @streamsize
31
+ header << [XXhash.xxh32(@predict)].pack("V") if @predict
32
+ output << header
33
+ else
34
+ raise LZ4::Error, "un-supported version"
35
+ end
36
+ end
37
+
38
+ BLOCK_PIVOT_SIZE = 4
39
+
40
+ def export_fixedblocks(output)
41
+ # base is copied from LZ4::StreamDecoder#getnextblock
42
+
43
+ total = @io.size
44
+ canyield = block_given?
45
+ endofblock = @io.size - BLOCK_PIVOT_SIZE
46
+ endofblock -= 4 if @blockchecksum
47
+ endofblock -= 4 if @streamchecksum
48
+
49
+ while true
50
+ yield("reading block", @io.pos, total) if canyield
51
+
52
+ flags = @io.read(4).unpack("V")[0]
53
+ iscomp = (flags >> 31) == 0 ? true : false
54
+ blocksize = flags & 0x7fffffff
55
+ unless blocksize > 0
56
+ output << [flags].pack("V")
57
+ break
58
+ end
59
+
60
+ unless iscomp
61
+ blocksize1 = endofblock - @io.pos
62
+ blocksize1 = @blockmaximum if blocksize1 > @blockmaximum
63
+
64
+ if blocksize > blocksize1
65
+ blocksize = blocksize1
66
+ flags = LITERAL_DATA_BLOCK_FLAG | blocksize
67
+ yield("correct block size", @io.pos - 4, total) if canyield
68
+ else
69
+ end
70
+ end
71
+
72
+ w = @io.read(blocksize, @readbuf)
73
+ unless w.bytesize == blocksize
74
+ raise IOError, "can not read block (readsize=#{w.bytesize}, needsize=#{blocksize} (#{"0x%x" % blocksize}))"
75
+ end
76
+ output << [flags].pack("V") << w
77
+ output << @io.read(4) if @blockchecksum
78
+ end
79
+ end
80
+
81
+ def export_streamsum(output)
82
+ output << @io.read(4)
83
+ end
84
+ end
85
+
86
+ def self.fix_extlz4_0_1_bug(inpath, outpath, &block)
87
+ open_file(inpath, "rb") do |infile|
88
+ open_file(outpath, "wb") do |outfile|
89
+ fixer = LZ4::StreamFixerForBug_0_1.new(infile)
90
+ fixer.fix(outfile, &block)
91
+ end
92
+ end
93
+
94
+ nil
95
+ end
96
+ end
@@ -0,0 +1,529 @@
1
+ #
2
+ # This code is under public domain (CC0)
3
+ # <http://creativecommons.org/publicdomain/zero/1.0/>.
4
+ #
5
+ # To the extent possible under law, dearblue has waived all copyright
6
+ # and related or neighboring rights to this work.
7
+ #
8
+ # dearblue <dearblue@users.noreply.github.com>
9
+ #
10
+
11
+ require_relative "../extlz4"
12
+ require "stringio"
13
+
14
+ require "rubygems"
15
+ gem "xxhash", "~> 0.3"
16
+ require "xxhash"
17
+
18
+ module LZ4
19
+ def self.encode_old(first, *args)
20
+ case args.size
21
+ when 0
22
+ level = nil
23
+ opts = StreamEncoder::OPTIONS
24
+ when 1
25
+ level = args[0]
26
+ if level.respond_to?(:to_hash)
27
+ opts = StreamEncoder::OPTIONS.merge(level)
28
+ level = nil
29
+ else
30
+ level = level.to_i
31
+ opts = StreamEncoder::OPTIONS
32
+ end
33
+ when 2
34
+ level = args[0].to_i
35
+ opts = StreamEncoder::OPTIONS.merge(args[1])
36
+ else
37
+ raise ArgumentError, "wrong number of arguments (#{args.size + 1} for 1 .. 3)"
38
+ end
39
+
40
+ left = opts.keys - StreamEncoder::OPTIONS.keys
41
+ unless left.empty?
42
+ if left.size > 10
43
+ raise ArgumentError, "unknown key - #{left[0]} (for #{StreamEncoder::OPTIONS.keys.slice(0, 10).join(", ")} and more...)"
44
+ else
45
+ raise ArgumentError, "unknown key - #{left[0]} (for #{StreamEncoder::OPTIONS.keys.join(", ")})"
46
+ end
47
+ end
48
+
49
+ if first.kind_of?(String)
50
+ src = first
51
+ dest = StringIO.new("".b)
52
+ else
53
+ src = nil
54
+ dest = first
55
+ end
56
+
57
+ lz4 = StreamEncoder.new(dest, level || 1,
58
+ opts[:blocksize], opts[:block_dependency],
59
+ opts[:block_checksum], opts[:stream_checksum])
60
+
61
+ case
62
+ when src
63
+ lz4 << src
64
+ lz4.close
65
+ dest.string
66
+ when block_given?
67
+ begin
68
+ yield(lz4)
69
+ ensure
70
+ lz4.close
71
+ end
72
+ else
73
+ lz4
74
+ end
75
+ end
76
+
77
+ def self.decode_old(io, &block)
78
+ if io.kind_of?(String)
79
+ lz4 = StreamDecoder.new(StringIO.new(io))
80
+ dest = lz4.read
81
+ lz4.close
82
+ return dest
83
+ end
84
+
85
+ dec = StreamDecoder.new(io)
86
+ return dec unless block_given?
87
+
88
+ begin
89
+ yield(dec)
90
+ ensure
91
+ dec.close
92
+ end
93
+ end
94
+
95
+
96
+ module BasicStream
97
+ MAGIC_NUMBER = 0x184D2204
98
+ MAGIC_NUMBER_LEGACY = 0x184C2102
99
+
100
+ BLOCK_MAXIMUM_SIZES = {
101
+ # 0 => not available
102
+ # 1 => not available
103
+ # 2 => not available
104
+ # 3 => not available
105
+ 4 => 1 << 16, # 64 KiB
106
+ 5 => 1 << 18, # 256 KiB
107
+ 6 => 1 << 20, # 1 MiB
108
+ 7 => 1 << 22, # 4 MiB
109
+ }
110
+
111
+ LITERAL_DATA_BLOCK_FLAG = 0x80000000
112
+
113
+ VERSION_NUMBER = 1 << 6
114
+ VERSION_NUMBER_MASK = 0x03 << 6
115
+ BLOCK_INDEPENDENCY = 1 << 5
116
+ BLOCK_CHECKSUM = 1 << 4
117
+ STREAM_SIZE = 1 << 3
118
+ STREAM_CHECKSUM = 1 << 2
119
+ PRESET_DICTIONARY = 1 << 0
120
+
121
+ Header = Struct.new(:magic,
122
+ :version,
123
+ :blockindependence,
124
+ :blockchecksum,
125
+ :streamchecksum,
126
+ :blocksize,
127
+ :streamsize,
128
+ :predictid)
129
+
130
+ class Header
131
+ def self.load(io)
132
+ case magic = io.read(4).unpack("V")
133
+ when MAGIC_NUMBER_LEGACY
134
+ new(magic, -1, true, false, false, 8 * 1024 * 1024, nil, nil)
135
+ when MAGIC_NUMBER
136
+ (sf, bd) = io.read(2).unpack("CC")
137
+ version = (sf >> 6) & 0x03
138
+ raise "stream header error - wrong version number" unless version == 0x01
139
+ blockindependence = ((sf >> 5) & 0x01) == 0 ? false : true
140
+ blockchecksum = ((sf >> 4) & 0x01) == 0 ? false : true
141
+ streamsize = ((sf >> 3) & 0x01) == 0 ? false : true
142
+ streamchecksum = ((sf >> 2) & 0x01) == 0 ? false : true
143
+ # reserved = (sf >> 1) & 0x01
144
+ predictid = ((sf >> 0) & 0x01) == 0 ? false : true
145
+
146
+ # reserved = (bd >> 7) & 0x01
147
+ blockmax = (bd >> 4) & 0x07
148
+ # reserved = (bd >> 0) & 0x0f
149
+
150
+ blocksize = BLOCK_MAXIMUM_SIZES[blockmax]
151
+ raise Error, "stream header error - wrong block maximum size (#{blockmax} for 4 .. 7)" unless blocksize
152
+
153
+ streamsize = io.read(8).unpack("Q<")[0] if streamsize
154
+ predictid = io.read(4).unpack("V")[0] if predictid
155
+
156
+ headerchecksum = io.getbyte
157
+
158
+ new(magic, version, blockindependence, blockchecksum, streamchecksum, blocksize, streamsize, predictid)
159
+ else
160
+ raise "could not recognized magic number (0x%08x)" % (magic || nil)
161
+ end
162
+ end
163
+
164
+ def self.pack(*args)
165
+ new(*args).pack
166
+ end
167
+
168
+ def pack
169
+ raise "wrong magic number" unless magic == MAGIC_NUMBER
170
+ raise "wrong version number" unless version == VERSION_NUMBER
171
+
172
+ header = [magic].pack("V")
173
+ sd = version |
174
+ (blockindependence ? BLOCK_INDEPENDENCY : 0) |
175
+ (blockchecksum ? BLOCK_CHECKSUM : 0) |
176
+ (streamsize ? STREAM_SIZE : 0) |
177
+ (streamchecksum ? STREAM_CHECKSUM : 0) |
178
+ (predictid ? PRESET_DICTIONARY : 0)
179
+ bd = (BLOCK_MAXIMUM_SIZES.rassoc(blocksize)[0] << 4)
180
+ desc = [sd, bd].pack("CC")
181
+ header << desc
182
+ header << [streamsize].pack("Q<") if streamsize
183
+ header << [predictid].pack("V") if predictid
184
+ header << [XXhash.xxh32(desc) >> 8].pack("C")
185
+ end
186
+ end
187
+
188
+ BlockHeader = Struct.new(:iscompress,
189
+ :packedsize)
190
+
191
+ class BlockHeader
192
+ alias compress? iscompress
193
+ undef iscompress
194
+ undef iscompress=
195
+ undef packedsize=
196
+
197
+ def pack
198
+ [(compress? ? 0 : LITERAL_DATA_BLOCK_FLAG) | packedsize].pack("V")
199
+ end
200
+
201
+ def self.pack(iscompress, packedsize)
202
+ new(iscompress, packedsize).pack
203
+ end
204
+
205
+ def self.unpack(data)
206
+ d = data.unpack("V")[0]
207
+ new((d & LITERAL_DATA_BLOCK_FLAG) == 0 ? true : false,
208
+ packedsize & ~LITERAL_DATA_BLOCK_FLAG)
209
+ end
210
+
211
+ def self.load(io)
212
+ unpack io.read(4)
213
+ end
214
+ end
215
+ end
216
+
217
+ #
218
+ # LZ4 stream encoder
219
+ #
220
+ class StreamEncoder
221
+ include BasicStream
222
+
223
+ OPTIONS = {
224
+ legacy: false,
225
+ blocksize: 7,
226
+ block_dependency: false,
227
+ block_checksum: false,
228
+ stream_checksum: true,
229
+ }
230
+
231
+ def initialize(io, level, blocksize, block_dependency,
232
+ block_checksum, stream_checksum)
233
+ @block_checksum = !!block_checksum
234
+ @stream_checksum = XXhash::XXhashInternal::StreamingHash32.new(0) if stream_checksum
235
+
236
+ @blocksize = BLOCK_MAXIMUM_SIZES[blocksize]
237
+ raise ArgumentError, "wrong blocksize (#{blocksize})" unless @blocksize
238
+
239
+ @block_dependency = !!block_dependency
240
+ level = level ? level.to_i : nil
241
+ case
242
+ when level.nil? || level < 4
243
+ level = nil
244
+ when level > 16
245
+ level = 16
246
+ end
247
+ @encoder = get_encoder(level, @block_dependency)
248
+ @io = io
249
+ @buf = "".force_encoding(Encoding::BINARY)
250
+
251
+ header = [MAGIC_NUMBER].pack("V")
252
+ sd = VERSION_NUMBER |
253
+ (@block_dependency ? 0 : BLOCK_INDEPENDENCY) |
254
+ (@block_checksum ? BLOCK_CHECKSUM : 0) |
255
+ (false ? STREAM_SIZE : 0) |
256
+ (@stream_checksum ? STREAM_CHECKSUM : 0) |
257
+ (false ? PRESET_DICTIONARY : 0)
258
+ bd = (blocksize << 4)
259
+ desc = [sd, bd].pack("CC")
260
+ header << desc
261
+ # TODO: header << [stream_size].pack("Q<") if stream_size
262
+ # TODO: header << [XXhash.xxh32(predict)].pack("V") if predict # preset dictionary
263
+ header << [XXhash.xxh32(desc) >> 8].pack("C")
264
+ @io << header
265
+ end
266
+
267
+ #
268
+ # call-seq:
269
+ # write(data) -> nil or self
270
+ #
271
+ # Write data to lz4 stream.
272
+ #
273
+ # If data is nil, return to process nothing.
274
+ #
275
+ # [RETURN (self)]
276
+ # Success write process.
277
+ #
278
+ # [RETURN (nil)]
279
+ # Given nil to data.
280
+ #
281
+ # [data (String)]
282
+ #
283
+ def write(data)
284
+ return nil if data.nil?
285
+ @slicebuf ||= ""
286
+ @inputproxy ||= StringIO.new
287
+ @inputproxy.string = String(data)
288
+ until @inputproxy.eof?
289
+ slicesize = @blocksize - @buf.bytesize
290
+ slicesize = @blocksize if slicesize > @blocksize
291
+ @buf << @inputproxy.read(slicesize, @slicebuf)
292
+ export_block if @buf.bytesize >= @blocksize
293
+ end
294
+
295
+ self
296
+ end
297
+
298
+ #
299
+ # Same as `write` method, but return self always.
300
+ #
301
+ def <<(data)
302
+ write data
303
+ self
304
+ end
305
+
306
+ def close
307
+ export_block unless @buf.empty?
308
+ @io << [0].pack("V")
309
+ @io << [@stream_checksum.digest].pack("V") if @stream_checksum
310
+ @io.flush if @io.respond_to?(:flush)
311
+ @io = nil
312
+ end
313
+
314
+ private
315
+ def get_encoder(level, block_dependency)
316
+ workencbuf = "".force_encoding(Encoding::BINARY)
317
+ if block_dependency
318
+ streamencoder = LZ4::BlockEncoder.new(level)
319
+ ->(src) { streamencoder.update(src, workencbuf) }
320
+ else
321
+ ->(src) { LZ4.block_encode(level, src, workencbuf) }
322
+ end
323
+ end
324
+
325
+ private
326
+ def export_block
327
+ w = @encoder.(@buf)
328
+ @stream_checksum.update(@buf) if @stream_checksum
329
+ if w.bytesize < @buf.bytesize
330
+ # 上限を超えずに圧縮できた
331
+ @io << [w.bytesize].pack("V") << w
332
+ else
333
+ # 圧縮後は上限を超過したため、無圧縮データを出力する
334
+ @io << [@buf.bytesize | LITERAL_DATA_BLOCK_FLAG].pack("V") << @buf
335
+ w = @buf
336
+ end
337
+
338
+ if @block_checksum
339
+ @io << [XXhash.xxh32(w)].pack("V")
340
+ end
341
+ @buf.clear
342
+ end
343
+ end
344
+
345
+ #
346
+ # LZ4 ストリームを伸張するためのクラスです。
347
+ #
348
+ class StreamDecoder
349
+ include BasicStream
350
+
351
+ attr_reader :version
352
+ attr_reader :blockindependence
353
+ attr_reader :blockchecksum
354
+ attr_reader :streamchecksum
355
+ attr_reader :blockmaximum
356
+ attr_reader :streamsize
357
+ attr_reader :presetdict
358
+
359
+ def initialize(io)
360
+ magic = io.read(4).unpack("V")[0]
361
+ case magic
362
+ when MAGIC_NUMBER
363
+ sf = io.getbyte
364
+ @version = (sf >> 6) & 0x03
365
+ raise "stream header error - wrong version number" unless @version == 0x01
366
+ @blockindependence = ((sf >> 5) & 0x01) == 0 ? false : true
367
+ @blockchecksum = ((sf >> 4) & 0x01) == 0 ? false : true
368
+ streamsize = ((sf >> 3) & 0x01) == 0 ? false : true
369
+ @streamchecksum = ((sf >> 2) & 0x01) == 0 ? false : true
370
+ # reserved = (sf >> 1) & 0x01
371
+ presetdict = ((sf >> 0) & 0x01) == 0 ? false : true
372
+
373
+ bd = io.getbyte
374
+ # reserved = (bd >> 7) & 0x01
375
+ blockmax = (bd >> 4) & 0x07
376
+ # reserved = (bd >> 0) & 0x0f
377
+
378
+ @blockmaximum = BLOCK_MAXIMUM_SIZES[blockmax]
379
+ raise Error, "stream header error - wrong block maximum size (#{blockmax} for 4 .. 7)" unless @blockmaximum
380
+
381
+ @streamsize = io.read(8).unpack("Q<")[0] if streamsize
382
+ @presetdict = io.read(4).unpack("V")[0] if presetdict
383
+
384
+ headerchecksum = io.getbyte
385
+
386
+ if @blockindependence
387
+ @decoder = LZ4.method(:block_decode)
388
+ else
389
+ @decoder = LZ4::BlockDecoder.new.method(:update)
390
+ end
391
+ when MAGIC_NUMBER_LEGACY
392
+ @version = -1
393
+ @blockindependence = true
394
+ @blockchecksum = false
395
+ @streamchecksum = false
396
+ @blockmaximum = 1 << 23 # 8 MiB
397
+ @streamsize = nil
398
+ @presetdict = nil
399
+ @decoder = LZ4.method(:block_decode)
400
+ else
401
+ raise Error, "stream header error - wrong magic number"
402
+ end
403
+
404
+ @io = io
405
+ @pos = 0
406
+
407
+ @readbuf = "".b
408
+ @decodebuf = "".b
409
+ end
410
+
411
+ def close
412
+ @io = nil
413
+ end
414
+
415
+ #
416
+ # call-seq:
417
+ # read -> string or nil
418
+ # read(size) -> string or nil
419
+ # read(size, dest) -> string or nil
420
+ #
421
+ def read(*args)
422
+ case args.size
423
+ when 0
424
+ read_all
425
+ when 1
426
+ read_part(args[0].to_i, "")
427
+ when 2
428
+ read_part(args[0].to_i, args[1])
429
+ else
430
+ raise ArgumentError, "wrong number of arguments (#{args.size} for 0 .. 2)"
431
+ end
432
+ end
433
+
434
+ def getbyte
435
+ w = read(1) or return nil
436
+ w.getbyte(0)
437
+ end
438
+
439
+ def eof
440
+ !@pos
441
+ end
442
+
443
+ alias eof? eof
444
+
445
+ def tell
446
+ raise NotImplementedError
447
+ end
448
+
449
+ def seek(off, cur)
450
+ raise NotImplementedError
451
+ end
452
+
453
+ def pos
454
+ raise NotImplementedError
455
+ end
456
+
457
+ def pos=(pos)
458
+ raise NotImplementedError
459
+ end
460
+
461
+ private
462
+ def read_all
463
+ if @buf
464
+ dest = @buf.read
465
+ else
466
+ dest = ""
467
+ end
468
+ @buf = nil
469
+ w = nil
470
+ dest << w while w = getnextblock
471
+ @pos = nil
472
+ dest
473
+ end
474
+
475
+ private
476
+ def read_part(size, dest)
477
+ dest.clear
478
+ return dest unless size > 0
479
+ return nil unless @pos
480
+
481
+ @slicebuf ||= ""
482
+
483
+ begin
484
+ unless @buf && !@buf.eof?
485
+ unless w = getnextblock
486
+ @pos = nil
487
+ if dest.empty?
488
+ return nil
489
+ else
490
+ return dest
491
+ end
492
+ end
493
+
494
+ # NOTE: StringIO を用いている理由について
495
+ # ruby-2.1 で String#slice 系を使って新しい文字列を生成すると、ヒープ領域の確保量が㌧でもない状況になる。
496
+ # StringIO#read に読み込みバッファを与えることで、この問題を軽減している。
497
+
498
+ @buf ||= StringIO.new
499
+ @buf.string = w
500
+ end
501
+
502
+ dest << @buf.read(size, @slicebuf)
503
+ size -= @slicebuf.bytesize
504
+ end while size > 0
505
+
506
+ dest
507
+ end
508
+
509
+ private
510
+ def getnextblock
511
+ return nil if @version == -1 && @io.eof?
512
+
513
+ flags = @io.read(4).unpack("V")[0]
514
+ iscomp = (flags >> 31) == 0 ? true : false
515
+ blocksize = flags & 0x7fffffff
516
+ return nil unless blocksize > 0
517
+ unless blocksize <= @blockmaximum
518
+ raise LZ4::Error, "block size is too big (blocksize is #{blocksize}, but blockmaximum is #{@blockmaximum}. may have damaged)."
519
+ end
520
+ w = @io.read(blocksize, @readbuf)
521
+ unless w.bytesize == blocksize
522
+ raise LZ4::Error, "can not read block (readsize=#{w.bytesize}, needsize=#{blocksize} (#{"0x%x" % blocksize}))"
523
+ end
524
+ w = @decoder.(w, @blockmaximum, @decodebuf) if iscomp
525
+ @io.read(4) if @blockchecksum # TODO: IMPLEMENT ME! compare checksum
526
+ w
527
+ end
528
+ end
529
+ end