rbzip2 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
File without changes
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rbzip2 (0.1.0)
4
+ rbzip2 (0.2.0)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
data/README.md CHANGED
@@ -11,20 +11,31 @@ operating systems supported by those implementations.
11
11
 
12
12
  ## Features
13
13
 
14
- * Decompression of bzip2 compressed IOs (like `File` or `StringIO`)
14
+ * Compression of raw data into bzip2 compressed `IO`s (like `File` or
15
+ `StringIO`)
16
+ * Decompression of bzip2 compressed `IO`s (like `File` or `StringIO`)
15
17
 
16
18
  ## Usage
17
19
 
18
20
  require 'rbzip2'
19
21
 
20
- file = File.new 'somefile.bz2' # open a compressed file
21
- io = RBzip2::IO.new file # wrap the file into RBzip2's IO
22
- data = io.read # read data into a string
22
+ ### Compression
23
+
24
+ data = some_data
25
+ file = File.new 'somefile.bz2' # open the target file
26
+ bz2 = RBzip2::Compressor.new file # wrap the file into the compressor
27
+ bz2.write data # write the raw data to the compressor
28
+ bz2.close # finish compression (important!)
29
+
30
+ ### Decompression
31
+
32
+ file = File.new 'somefile.bz2' # open a compressed file
33
+ bz2 = RBzip2::Decompressor.new file # wrap the file into the decompressor
34
+ data = io.read # read data into a string
23
35
 
24
36
  ## Future plans
25
37
 
26
38
  * Simple decompression of strings
27
- * Compression of raw data
28
39
  * Simple creation of compressed files
29
40
  * Two-way compressed IO that will (de)compress as you read/write
30
41
 
@@ -39,6 +50,14 @@ To use it as a dependency managed by Bundler add the following to your
39
50
 
40
51
  gem 'rbzip2'
41
52
 
53
+ ## Performance
54
+
55
+ Due to its pure Ruby implementation RBzip2 is inherently slower than
56
+ bzip2-ruby, which is a Ruby binding to libbzip2. Currently, RBzip2 is a plain
57
+ port of Apache Commons' Java code to Ruby and no effort has been made to
58
+ optimize it. That's why RBzip2 is slower by a factor of about 140/1000 while
59
+ compressing/decompressing (on Ruby 1.9.3). Ruby 1.8.7 is even slower.
60
+
42
61
  ## License
43
62
 
44
63
  This code is free software; you can redistribute it and/or modify it under the
@@ -6,9 +6,12 @@
6
6
  module RBzip2
7
7
 
8
8
  autoload :CRC, 'rbzip2/crc'
9
- autoload :Data, 'rbzip2/data'
9
+ autoload :Compressor, 'rbzip2/compressor'
10
+ autoload :Constants, 'rbzip2/constants'
10
11
  autoload :Decompressor, 'rbzip2/decompressor'
11
12
  autoload :IO, 'rbzip2/io'
13
+ autoload :InputData, 'rbzip2/input_data'
14
+ autoload :OutputData, 'rbzip2/output_data'
12
15
  autoload :VERSION, 'rbzip2/version'
13
16
 
14
17
  end
@@ -0,0 +1,1310 @@
1
+ # This code is free software you can redistribute it and/or modify it under
2
+ # the terms of the new BSD License.
3
+ #
4
+ # Copyright (c) 2011, Sebastian Staudt
5
+
6
+ class RBzip2::Compressor
7
+
8
+ include RBzip2::Constants
9
+
10
+ def self.assign_codes(code, length, min_len, max_len, alpha_size)
11
+ vec = 0
12
+ min_len.upto(max_len) do |n|
13
+ alpha_size.times do |i|
14
+ if (length[i] & 0xff) == n
15
+ code[i] = vec
16
+ vec += 1
17
+ end
18
+ end
19
+ vec <<= 1
20
+ end
21
+ end
22
+
23
+ def self.choose_block_size(input_length)
24
+ input_length > 0 ? [(input_length / 132000) + 1, 9].min : MAX_BLOCK_SIZE
25
+ end
26
+
27
+ def self.make_code_lengths(len, freq, data, alpha_size, max_len)
28
+ heap = data.heap
29
+ weight = data.weight
30
+ parent = data.parent
31
+
32
+ weight[0] = 0
33
+ (alpha_size - 1).downto(0) do |i|
34
+ weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8
35
+ end
36
+
37
+ too_long = true
38
+ while too_long
39
+ too_long = false
40
+
41
+ n_nodes = alpha_size
42
+ n_heap = 0
43
+ heap[0] = 0
44
+ weight[0] = 0
45
+ parent[0] = -2
46
+
47
+ 1.upto(alpha_size) do |i|
48
+ parent[i] = -1
49
+ n_heap += 1
50
+ heap[n_heap] = i
51
+
52
+ zz = n_heap
53
+ tmp = heap[zz]
54
+ while weight[tmp] < weight[heap[zz >> 1]]
55
+ heap[zz] = heap[zz >> 1]
56
+ zz >>= 1
57
+ end
58
+ heap[zz] = tmp
59
+ end
60
+
61
+ while n_heap > 1
62
+ n1 = heap[1]
63
+ heap[1] = heap[n_heap]
64
+ n_heap -= 1
65
+
66
+ yy = 0
67
+ zz = 1
68
+ tmp = heap[1]
69
+
70
+ loop do
71
+ yy = zz << 1
72
+
73
+ break if yy > n_heap
74
+
75
+ yy += 1 if (yy < n_heap) && (weight[heap[yy + 1]] < weight[heap[yy]])
76
+
77
+ break if weight[tmp] < weight[heap[yy]]
78
+
79
+ heap[zz] = heap[yy]
80
+ zz = yy
81
+ end
82
+
83
+ heap[zz] = tmp
84
+
85
+ n2 = heap[1]
86
+ heap[1] = heap[n_heap]
87
+ n_heap -= 1
88
+
89
+ yy = 0
90
+ zz = 1
91
+ tmp = heap[1]
92
+
93
+ loop do
94
+ yy = zz << 1
95
+
96
+ break if yy > n_heap
97
+
98
+ yy += 1 if (yy < n_heap) && (weight[heap[yy + 1]] < weight[heap[yy]])
99
+
100
+ break if weight[tmp] < weight[heap[yy]]
101
+
102
+ heap[zz] = heap[yy]
103
+ zz = yy
104
+ end
105
+
106
+ heap[zz] = tmp
107
+ n_nodes += 1
108
+ parent[n1] = parent[n2] = n_nodes
109
+
110
+ weight_n1 = weight[n1]
111
+ weight_n2 = weight[n2]
112
+ weight[n_nodes] = ((weight_n1 & 0xffffff00) +
113
+ (weight_n2 & 0xffffff00)) |
114
+ (1 + (((weight_n1 & 0x000000ff) >
115
+ (weight_n2 & 0x000000ff)) ? (weight_n1 & 0x000000ff) :
116
+ (weight_n2 & 0x000000ff)))
117
+
118
+ parent[n_nodes] = -1
119
+ n_heap += 1
120
+ heap[n_heap] = n_nodes
121
+
122
+ tmp = 0
123
+ zz = n_heap
124
+ tmp = heap[zz]
125
+ weight_tmp = weight[tmp]
126
+ while weight_tmp < weight[heap[zz >> 1]]
127
+ heap[zz] = heap[zz >> 1]
128
+ zz >>= 1
129
+ end
130
+ heap[zz] = tmp
131
+ end
132
+
133
+ 1.upto(alpha_size) do |i|
134
+ j = 0
135
+ k = i
136
+
137
+ while (parent_k = parent[k]) >= 0
138
+ k = parent_k
139
+ j += 1
140
+ end
141
+
142
+ len[i - 1] = j
143
+ too_long = true if j > max_len
144
+ end
145
+
146
+ if too_long
147
+ 1.upto(alpha_size) do |i|
148
+ j = weight[i] >> 8
149
+ j = 1 + (j >> 1)
150
+ weight[i] = j << 8
151
+ end
152
+ end
153
+ end
154
+ end
155
+
156
+ def self.med3(a, b, c)
157
+ (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c : a)
158
+ end
159
+
160
+ def self.vswap(fmap, p1, p2, n)
161
+ n += p1
162
+ while p1 < n
163
+ t = fmap[p1]
164
+ fmap[p1] = fmap[p2]
165
+ fmap[p2] = t
166
+ p1 += 1
167
+ p2 += 1
168
+ end
169
+ end
170
+
171
+ attr_reader :block_size
172
+
173
+ def initialize(io, block_size = MAX_BLOCK_SIZE)
174
+ @allowable_block_size = 0
175
+ @block_size = block_size
176
+ @buff = 0
177
+ @combined_crc = 0
178
+ @crc = RBzip2::CRC.new
179
+ @current_char = -1
180
+ @io = io
181
+ @last = 0
182
+ @live = 0
183
+ @run_length = 0
184
+
185
+ init
186
+ end
187
+
188
+ def block_sort
189
+ @work_limit = WORK_FACTOR * @last
190
+ @work_done = 0
191
+ @block_randomized = false
192
+ @first_attempt = true
193
+ main_sort
194
+
195
+ if @first_attempt && @work_done > @work_limit
196
+ randomize_block
197
+ @work_limit = @work_done = 0
198
+ @first_attempt = false
199
+ main_sort
200
+ end
201
+
202
+ fmap = @data.fmap
203
+ @orig_ptr = -1
204
+ (@last + 1).times do |i|
205
+ if fmap[i] == 0
206
+ @orig_ptr = i
207
+ break
208
+ end
209
+ end
210
+ end
211
+
212
+ def close
213
+ unless @io.nil?
214
+ io_shadow = @io
215
+ finish
216
+ io_shadow.close
217
+ end
218
+ end
219
+
220
+ def end_block
221
+ @block_crc = @crc.final_crc
222
+ @combined_crc = (@combined_crc << 1) | (@combined_crc >> 31)
223
+ @combined_crc ^= @block_crc
224
+
225
+ return if @last == -1
226
+
227
+ block_sort
228
+
229
+ put_byte 0x31
230
+ put_byte 0x41
231
+ put_byte 0x59
232
+ put_byte 0x26
233
+ put_byte 0x53
234
+ put_byte 0x59
235
+
236
+ put_int @block_crc
237
+
238
+ @block_randomized ? w(1, 1) : w(1, 0)
239
+
240
+ move_to_front_code_and_send
241
+ end
242
+
243
+ def end_compression
244
+ put_byte 0x17
245
+ put_byte 0x72
246
+ put_byte 0x45
247
+ put_byte 0x38
248
+ put_byte 0x50
249
+ put_byte 0x90
250
+
251
+ put_int @combined_crc
252
+
253
+ finished_with_stream
254
+ end
255
+
256
+ def finish
257
+ unless @io.nil?
258
+ begin
259
+ write_run if @run_length > 0
260
+ @current_char = -1
261
+ end_block
262
+ end_compression
263
+ ensure
264
+ @io = nil
265
+ @data = nil
266
+ end
267
+ end
268
+ end
269
+
270
+ def finished_with_stream
271
+ while @live > 0
272
+ @io.write((@buff >> 24).chr)
273
+ @buff <<= 8
274
+ @buff &= 0xffffffff
275
+ @live -= 8
276
+ end
277
+ end
278
+
279
+ def flush
280
+ @io.flush unless @io.nil?
281
+ end
282
+
283
+ def generate_mtf_values
284
+ last_shadow = @last
285
+ data_shadow = @data
286
+ in_use = data_shadow.in_use
287
+ block = data_shadow.block
288
+ fmap = data_shadow.fmap
289
+ sfmap = data_shadow.sfmap
290
+ mtf_freq = data_shadow.mtf_freq
291
+ unseq_to_seq = data_shadow.unseq_to_seq
292
+ yy = data_shadow.generate_mtf_values_yy
293
+
294
+ n_in_use_shadow = 0
295
+ 256.times do |i|
296
+ if in_use[i]
297
+ unseq_to_seq[i] = n_in_use_shadow
298
+ n_in_use_shadow += 1
299
+ end
300
+ end
301
+ @n_in_use = n_in_use_shadow
302
+
303
+ eob = n_in_use_shadow + 1
304
+ eob.times { |i| mtf_freq[i] }
305
+
306
+ n_in_use_shadow.times { |i| yy[i] = i }
307
+
308
+ wr = 0
309
+ z_pend = 0
310
+
311
+ 0.upto(last_shadow) do |i|
312
+ ll_i = unseq_to_seq[block[fmap[i]] & 0xff]
313
+ tmp = yy[0]
314
+ j = 0
315
+
316
+ while ll_i != tmp
317
+ j += 1
318
+ tmp2 = tmp
319
+ tmp = yy[j]
320
+ yy[j] = tmp2
321
+ end
322
+ yy[0] = tmp
323
+
324
+ if j == 0
325
+ z_pend += 1
326
+ else
327
+ if z_pend > 0
328
+ z_pend -= 1
329
+ loop do
330
+ if (z_pend & 1) == 0
331
+ sfmap[wr] = RUNA
332
+ mtf_freq[RUNA] += 1
333
+ else
334
+ sfmap[wr] = RUNB
335
+ mtf_freq[RUNB] += 1
336
+ end
337
+ wr += 1
338
+
339
+ break if z_pend < 2
340
+
341
+ z_pend = (z_pend - 2) >> 1
342
+ end
343
+ z_pend = 0
344
+ end
345
+ sfmap[wr] = j + 1
346
+ wr += 1
347
+ mtf_freq[j + 1] += 1
348
+ end
349
+ end
350
+
351
+ if z_pend > 0
352
+ z_pend -= 1
353
+ loop do
354
+ if (z_pend & 1) == 0
355
+ sfmap[wr] = RUNA
356
+ mtf_freq[RUNA] += 1
357
+ else
358
+ sfmap[wr] = RUNB
359
+ mtf_freq[RUNB] += 1
360
+ end
361
+ wr += 1
362
+
363
+ break if z_pend < 2
364
+
365
+ z_pend = (z_pend - 2) >> 1
366
+ end
367
+ end
368
+
369
+ sfmap[wr] = eob
370
+ mtf_freq[eob] += 1
371
+ @n_mtf = wr + 1
372
+ end
373
+
374
+ def init
375
+ put_byte 0x42
376
+ put_byte 0x5a
377
+
378
+ @data = RBzip2::OutputData.new @block_size
379
+
380
+ put_byte 0x68
381
+ put_byte 0x30 + @block_size
382
+
383
+ @combined_crc = 0
384
+ init_block
385
+ end
386
+
387
+ def init_block
388
+ @crc.initialize_crc
389
+ @last = -1
390
+
391
+ in_use = @data.in_use
392
+ in_use[0, 256] = [false] * 256
393
+
394
+ @allowable_block_size = (@block_size * BASEBLOCKSIZE) - 20
395
+ end
396
+
397
+ def main_sort
398
+ data_shadow = @data
399
+ running_order = data_shadow.main_sort_running_order
400
+ copy = data_shadow.main_sort_copy
401
+ big_done = data_shadow.main_sort_big_done
402
+ ftab = data_shadow.ftab
403
+ block = data_shadow.block
404
+ fmap = data_shadow.fmap
405
+ quadrant = data_shadow.quadrant
406
+ last_shadow = @last
407
+ work_limit_shadow = @work_limit
408
+ first_attempt_shadow = @first_attempt
409
+
410
+ 65537.times { |i| ftab[i] = 0 }
411
+
412
+ NUM_OVERSHOOT_BYTES.times do |i|
413
+ block[last_shadow + i + 2] = block[(i % (last_shadow + 1)) + 1]
414
+ end
415
+ (last_shadow + NUM_OVERSHOOT_BYTES).times do |i|
416
+ quadrant[i] = 0
417
+ end
418
+ block[0] = block[last_shadow + 1]
419
+
420
+ c1 = block[0] & 0xff
421
+ (last_shadow + 1).times do |i|
422
+ c2 = block[i + 1] & 0xff
423
+ ftab[(c1 << 8) + c2] += 1
424
+ c1 = c2
425
+ end
426
+
427
+ 1.upto(65536) { |i| ftab[i] += ftab[i - 1] }
428
+
429
+ c1 = block[1] & 0xff
430
+ last_shadow.times do |i|
431
+ c2 = block[i + 2] & 0xff
432
+ fmap[ftab[(c1 << 8) + c2] -= 1] = i
433
+ c1 = c2
434
+ end
435
+
436
+ fmap[ftab[((block[last_shadow + 1] & 0xff) << 8) + (block[1] & 0xff)] -= 1] = last_shadow
437
+
438
+ 256.times do |i|
439
+ big_done[i] = false
440
+ running_order[i] = i
441
+ end
442
+
443
+ h = 364
444
+ while h != 1
445
+ h /= 3
446
+ h.upto(255) do |i|
447
+ vv = running_order[i]
448
+ a = ftab[(vv + 1) << 8] - ftab[vv << 8]
449
+ b = h - 1
450
+ j = i
451
+
452
+ ro = running_order[j - h]
453
+ while ftab[(ro + 1) << 8] - ftab[ro << 8] > a
454
+ running_order[j] = ro
455
+ j -= h
456
+ break if j <= b
457
+ ro = running_order[j - h]
458
+ end
459
+
460
+ running_order[j] = vv
461
+ end
462
+ end
463
+
464
+ 256.times do |i|
465
+ ss = running_order[i]
466
+
467
+ 256.times do |j|
468
+ sb = (ss << 8) + j
469
+ ftab_sb = ftab[sb]
470
+ if (ftab_sb & SETMASK) != SETMASK
471
+ lo = ftab_sb & CLEARMASK
472
+ hi = (ftab[sb + 1] & CLEARMASK) - 1
473
+ if hi > lo
474
+ main_qsort3 data_shadow, lo, hi, 2
475
+ return if first_attempt_shadow && (@work_done > work_limit_shadow)
476
+ end
477
+ ftab[sb] = ftab_sb | SETMASK
478
+ end
479
+ end
480
+
481
+ 256.times { |j| copy[j] = ftab[(j << 8) + ss] & CLEARMASK }
482
+
483
+ hj = ftab[(ss + 1) << 8] & CLEARMASK
484
+ (ftab[ss << 8] & CLEARMASK).upto(hj - 1) do |j|
485
+ fmap_j = fmap[j]
486
+ c1 = block[fmap_j] & 0xff
487
+ if !big_done[c1]
488
+ fmap[copy[c1]] = (fmap_j == 0) ? last_shadow : (fmap_j - 1)
489
+ copy[c1] += 1
490
+ end
491
+ end
492
+
493
+ 255.downto(0) { |j| ftab[(j << 8) + ss] |= SETMASK }
494
+
495
+ big_done[ss] = true
496
+
497
+ if i < 255
498
+ bb_start = ftab[ss << 8] & CLEARMASK
499
+ bb_size = (ftab[(ss + 1) << 8] & CLEARMASK) - bb_start
500
+ shifts = 0
501
+
502
+ while (bb_size >> shifts) > 65534
503
+ shifts += 1
504
+ end
505
+
506
+ bb_size.times do |j|
507
+ a2update = fmap[bb_start + j]
508
+ q_val = j >> shifts
509
+ quadrant[a2update] = q_val
510
+ if a2update < NUM_OVERSHOOT_BYTES
511
+ quadrant[a2update + last_shadow + 1] = q_val
512
+ end
513
+ end
514
+ end
515
+ end
516
+ end
517
+
518
+ def main_qsort3(data_shadow, lo_st, hi_st, d_st)
519
+ stack_ll = data_shadow.stack_ll
520
+ stack_hh = data_shadow.stack_hh
521
+ stack_dd = data_shadow.stack_dd
522
+ fmap = data_shadow.fmap
523
+ block = data_shadow.block
524
+
525
+ stack_ll[0] = lo_st
526
+ stack_hh[0] = hi_st
527
+ stack_dd[0] = d_st
528
+
529
+ sp = 1
530
+ while (sp -= 1) >= 0
531
+ lo = stack_ll[sp]
532
+ hi = stack_hh[sp]
533
+ d = stack_dd[sp]
534
+
535
+ if (hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)
536
+ return if main_simple_sort data_shadow, lo, hi, d
537
+ else
538
+ d1 = d + 1
539
+ med = self.class.med3(block[fmap[lo] + d1], block[fmap[hi] + d1], block[fmap[(lo + hi) >> 1] + d1]) & 0xff
540
+
541
+ un_lo = lo
542
+ un_hi = hi
543
+ lt_lo = lo
544
+ gt_hi = hi
545
+
546
+ while true
547
+ while un_lo <= un_hi
548
+ n = (block[fmap[un_lo] + d1] & 0xff) - med
549
+ if n == 0
550
+ temp = fmap[un_lo]
551
+ fmap[un_lo] = fmap[lt_lo]
552
+ fmap[lt_lo] = temp
553
+ un_lo += 1
554
+ lt_lo += 1
555
+ elsif n < 0
556
+ un_lo += 1
557
+ else
558
+ break
559
+ end
560
+ end
561
+
562
+ while un_lo <= un_hi
563
+ n = (block[fmap[un_hi] + d1] & 0xff) - med
564
+ if n == 0
565
+ temp = fmap[un_hi]
566
+ fmap[un_hi] = fmap[gt_hi]
567
+ fmap[gt_hi] = temp
568
+ un_hi -= 1
569
+ gt_hi -= 1
570
+ elsif n > 0
571
+ un_hi -= 1
572
+ else
573
+ break
574
+ end
575
+ end
576
+
577
+ if un_lo <= un_hi
578
+ temp = fmap[un_lo]
579
+ fmap[un_lo] = fmap[un_hi]
580
+ fmap[un_hi] = temp
581
+ un_lo += 1
582
+ un_hi -= 1
583
+ else
584
+ break
585
+ end
586
+ end
587
+
588
+ if gt_hi < lt_lo
589
+ stack_ll[sp] = lo
590
+ stack_hh[sp] = hi
591
+ stack_dd[sp] = d1
592
+ sp += 1
593
+ else
594
+ n = ((lt_lo - lo) < (un_lo - lt_lo)) ? (lt_lo - lo) : (un_lo - lt_lo)
595
+ self.class.vswap fmap, lo, un_lo - n, n
596
+ m = ((hi - gt_hi) < (gt_hi - un_hi)) ? (hi - gt_hi) : (gt_hi - un_hi)
597
+ self.class.vswap fmap, un_lo, hi - m + 1, m
598
+
599
+ n = lo + un_lo - lt_lo - 1
600
+ m = hi - (gt_hi - un_hi) + 1
601
+
602
+ stack_ll[sp] = lo
603
+ stack_hh[sp] = n
604
+ stack_dd[sp] = d
605
+ sp += 1
606
+
607
+ stack_ll[sp] = n + 1
608
+ stack_hh[sp] = m - 1
609
+ stack_dd[sp] = d1
610
+ sp += 1
611
+
612
+ stack_ll[sp] = m
613
+ stack_hh[sp] = hi
614
+ stack_dd[sp] = d
615
+ sp += 1
616
+ end
617
+ end
618
+ end
619
+ end
620
+
621
+ def main_simple_sort(data_shadow, lo, hi, d)
622
+ big_n = hi - lo + 1
623
+ return @first_attempt && (@work_done > @work_limit) if big_n < 2
624
+
625
+ hp = 0
626
+ while INCS[hp] < big_n
627
+ hp += 1
628
+ end
629
+
630
+ fmap = data_shadow.fmap
631
+ quadrant = data_shadow.quadrant
632
+ block = data_shadow.block
633
+ last_shadow = @last
634
+ last_plus_1 = last_shadow + 1
635
+ first_attempt_shadow = @first_attempt
636
+ work_limit_shadow = @work_limit
637
+ work_done_shadow = @work_done
638
+
639
+ a = nil
640
+ h = nil
641
+ i1 = nil
642
+ i2 = nil
643
+ j = nil
644
+ mj = nil
645
+ once_runned = nil
646
+ vd = nil
647
+ x = nil
648
+
649
+ x_loop = lambda do
650
+ while x > 0
651
+ x -= 4
652
+
653
+ if block[i1 + 1] == block[i2 + 1]
654
+ if quadrant[i1] == quadrant[i2]
655
+ if block[i1 + 2] == block[i2 + 2]
656
+ if quadrant[i1 + 1] == quadrant[i2 + 1]
657
+ if block[i1 + 3] == block[i2 + 3]
658
+ if quadrant[i1 + 2] == quadrant[i2 + 2]
659
+ if block[i1 + 4] == block[i2 + 4]
660
+ if quadrant[i1 + 3] == quadrant[i2 + 3]
661
+ i1 -= last_plus_1 if (i1 += 4) >= last_plus_1
662
+ i2 -= last_plus_1 if (i2 += 4) >= last_plus_1
663
+ work_done_shadow += 1
664
+ next
665
+ elsif quadrant[i1 + 3] > quadrant[i2 + 3]
666
+ return true
667
+ else
668
+ return false
669
+ end
670
+ elsif (block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)
671
+ return true
672
+ else
673
+ return false
674
+ end
675
+ elsif quadrant[i1 + 2] > quadrant[i2 + 2]
676
+ return true
677
+ else
678
+ return false
679
+ end
680
+ elsif (block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)
681
+ return true
682
+ else
683
+ return false
684
+ end
685
+ elsif quadrant[i1 + 1] > quadrant[i2 + 1]
686
+ return true
687
+ else
688
+ return false
689
+ end
690
+ elsif (block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)
691
+ return true
692
+ else
693
+ return false
694
+ end
695
+ elsif (quadrant[i1] > quadrant[i2])
696
+ return true
697
+ else
698
+ return false
699
+ end
700
+ elsif (block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)
701
+ return true
702
+ else
703
+ return false
704
+ end
705
+ end
706
+
707
+ true
708
+ end
709
+
710
+ hammer_loop = lambda do
711
+ a = 0
712
+
713
+ while true do
714
+ if once_runned
715
+ fmap[j] = a
716
+ break if (j -= h) <= mj
717
+ else
718
+ once_runned = true
719
+ end
720
+
721
+ a = fmap[j - h]
722
+ i1 = a + d
723
+ i2 = vd
724
+
725
+ if block[i1 + 1] == block[i2 + 1]
726
+ if block[i1 + 2] == block[i2 + 2]
727
+ if block[i1 + 3] == block[i2 + 3]
728
+ if block[i1 + 4] == block[i2 + 4]
729
+ if block[i1 + 5] == block[i2 + 5]
730
+ if block[i1 += 6] == block[i2 += 6]
731
+ x = last_shadow
732
+
733
+ break unless x_loop.call
734
+ else
735
+ if (block[i1] & 0xff) > (block[i2] & 0xff)
736
+ next
737
+ else
738
+ break
739
+ end
740
+ end
741
+ elsif (block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)
742
+ next
743
+ else
744
+ break
745
+ end
746
+ elsif (block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)
747
+ next
748
+ else
749
+ break
750
+ end
751
+ elsif (block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)
752
+ next
753
+ else
754
+ break
755
+ end
756
+ elsif (block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)
757
+ next
758
+ else
759
+ break
760
+ end
761
+ elsif (block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)
762
+ next
763
+ else
764
+ break
765
+ end
766
+ end
767
+ end
768
+
769
+ while (hp -= 1) >= 0
770
+ h = INCS[hp]
771
+ mj = lo + h - 1
772
+
773
+ i = lo + h
774
+ while i <= hi
775
+ k = 3
776
+ while i <= hi && (k -= 1) >= 0
777
+ v = fmap[i]
778
+ vd = v + d
779
+ j = i
780
+
781
+ once_runned = false
782
+
783
+ hammer_loop.call
784
+
785
+ fmap[j] = v
786
+
787
+ i += 1
788
+ end
789
+ end
790
+
791
+ break if first_attempt_shadow && i <= hi && work_done_shadow > work_limit_shadow
792
+ end
793
+
794
+ @work_done = work_done_shadow
795
+
796
+ first_attempt_shadow && (work_done_shadow > work_limit_shadow)
797
+ end
798
+
799
+ def move_to_front_code_and_send
800
+ w 24, @orig_ptr
801
+ generate_mtf_values
802
+ send_mtf_values
803
+ end
804
+
805
+ def put_byte(c)
806
+ c = c[0].to_i if c.is_a? String
807
+ w 8, c
808
+ end
809
+
810
+ def put_int(u)
811
+ w 8, (u >> 24) & 0xff
812
+ w 8, (u >> 16) & 0xff
813
+ w 8, (u >> 8) & 0xff
814
+ w 8, u & 0xff
815
+ end
816
+
817
+ def randomize_block
818
+ in_use = @data.in_use
819
+ block = @data.block
820
+ last_shadow = @last
821
+
822
+ 256.times { |i| in_use[i] = false }
823
+
824
+ r_n_to_go = 0
825
+ r_t_pos = 0
826
+ j = 1
827
+ i = 0
828
+ while i <= last_shadow
829
+ i = j
830
+
831
+ if r_n_to_go == 0
832
+ r_n_to_go = RNUMS[r_t_pos]
833
+ r_t_pos = 0 if (r_t_pos += 1) == 512
834
+ end
835
+
836
+ r_n_to_go -= 1
837
+ block[j] ^= r_n_to_go == 1 ? 1 : 0
838
+
839
+ in_use[block[j] & 0xff] = true
840
+
841
+ j += 1
842
+ end
843
+
844
+ @block_randomized = true
845
+ end
846
+
847
+ def send_mtf_values
848
+ len = @data.send_mtf_values_len
849
+ alpha_size = @n_in_use + 2
850
+
851
+ N_GROUPS.times do |t|
852
+ len_t = len[t]
853
+ alpha_size.times { |v| len_t[v] = GREATER_ICOST }
854
+ end
855
+
856
+ n_groups = (@n_mtf < 200) ? 2 : (@n_mtf < 600) ? 3 : (@n_mtf < 1200) ? 4 :
857
+ (@n_mtf < 2400) ? 5 : 6
858
+
859
+ send_mtf_values0 n_groups, alpha_size
860
+ n_selectors = send_mtf_values1 n_groups, alpha_size
861
+ send_mtf_values2 n_groups, n_selectors
862
+ send_mtf_values3 n_groups, alpha_size
863
+ send_mtf_values4
864
+ send_mtf_values5 n_groups, n_selectors
865
+ send_mtf_values6 n_groups, alpha_size
866
+ send_mtf_values7
867
+ end
868
+
869
+ def send_mtf_values0(n_groups, alpha_size)
870
+ len = @data.send_mtf_values_len
871
+ mtf_freq = @data.mtf_freq
872
+
873
+ rem_f = @n_mtf
874
+ gs = 0
875
+
876
+ n_groups.downto(1) do |n_part|
877
+ t_freq = rem_f / n_part
878
+ ge = gs - 1
879
+ a_freq = 0
880
+
881
+ a = alpha_size - 1
882
+ while a_freq < t_freq && ge < a
883
+ ge += 1
884
+ a_freq += mtf_freq[ge]
885
+ end
886
+
887
+ if ge > gs && n_part != n_groups && n_part != 1 &&
888
+ ((n_groups - n_part) & 1) != 0
889
+ ge -= 1
890
+ a_freq -= mtf_freq[ge]
891
+ end
892
+
893
+ len_np = len[n_part - 1]
894
+ (alpha_size - 1).downto(0) do |v|
895
+ if v >= gs && v <= ge
896
+ len_np[v] = LESSER_ICOST
897
+ else
898
+ len_np[v] = GREATER_ICOST
899
+ end
900
+ end
901
+
902
+ gs = ge +1
903
+ rem_f -= a_freq
904
+ end
905
+ end
906
+
907
+ def send_mtf_values1(n_groups, alpha_size)
908
+ data_shadow = @data
909
+ rfreq = data_shadow.send_mtf_values_rfreq
910
+ fave = data_shadow.send_mtf_values_fave
911
+ cost = data_shadow.send_mtf_values_cost
912
+ sfmap = data_shadow.sfmap
913
+ selector = data_shadow.selector
914
+ len = data_shadow.send_mtf_values_len
915
+ len_0 = len[0]
916
+ len_1 = len[1]
917
+ len_2 = len[2]
918
+ len_3 = len[3]
919
+ len_4 = len[4]
920
+ len_5 = len[5]
921
+ n_mtf_shadow = @n_mtf
922
+ n_selectors = 0
923
+
924
+ N_ITERS.times do
925
+ (n_groups - 1).downto(0) do |t|
926
+ fave[t] = 0
927
+ rfreqt = rfreq[t]
928
+ (alpha_size - 1).downto(0) { |i| rfreqt[i] = 0 }
929
+ end
930
+
931
+ n_selectors = 0
932
+
933
+ gs = 0
934
+ while gs < @n_mtf
935
+ ge = [gs + G_SIZE - 1, n_mtf_shadow - 1].min
936
+
937
+ if n_groups == N_GROUPS
938
+ cost0 = 0
939
+ cost1 = 0
940
+ cost2 = 0
941
+ cost3 = 0
942
+ cost4 = 0
943
+ cost5 = 0
944
+
945
+ gs.upto(ge) do |i|
946
+ icv = sfmap[i]
947
+ cost0 += len_0[icv] & 0xff
948
+ cost1 += len_1[icv] & 0xff
949
+ cost2 += len_2[icv] & 0xff
950
+ cost3 += len_3[icv] & 0xff
951
+ cost4 += len_4[icv] & 0xff
952
+ cost5 += len_5[icv] & 0xff
953
+ end
954
+
955
+ cost[0] = cost0
956
+ cost[1] = cost1
957
+ cost[2] = cost2
958
+ cost[3] = cost3
959
+ cost[4] = cost4
960
+ cost[5] = cost5
961
+ else
962
+ (n_groups - 1).downto(0) { |t| cost[t] = 0 }
963
+
964
+ gs.upto(ge) do |i|
965
+ icv = sfmap[i]
966
+ (n_groups - 1).downto(0) { |t| cost[t] += len[t][icv] & 0xff }
967
+ end
968
+ end
969
+
970
+ bt = -1
971
+ bc = 999999999
972
+ (n_groups - 1).downto(0) do |t|
973
+ cost_t = cost[t]
974
+ if cost_t < bc
975
+ bc = cost_t
976
+ bt = t
977
+ end
978
+ end
979
+
980
+ fave[bt] += 1
981
+ selector[n_selectors] = bt
982
+ n_selectors += 1
983
+
984
+ rfreq_bt = rfreq[bt]
985
+ gs.upto(ge) { |i| rfreq_bt[sfmap[i]] += 1 }
986
+
987
+ gs = ge + 1
988
+ end
989
+
990
+ n_groups.times do |t|
991
+ self.class.make_code_lengths len[t], rfreq[t], @data, alpha_size, 20
992
+ end
993
+ end
994
+
995
+ n_selectors
996
+ end
997
+
998
+ def send_mtf_values2(n_groups, n_selectors)
999
+ data_shadow = @data
1000
+ pos = data_shadow.send_mtf_values2_pos
1001
+
1002
+ n_groups.times { |i| pos[i] = i }
1003
+
1004
+ n_selectors.times do |i|
1005
+ ll_i = data_shadow.selector[i]
1006
+ tmp = pos[0]
1007
+ j = 0
1008
+
1009
+ while ll_i != tmp
1010
+ j += 1
1011
+ tmp2 = tmp
1012
+ tmp = pos[j]
1013
+ pos[j] = tmp2
1014
+ end
1015
+
1016
+ pos[0] = tmp
1017
+ data_shadow.selector_mtf[i] = j
1018
+ end
1019
+ end
1020
+
1021
+ def send_mtf_values3(n_groups, alpha_size)
1022
+ code = @data.send_mtf_values_code
1023
+ len = @data.send_mtf_values_len
1024
+
1025
+ n_groups.times do |t|
1026
+ min_len = 32
1027
+ max_len = 0
1028
+ len_t = len[t]
1029
+ (alpha_size - 1).downto(0) do |i|
1030
+ l = len_t[i] & 0xff
1031
+ max_len = l if l > max_len
1032
+ min_len = l if l < min_len
1033
+ end
1034
+
1035
+ self.class.assign_codes code[t], len[t], min_len, max_len, alpha_size
1036
+ end
1037
+ end
1038
+
1039
+ def send_mtf_values4
1040
+ in_use = @data.in_use
1041
+ in_use_16 = @data.send_mtf_values4_in_use_16
1042
+
1043
+ 15.downto(0) do |i|
1044
+ in_use_16[i] = false
1045
+ i16 = i * 16
1046
+ 15.downto(0) do |j|
1047
+ in_use_16[i] = true if in_use[i16 + j]
1048
+ end
1049
+ end
1050
+
1051
+ 16.times { |i| w 1, (in_use_16[i] ? 1 : 0) }
1052
+
1053
+ io_shadow = @io
1054
+ live_shadow = @live
1055
+ buff_shadow = @buff
1056
+
1057
+ 16.times do |i|
1058
+ if in_use_16[i]
1059
+ i16 = i * 16
1060
+ 16.times do |j|
1061
+ while live_shadow >= 8
1062
+ io_shadow.write(((buff_shadow >> 24) & 0xffffffff).chr)
1063
+ buff_shadow <<= 8
1064
+ buff_shadow &= 0xffffffff
1065
+ live_shadow -= 8
1066
+ end
1067
+ buff_shadow |= 1 << (32 - live_shadow - 1) if in_use[i16 + j]
1068
+ live_shadow += 1
1069
+ end
1070
+ end
1071
+ end
1072
+
1073
+ @buff = buff_shadow
1074
+ @live = live_shadow
1075
+ end
1076
+
1077
+ def send_mtf_values5(n_groups, n_selectors)
1078
+ w 3, n_groups
1079
+ w 15, n_selectors
1080
+
1081
+ io_shadow = @io
1082
+ selector_mtf = @data.selector_mtf
1083
+
1084
+ live_shadow = @live
1085
+ buff_shadow = @buff
1086
+
1087
+ n_selectors.times do |i|
1088
+ hj = selector_mtf[i] & 0xff
1089
+ hj.times do
1090
+ while live_shadow >= 8
1091
+ io_shadow.write(((buff_shadow >> 24) & 0xffffffff).chr)
1092
+ buff_shadow <<= 8
1093
+ buff_shadow &= 0xffffffff
1094
+ live_shadow -= 8
1095
+ end
1096
+ buff_shadow |= 1 << (32 - live_shadow - 1)
1097
+ live_shadow += 1
1098
+ end
1099
+
1100
+ while live_shadow >= 8
1101
+ io_shadow.write(((buff_shadow >> 24) & 0xffffffff).chr)
1102
+ buff_shadow <<= 8
1103
+ buff_shadow &= 0xffffffff
1104
+ live_shadow -= 8
1105
+ end
1106
+ live_shadow += 1
1107
+ end
1108
+
1109
+ @buff = buff_shadow
1110
+ @live = live_shadow
1111
+ end
1112
+
1113
+ def send_mtf_values6(n_groups, alpha_size)
1114
+ len = @data.send_mtf_values_len
1115
+ io_shadow = @io
1116
+
1117
+ live_shadow = @live
1118
+ buff_shadow = @buff
1119
+
1120
+ n_groups.times do |t|
1121
+ len_t = len[t]
1122
+ curr = len_t[0] & 0xff
1123
+
1124
+ while live_shadow >= 8
1125
+ io_shadow.write(((buff_shadow >> 24) & 0xffffffff).chr)
1126
+ buff_shadow <<= 8
1127
+ buff_shadow &= 0xffffffff
1128
+ live_shadow -= 8
1129
+ end
1130
+ buff_shadow |= curr << (32 - live_shadow - 5)
1131
+ live_shadow += 5
1132
+
1133
+ alpha_size.times do |i|
1134
+ lti = len_t[i] & 0xff
1135
+ while curr < lti
1136
+ while live_shadow >= 8
1137
+ io_shadow.write(((buff_shadow >> 24) & 0xffffffff).chr)
1138
+ buff_shadow <<= 8
1139
+ buff_shadow &= 0xffffffff
1140
+ live_shadow -= 8
1141
+ end
1142
+ buff_shadow |= 2 << (32 - live_shadow - 2)
1143
+ live_shadow += 2
1144
+
1145
+ curr += 1
1146
+ end
1147
+
1148
+ while curr > lti
1149
+ while live_shadow >= 8
1150
+ io_shadow.write(((buff_shadow >> 24) & 0xffffffff).chr)
1151
+ buff_shadow <<= 8
1152
+ buff_shadow &= 0xffffffff
1153
+ live_shadow -= 8
1154
+ end
1155
+ buff_shadow |= 3 << (32 - live_shadow - 2)
1156
+ live_shadow += 2
1157
+
1158
+ curr -= 1
1159
+ end
1160
+
1161
+ while live_shadow >= 8
1162
+ io_shadow.write(((buff_shadow >> 24) & 0xffffffff).chr)
1163
+ buff_shadow <<= 8
1164
+ buff_shadow &= 0xffffffff
1165
+ live_shadow -= 8
1166
+ end
1167
+ live_shadow += 1
1168
+ end
1169
+ end
1170
+
1171
+ @buff = buff_shadow
1172
+ @live = live_shadow
1173
+ end
1174
+
1175
+ def send_mtf_values7
1176
+ data_shadow = @data
1177
+ len = data_shadow.send_mtf_values_len
1178
+ code = data_shadow.send_mtf_values_code
1179
+ io_shadow = @io
1180
+ selector = data_shadow.selector
1181
+ sfmap = data_shadow.sfmap
1182
+ n_mtf_shadow = @n_mtf
1183
+
1184
+ sel_ctr = 0
1185
+
1186
+ live_shadow = @live
1187
+ buff_shadow = @buff
1188
+
1189
+ gs = 0
1190
+ while gs < n_mtf_shadow
1191
+ ge = [gs + G_SIZE - 1, n_mtf_shadow - 1].min
1192
+ selector_sel_ctr = selector[sel_ctr] & 0xff
1193
+ code_sel_ctr = code[selector_sel_ctr]
1194
+ len_sel_ctr = len[selector_sel_ctr]
1195
+
1196
+ while gs <= ge
1197
+ sfmap_i = sfmap[gs]
1198
+
1199
+ while live_shadow >= 8
1200
+ io_shadow.write(((buff_shadow >> 24) & 0xffffffff).chr)
1201
+ buff_shadow <<= 8
1202
+ buff_shadow &= 0xffffffff
1203
+ live_shadow -= 8
1204
+ end
1205
+ n = len_sel_ctr[sfmap_i] & 0xff
1206
+ buff_shadow |= code_sel_ctr[sfmap_i] << (32 - live_shadow - n)
1207
+ live_shadow += n
1208
+
1209
+ gs += 1
1210
+ end
1211
+
1212
+ gs = ge + 1
1213
+ sel_ctr += 1
1214
+ end
1215
+
1216
+ @buff = buff_shadow
1217
+ @live = live_shadow
1218
+ end
1219
+
1220
+ def w(n, v)
1221
+ io_shadow = @io
1222
+ live_shadow = @live
1223
+ buff_shadow = @buff
1224
+
1225
+ while live_shadow >= 8
1226
+ io_shadow.write(((buff_shadow >> 24) & 0xffffffff).chr)
1227
+ buff_shadow <<= 8
1228
+ buff_shadow &= 0xffffffff
1229
+ live_shadow -= 8
1230
+ end
1231
+
1232
+ @buff = buff_shadow | (v << (32 - live_shadow - n))
1233
+ @live = live_shadow + n
1234
+ end
1235
+
1236
+ def write(bytes)
1237
+ raise 'stream closed' if @io.nil?
1238
+
1239
+ bytes.each_byte { |b| write0 b }
1240
+ end
1241
+
1242
+ def write0(b)
1243
+ if @current_char != -1
1244
+ b &= 0xff
1245
+ if @current_char == b
1246
+ @run_length += 1
1247
+ if @run_length > 254
1248
+ write_run
1249
+ @current_char = -1
1250
+ @run_length = 0
1251
+ end
1252
+ else
1253
+ write_run
1254
+ @run_length = 1
1255
+ @current_char = b
1256
+ end
1257
+ else
1258
+ @current_char = b & 0xff
1259
+ @run_length += 1
1260
+ end
1261
+ end
1262
+
1263
+ def write_run
1264
+ last_shadow = @last
1265
+
1266
+ if last_shadow < @allowable_block_size
1267
+ current_char_shadow = @current_char
1268
+ data_shadow = @data
1269
+ data_shadow.in_use[current_char_shadow] = true
1270
+ ch = current_char_shadow
1271
+
1272
+ run_length_shadow = @run_length
1273
+ @crc.update_crc current_char_shadow, run_length_shadow
1274
+
1275
+ case run_length_shadow
1276
+ when 1
1277
+ data_shadow.block[last_shadow + 2] = ch
1278
+ @last = last_shadow + 1
1279
+
1280
+ when 2
1281
+ data_shadow.block[last_shadow + 2] = ch
1282
+ data_shadow.block[last_shadow + 3] = ch
1283
+ @last = last_shadow + 2
1284
+
1285
+ when 3
1286
+ block = data_shadow.block
1287
+ block[last_shadow + 2] = ch
1288
+ block[last_shadow + 3] = ch
1289
+ block[last_shadow + 4] = ch
1290
+ @last = last_shadow + 3
1291
+
1292
+ else
1293
+ run_length_shadow -= 4
1294
+ data_shadow.in_use[run_length_shadow] = true
1295
+ block = data_shadow.block
1296
+ block[last_shadow + 2] = ch
1297
+ block[last_shadow + 3] = ch
1298
+ block[last_shadow + 4] = ch
1299
+ block[last_shadow + 5] = ch
1300
+ block[last_shadow + 6] = run_length_shadow
1301
+ @last = last_shadow + 5
1302
+ end
1303
+ else
1304
+ end_block
1305
+ init_block
1306
+ write_run
1307
+ end
1308
+ end
1309
+
1310
+ end