isomorfeus-ferret 0.12.7 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (164) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +54 -1
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
  9. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
  10. data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
  11. data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
  12. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  13. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  14. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  15. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  16. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  17. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  18. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  19. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  20. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  21. data/ext/isomorfeus_ferret_ext/frb_index.c +492 -474
  22. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  23. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  24. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  25. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  26. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  27. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  28. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  29. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  30. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  31. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  32. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  33. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
  34. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  42. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  43. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  44. data/ext/isomorfeus_ferret_ext/frt_index.c +580 -399
  45. data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
  46. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  47. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  48. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
  49. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  50. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  51. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  52. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  53. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  54. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  55. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  56. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  57. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  58. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  59. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  60. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  61. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
  62. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  63. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  64. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  65. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  66. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  67. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  68. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  69. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  70. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
  71. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  72. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  73. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  74. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  75. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  76. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  77. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  78. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  79. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  80. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  81. data/ext/isomorfeus_ferret_ext/test.c +1 -2
  82. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  83. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  84. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  85. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  86. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  87. data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
  88. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  89. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  90. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  91. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  92. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  93. data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
  94. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  95. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  96. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  97. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  98. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  99. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  100. data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
  101. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  102. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  103. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  104. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  105. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  106. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  107. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  108. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  109. data/lib/isomorfeus/ferret/version.rb +1 -1
  110. metadata +27 -57
  111. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  112. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  113. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  114. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  115. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  116. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  117. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  118. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  119. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  120. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  144. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  145. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  146. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  147. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  148. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  149. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  150. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  151. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  152. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  153. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  154. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  155. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  156. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  157. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  158. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  159. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  160. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  161. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  162. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  163. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  164. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -0,0 +1,652 @@
1
+
2
+ /*-------------------------------------------------------------*/
3
+ /*--- Decompression machinery ---*/
4
+ /*--- decompress.c ---*/
5
+ /*-------------------------------------------------------------*/
6
+
7
+ /* ------------------------------------------------------------------
8
+ This file is part of bzip2/libbzip2, a program and library for
9
+ lossless, block-sorting data compression.
10
+
11
+ bzip2/libbzip2 version 1.0.8 of 13 July 2019
12
+ Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
13
+
14
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
15
+ README file.
16
+
17
+ This program is released under the terms of the license contained
18
+ in the file LICENSE.
19
+ ------------------------------------------------------------------ */
20
+
21
+
22
+ #include "bzlib_private.h"
23
+
24
+
25
+ /*---------------------------------------------------*/
26
+ static
27
+ void makeMaps_d ( DState* s )
28
+ {
29
+ Int32 i;
30
+ s->nInUse = 0;
31
+ for (i = 0; i < 256; i++)
32
+ if (s->inUse[i]) {
33
+ s->seqToUnseq[s->nInUse] = i;
34
+ s->nInUse++;
35
+ }
36
+ }
37
+
38
+
39
+ /*---------------------------------------------------*/
40
+ #define RETURN(rrr) \
41
+ { retVal = rrr; goto save_state_and_return; };
42
+
43
+ #define GET_BITS(lll,vvv,nnn) \
44
+ case lll: s->state = lll; \
45
+ while (True) { \
46
+ if (s->bsLive >= nnn) { \
47
+ UInt32 v; \
48
+ v = (s->bsBuff >> \
49
+ (s->bsLive-nnn)) & ((1 << nnn)-1); \
50
+ s->bsLive -= nnn; \
51
+ vvv = v; \
52
+ break; \
53
+ } \
54
+ if (s->strm->avail_in == 0) RETURN(BZ_OK); \
55
+ s->bsBuff \
56
+ = (s->bsBuff << 8) | \
57
+ ((UInt32) \
58
+ (*((UChar*)(s->strm->next_in)))); \
59
+ s->bsLive += 8; \
60
+ s->strm->next_in++; \
61
+ s->strm->avail_in--; \
62
+ s->strm->total_in_lo32++; \
63
+ if (s->strm->total_in_lo32 == 0) \
64
+ s->strm->total_in_hi32++; \
65
+ }
66
+
67
+ #define GET_UCHAR(lll,uuu) \
68
+ GET_BITS(lll,uuu,8)
69
+
70
+ #define GET_BIT(lll,uuu) \
71
+ GET_BITS(lll,uuu,1)
72
+
73
+ /*---------------------------------------------------*/
74
+ #define GET_MTF_VAL(label1,label2,lval) \
75
+ { \
76
+ if (groupPos == 0) { \
77
+ groupNo++; \
78
+ if (groupNo >= nSelectors) \
79
+ RETURN(BZ_DATA_ERROR); \
80
+ groupPos = BZ_G_SIZE; \
81
+ gSel = s->selector[groupNo]; \
82
+ gMinlen = s->minLens[gSel]; \
83
+ gLimit = &(s->limit[gSel][0]); \
84
+ gPerm = &(s->perm[gSel][0]); \
85
+ gBase = &(s->base[gSel][0]); \
86
+ } \
87
+ groupPos--; \
88
+ zn = gMinlen; \
89
+ GET_BITS(label1, zvec, zn); \
90
+ while (1) { \
91
+ if (zn > 20 /* the longest code */) \
92
+ RETURN(BZ_DATA_ERROR); \
93
+ if (zvec <= gLimit[zn]) break; \
94
+ zn++; \
95
+ GET_BIT(label2, zj); \
96
+ zvec = (zvec << 1) | zj; \
97
+ }; \
98
+ if (zvec - gBase[zn] < 0 \
99
+ || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
100
+ RETURN(BZ_DATA_ERROR); \
101
+ lval = gPerm[zvec - gBase[zn]]; \
102
+ }
103
+
104
+
105
+ /*---------------------------------------------------*/
106
+ Int32 BZ2_decompress ( DState* s )
107
+ {
108
+ UChar uc;
109
+ Int32 retVal;
110
+ Int32 minLen, maxLen;
111
+ bz_stream* strm = s->strm;
112
+
113
+ /* stuff that needs to be saved/restored */
114
+ Int32 i;
115
+ Int32 j;
116
+ Int32 t;
117
+ Int32 alphaSize;
118
+ Int32 nGroups;
119
+ Int32 nSelectors;
120
+ Int32 EOB;
121
+ Int32 groupNo;
122
+ Int32 groupPos;
123
+ Int32 nextSym;
124
+ Int32 nblockMAX;
125
+ Int32 nblock;
126
+ Int32 es;
127
+ Int32 N;
128
+ Int32 curr;
129
+ Int32 zt;
130
+ Int32 zn;
131
+ Int32 zvec;
132
+ Int32 zj;
133
+ Int32 gSel;
134
+ Int32 gMinlen;
135
+ Int32* gLimit;
136
+ Int32* gBase;
137
+ Int32* gPerm;
138
+
139
+ if (s->state == BZ_X_MAGIC_1) {
140
+ /*initialise the save area*/
141
+ s->save_i = 0;
142
+ s->save_j = 0;
143
+ s->save_t = 0;
144
+ s->save_alphaSize = 0;
145
+ s->save_nGroups = 0;
146
+ s->save_nSelectors = 0;
147
+ s->save_EOB = 0;
148
+ s->save_groupNo = 0;
149
+ s->save_groupPos = 0;
150
+ s->save_nextSym = 0;
151
+ s->save_nblockMAX = 0;
152
+ s->save_nblock = 0;
153
+ s->save_es = 0;
154
+ s->save_N = 0;
155
+ s->save_curr = 0;
156
+ s->save_zt = 0;
157
+ s->save_zn = 0;
158
+ s->save_zvec = 0;
159
+ s->save_zj = 0;
160
+ s->save_gSel = 0;
161
+ s->save_gMinlen = 0;
162
+ s->save_gLimit = NULL;
163
+ s->save_gBase = NULL;
164
+ s->save_gPerm = NULL;
165
+ }
166
+
167
+ /*restore from the save area*/
168
+ i = s->save_i;
169
+ j = s->save_j;
170
+ t = s->save_t;
171
+ alphaSize = s->save_alphaSize;
172
+ nGroups = s->save_nGroups;
173
+ nSelectors = s->save_nSelectors;
174
+ EOB = s->save_EOB;
175
+ groupNo = s->save_groupNo;
176
+ groupPos = s->save_groupPos;
177
+ nextSym = s->save_nextSym;
178
+ nblockMAX = s->save_nblockMAX;
179
+ nblock = s->save_nblock;
180
+ es = s->save_es;
181
+ N = s->save_N;
182
+ curr = s->save_curr;
183
+ zt = s->save_zt;
184
+ zn = s->save_zn;
185
+ zvec = s->save_zvec;
186
+ zj = s->save_zj;
187
+ gSel = s->save_gSel;
188
+ gMinlen = s->save_gMinlen;
189
+ gLimit = s->save_gLimit;
190
+ gBase = s->save_gBase;
191
+ gPerm = s->save_gPerm;
192
+
193
+ retVal = BZ_OK;
194
+
195
+ switch (s->state) {
196
+
197
+ GET_UCHAR(BZ_X_MAGIC_1, uc);
198
+ if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
199
+
200
+ GET_UCHAR(BZ_X_MAGIC_2, uc);
201
+ if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
202
+
203
+ GET_UCHAR(BZ_X_MAGIC_3, uc)
204
+ if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
205
+
206
+ GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
207
+ if (s->blockSize100k < (BZ_HDR_0 + 1) ||
208
+ s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
209
+ s->blockSize100k -= BZ_HDR_0;
210
+
211
+ if (s->smallDecompress) {
212
+ s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
213
+ s->ll4 = BZALLOC(
214
+ ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar)
215
+ );
216
+ if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
217
+ } else {
218
+ s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
219
+ if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
220
+ }
221
+
222
+ GET_UCHAR(BZ_X_BLKHDR_1, uc);
223
+
224
+ if (uc == 0x17) goto endhdr_2;
225
+ if (uc != 0x31) RETURN(BZ_DATA_ERROR);
226
+ GET_UCHAR(BZ_X_BLKHDR_2, uc);
227
+ if (uc != 0x41) RETURN(BZ_DATA_ERROR);
228
+ GET_UCHAR(BZ_X_BLKHDR_3, uc);
229
+ if (uc != 0x59) RETURN(BZ_DATA_ERROR);
230
+ GET_UCHAR(BZ_X_BLKHDR_4, uc);
231
+ if (uc != 0x26) RETURN(BZ_DATA_ERROR);
232
+ GET_UCHAR(BZ_X_BLKHDR_5, uc);
233
+ if (uc != 0x53) RETURN(BZ_DATA_ERROR);
234
+ GET_UCHAR(BZ_X_BLKHDR_6, uc);
235
+ if (uc != 0x59) RETURN(BZ_DATA_ERROR);
236
+
237
+ s->currBlockNo++;
238
+ if (s->verbosity >= 2)
239
+ VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo );
240
+
241
+ s->storedBlockCRC = 0;
242
+ GET_UCHAR(BZ_X_BCRC_1, uc);
243
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
244
+ GET_UCHAR(BZ_X_BCRC_2, uc);
245
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
246
+ GET_UCHAR(BZ_X_BCRC_3, uc);
247
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
248
+ GET_UCHAR(BZ_X_BCRC_4, uc);
249
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
250
+
251
+ GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
252
+
253
+ s->origPtr = 0;
254
+ GET_UCHAR(BZ_X_ORIGPTR_1, uc);
255
+ s->origPtr = (s->origPtr << 8) | ((Int32)uc);
256
+ GET_UCHAR(BZ_X_ORIGPTR_2, uc);
257
+ s->origPtr = (s->origPtr << 8) | ((Int32)uc);
258
+ GET_UCHAR(BZ_X_ORIGPTR_3, uc);
259
+ s->origPtr = (s->origPtr << 8) | ((Int32)uc);
260
+
261
+ if (s->origPtr < 0)
262
+ RETURN(BZ_DATA_ERROR);
263
+ if (s->origPtr > 10 + 100000*s->blockSize100k)
264
+ RETURN(BZ_DATA_ERROR);
265
+
266
+ /*--- Receive the mapping table ---*/
267
+ for (i = 0; i < 16; i++) {
268
+ GET_BIT(BZ_X_MAPPING_1, uc);
269
+ if (uc == 1)
270
+ s->inUse16[i] = True; else
271
+ s->inUse16[i] = False;
272
+ }
273
+
274
+ for (i = 0; i < 256; i++) s->inUse[i] = False;
275
+
276
+ for (i = 0; i < 16; i++)
277
+ if (s->inUse16[i])
278
+ for (j = 0; j < 16; j++) {
279
+ GET_BIT(BZ_X_MAPPING_2, uc);
280
+ if (uc == 1) s->inUse[i * 16 + j] = True;
281
+ }
282
+ makeMaps_d ( s );
283
+ if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
284
+ alphaSize = s->nInUse+2;
285
+
286
+ /*--- Now the selectors ---*/
287
+ GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
288
+ if (nGroups < 2 || nGroups > BZ_N_GROUPS) RETURN(BZ_DATA_ERROR);
289
+ GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
290
+ if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
291
+ for (i = 0; i < nSelectors; i++) {
292
+ j = 0;
293
+ while (True) {
294
+ GET_BIT(BZ_X_SELECTOR_3, uc);
295
+ if (uc == 0) break;
296
+ j++;
297
+ if (j >= nGroups) RETURN(BZ_DATA_ERROR);
298
+ }
299
+ /* Having more than BZ_MAX_SELECTORS doesn't make much sense
300
+ since they will never be used, but some implementations might
301
+ "round up" the number of selectors, so just ignore those. */
302
+ if (i < BZ_MAX_SELECTORS)
303
+ s->selectorMtf[i] = j;
304
+ }
305
+ if (nSelectors > BZ_MAX_SELECTORS)
306
+ nSelectors = BZ_MAX_SELECTORS;
307
+
308
+ /*--- Undo the MTF values for the selectors. ---*/
309
+ {
310
+ UChar pos[BZ_N_GROUPS], tmp, v;
311
+ for (v = 0; v < nGroups; v++) pos[v] = v;
312
+
313
+ for (i = 0; i < nSelectors; i++) {
314
+ v = s->selectorMtf[i];
315
+ tmp = pos[v];
316
+ while (v > 0) { pos[v] = pos[v-1]; v--; }
317
+ pos[0] = tmp;
318
+ s->selector[i] = tmp;
319
+ }
320
+ }
321
+
322
+ /*--- Now the coding tables ---*/
323
+ for (t = 0; t < nGroups; t++) {
324
+ GET_BITS(BZ_X_CODING_1, curr, 5);
325
+ for (i = 0; i < alphaSize; i++) {
326
+ while (True) {
327
+ if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
328
+ GET_BIT(BZ_X_CODING_2, uc);
329
+ if (uc == 0) break;
330
+ GET_BIT(BZ_X_CODING_3, uc);
331
+ if (uc == 0) curr++; else curr--;
332
+ }
333
+ s->len[t][i] = curr;
334
+ }
335
+ }
336
+
337
+ /*--- Create the Huffman decoding tables ---*/
338
+ for (t = 0; t < nGroups; t++) {
339
+ minLen = 32;
340
+ maxLen = 0;
341
+ for (i = 0; i < alphaSize; i++) {
342
+ if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
343
+ if (s->len[t][i] < minLen) minLen = s->len[t][i];
344
+ }
345
+ BZ2_hbCreateDecodeTables (
346
+ &(s->limit[t][0]),
347
+ &(s->base[t][0]),
348
+ &(s->perm[t][0]),
349
+ &(s->len[t][0]),
350
+ minLen, maxLen, alphaSize
351
+ );
352
+ s->minLens[t] = minLen;
353
+ }
354
+
355
+ /*--- Now the MTF values ---*/
356
+
357
+ EOB = s->nInUse+1;
358
+ nblockMAX = 100000 * s->blockSize100k;
359
+ groupNo = -1;
360
+ groupPos = 0;
361
+
362
+ for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
363
+
364
+ /*-- MTF init --*/
365
+ {
366
+ Int32 ii, jj, kk;
367
+ kk = MTFA_SIZE-1;
368
+ for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
369
+ for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
370
+ s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
371
+ kk--;
372
+ }
373
+ s->mtfbase[ii] = kk + 1;
374
+ }
375
+ }
376
+ /*-- end MTF init --*/
377
+
378
+ nblock = 0;
379
+ GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
380
+
381
+ while (True) {
382
+
383
+ if (nextSym == EOB) break;
384
+
385
+ if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
386
+
387
+ es = -1;
388
+ N = 1;
389
+ do {
390
+ /* Check that N doesn't get too big, so that es doesn't
391
+ go negative. The maximum value that can be
392
+ RUNA/RUNB encoded is equal to the block size (post
393
+ the initial RLE), viz, 900k, so bounding N at 2
394
+ million should guard against overflow without
395
+ rejecting any legitimate inputs. */
396
+ if (N >= 2*1024*1024) RETURN(BZ_DATA_ERROR);
397
+ if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
398
+ if (nextSym == BZ_RUNB) es = es + (1+1) * N;
399
+ N = N * 2;
400
+ GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
401
+ }
402
+ while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
403
+
404
+ es++;
405
+ uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
406
+ s->unzftab[uc] += es;
407
+
408
+ if (s->smallDecompress)
409
+ while (es > 0) {
410
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
411
+ s->ll16[nblock] = (UInt16)uc;
412
+ nblock++;
413
+ es--;
414
+ }
415
+ else
416
+ while (es > 0) {
417
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
418
+ s->tt[nblock] = (UInt32)uc;
419
+ nblock++;
420
+ es--;
421
+ };
422
+
423
+ continue;
424
+
425
+ } else {
426
+
427
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
428
+
429
+ /*-- uc = MTF ( nextSym-1 ) --*/
430
+ {
431
+ Int32 ii, jj, kk, pp, lno, off;
432
+ UInt32 nn;
433
+ nn = (UInt32)(nextSym - 1);
434
+
435
+ if (nn < MTFL_SIZE) {
436
+ /* avoid general-case expense */
437
+ pp = s->mtfbase[0];
438
+ uc = s->mtfa[pp+nn];
439
+ while (nn > 3) {
440
+ Int32 z = pp+nn;
441
+ s->mtfa[(z) ] = s->mtfa[(z)-1];
442
+ s->mtfa[(z)-1] = s->mtfa[(z)-2];
443
+ s->mtfa[(z)-2] = s->mtfa[(z)-3];
444
+ s->mtfa[(z)-3] = s->mtfa[(z)-4];
445
+ nn -= 4;
446
+ }
447
+ while (nn > 0) {
448
+ s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;
449
+ };
450
+ s->mtfa[pp] = uc;
451
+ } else {
452
+ /* general case */
453
+ lno = nn / MTFL_SIZE;
454
+ off = nn % MTFL_SIZE;
455
+ pp = s->mtfbase[lno] + off;
456
+ uc = s->mtfa[pp];
457
+ while (pp > s->mtfbase[lno]) {
458
+ s->mtfa[pp] = s->mtfa[pp-1]; pp--;
459
+ };
460
+ s->mtfbase[lno]++;
461
+ while (lno > 0) {
462
+ s->mtfbase[lno]--;
463
+ s->mtfa[s->mtfbase[lno]]
464
+ = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
465
+ lno--;
466
+ }
467
+ s->mtfbase[0]--;
468
+ s->mtfa[s->mtfbase[0]] = uc;
469
+ if (s->mtfbase[0] == 0) {
470
+ kk = MTFA_SIZE-1;
471
+ for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
472
+ for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
473
+ s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
474
+ kk--;
475
+ }
476
+ s->mtfbase[ii] = kk + 1;
477
+ }
478
+ }
479
+ }
480
+ }
481
+ /*-- end uc = MTF ( nextSym-1 ) --*/
482
+
483
+ s->unzftab[s->seqToUnseq[uc]]++;
484
+ if (s->smallDecompress)
485
+ s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
486
+ s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]);
487
+ nblock++;
488
+
489
+ GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
490
+ continue;
491
+ }
492
+ }
493
+
494
+ /* Now we know what nblock is, we can do a better sanity
495
+ check on s->origPtr.
496
+ */
497
+ if (s->origPtr < 0 || s->origPtr >= nblock)
498
+ RETURN(BZ_DATA_ERROR);
499
+
500
+ /*-- Set up cftab to facilitate generation of T^(-1) --*/
501
+ /* Check: unzftab entries in range. */
502
+ for (i = 0; i <= 255; i++) {
503
+ if (s->unzftab[i] < 0 || s->unzftab[i] > nblock)
504
+ RETURN(BZ_DATA_ERROR);
505
+ }
506
+ /* Actually generate cftab. */
507
+ s->cftab[0] = 0;
508
+ for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
509
+ for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
510
+ /* Check: cftab entries in range. */
511
+ for (i = 0; i <= 256; i++) {
512
+ if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
513
+ /* s->cftab[i] can legitimately be == nblock */
514
+ RETURN(BZ_DATA_ERROR);
515
+ }
516
+ }
517
+ /* Check: cftab entries non-descending. */
518
+ for (i = 1; i <= 256; i++) {
519
+ if (s->cftab[i-1] > s->cftab[i]) {
520
+ RETURN(BZ_DATA_ERROR);
521
+ }
522
+ }
523
+
524
+ s->state_out_len = 0;
525
+ s->state_out_ch = 0;
526
+ BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
527
+ s->state = BZ_X_OUTPUT;
528
+ if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
529
+
530
+ if (s->smallDecompress) {
531
+
532
+ /*-- Make a copy of cftab, used in generation of T --*/
533
+ for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
534
+
535
+ /*-- compute the T vector --*/
536
+ for (i = 0; i < nblock; i++) {
537
+ uc = (UChar)(s->ll16[i]);
538
+ SET_LL(i, s->cftabCopy[uc]);
539
+ s->cftabCopy[uc]++;
540
+ }
541
+
542
+ /*-- Compute T^(-1) by pointer reversal on T --*/
543
+ i = s->origPtr;
544
+ j = GET_LL(i);
545
+ do {
546
+ Int32 tmp = GET_LL(j);
547
+ SET_LL(j, i);
548
+ i = j;
549
+ j = tmp;
550
+ }
551
+ while (i != s->origPtr);
552
+
553
+ s->tPos = s->origPtr;
554
+ s->nblock_used = 0;
555
+ if (s->blockRandomised) {
556
+ BZ_RAND_INIT_MASK;
557
+ BZ_GET_SMALL(s->k0); s->nblock_used++;
558
+ BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
559
+ } else {
560
+ BZ_GET_SMALL(s->k0); s->nblock_used++;
561
+ }
562
+
563
+ } else {
564
+
565
+ /*-- compute the T^(-1) vector --*/
566
+ for (i = 0; i < nblock; i++) {
567
+ uc = (UChar)(s->tt[i] & 0xff);
568
+ s->tt[s->cftab[uc]] |= (i << 8);
569
+ s->cftab[uc]++;
570
+ }
571
+
572
+ s->tPos = s->tt[s->origPtr] >> 8;
573
+ s->nblock_used = 0;
574
+ if (s->blockRandomised) {
575
+ BZ_RAND_INIT_MASK;
576
+ BZ_GET_FAST(s->k0); s->nblock_used++;
577
+ BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
578
+ } else {
579
+ BZ_GET_FAST(s->k0); s->nblock_used++;
580
+ }
581
+
582
+ }
583
+
584
+ RETURN(BZ_OK);
585
+
586
+
587
+
588
+ endhdr_2:
589
+
590
+ GET_UCHAR(BZ_X_ENDHDR_2, uc);
591
+ if (uc != 0x72) RETURN(BZ_DATA_ERROR);
592
+ GET_UCHAR(BZ_X_ENDHDR_3, uc);
593
+ if (uc != 0x45) RETURN(BZ_DATA_ERROR);
594
+ GET_UCHAR(BZ_X_ENDHDR_4, uc);
595
+ if (uc != 0x38) RETURN(BZ_DATA_ERROR);
596
+ GET_UCHAR(BZ_X_ENDHDR_5, uc);
597
+ if (uc != 0x50) RETURN(BZ_DATA_ERROR);
598
+ GET_UCHAR(BZ_X_ENDHDR_6, uc);
599
+ if (uc != 0x90) RETURN(BZ_DATA_ERROR);
600
+
601
+ s->storedCombinedCRC = 0;
602
+ GET_UCHAR(BZ_X_CCRC_1, uc);
603
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
604
+ GET_UCHAR(BZ_X_CCRC_2, uc);
605
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
606
+ GET_UCHAR(BZ_X_CCRC_3, uc);
607
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
608
+ GET_UCHAR(BZ_X_CCRC_4, uc);
609
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
610
+
611
+ s->state = BZ_X_IDLE;
612
+ RETURN(BZ_STREAM_END);
613
+
614
+ default: AssertH ( False, 4001 );
615
+ }
616
+
617
+ AssertH ( False, 4002 );
618
+
619
+ save_state_and_return:
620
+
621
+ s->save_i = i;
622
+ s->save_j = j;
623
+ s->save_t = t;
624
+ s->save_alphaSize = alphaSize;
625
+ s->save_nGroups = nGroups;
626
+ s->save_nSelectors = nSelectors;
627
+ s->save_EOB = EOB;
628
+ s->save_groupNo = groupNo;
629
+ s->save_groupPos = groupPos;
630
+ s->save_nextSym = nextSym;
631
+ s->save_nblockMAX = nblockMAX;
632
+ s->save_nblock = nblock;
633
+ s->save_es = es;
634
+ s->save_N = N;
635
+ s->save_curr = curr;
636
+ s->save_zt = zt;
637
+ s->save_zn = zn;
638
+ s->save_zvec = zvec;
639
+ s->save_zj = zj;
640
+ s->save_gSel = gSel;
641
+ s->save_gMinlen = gMinlen;
642
+ s->save_gLimit = gLimit;
643
+ s->save_gBase = gBase;
644
+ s->save_gPerm = gPerm;
645
+
646
+ return retVal;
647
+ }
648
+
649
+
650
+ /*-------------------------------------------------------------*/
651
+ /*--- end decompress.c ---*/
652
+ /*-------------------------------------------------------------*/