ferret 0.11.6 → 0.11.8.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (185) hide show
  1. data/README +10 -22
  2. data/RELEASE_CHANGES +137 -0
  3. data/RELEASE_NOTES +60 -0
  4. data/Rakefile +379 -274
  5. data/TODO +100 -8
  6. data/bin/ferret-browser +0 -0
  7. data/ext/BZLIB_blocksort.c +1094 -0
  8. data/ext/BZLIB_bzlib.c +1578 -0
  9. data/ext/BZLIB_compress.c +672 -0
  10. data/ext/BZLIB_crctable.c +104 -0
  11. data/ext/BZLIB_decompress.c +626 -0
  12. data/ext/BZLIB_huffman.c +205 -0
  13. data/ext/BZLIB_randtable.c +84 -0
  14. data/ext/{api.c → STEMMER_api.c} +7 -10
  15. data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
  16. data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
  17. data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
  18. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  19. data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
  20. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  21. data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
  22. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  25. data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
  26. data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
  27. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  29. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  30. data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
  31. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  32. data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
  33. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  34. data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
  35. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  36. data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
  37. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  38. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  39. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  40. data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
  41. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  42. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  43. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  44. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  45. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  46. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  47. data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
  48. data/ext/analysis.c +276 -121
  49. data/ext/analysis.h +190 -143
  50. data/ext/api.h +3 -4
  51. data/ext/array.c +5 -3
  52. data/ext/array.h +52 -43
  53. data/ext/bitvector.c +38 -482
  54. data/ext/bitvector.h +446 -124
  55. data/ext/bzlib.h +282 -0
  56. data/ext/bzlib_private.h +503 -0
  57. data/ext/compound_io.c +23 -22
  58. data/ext/config.h +21 -11
  59. data/ext/document.c +43 -40
  60. data/ext/document.h +31 -21
  61. data/ext/except.c +20 -38
  62. data/ext/except.h +89 -76
  63. data/ext/extconf.rb +3 -2
  64. data/ext/ferret.c +49 -35
  65. data/ext/ferret.h +14 -11
  66. data/ext/field_index.c +262 -0
  67. data/ext/field_index.h +52 -0
  68. data/ext/filter.c +11 -10
  69. data/ext/fs_store.c +65 -47
  70. data/ext/global.c +245 -165
  71. data/ext/global.h +252 -54
  72. data/ext/hash.c +200 -243
  73. data/ext/hash.h +205 -163
  74. data/ext/hashset.c +118 -96
  75. data/ext/hashset.h +110 -82
  76. data/ext/header.h +19 -19
  77. data/ext/helper.c +11 -10
  78. data/ext/helper.h +14 -6
  79. data/ext/index.c +745 -366
  80. data/ext/index.h +503 -529
  81. data/ext/internal.h +1020 -0
  82. data/ext/lang.c +10 -0
  83. data/ext/lang.h +35 -15
  84. data/ext/mempool.c +5 -4
  85. data/ext/mempool.h +30 -22
  86. data/ext/modules.h +35 -7
  87. data/ext/multimapper.c +43 -2
  88. data/ext/multimapper.h +32 -23
  89. data/ext/posh.c +0 -0
  90. data/ext/posh.h +4 -38
  91. data/ext/priorityqueue.c +10 -12
  92. data/ext/priorityqueue.h +33 -21
  93. data/ext/q_boolean.c +22 -9
  94. data/ext/q_const_score.c +3 -2
  95. data/ext/q_filtered_query.c +15 -12
  96. data/ext/q_fuzzy.c +147 -135
  97. data/ext/q_match_all.c +3 -2
  98. data/ext/q_multi_term.c +28 -32
  99. data/ext/q_parser.c +451 -173
  100. data/ext/q_phrase.c +158 -79
  101. data/ext/q_prefix.c +16 -18
  102. data/ext/q_range.c +363 -31
  103. data/ext/q_span.c +130 -141
  104. data/ext/q_term.c +21 -21
  105. data/ext/q_wildcard.c +19 -23
  106. data/ext/r_analysis.c +369 -242
  107. data/ext/r_index.c +421 -434
  108. data/ext/r_qparser.c +142 -92
  109. data/ext/r_search.c +790 -407
  110. data/ext/r_store.c +44 -44
  111. data/ext/r_utils.c +264 -96
  112. data/ext/ram_store.c +29 -23
  113. data/ext/scanner.c +895 -0
  114. data/ext/scanner.h +36 -0
  115. data/ext/scanner_mb.c +6701 -0
  116. data/ext/scanner_utf8.c +4415 -0
  117. data/ext/search.c +210 -87
  118. data/ext/search.h +556 -488
  119. data/ext/similarity.c +17 -16
  120. data/ext/similarity.h +51 -44
  121. data/ext/sort.c +157 -354
  122. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  123. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  124. data/ext/stem_UTF_8_hungarian.h +16 -0
  125. data/ext/stem_UTF_8_romanian.h +16 -0
  126. data/ext/stem_UTF_8_turkish.h +16 -0
  127. data/ext/stopwords.c +287 -278
  128. data/ext/store.c +57 -51
  129. data/ext/store.h +308 -286
  130. data/ext/symbol.c +10 -0
  131. data/ext/symbol.h +23 -0
  132. data/ext/term_vectors.c +14 -293
  133. data/ext/threading.h +22 -22
  134. data/ext/win32.h +12 -4
  135. data/lib/ferret.rb +2 -1
  136. data/lib/ferret/browser.rb +1 -1
  137. data/lib/ferret/field_symbol.rb +94 -0
  138. data/lib/ferret/index.rb +221 -34
  139. data/lib/ferret/number_tools.rb +6 -6
  140. data/lib/ferret/version.rb +3 -0
  141. data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
  142. data/test/test_helper.rb +7 -2
  143. data/test/test_installed.rb +1 -0
  144. data/test/threading/thread_safety_index_test.rb +10 -1
  145. data/test/threading/thread_safety_read_write_test.rb +4 -7
  146. data/test/threading/thread_safety_test.rb +0 -0
  147. data/test/unit/analysis/tc_analyzer.rb +29 -27
  148. data/test/unit/analysis/tc_token_stream.rb +23 -16
  149. data/test/unit/index/tc_index.rb +116 -11
  150. data/test/unit/index/tc_index_reader.rb +27 -27
  151. data/test/unit/index/tc_index_writer.rb +10 -0
  152. data/test/unit/index/th_doc.rb +38 -21
  153. data/test/unit/search/tc_filter.rb +31 -10
  154. data/test/unit/search/tc_index_searcher.rb +6 -0
  155. data/test/unit/search/tm_searcher.rb +53 -1
  156. data/test/unit/store/tc_fs_store.rb +40 -2
  157. data/test/unit/store/tc_ram_store.rb +0 -0
  158. data/test/unit/store/tm_store.rb +0 -0
  159. data/test/unit/store/tm_store_lock.rb +7 -6
  160. data/test/unit/tc_field_symbol.rb +26 -0
  161. data/test/unit/ts_analysis.rb +0 -0
  162. data/test/unit/ts_index.rb +0 -0
  163. data/test/unit/ts_store.rb +0 -0
  164. data/test/unit/ts_utils.rb +0 -0
  165. data/test/unit/utils/tc_number_tools.rb +0 -0
  166. data/test/utils/content_generator.rb +226 -0
  167. metadata +262 -221
  168. data/ext/inc/lang.h +0 -48
  169. data/ext/inc/threading.h +0 -31
  170. data/ext/stem_ISO_8859_1_english.c +0 -1156
  171. data/ext/stem_ISO_8859_1_french.c +0 -1276
  172. data/ext/stem_ISO_8859_1_italian.c +0 -1091
  173. data/ext/stem_ISO_8859_1_norwegian.c +0 -296
  174. data/ext/stem_ISO_8859_1_spanish.c +0 -1119
  175. data/ext/stem_ISO_8859_1_swedish.c +0 -307
  176. data/ext/stem_UTF_8_danish.c +0 -344
  177. data/ext/stem_UTF_8_english.c +0 -1176
  178. data/ext/stem_UTF_8_french.c +0 -1296
  179. data/ext/stem_UTF_8_italian.c +0 -1113
  180. data/ext/stem_UTF_8_norwegian.c +0 -302
  181. data/ext/stem_UTF_8_portuguese.c +0 -1055
  182. data/ext/stem_UTF_8_russian.c +0 -709
  183. data/ext/stem_UTF_8_spanish.c +0 -1137
  184. data/ext/stem_UTF_8_swedish.c +0 -313
  185. data/lib/ferret_version.rb +0 -3
@@ -0,0 +1,104 @@
1
+
2
+ /*-------------------------------------------------------------*/
3
+ /*--- Table for doing CRCs ---*/
4
+ /*--- crctable.c ---*/
5
+ /*-------------------------------------------------------------*/
6
+
7
+ /* ------------------------------------------------------------------
8
+ This file is part of bzip2/libbzip2, a program and library for
9
+ lossless, block-sorting data compression.
10
+
11
+ bzip2/libbzip2 version 1.0.4 of 20 December 2006
12
+ Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
13
+
14
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
15
+ README file.
16
+
17
+ This program is released under the terms of the license contained
18
+ in the file LICENSE.
19
+ ------------------------------------------------------------------ */
20
+
21
+
22
+ #include "bzlib_private.h"
23
+
24
+ /*--
25
+ I think this is an implementation of the AUTODIN-II,
26
+ Ethernet & FDDI 32-bit CRC standard. Vaguely derived
27
+ from code by Rob Warnock, in Section 51 of the
28
+ comp.compression FAQ.
29
+ --*/
30
+
31
+ UInt32 BZ2_crc32Table[256] = {
32
+
33
+ /*-- Ugly, innit? --*/
34
+
35
+ 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
36
+ 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
37
+ 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
38
+ 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
39
+ 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
40
+ 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
41
+ 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
42
+ 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
43
+ 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
44
+ 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
45
+ 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
46
+ 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
47
+ 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
48
+ 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
49
+ 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
50
+ 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
51
+ 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
52
+ 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
53
+ 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
54
+ 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
55
+ 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
56
+ 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
57
+ 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
58
+ 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
59
+ 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
60
+ 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
61
+ 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
62
+ 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
63
+ 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
64
+ 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
65
+ 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
66
+ 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
67
+ 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
68
+ 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
69
+ 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
70
+ 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
71
+ 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
72
+ 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
73
+ 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
74
+ 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
75
+ 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
76
+ 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
77
+ 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
78
+ 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
79
+ 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
80
+ 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
81
+ 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
82
+ 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
83
+ 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
84
+ 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
85
+ 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
86
+ 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
87
+ 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
88
+ 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
89
+ 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
90
+ 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
91
+ 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
92
+ 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
93
+ 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
94
+ 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
95
+ 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
96
+ 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
97
+ 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
98
+ 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
99
+ };
100
+
101
+
102
+ /*-------------------------------------------------------------*/
103
+ /*--- end crctable.c ---*/
104
+ /*-------------------------------------------------------------*/
@@ -0,0 +1,626 @@
1
+
2
+ /*-------------------------------------------------------------*/
3
+ /*--- Decompression machinery ---*/
4
+ /*--- decompress.c ---*/
5
+ /*-------------------------------------------------------------*/
6
+
7
+ /* ------------------------------------------------------------------
8
+ This file is part of bzip2/libbzip2, a program and library for
9
+ lossless, block-sorting data compression.
10
+
11
+ bzip2/libbzip2 version 1.0.4 of 20 December 2006
12
+ Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
13
+
14
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
15
+ README file.
16
+
17
+ This program is released under the terms of the license contained
18
+ in the file LICENSE.
19
+ ------------------------------------------------------------------ */
20
+
21
+
22
+ #include "bzlib_private.h"
23
+
24
+
25
+ /*---------------------------------------------------*/
26
+ static
27
+ void makeMaps_d ( DState* s )
28
+ {
29
+ Int32 i;
30
+ s->nInUse = 0;
31
+ for (i = 0; i < 256; i++)
32
+ if (s->inUse[i]) {
33
+ s->seqToUnseq[s->nInUse] = i;
34
+ s->nInUse++;
35
+ }
36
+ }
37
+
38
+
39
+ /*---------------------------------------------------*/
40
+ #define RETURN(rrr) \
41
+ { retVal = rrr; goto save_state_and_return; };
42
+
43
+ #define GET_BITS(lll,vvv,nnn) \
44
+ case lll: s->state = lll; \
45
+ while (True) { \
46
+ if (s->bsLive >= nnn) { \
47
+ UInt32 v; \
48
+ v = (s->bsBuff >> \
49
+ (s->bsLive-nnn)) & ((1 << nnn)-1); \
50
+ s->bsLive -= nnn; \
51
+ vvv = v; \
52
+ break; \
53
+ } \
54
+ if (s->strm->avail_in == 0) RETURN(BZ_OK); \
55
+ s->bsBuff \
56
+ = (s->bsBuff << 8) | \
57
+ ((UInt32) \
58
+ (*((UChar*)(s->strm->next_in)))); \
59
+ s->bsLive += 8; \
60
+ s->strm->next_in++; \
61
+ s->strm->avail_in--; \
62
+ s->strm->total_in_lo32++; \
63
+ if (s->strm->total_in_lo32 == 0) \
64
+ s->strm->total_in_hi32++; \
65
+ }
66
+
67
+ #define GET_UCHAR(lll,uuu) \
68
+ GET_BITS(lll,uuu,8)
69
+
70
+ #define GET_BIT(lll,uuu) \
71
+ GET_BITS(lll,uuu,1)
72
+
73
+ /*---------------------------------------------------*/
74
+ #define GET_MTF_VAL(label1,label2,lval) \
75
+ { \
76
+ if (groupPos == 0) { \
77
+ groupNo++; \
78
+ if (groupNo >= nSelectors) \
79
+ RETURN(BZ_DATA_ERROR); \
80
+ groupPos = BZ_G_SIZE; \
81
+ gSel = s->selector[groupNo]; \
82
+ gMinlen = s->minLens[gSel]; \
83
+ gLimit = &(s->limit[gSel][0]); \
84
+ gPerm = &(s->perm[gSel][0]); \
85
+ gBase = &(s->base[gSel][0]); \
86
+ } \
87
+ groupPos--; \
88
+ zn = gMinlen; \
89
+ GET_BITS(label1, zvec, zn); \
90
+ while (1) { \
91
+ if (zn > 20 /* the longest code */) \
92
+ RETURN(BZ_DATA_ERROR); \
93
+ if (zvec <= gLimit[zn]) break; \
94
+ zn++; \
95
+ GET_BIT(label2, zj); \
96
+ zvec = (zvec << 1) | zj; \
97
+ }; \
98
+ if (zvec - gBase[zn] < 0 \
99
+ || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
100
+ RETURN(BZ_DATA_ERROR); \
101
+ lval = gPerm[zvec - gBase[zn]]; \
102
+ }
103
+
104
+
105
+ /*---------------------------------------------------*/
106
+ Int32 BZ2_decompress ( DState* s )
107
+ {
108
+ UChar uc;
109
+ Int32 retVal;
110
+ Int32 minLen, maxLen;
111
+ bz_stream* strm = s->strm;
112
+
113
+ /* stuff that needs to be saved/restored */
114
+ Int32 i;
115
+ Int32 j;
116
+ Int32 t;
117
+ Int32 alphaSize;
118
+ Int32 nGroups;
119
+ Int32 nSelectors;
120
+ Int32 EOB;
121
+ Int32 groupNo;
122
+ Int32 groupPos;
123
+ Int32 nextSym;
124
+ Int32 nblockMAX;
125
+ Int32 nblock;
126
+ Int32 es;
127
+ Int32 N;
128
+ Int32 curr;
129
+ Int32 zt;
130
+ Int32 zn;
131
+ Int32 zvec;
132
+ Int32 zj;
133
+ Int32 gSel;
134
+ Int32 gMinlen;
135
+ Int32* gLimit;
136
+ Int32* gBase;
137
+ Int32* gPerm;
138
+
139
+ if (s->state == BZ_X_MAGIC_1) {
140
+ /*initialise the save area*/
141
+ s->save_i = 0;
142
+ s->save_j = 0;
143
+ s->save_t = 0;
144
+ s->save_alphaSize = 0;
145
+ s->save_nGroups = 0;
146
+ s->save_nSelectors = 0;
147
+ s->save_EOB = 0;
148
+ s->save_groupNo = 0;
149
+ s->save_groupPos = 0;
150
+ s->save_nextSym = 0;
151
+ s->save_nblockMAX = 0;
152
+ s->save_nblock = 0;
153
+ s->save_es = 0;
154
+ s->save_N = 0;
155
+ s->save_curr = 0;
156
+ s->save_zt = 0;
157
+ s->save_zn = 0;
158
+ s->save_zvec = 0;
159
+ s->save_zj = 0;
160
+ s->save_gSel = 0;
161
+ s->save_gMinlen = 0;
162
+ s->save_gLimit = NULL;
163
+ s->save_gBase = NULL;
164
+ s->save_gPerm = NULL;
165
+ }
166
+
167
+ /*restore from the save area*/
168
+ i = s->save_i;
169
+ j = s->save_j;
170
+ t = s->save_t;
171
+ alphaSize = s->save_alphaSize;
172
+ nGroups = s->save_nGroups;
173
+ nSelectors = s->save_nSelectors;
174
+ EOB = s->save_EOB;
175
+ groupNo = s->save_groupNo;
176
+ groupPos = s->save_groupPos;
177
+ nextSym = s->save_nextSym;
178
+ nblockMAX = s->save_nblockMAX;
179
+ nblock = s->save_nblock;
180
+ es = s->save_es;
181
+ N = s->save_N;
182
+ curr = s->save_curr;
183
+ zt = s->save_zt;
184
+ zn = s->save_zn;
185
+ zvec = s->save_zvec;
186
+ zj = s->save_zj;
187
+ gSel = s->save_gSel;
188
+ gMinlen = s->save_gMinlen;
189
+ gLimit = s->save_gLimit;
190
+ gBase = s->save_gBase;
191
+ gPerm = s->save_gPerm;
192
+
193
+ retVal = BZ_OK;
194
+
195
+ switch (s->state) {
196
+
197
+ GET_UCHAR(BZ_X_MAGIC_1, uc);
198
+ if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
199
+
200
+ GET_UCHAR(BZ_X_MAGIC_2, uc);
201
+ if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
202
+
203
+ GET_UCHAR(BZ_X_MAGIC_3, uc)
204
+ if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
205
+
206
+ GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
207
+ if (s->blockSize100k < (BZ_HDR_0 + 1) ||
208
+ s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
209
+ s->blockSize100k -= BZ_HDR_0;
210
+
211
+ if (s->smallDecompress) {
212
+ s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
213
+ s->ll4 = BZALLOC(
214
+ ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar)
215
+ );
216
+ if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
217
+ } else {
218
+ s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
219
+ if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
220
+ }
221
+
222
+ GET_UCHAR(BZ_X_BLKHDR_1, uc);
223
+
224
+ if (uc == 0x17) goto endhdr_2;
225
+ if (uc != 0x31) RETURN(BZ_DATA_ERROR);
226
+ GET_UCHAR(BZ_X_BLKHDR_2, uc);
227
+ if (uc != 0x41) RETURN(BZ_DATA_ERROR);
228
+ GET_UCHAR(BZ_X_BLKHDR_3, uc);
229
+ if (uc != 0x59) RETURN(BZ_DATA_ERROR);
230
+ GET_UCHAR(BZ_X_BLKHDR_4, uc);
231
+ if (uc != 0x26) RETURN(BZ_DATA_ERROR);
232
+ GET_UCHAR(BZ_X_BLKHDR_5, uc);
233
+ if (uc != 0x53) RETURN(BZ_DATA_ERROR);
234
+ GET_UCHAR(BZ_X_BLKHDR_6, uc);
235
+ if (uc != 0x59) RETURN(BZ_DATA_ERROR);
236
+
237
+ s->currBlockNo++;
238
+ if (s->verbosity >= 2)
239
+ VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo );
240
+
241
+ s->storedBlockCRC = 0;
242
+ GET_UCHAR(BZ_X_BCRC_1, uc);
243
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
244
+ GET_UCHAR(BZ_X_BCRC_2, uc);
245
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
246
+ GET_UCHAR(BZ_X_BCRC_3, uc);
247
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
248
+ GET_UCHAR(BZ_X_BCRC_4, uc);
249
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
250
+
251
+ GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
252
+
253
+ s->origPtr = 0;
254
+ GET_UCHAR(BZ_X_ORIGPTR_1, uc);
255
+ s->origPtr = (s->origPtr << 8) | ((Int32)uc);
256
+ GET_UCHAR(BZ_X_ORIGPTR_2, uc);
257
+ s->origPtr = (s->origPtr << 8) | ((Int32)uc);
258
+ GET_UCHAR(BZ_X_ORIGPTR_3, uc);
259
+ s->origPtr = (s->origPtr << 8) | ((Int32)uc);
260
+
261
+ if (s->origPtr < 0)
262
+ RETURN(BZ_DATA_ERROR);
263
+ if (s->origPtr > 10 + 100000*s->blockSize100k)
264
+ RETURN(BZ_DATA_ERROR);
265
+
266
+ /*--- Receive the mapping table ---*/
267
+ for (i = 0; i < 16; i++) {
268
+ GET_BIT(BZ_X_MAPPING_1, uc);
269
+ if (uc == 1)
270
+ s->inUse16[i] = True; else
271
+ s->inUse16[i] = False;
272
+ }
273
+
274
+ for (i = 0; i < 256; i++) s->inUse[i] = False;
275
+
276
+ for (i = 0; i < 16; i++)
277
+ if (s->inUse16[i])
278
+ for (j = 0; j < 16; j++) {
279
+ GET_BIT(BZ_X_MAPPING_2, uc);
280
+ if (uc == 1) s->inUse[i * 16 + j] = True;
281
+ }
282
+ makeMaps_d ( s );
283
+ if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
284
+ alphaSize = s->nInUse+2;
285
+
286
+ /*--- Now the selectors ---*/
287
+ GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
288
+ if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR);
289
+ GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
290
+ if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
291
+ for (i = 0; i < nSelectors; i++) {
292
+ j = 0;
293
+ while (True) {
294
+ GET_BIT(BZ_X_SELECTOR_3, uc);
295
+ if (uc == 0) break;
296
+ j++;
297
+ if (j >= nGroups) RETURN(BZ_DATA_ERROR);
298
+ }
299
+ s->selectorMtf[i] = j;
300
+ }
301
+
302
+ /*--- Undo the MTF values for the selectors. ---*/
303
+ {
304
+ UChar pos[BZ_N_GROUPS], tmp, v;
305
+ for (v = 0; v < nGroups; v++) pos[v] = v;
306
+
307
+ for (i = 0; i < nSelectors; i++) {
308
+ v = s->selectorMtf[i];
309
+ tmp = pos[v];
310
+ while (v > 0) { pos[v] = pos[v-1]; v--; }
311
+ pos[0] = tmp;
312
+ s->selector[i] = tmp;
313
+ }
314
+ }
315
+
316
+ /*--- Now the coding tables ---*/
317
+ for (t = 0; t < nGroups; t++) {
318
+ GET_BITS(BZ_X_CODING_1, curr, 5);
319
+ for (i = 0; i < alphaSize; i++) {
320
+ while (True) {
321
+ if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
322
+ GET_BIT(BZ_X_CODING_2, uc);
323
+ if (uc == 0) break;
324
+ GET_BIT(BZ_X_CODING_3, uc);
325
+ if (uc == 0) curr++; else curr--;
326
+ }
327
+ s->len[t][i] = curr;
328
+ }
329
+ }
330
+
331
+ /*--- Create the Huffman decoding tables ---*/
332
+ for (t = 0; t < nGroups; t++) {
333
+ minLen = 32;
334
+ maxLen = 0;
335
+ for (i = 0; i < alphaSize; i++) {
336
+ if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
337
+ if (s->len[t][i] < minLen) minLen = s->len[t][i];
338
+ }
339
+ BZ2_hbCreateDecodeTables (
340
+ &(s->limit[t][0]),
341
+ &(s->base[t][0]),
342
+ &(s->perm[t][0]),
343
+ &(s->len[t][0]),
344
+ minLen, maxLen, alphaSize
345
+ );
346
+ s->minLens[t] = minLen;
347
+ }
348
+
349
+ /*--- Now the MTF values ---*/
350
+
351
+ EOB = s->nInUse+1;
352
+ nblockMAX = 100000 * s->blockSize100k;
353
+ groupNo = -1;
354
+ groupPos = 0;
355
+
356
+ for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
357
+
358
+ /*-- MTF init --*/
359
+ {
360
+ Int32 ii, jj, kk;
361
+ kk = MTFA_SIZE-1;
362
+ for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
363
+ for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
364
+ s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
365
+ kk--;
366
+ }
367
+ s->mtfbase[ii] = kk + 1;
368
+ }
369
+ }
370
+ /*-- end MTF init --*/
371
+
372
+ nblock = 0;
373
+ GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
374
+
375
+ while (True) {
376
+
377
+ if (nextSym == EOB) break;
378
+
379
+ if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
380
+
381
+ es = -1;
382
+ N = 1;
383
+ do {
384
+ if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
385
+ if (nextSym == BZ_RUNB) es = es + (1+1) * N;
386
+ N = N * 2;
387
+ GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
388
+ }
389
+ while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
390
+
391
+ es++;
392
+ uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
393
+ s->unzftab[uc] += es;
394
+
395
+ if (s->smallDecompress)
396
+ while (es > 0) {
397
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
398
+ s->ll16[nblock] = (UInt16)uc;
399
+ nblock++;
400
+ es--;
401
+ }
402
+ else
403
+ while (es > 0) {
404
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
405
+ s->tt[nblock] = (UInt32)uc;
406
+ nblock++;
407
+ es--;
408
+ };
409
+
410
+ continue;
411
+
412
+ } else {
413
+
414
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
415
+
416
+ /*-- uc = MTF ( nextSym-1 ) --*/
417
+ {
418
+ Int32 ii, jj, kk, pp, lno, off;
419
+ UInt32 nn;
420
+ nn = (UInt32)(nextSym - 1);
421
+
422
+ if (nn < MTFL_SIZE) {
423
+ /* avoid general-case expense */
424
+ pp = s->mtfbase[0];
425
+ uc = s->mtfa[pp+nn];
426
+ while (nn > 3) {
427
+ Int32 z = pp+nn;
428
+ s->mtfa[(z) ] = s->mtfa[(z)-1];
429
+ s->mtfa[(z)-1] = s->mtfa[(z)-2];
430
+ s->mtfa[(z)-2] = s->mtfa[(z)-3];
431
+ s->mtfa[(z)-3] = s->mtfa[(z)-4];
432
+ nn -= 4;
433
+ }
434
+ while (nn > 0) {
435
+ s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;
436
+ };
437
+ s->mtfa[pp] = uc;
438
+ } else {
439
+ /* general case */
440
+ lno = nn / MTFL_SIZE;
441
+ off = nn % MTFL_SIZE;
442
+ pp = s->mtfbase[lno] + off;
443
+ uc = s->mtfa[pp];
444
+ while (pp > s->mtfbase[lno]) {
445
+ s->mtfa[pp] = s->mtfa[pp-1]; pp--;
446
+ };
447
+ s->mtfbase[lno]++;
448
+ while (lno > 0) {
449
+ s->mtfbase[lno]--;
450
+ s->mtfa[s->mtfbase[lno]]
451
+ = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
452
+ lno--;
453
+ }
454
+ s->mtfbase[0]--;
455
+ s->mtfa[s->mtfbase[0]] = uc;
456
+ if (s->mtfbase[0] == 0) {
457
+ kk = MTFA_SIZE-1;
458
+ for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
459
+ for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
460
+ s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
461
+ kk--;
462
+ }
463
+ s->mtfbase[ii] = kk + 1;
464
+ }
465
+ }
466
+ }
467
+ }
468
+ /*-- end uc = MTF ( nextSym-1 ) --*/
469
+
470
+ s->unzftab[s->seqToUnseq[uc]]++;
471
+ if (s->smallDecompress)
472
+ s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
473
+ s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]);
474
+ nblock++;
475
+
476
+ GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
477
+ continue;
478
+ }
479
+ }
480
+
481
+ /* Now we know what nblock is, we can do a better sanity
482
+ check on s->origPtr.
483
+ */
484
+ if (s->origPtr < 0 || s->origPtr >= nblock)
485
+ RETURN(BZ_DATA_ERROR);
486
+
487
+ /*-- Set up cftab to facilitate generation of T^(-1) --*/
488
+ s->cftab[0] = 0;
489
+ for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
490
+ for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
491
+ for (i = 0; i <= 256; i++) {
492
+ if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
493
+ /* s->cftab[i] can legitimately be == nblock */
494
+ RETURN(BZ_DATA_ERROR);
495
+ }
496
+ }
497
+
498
+ s->state_out_len = 0;
499
+ s->state_out_ch = 0;
500
+ BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
501
+ s->state = BZ_X_OUTPUT;
502
+ if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
503
+
504
+ if (s->smallDecompress) {
505
+
506
+ /*-- Make a copy of cftab, used in generation of T --*/
507
+ for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
508
+
509
+ /*-- compute the T vector --*/
510
+ for (i = 0; i < nblock; i++) {
511
+ uc = (UChar)(s->ll16[i]);
512
+ SET_LL(i, s->cftabCopy[uc]);
513
+ s->cftabCopy[uc]++;
514
+ }
515
+
516
+ /*-- Compute T^(-1) by pointer reversal on T --*/
517
+ i = s->origPtr;
518
+ j = GET_LL(i);
519
+ do {
520
+ Int32 tmp = GET_LL(j);
521
+ SET_LL(j, i);
522
+ i = j;
523
+ j = tmp;
524
+ }
525
+ while (i != s->origPtr);
526
+
527
+ s->tPos = s->origPtr;
528
+ s->nblock_used = 0;
529
+ if (s->blockRandomised) {
530
+ BZ_RAND_INIT_MASK;
531
+ BZ_GET_SMALL(s->k0); s->nblock_used++;
532
+ BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
533
+ } else {
534
+ BZ_GET_SMALL(s->k0); s->nblock_used++;
535
+ }
536
+
537
+ } else {
538
+
539
+ /*-- compute the T^(-1) vector --*/
540
+ for (i = 0; i < nblock; i++) {
541
+ uc = (UChar)(s->tt[i] & 0xff);
542
+ s->tt[s->cftab[uc]] |= (i << 8);
543
+ s->cftab[uc]++;
544
+ }
545
+
546
+ s->tPos = s->tt[s->origPtr] >> 8;
547
+ s->nblock_used = 0;
548
+ if (s->blockRandomised) {
549
+ BZ_RAND_INIT_MASK;
550
+ BZ_GET_FAST(s->k0); s->nblock_used++;
551
+ BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
552
+ } else {
553
+ BZ_GET_FAST(s->k0); s->nblock_used++;
554
+ }
555
+
556
+ }
557
+
558
+ RETURN(BZ_OK);
559
+
560
+
561
+
562
+ endhdr_2:
563
+
564
+ GET_UCHAR(BZ_X_ENDHDR_2, uc);
565
+ if (uc != 0x72) RETURN(BZ_DATA_ERROR);
566
+ GET_UCHAR(BZ_X_ENDHDR_3, uc);
567
+ if (uc != 0x45) RETURN(BZ_DATA_ERROR);
568
+ GET_UCHAR(BZ_X_ENDHDR_4, uc);
569
+ if (uc != 0x38) RETURN(BZ_DATA_ERROR);
570
+ GET_UCHAR(BZ_X_ENDHDR_5, uc);
571
+ if (uc != 0x50) RETURN(BZ_DATA_ERROR);
572
+ GET_UCHAR(BZ_X_ENDHDR_6, uc);
573
+ if (uc != 0x90) RETURN(BZ_DATA_ERROR);
574
+
575
+ s->storedCombinedCRC = 0;
576
+ GET_UCHAR(BZ_X_CCRC_1, uc);
577
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
578
+ GET_UCHAR(BZ_X_CCRC_2, uc);
579
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
580
+ GET_UCHAR(BZ_X_CCRC_3, uc);
581
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
582
+ GET_UCHAR(BZ_X_CCRC_4, uc);
583
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
584
+
585
+ s->state = BZ_X_IDLE;
586
+ RETURN(BZ_STREAM_END);
587
+
588
+ default: AssertH ( False, 4001 );
589
+ }
590
+
591
+ AssertH ( False, 4002 );
592
+
593
+ save_state_and_return:
594
+
595
+ s->save_i = i;
596
+ s->save_j = j;
597
+ s->save_t = t;
598
+ s->save_alphaSize = alphaSize;
599
+ s->save_nGroups = nGroups;
600
+ s->save_nSelectors = nSelectors;
601
+ s->save_EOB = EOB;
602
+ s->save_groupNo = groupNo;
603
+ s->save_groupPos = groupPos;
604
+ s->save_nextSym = nextSym;
605
+ s->save_nblockMAX = nblockMAX;
606
+ s->save_nblock = nblock;
607
+ s->save_es = es;
608
+ s->save_N = N;
609
+ s->save_curr = curr;
610
+ s->save_zt = zt;
611
+ s->save_zn = zn;
612
+ s->save_zvec = zvec;
613
+ s->save_zj = zj;
614
+ s->save_gSel = gSel;
615
+ s->save_gMinlen = gMinlen;
616
+ s->save_gLimit = gLimit;
617
+ s->save_gBase = gBase;
618
+ s->save_gPerm = gPerm;
619
+
620
+ return retVal;
621
+ }
622
+
623
+
624
+ /*-------------------------------------------------------------*/
625
+ /*--- end decompress.c ---*/
626
+ /*-------------------------------------------------------------*/