ferret 0.11.6 → 0.11.8.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README +10 -22
- data/RELEASE_CHANGES +137 -0
- data/RELEASE_NOTES +60 -0
- data/Rakefile +379 -274
- data/TODO +100 -8
- data/bin/ferret-browser +0 -0
- data/ext/BZLIB_blocksort.c +1094 -0
- data/ext/BZLIB_bzlib.c +1578 -0
- data/ext/BZLIB_compress.c +672 -0
- data/ext/BZLIB_crctable.c +104 -0
- data/ext/BZLIB_decompress.c +626 -0
- data/ext/BZLIB_huffman.c +205 -0
- data/ext/BZLIB_randtable.c +84 -0
- data/ext/{api.c → STEMMER_api.c} +7 -10
- data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
- data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
- data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
- data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
- data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
- data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
- data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
- data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
- data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
- data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
- data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
- data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
- data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
- data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
- data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
- data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
- data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
- data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
- data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
- data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
- data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
- data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
- data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
- data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
- data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
- data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
- data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
- data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
- data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
- data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
- data/ext/analysis.c +276 -121
- data/ext/analysis.h +190 -143
- data/ext/api.h +3 -4
- data/ext/array.c +5 -3
- data/ext/array.h +52 -43
- data/ext/bitvector.c +38 -482
- data/ext/bitvector.h +446 -124
- data/ext/bzlib.h +282 -0
- data/ext/bzlib_private.h +503 -0
- data/ext/compound_io.c +23 -22
- data/ext/config.h +21 -11
- data/ext/document.c +43 -40
- data/ext/document.h +31 -21
- data/ext/except.c +20 -38
- data/ext/except.h +89 -76
- data/ext/extconf.rb +3 -2
- data/ext/ferret.c +49 -35
- data/ext/ferret.h +14 -11
- data/ext/field_index.c +262 -0
- data/ext/field_index.h +52 -0
- data/ext/filter.c +11 -10
- data/ext/fs_store.c +65 -47
- data/ext/global.c +245 -165
- data/ext/global.h +252 -54
- data/ext/hash.c +200 -243
- data/ext/hash.h +205 -163
- data/ext/hashset.c +118 -96
- data/ext/hashset.h +110 -82
- data/ext/header.h +19 -19
- data/ext/helper.c +11 -10
- data/ext/helper.h +14 -6
- data/ext/index.c +745 -366
- data/ext/index.h +503 -529
- data/ext/internal.h +1020 -0
- data/ext/lang.c +10 -0
- data/ext/lang.h +35 -15
- data/ext/mempool.c +5 -4
- data/ext/mempool.h +30 -22
- data/ext/modules.h +35 -7
- data/ext/multimapper.c +43 -2
- data/ext/multimapper.h +32 -23
- data/ext/posh.c +0 -0
- data/ext/posh.h +4 -38
- data/ext/priorityqueue.c +10 -12
- data/ext/priorityqueue.h +33 -21
- data/ext/q_boolean.c +22 -9
- data/ext/q_const_score.c +3 -2
- data/ext/q_filtered_query.c +15 -12
- data/ext/q_fuzzy.c +147 -135
- data/ext/q_match_all.c +3 -2
- data/ext/q_multi_term.c +28 -32
- data/ext/q_parser.c +451 -173
- data/ext/q_phrase.c +158 -79
- data/ext/q_prefix.c +16 -18
- data/ext/q_range.c +363 -31
- data/ext/q_span.c +130 -141
- data/ext/q_term.c +21 -21
- data/ext/q_wildcard.c +19 -23
- data/ext/r_analysis.c +369 -242
- data/ext/r_index.c +421 -434
- data/ext/r_qparser.c +142 -92
- data/ext/r_search.c +790 -407
- data/ext/r_store.c +44 -44
- data/ext/r_utils.c +264 -96
- data/ext/ram_store.c +29 -23
- data/ext/scanner.c +895 -0
- data/ext/scanner.h +36 -0
- data/ext/scanner_mb.c +6701 -0
- data/ext/scanner_utf8.c +4415 -0
- data/ext/search.c +210 -87
- data/ext/search.h +556 -488
- data/ext/similarity.c +17 -16
- data/ext/similarity.h +51 -44
- data/ext/sort.c +157 -354
- data/ext/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/stem_UTF_8_hungarian.h +16 -0
- data/ext/stem_UTF_8_romanian.h +16 -0
- data/ext/stem_UTF_8_turkish.h +16 -0
- data/ext/stopwords.c +287 -278
- data/ext/store.c +57 -51
- data/ext/store.h +308 -286
- data/ext/symbol.c +10 -0
- data/ext/symbol.h +23 -0
- data/ext/term_vectors.c +14 -293
- data/ext/threading.h +22 -22
- data/ext/win32.h +12 -4
- data/lib/ferret.rb +2 -1
- data/lib/ferret/browser.rb +1 -1
- data/lib/ferret/field_symbol.rb +94 -0
- data/lib/ferret/index.rb +221 -34
- data/lib/ferret/number_tools.rb +6 -6
- data/lib/ferret/version.rb +3 -0
- data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
- data/test/test_helper.rb +7 -2
- data/test/test_installed.rb +1 -0
- data/test/threading/thread_safety_index_test.rb +10 -1
- data/test/threading/thread_safety_read_write_test.rb +4 -7
- data/test/threading/thread_safety_test.rb +0 -0
- data/test/unit/analysis/tc_analyzer.rb +29 -27
- data/test/unit/analysis/tc_token_stream.rb +23 -16
- data/test/unit/index/tc_index.rb +116 -11
- data/test/unit/index/tc_index_reader.rb +27 -27
- data/test/unit/index/tc_index_writer.rb +10 -0
- data/test/unit/index/th_doc.rb +38 -21
- data/test/unit/search/tc_filter.rb +31 -10
- data/test/unit/search/tc_index_searcher.rb +6 -0
- data/test/unit/search/tm_searcher.rb +53 -1
- data/test/unit/store/tc_fs_store.rb +40 -2
- data/test/unit/store/tc_ram_store.rb +0 -0
- data/test/unit/store/tm_store.rb +0 -0
- data/test/unit/store/tm_store_lock.rb +7 -6
- data/test/unit/tc_field_symbol.rb +26 -0
- data/test/unit/ts_analysis.rb +0 -0
- data/test/unit/ts_index.rb +0 -0
- data/test/unit/ts_store.rb +0 -0
- data/test/unit/ts_utils.rb +0 -0
- data/test/unit/utils/tc_number_tools.rb +0 -0
- data/test/utils/content_generator.rb +226 -0
- metadata +262 -221
- data/ext/inc/lang.h +0 -48
- data/ext/inc/threading.h +0 -31
- data/ext/stem_ISO_8859_1_english.c +0 -1156
- data/ext/stem_ISO_8859_1_french.c +0 -1276
- data/ext/stem_ISO_8859_1_italian.c +0 -1091
- data/ext/stem_ISO_8859_1_norwegian.c +0 -296
- data/ext/stem_ISO_8859_1_spanish.c +0 -1119
- data/ext/stem_ISO_8859_1_swedish.c +0 -307
- data/ext/stem_UTF_8_danish.c +0 -344
- data/ext/stem_UTF_8_english.c +0 -1176
- data/ext/stem_UTF_8_french.c +0 -1296
- data/ext/stem_UTF_8_italian.c +0 -1113
- data/ext/stem_UTF_8_norwegian.c +0 -302
- data/ext/stem_UTF_8_portuguese.c +0 -1055
- data/ext/stem_UTF_8_russian.c +0 -709
- data/ext/stem_UTF_8_spanish.c +0 -1137
- data/ext/stem_UTF_8_swedish.c +0 -313
- data/lib/ferret_version.rb +0 -3
data/ext/bzlib.h
ADDED
@@ -0,0 +1,282 @@
|
|
1
|
+
|
2
|
+
/*-------------------------------------------------------------*/
|
3
|
+
/*--- Public header file for the library. ---*/
|
4
|
+
/*--- bzlib.h ---*/
|
5
|
+
/*-------------------------------------------------------------*/
|
6
|
+
|
7
|
+
/* ------------------------------------------------------------------
|
8
|
+
This file is part of bzip2/libbzip2, a program and library for
|
9
|
+
lossless, block-sorting data compression.
|
10
|
+
|
11
|
+
bzip2/libbzip2 version 1.0.4 of 20 December 2006
|
12
|
+
Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
|
13
|
+
|
14
|
+
Please read the WARNING, DISCLAIMER and PATENTS sections in the
|
15
|
+
README file.
|
16
|
+
|
17
|
+
This program is released under the terms of the license contained
|
18
|
+
in the file LICENSE.
|
19
|
+
------------------------------------------------------------------ */
|
20
|
+
|
21
|
+
|
22
|
+
#ifndef _BZLIB_H
|
23
|
+
#define _BZLIB_H
|
24
|
+
|
25
|
+
#ifdef __cplusplus
|
26
|
+
extern "C" {
|
27
|
+
#endif
|
28
|
+
|
29
|
+
#define BZ_RUN 0
|
30
|
+
#define BZ_FLUSH 1
|
31
|
+
#define BZ_FINISH 2
|
32
|
+
|
33
|
+
#define BZ_OK 0
|
34
|
+
#define BZ_RUN_OK 1
|
35
|
+
#define BZ_FLUSH_OK 2
|
36
|
+
#define BZ_FINISH_OK 3
|
37
|
+
#define BZ_STREAM_END 4
|
38
|
+
#define BZ_SEQUENCE_ERROR (-1)
|
39
|
+
#define BZ_PARAM_ERROR (-2)
|
40
|
+
#define BZ_MEM_ERROR (-3)
|
41
|
+
#define BZ_DATA_ERROR (-4)
|
42
|
+
#define BZ_DATA_ERROR_MAGIC (-5)
|
43
|
+
#define BZ_IO_ERROR (-6)
|
44
|
+
#define BZ_UNEXPECTED_EOF (-7)
|
45
|
+
#define BZ_OUTBUFF_FULL (-8)
|
46
|
+
#define BZ_CONFIG_ERROR (-9)
|
47
|
+
|
48
|
+
typedef
|
49
|
+
struct {
|
50
|
+
char *next_in;
|
51
|
+
unsigned int avail_in;
|
52
|
+
unsigned int total_in_lo32;
|
53
|
+
unsigned int total_in_hi32;
|
54
|
+
|
55
|
+
char *next_out;
|
56
|
+
unsigned int avail_out;
|
57
|
+
unsigned int total_out_lo32;
|
58
|
+
unsigned int total_out_hi32;
|
59
|
+
|
60
|
+
void *state;
|
61
|
+
|
62
|
+
void *(*bzalloc)(void *,int,int);
|
63
|
+
void (*bzfree)(void *,void *);
|
64
|
+
void *opaque;
|
65
|
+
}
|
66
|
+
bz_stream;
|
67
|
+
|
68
|
+
|
69
|
+
#ifndef BZ_IMPORT
|
70
|
+
#define BZ_EXPORT
|
71
|
+
#endif
|
72
|
+
|
73
|
+
#ifndef BZ_NO_STDIO
|
74
|
+
/* Need a definitition for FILE */
|
75
|
+
#include <stdio.h>
|
76
|
+
#endif
|
77
|
+
|
78
|
+
#ifdef _WIN32
|
79
|
+
# include <windows.h>
|
80
|
+
# ifdef small
|
81
|
+
/* windows.h define small to char */
|
82
|
+
# undef small
|
83
|
+
# endif
|
84
|
+
# ifdef BZ_EXPORT
|
85
|
+
# define BZ_API(func) WINAPI func
|
86
|
+
# define BZ_EXTERN extern
|
87
|
+
# else
|
88
|
+
/* import windows dll dynamically */
|
89
|
+
# define BZ_API(func) (WINAPI * func)
|
90
|
+
# define BZ_EXTERN
|
91
|
+
# endif
|
92
|
+
#else
|
93
|
+
# define BZ_API(func) func
|
94
|
+
# define BZ_EXTERN extern
|
95
|
+
#endif
|
96
|
+
|
97
|
+
|
98
|
+
/*-- Core (low-level) library functions --*/
|
99
|
+
|
100
|
+
BZ_EXTERN int BZ_API(BZ2_bzCompressInit) (
|
101
|
+
bz_stream* strm,
|
102
|
+
int blockSize100k,
|
103
|
+
int verbosity,
|
104
|
+
int workFactor
|
105
|
+
);
|
106
|
+
|
107
|
+
BZ_EXTERN int BZ_API(BZ2_bzCompress) (
|
108
|
+
bz_stream* strm,
|
109
|
+
int action
|
110
|
+
);
|
111
|
+
|
112
|
+
BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) (
|
113
|
+
bz_stream* strm
|
114
|
+
);
|
115
|
+
|
116
|
+
BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) (
|
117
|
+
bz_stream *strm,
|
118
|
+
int verbosity,
|
119
|
+
int small
|
120
|
+
);
|
121
|
+
|
122
|
+
BZ_EXTERN int BZ_API(BZ2_bzDecompress) (
|
123
|
+
bz_stream* strm
|
124
|
+
);
|
125
|
+
|
126
|
+
BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) (
|
127
|
+
bz_stream *strm
|
128
|
+
);
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
/*-- High(er) level library functions --*/
|
133
|
+
|
134
|
+
#ifndef BZ_NO_STDIO
|
135
|
+
#define BZ_MAX_UNUSED 5000
|
136
|
+
|
137
|
+
typedef void BZFILE;
|
138
|
+
|
139
|
+
BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) (
|
140
|
+
int* bzerror,
|
141
|
+
FILE* f,
|
142
|
+
int verbosity,
|
143
|
+
int small,
|
144
|
+
void* unused,
|
145
|
+
int nUnused
|
146
|
+
);
|
147
|
+
|
148
|
+
BZ_EXTERN void BZ_API(BZ2_bzReadClose) (
|
149
|
+
int* bzerror,
|
150
|
+
BZFILE* b
|
151
|
+
);
|
152
|
+
|
153
|
+
BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) (
|
154
|
+
int* bzerror,
|
155
|
+
BZFILE* b,
|
156
|
+
void** unused,
|
157
|
+
int* nUnused
|
158
|
+
);
|
159
|
+
|
160
|
+
BZ_EXTERN int BZ_API(BZ2_bzRead) (
|
161
|
+
int* bzerror,
|
162
|
+
BZFILE* b,
|
163
|
+
void* buf,
|
164
|
+
int len
|
165
|
+
);
|
166
|
+
|
167
|
+
BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
|
168
|
+
int* bzerror,
|
169
|
+
FILE* f,
|
170
|
+
int blockSize100k,
|
171
|
+
int verbosity,
|
172
|
+
int workFactor
|
173
|
+
);
|
174
|
+
|
175
|
+
BZ_EXTERN void BZ_API(BZ2_bzWrite) (
|
176
|
+
int* bzerror,
|
177
|
+
BZFILE* b,
|
178
|
+
void* buf,
|
179
|
+
int len
|
180
|
+
);
|
181
|
+
|
182
|
+
BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
|
183
|
+
int* bzerror,
|
184
|
+
BZFILE* b,
|
185
|
+
int abandon,
|
186
|
+
unsigned int* nbytes_in,
|
187
|
+
unsigned int* nbytes_out
|
188
|
+
);
|
189
|
+
|
190
|
+
BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
|
191
|
+
int* bzerror,
|
192
|
+
BZFILE* b,
|
193
|
+
int abandon,
|
194
|
+
unsigned int* nbytes_in_lo32,
|
195
|
+
unsigned int* nbytes_in_hi32,
|
196
|
+
unsigned int* nbytes_out_lo32,
|
197
|
+
unsigned int* nbytes_out_hi32
|
198
|
+
);
|
199
|
+
#endif
|
200
|
+
|
201
|
+
|
202
|
+
/*-- Utility functions --*/
|
203
|
+
|
204
|
+
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
|
205
|
+
char* dest,
|
206
|
+
unsigned int* destLen,
|
207
|
+
char* source,
|
208
|
+
unsigned int sourceLen,
|
209
|
+
int blockSize100k,
|
210
|
+
int verbosity,
|
211
|
+
int workFactor
|
212
|
+
);
|
213
|
+
|
214
|
+
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
|
215
|
+
char* dest,
|
216
|
+
unsigned int* destLen,
|
217
|
+
char* source,
|
218
|
+
unsigned int sourceLen,
|
219
|
+
int small,
|
220
|
+
int verbosity
|
221
|
+
);
|
222
|
+
|
223
|
+
|
224
|
+
/*--
|
225
|
+
Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp)
|
226
|
+
to support better zlib compatibility.
|
227
|
+
This code is not _officially_ part of libbzip2 (yet);
|
228
|
+
I haven't tested it, documented it, or considered the
|
229
|
+
threading-safeness of it.
|
230
|
+
If this code breaks, please contact both Yoshioka and me.
|
231
|
+
--*/
|
232
|
+
|
233
|
+
BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) (
|
234
|
+
void
|
235
|
+
);
|
236
|
+
|
237
|
+
#ifndef BZ_NO_STDIO
|
238
|
+
BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) (
|
239
|
+
const char *path,
|
240
|
+
const char *mode
|
241
|
+
);
|
242
|
+
|
243
|
+
BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) (
|
244
|
+
int fd,
|
245
|
+
const char *mode
|
246
|
+
);
|
247
|
+
|
248
|
+
BZ_EXTERN int BZ_API(BZ2_bzread) (
|
249
|
+
BZFILE* b,
|
250
|
+
void* buf,
|
251
|
+
int len
|
252
|
+
);
|
253
|
+
|
254
|
+
BZ_EXTERN int BZ_API(BZ2_bzwrite) (
|
255
|
+
BZFILE* b,
|
256
|
+
void* buf,
|
257
|
+
int len
|
258
|
+
);
|
259
|
+
|
260
|
+
BZ_EXTERN int BZ_API(BZ2_bzflush) (
|
261
|
+
BZFILE* b
|
262
|
+
);
|
263
|
+
|
264
|
+
BZ_EXTERN void BZ_API(BZ2_bzclose) (
|
265
|
+
BZFILE* b
|
266
|
+
);
|
267
|
+
|
268
|
+
BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
|
269
|
+
BZFILE *b,
|
270
|
+
int *errnum
|
271
|
+
);
|
272
|
+
#endif
|
273
|
+
|
274
|
+
#ifdef __cplusplus
|
275
|
+
}
|
276
|
+
#endif
|
277
|
+
|
278
|
+
#endif
|
279
|
+
|
280
|
+
/*-------------------------------------------------------------*/
|
281
|
+
/*--- end bzlib.h ---*/
|
282
|
+
/*-------------------------------------------------------------*/
|
data/ext/bzlib_private.h
ADDED
@@ -0,0 +1,503 @@
|
|
1
|
+
|
2
|
+
/*-------------------------------------------------------------*/
|
3
|
+
/*--- Private header file for the library. ---*/
|
4
|
+
/*--- bzlib_private.h ---*/
|
5
|
+
/*-------------------------------------------------------------*/
|
6
|
+
|
7
|
+
/* ------------------------------------------------------------------
|
8
|
+
This file is part of bzip2/libbzip2, a program and library for
|
9
|
+
lossless, block-sorting data compression.
|
10
|
+
|
11
|
+
bzip2/libbzip2 version 1.0.4 of 20 December 2006
|
12
|
+
Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
|
13
|
+
|
14
|
+
Please read the WARNING, DISCLAIMER and PATENTS sections in the
|
15
|
+
README file.
|
16
|
+
|
17
|
+
This program is released under the terms of the license contained
|
18
|
+
in the file LICENSE.
|
19
|
+
------------------------------------------------------------------ */
|
20
|
+
|
21
|
+
|
22
|
+
#ifndef _BZLIB_PRIVATE_H
|
23
|
+
#define _BZLIB_PRIVATE_H
|
24
|
+
|
25
|
+
#include <stdlib.h>
|
26
|
+
|
27
|
+
#ifndef BZ_NO_STDIO
|
28
|
+
#include <stdio.h>
|
29
|
+
#include <ctype.h>
|
30
|
+
#include <string.h>
|
31
|
+
#endif
|
32
|
+
|
33
|
+
#include "bzlib.h"
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
/*-- General stuff. --*/
|
38
|
+
|
39
|
+
#define BZ_VERSION "1.0.4, 20-Dec-2006"
|
40
|
+
|
41
|
+
typedef char Char;
|
42
|
+
typedef unsigned char Bool;
|
43
|
+
typedef unsigned char UChar;
|
44
|
+
typedef int Int32;
|
45
|
+
typedef unsigned int UInt32;
|
46
|
+
typedef short Int16;
|
47
|
+
typedef unsigned short UInt16;
|
48
|
+
|
49
|
+
#define True ((Bool)1)
|
50
|
+
#define False ((Bool)0)
|
51
|
+
|
52
|
+
#ifndef __GNUC__
|
53
|
+
#define __inline__ /* */
|
54
|
+
#endif
|
55
|
+
|
56
|
+
#ifndef BZ_NO_STDIO
|
57
|
+
|
58
|
+
extern void BZ2_bz__AssertH__fail ( int errcode );
|
59
|
+
#define AssertH(cond,errcode) \
|
60
|
+
{ if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); }
|
61
|
+
|
62
|
+
#if BZ_DEBUG
|
63
|
+
#define AssertD(cond,msg) \
|
64
|
+
{ if (!(cond)) { \
|
65
|
+
fprintf ( stderr, \
|
66
|
+
"\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\
|
67
|
+
exit(1); \
|
68
|
+
}}
|
69
|
+
#else
|
70
|
+
#define AssertD(cond,msg) /* */
|
71
|
+
#endif
|
72
|
+
|
73
|
+
#define VPrintf0(zf) \
|
74
|
+
fprintf(stderr,zf)
|
75
|
+
#define VPrintf1(zf,za1) \
|
76
|
+
fprintf(stderr,zf,za1)
|
77
|
+
#define VPrintf2(zf,za1,za2) \
|
78
|
+
fprintf(stderr,zf,za1,za2)
|
79
|
+
#define VPrintf3(zf,za1,za2,za3) \
|
80
|
+
fprintf(stderr,zf,za1,za2,za3)
|
81
|
+
#define VPrintf4(zf,za1,za2,za3,za4) \
|
82
|
+
fprintf(stderr,zf,za1,za2,za3,za4)
|
83
|
+
#define VPrintf5(zf,za1,za2,za3,za4,za5) \
|
84
|
+
fprintf(stderr,zf,za1,za2,za3,za4,za5)
|
85
|
+
|
86
|
+
#else
|
87
|
+
|
88
|
+
extern void bz_internal_error ( int errcode );
|
89
|
+
#define AssertH(cond,errcode) \
|
90
|
+
{ if (!(cond)) bz_internal_error ( errcode ); }
|
91
|
+
#define AssertD(cond,msg) do { } while (0)
|
92
|
+
#define VPrintf0(zf) do { } while (0)
|
93
|
+
#define VPrintf1(zf,za1) do { } while (0)
|
94
|
+
#define VPrintf2(zf,za1,za2) do { } while (0)
|
95
|
+
#define VPrintf3(zf,za1,za2,za3) do { } while (0)
|
96
|
+
#define VPrintf4(zf,za1,za2,za3,za4) do { } while (0)
|
97
|
+
#define VPrintf5(zf,za1,za2,za3,za4,za5) do { } while (0)
|
98
|
+
|
99
|
+
#endif
|
100
|
+
|
101
|
+
|
102
|
+
#define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1)
|
103
|
+
#define BZFREE(ppp) (strm->bzfree)(strm->opaque,(ppp))
|
104
|
+
|
105
|
+
|
106
|
+
/*-- Header bytes. --*/
|
107
|
+
|
108
|
+
#define BZ_HDR_B 0x42 /* 'B' */
|
109
|
+
#define BZ_HDR_Z 0x5a /* 'Z' */
|
110
|
+
#define BZ_HDR_h 0x68 /* 'h' */
|
111
|
+
#define BZ_HDR_0 0x30 /* '0' */
|
112
|
+
|
113
|
+
/*-- Constants for the back end. --*/
|
114
|
+
|
115
|
+
#define BZ_MAX_ALPHA_SIZE 258
|
116
|
+
#define BZ_MAX_CODE_LEN 23
|
117
|
+
|
118
|
+
#define BZ_RUNA 0
|
119
|
+
#define BZ_RUNB 1
|
120
|
+
|
121
|
+
#define BZ_N_GROUPS 6
|
122
|
+
#define BZ_G_SIZE 50
|
123
|
+
#define BZ_N_ITERS 4
|
124
|
+
|
125
|
+
#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE))
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
/*-- Stuff for randomising repetitive blocks. --*/
|
130
|
+
|
131
|
+
extern Int32 BZ2_rNums[512];
|
132
|
+
|
133
|
+
#define BZ_RAND_DECLS \
|
134
|
+
Int32 rNToGo; \
|
135
|
+
Int32 rTPos \
|
136
|
+
|
137
|
+
#define BZ_RAND_INIT_MASK \
|
138
|
+
s->rNToGo = 0; \
|
139
|
+
s->rTPos = 0 \
|
140
|
+
|
141
|
+
#define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0)
|
142
|
+
|
143
|
+
#define BZ_RAND_UPD_MASK \
|
144
|
+
if (s->rNToGo == 0) { \
|
145
|
+
s->rNToGo = BZ2_rNums[s->rTPos]; \
|
146
|
+
s->rTPos++; \
|
147
|
+
if (s->rTPos == 512) s->rTPos = 0; \
|
148
|
+
} \
|
149
|
+
s->rNToGo--;
|
150
|
+
|
151
|
+
|
152
|
+
|
153
|
+
/*-- Stuff for doing CRCs. --*/
|
154
|
+
|
155
|
+
extern UInt32 BZ2_crc32Table[256];
|
156
|
+
|
157
|
+
#define BZ_INITIALISE_CRC(crcVar) \
|
158
|
+
{ \
|
159
|
+
crcVar = 0xffffffffL; \
|
160
|
+
}
|
161
|
+
|
162
|
+
#define BZ_FINALISE_CRC(crcVar) \
|
163
|
+
{ \
|
164
|
+
crcVar = ~(crcVar); \
|
165
|
+
}
|
166
|
+
|
167
|
+
#define BZ_UPDATE_CRC(crcVar,cha) \
|
168
|
+
{ \
|
169
|
+
crcVar = (crcVar << 8) ^ \
|
170
|
+
BZ2_crc32Table[(crcVar >> 24) ^ \
|
171
|
+
((UChar)cha)]; \
|
172
|
+
}
|
173
|
+
|
174
|
+
|
175
|
+
|
176
|
+
/*-- States and modes for compression. --*/
|
177
|
+
|
178
|
+
#define BZ_M_IDLE 1
|
179
|
+
#define BZ_M_RUNNING 2
|
180
|
+
#define BZ_M_FLUSHING 3
|
181
|
+
#define BZ_M_FINISHING 4
|
182
|
+
|
183
|
+
#define BZ_S_OUTPUT 1
|
184
|
+
#define BZ_S_INPUT 2
|
185
|
+
|
186
|
+
#define BZ_N_RADIX 2
|
187
|
+
#define BZ_N_QSORT 12
|
188
|
+
#define BZ_N_SHELL 18
|
189
|
+
#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2)
|
190
|
+
|
191
|
+
|
192
|
+
|
193
|
+
|
194
|
+
/*-- Structure holding all the compression-side stuff. --*/
|
195
|
+
|
196
|
+
typedef
|
197
|
+
struct {
|
198
|
+
/* pointer back to the struct bz_stream */
|
199
|
+
bz_stream* strm;
|
200
|
+
|
201
|
+
/* mode this stream is in, and whether inputting */
|
202
|
+
/* or outputting data */
|
203
|
+
Int32 mode;
|
204
|
+
Int32 state;
|
205
|
+
|
206
|
+
/* remembers avail_in when flush/finish requested */
|
207
|
+
UInt32 avail_in_expect;
|
208
|
+
|
209
|
+
/* for doing the block sorting */
|
210
|
+
UInt32* arr1;
|
211
|
+
UInt32* arr2;
|
212
|
+
UInt32* ftab;
|
213
|
+
Int32 origPtr;
|
214
|
+
|
215
|
+
/* aliases for arr1 and arr2 */
|
216
|
+
UInt32* ptr;
|
217
|
+
UChar* block;
|
218
|
+
UInt16* mtfv;
|
219
|
+
UChar* zbits;
|
220
|
+
|
221
|
+
/* for deciding when to use the fallback sorting algorithm */
|
222
|
+
Int32 workFactor;
|
223
|
+
|
224
|
+
/* run-length-encoding of the input */
|
225
|
+
UInt32 state_in_ch;
|
226
|
+
Int32 state_in_len;
|
227
|
+
BZ_RAND_DECLS;
|
228
|
+
|
229
|
+
/* input and output limits and current posns */
|
230
|
+
Int32 nblock;
|
231
|
+
Int32 nblockMAX;
|
232
|
+
Int32 numZ;
|
233
|
+
Int32 state_out_pos;
|
234
|
+
|
235
|
+
/* map of bytes used in block */
|
236
|
+
Int32 nInUse;
|
237
|
+
Bool inUse[256];
|
238
|
+
UChar unseqToSeq[256];
|
239
|
+
|
240
|
+
/* the buffer for bit stream creation */
|
241
|
+
UInt32 bsBuff;
|
242
|
+
Int32 bsLive;
|
243
|
+
|
244
|
+
/* block and combined CRCs */
|
245
|
+
UInt32 blockCRC;
|
246
|
+
UInt32 combinedCRC;
|
247
|
+
|
248
|
+
/* misc administratium */
|
249
|
+
Int32 verbosity;
|
250
|
+
Int32 blockNo;
|
251
|
+
Int32 blockSize100k;
|
252
|
+
|
253
|
+
/* stuff for coding the MTF values */
|
254
|
+
Int32 nMTF;
|
255
|
+
Int32 mtfFreq [BZ_MAX_ALPHA_SIZE];
|
256
|
+
UChar selector [BZ_MAX_SELECTORS];
|
257
|
+
UChar selectorMtf[BZ_MAX_SELECTORS];
|
258
|
+
|
259
|
+
UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
260
|
+
Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
261
|
+
Int32 rfreq [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
262
|
+
/* second dimension: only 3 needed; 4 makes index calculations faster */
|
263
|
+
UInt32 len_pack[BZ_MAX_ALPHA_SIZE][4];
|
264
|
+
|
265
|
+
}
|
266
|
+
EState;
|
267
|
+
|
268
|
+
|
269
|
+
|
270
|
+
/*-- externs for compression. --*/
|
271
|
+
|
272
|
+
extern void
|
273
|
+
BZ2_blockSort ( EState* );
|
274
|
+
|
275
|
+
extern void
|
276
|
+
BZ2_compressBlock ( EState*, Bool );
|
277
|
+
|
278
|
+
extern void
|
279
|
+
BZ2_bsInitWrite ( EState* );
|
280
|
+
|
281
|
+
extern void
|
282
|
+
BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 );
|
283
|
+
|
284
|
+
extern void
|
285
|
+
BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 );
|
286
|
+
|
287
|
+
|
288
|
+
|
289
|
+
/*-- states for decompression. --*/
|
290
|
+
|
291
|
+
#define BZ_X_IDLE 1
|
292
|
+
#define BZ_X_OUTPUT 2
|
293
|
+
|
294
|
+
#define BZ_X_MAGIC_1 10
|
295
|
+
#define BZ_X_MAGIC_2 11
|
296
|
+
#define BZ_X_MAGIC_3 12
|
297
|
+
#define BZ_X_MAGIC_4 13
|
298
|
+
#define BZ_X_BLKHDR_1 14
|
299
|
+
#define BZ_X_BLKHDR_2 15
|
300
|
+
#define BZ_X_BLKHDR_3 16
|
301
|
+
#define BZ_X_BLKHDR_4 17
|
302
|
+
#define BZ_X_BLKHDR_5 18
|
303
|
+
#define BZ_X_BLKHDR_6 19
|
304
|
+
#define BZ_X_BCRC_1 20
|
305
|
+
#define BZ_X_BCRC_2 21
|
306
|
+
#define BZ_X_BCRC_3 22
|
307
|
+
#define BZ_X_BCRC_4 23
|
308
|
+
#define BZ_X_RANDBIT 24
|
309
|
+
#define BZ_X_ORIGPTR_1 25
|
310
|
+
#define BZ_X_ORIGPTR_2 26
|
311
|
+
#define BZ_X_ORIGPTR_3 27
|
312
|
+
#define BZ_X_MAPPING_1 28
|
313
|
+
#define BZ_X_MAPPING_2 29
|
314
|
+
#define BZ_X_SELECTOR_1 30
|
315
|
+
#define BZ_X_SELECTOR_2 31
|
316
|
+
#define BZ_X_SELECTOR_3 32
|
317
|
+
#define BZ_X_CODING_1 33
|
318
|
+
#define BZ_X_CODING_2 34
|
319
|
+
#define BZ_X_CODING_3 35
|
320
|
+
#define BZ_X_MTF_1 36
|
321
|
+
#define BZ_X_MTF_2 37
|
322
|
+
#define BZ_X_MTF_3 38
|
323
|
+
#define BZ_X_MTF_4 39
|
324
|
+
#define BZ_X_MTF_5 40
|
325
|
+
#define BZ_X_MTF_6 41
|
326
|
+
#define BZ_X_ENDHDR_2 42
|
327
|
+
#define BZ_X_ENDHDR_3 43
|
328
|
+
#define BZ_X_ENDHDR_4 44
|
329
|
+
#define BZ_X_ENDHDR_5 45
|
330
|
+
#define BZ_X_ENDHDR_6 46
|
331
|
+
#define BZ_X_CCRC_1 47
|
332
|
+
#define BZ_X_CCRC_2 48
|
333
|
+
#define BZ_X_CCRC_3 49
|
334
|
+
#define BZ_X_CCRC_4 50
|
335
|
+
|
336
|
+
|
337
|
+
|
338
|
+
/*-- Constants for the fast MTF decoder. --*/
|
339
|
+
|
340
|
+
#define MTFA_SIZE 4096
|
341
|
+
#define MTFL_SIZE 16
|
342
|
+
|
343
|
+
|
344
|
+
|
345
|
+
/*-- Structure holding all the decompression-side stuff. --*/
|
346
|
+
|
347
|
+
typedef
|
348
|
+
struct {
|
349
|
+
/* pointer back to the struct bz_stream */
|
350
|
+
bz_stream* strm;
|
351
|
+
|
352
|
+
/* state indicator for this stream */
|
353
|
+
Int32 state;
|
354
|
+
|
355
|
+
/* for doing the final run-length decoding */
|
356
|
+
UChar state_out_ch;
|
357
|
+
Int32 state_out_len;
|
358
|
+
Bool blockRandomised;
|
359
|
+
BZ_RAND_DECLS;
|
360
|
+
|
361
|
+
/* the buffer for bit stream reading */
|
362
|
+
UInt32 bsBuff;
|
363
|
+
Int32 bsLive;
|
364
|
+
|
365
|
+
/* misc administratium */
|
366
|
+
Int32 blockSize100k;
|
367
|
+
Bool smallDecompress;
|
368
|
+
Int32 currBlockNo;
|
369
|
+
Int32 verbosity;
|
370
|
+
|
371
|
+
/* for undoing the Burrows-Wheeler transform */
|
372
|
+
Int32 origPtr;
|
373
|
+
UInt32 tPos;
|
374
|
+
Int32 k0;
|
375
|
+
Int32 unzftab[256];
|
376
|
+
Int32 nblock_used;
|
377
|
+
Int32 cftab[257];
|
378
|
+
Int32 cftabCopy[257];
|
379
|
+
|
380
|
+
/* for undoing the Burrows-Wheeler transform (FAST) */
|
381
|
+
UInt32 *tt;
|
382
|
+
|
383
|
+
/* for undoing the Burrows-Wheeler transform (SMALL) */
|
384
|
+
UInt16 *ll16;
|
385
|
+
UChar *ll4;
|
386
|
+
|
387
|
+
/* stored and calculated CRCs */
|
388
|
+
UInt32 storedBlockCRC;
|
389
|
+
UInt32 storedCombinedCRC;
|
390
|
+
UInt32 calculatedBlockCRC;
|
391
|
+
UInt32 calculatedCombinedCRC;
|
392
|
+
|
393
|
+
/* map of bytes used in block */
|
394
|
+
Int32 nInUse;
|
395
|
+
Bool inUse[256];
|
396
|
+
Bool inUse16[16];
|
397
|
+
UChar seqToUnseq[256];
|
398
|
+
|
399
|
+
/* for decoding the MTF values */
|
400
|
+
UChar mtfa [MTFA_SIZE];
|
401
|
+
Int32 mtfbase[256 / MTFL_SIZE];
|
402
|
+
UChar selector [BZ_MAX_SELECTORS];
|
403
|
+
UChar selectorMtf[BZ_MAX_SELECTORS];
|
404
|
+
UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
405
|
+
|
406
|
+
Int32 limit [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
407
|
+
Int32 base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
408
|
+
Int32 perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
409
|
+
Int32 minLens[BZ_N_GROUPS];
|
410
|
+
|
411
|
+
/* save area for scalars in the main decompress code */
|
412
|
+
Int32 save_i;
|
413
|
+
Int32 save_j;
|
414
|
+
Int32 save_t;
|
415
|
+
Int32 save_alphaSize;
|
416
|
+
Int32 save_nGroups;
|
417
|
+
Int32 save_nSelectors;
|
418
|
+
Int32 save_EOB;
|
419
|
+
Int32 save_groupNo;
|
420
|
+
Int32 save_groupPos;
|
421
|
+
Int32 save_nextSym;
|
422
|
+
Int32 save_nblockMAX;
|
423
|
+
Int32 save_nblock;
|
424
|
+
Int32 save_es;
|
425
|
+
Int32 save_N;
|
426
|
+
Int32 save_curr;
|
427
|
+
Int32 save_zt;
|
428
|
+
Int32 save_zn;
|
429
|
+
Int32 save_zvec;
|
430
|
+
Int32 save_zj;
|
431
|
+
Int32 save_gSel;
|
432
|
+
Int32 save_gMinlen;
|
433
|
+
Int32* save_gLimit;
|
434
|
+
Int32* save_gBase;
|
435
|
+
Int32* save_gPerm;
|
436
|
+
|
437
|
+
}
|
438
|
+
DState;
|
439
|
+
|
440
|
+
|
441
|
+
|
442
|
+
/*-- Macros for decompression. --*/
|
443
|
+
|
444
|
+
#define BZ_GET_FAST(cccc) \
|
445
|
+
s->tPos = s->tt[s->tPos]; \
|
446
|
+
cccc = (UChar)(s->tPos & 0xff); \
|
447
|
+
s->tPos >>= 8;
|
448
|
+
|
449
|
+
#define BZ_GET_FAST_C(cccc) \
|
450
|
+
c_tPos = c_tt[c_tPos]; \
|
451
|
+
cccc = (UChar)(c_tPos & 0xff); \
|
452
|
+
c_tPos >>= 8;
|
453
|
+
|
454
|
+
#define SET_LL4(i,n) \
|
455
|
+
{ if (((i) & 0x1) == 0) \
|
456
|
+
s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else \
|
457
|
+
s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4); \
|
458
|
+
}
|
459
|
+
|
460
|
+
#define GET_LL4(i) \
|
461
|
+
((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF)
|
462
|
+
|
463
|
+
#define SET_LL(i,n) \
|
464
|
+
{ s->ll16[i] = (UInt16)(n & 0x0000ffff); \
|
465
|
+
SET_LL4(i, n >> 16); \
|
466
|
+
}
|
467
|
+
|
468
|
+
#define GET_LL(i) \
|
469
|
+
(((UInt32)s->ll16[i]) | (GET_LL4(i) << 16))
|
470
|
+
|
471
|
+
#define BZ_GET_SMALL(cccc) \
|
472
|
+
cccc = BZ2_indexIntoF ( s->tPos, s->cftab ); \
|
473
|
+
s->tPos = GET_LL(s->tPos);
|
474
|
+
|
475
|
+
|
476
|
+
/*-- externs for decompression. --*/
|
477
|
+
|
478
|
+
extern Int32
|
479
|
+
BZ2_indexIntoF ( Int32, Int32* );
|
480
|
+
|
481
|
+
extern Int32
|
482
|
+
BZ2_decompress ( DState* );
|
483
|
+
|
484
|
+
extern void
|
485
|
+
BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*,
|
486
|
+
Int32, Int32, Int32 );
|
487
|
+
|
488
|
+
|
489
|
+
#endif
|
490
|
+
|
491
|
+
|
492
|
+
/*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/
|
493
|
+
|
494
|
+
#ifdef BZ_NO_STDIO
|
495
|
+
#ifndef NULL
|
496
|
+
#define NULL 0
|
497
|
+
#endif
|
498
|
+
#endif
|
499
|
+
|
500
|
+
|
501
|
+
/*-------------------------------------------------------------*/
|
502
|
+
/*--- end bzlib_private.h ---*/
|
503
|
+
/*-------------------------------------------------------------*/
|