isomorfeus-ferret 0.17.1 → 0.17.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (130) hide show
  1. checksums.yaml +4 -4
  2. data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
  3. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
  5. data/ext/isomorfeus_ferret_ext/bm_store.c +2 -0
  6. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
  7. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
  8. data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
  9. data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
  10. data/ext/isomorfeus_ferret_ext/frb_index.c +118 -160
  11. data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
  12. data/ext/isomorfeus_ferret_ext/frb_search.c +31 -23
  13. data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
  14. data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
  15. data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
  16. data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
  17. data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
  18. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
  19. data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
  20. data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
  21. data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
  22. data/ext/isomorfeus_ferret_ext/frt_document.h +2 -27
  23. data/ext/isomorfeus_ferret_ext/frt_except.c +50 -6
  24. data/ext/isomorfeus_ferret_ext/frt_except.h +3 -2
  25. data/ext/isomorfeus_ferret_ext/frt_field_index.c +13 -32
  26. data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
  27. data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
  28. data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
  29. data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
  30. data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
  31. data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
  32. data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
  33. data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
  34. data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
  35. data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
  36. data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
  37. data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
  38. data/ext/isomorfeus_ferret_ext/frt_in_stream.c +482 -0
  39. data/ext/isomorfeus_ferret_ext/frt_in_stream.h +241 -0
  40. data/ext/isomorfeus_ferret_ext/frt_ind.c +20 -49
  41. data/ext/isomorfeus_ferret_ext/frt_ind.h +0 -1
  42. data/ext/isomorfeus_ferret_ext/frt_index.c +296 -1857
  43. data/ext/isomorfeus_ferret_ext/frt_index.h +2 -145
  44. data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
  45. data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +29 -0
  46. data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +19 -0
  47. data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +93 -0
  48. data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +33 -0
  49. data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
  50. data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
  51. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
  52. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
  53. data/ext/isomorfeus_ferret_ext/frt_out_stream.c +334 -0
  54. data/ext/isomorfeus_ferret_ext/frt_out_stream.h +198 -0
  55. data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
  56. data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
  57. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
  58. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
  59. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +85 -171
  60. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -16
  61. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +1 -2
  62. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
  63. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +52 -104
  64. data/ext/isomorfeus_ferret_ext/frt_q_range.c +6 -12
  65. data/ext/isomorfeus_ferret_ext/frt_q_span.c +113 -226
  66. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
  67. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
  68. data/ext/isomorfeus_ferret_ext/frt_ram_store.h +12 -0
  69. data/ext/isomorfeus_ferret_ext/frt_search.c +82 -164
  70. data/ext/isomorfeus_ferret_ext/frt_similarity.c +11 -22
  71. data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
  72. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -536
  73. data/ext/isomorfeus_ferret_ext/frt_store.h +90 -495
  74. data/ext/isomorfeus_ferret_ext/frt_stream.h +18 -0
  75. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
  76. data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
  77. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
  78. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
  79. data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
  80. data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
  81. data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
  82. data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
  83. data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
  84. data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
  85. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
  86. data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
  87. data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
  88. data/ext/isomorfeus_ferret_ext/test.c +40 -87
  89. data/ext/isomorfeus_ferret_ext/test.h +3 -6
  90. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
  91. data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
  92. data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
  93. data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
  94. data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
  95. data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
  96. data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
  97. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +15 -24
  98. data/ext/isomorfeus_ferret_ext/test_filter.c +17 -27
  99. data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
  100. data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
  101. data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
  102. data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
  103. data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
  104. data/ext/isomorfeus_ferret_ext/test_index.c +277 -495
  105. data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
  106. data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
  107. data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
  108. data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
  109. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
  110. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
  111. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
  112. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
  113. data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
  114. data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
  115. data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
  116. data/ext/isomorfeus_ferret_ext/test_search.c +60 -109
  117. data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
  118. data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
  119. data/ext/isomorfeus_ferret_ext/test_sort.c +14 -24
  120. data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
  121. data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
  122. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
  123. data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
  124. data/ext/isomorfeus_ferret_ext/test_threading.c +15 -30
  125. data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
  126. data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
  127. data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
  128. data/lib/isomorfeus/ferret/index/index.rb +1 -12
  129. data/lib/isomorfeus/ferret/version.rb +1 -1
  130. metadata +43 -4
@@ -0,0 +1,482 @@
1
+ /* prevent warning: #warning Please include winsock2.h before windows.h [-Wcpp] */
2
+ #ifdef _WIN32
3
+ # include <winsock2.h>
4
+ #endif
5
+
6
+ #include "brotli_decode.h"
7
+ #include "brotli_encode.h"
8
+ #include "bzlib.h"
9
+ #include "lz4frame.h"
10
+ #include "frt_except.h"
11
+ #include "frt_in_stream.h"
12
+
13
+ /**
14
+ * Create a newly allocated and initialized InStream
15
+ *
16
+ * @return a newly allocated and initialized InStream
17
+ */
18
+ FrtInStream *frt_is_new(void) {
19
+ FrtInStream *is = FRT_ALLOC(FrtInStream);
20
+ is->folder_name = NULL;
21
+ is->f = FRT_ALLOC_AND_ZERO(FrtInStreamFile);
22
+ is->f->ref_cnt = 1;
23
+ is->buf.start = 0;
24
+ is->buf.pos = 0;
25
+ is->buf.len = 0;
26
+ is->ref_cnt = 1;
27
+ return is;
28
+ }
29
+
30
+ /**
31
+ * Refill the InStream's buffer from the store source (filesystem or memory).
32
+ *
33
+ * @param is the InStream to refill
34
+ * @raise FRT_IO_ERROR if there is a error reading from the filesystem
35
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
36
+ */
37
+ static void frt_is_refill(FrtInStream *is) {
38
+ frt_off_t start = is->buf.start + is->buf.pos;
39
+ frt_off_t last = start + FRT_BUFFER_SIZE;
40
+ frt_off_t flen = is->m->length_i(is);
41
+
42
+ if (last > flen) { /* don't read past EOF */
43
+ last = flen;
44
+ }
45
+
46
+ is->buf.len = last - start;
47
+ if (is->buf.len <= 0) {
48
+ FRT_RAISE(FRT_EOF_ERROR, "current pos = %"FRT_OFF_T_PFX"d, "
49
+ "file length = %"FRT_OFF_T_PFX"d", start, flen);
50
+ }
51
+
52
+ is->m->read_i(is, is->buf.buf, is->buf.len);
53
+
54
+ is->buf.start = start;
55
+ is->buf.pos = 0;
56
+ }
57
+
58
+ /**
59
+ * Unsafe alternative to frt_is_read_byte. Only use this method when you know
60
+ * there is no chance that you will read past the end of the InStream's
61
+ * buffer.
62
+ */
63
+ #define read_byte(is) is->buf.buf[is->buf.pos++]
64
+
65
+ /**
66
+ * Read a singly byte (unsigned char) from the InStream +is+.
67
+ *
68
+ * @param is the Instream to read from
69
+ * @return a single unsigned char read from the InStream +is+
70
+ * @raise FRT_IO_ERROR if there is a error reading from the filesystem
71
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
72
+ */
73
+ frt_uchar frt_is_read_byte(FrtInStream *is) {
74
+ if (is->buf.pos >= is->buf.len) {
75
+ frt_is_refill(is);
76
+ }
77
+ return read_byte(is);
78
+ }
79
+
80
+ off_t frt_is_pos(FrtInStream *is) {
81
+ return is->buf.start + is->buf.pos;
82
+ }
83
+
84
+ frt_uchar *frt_is_read_bytes(FrtInStream *is, frt_uchar *buf, int len) {
85
+ int i;
86
+ frt_off_t start;
87
+
88
+ if ((is->buf.pos + len) < is->buf.len) {
89
+ for (i = 0; i < len; i++) {
90
+ buf[i] = read_byte(is);
91
+ }
92
+ } else { /* read all-at-once */
93
+ start = frt_is_pos(is);
94
+ is->m->seek_i(is, start);
95
+ is->m->read_i(is, buf, len);
96
+
97
+ is->buf.start = start + len; /* adjust stream variables */
98
+ is->buf.pos = 0;
99
+ is->buf.len = 0; /* trigger refill on read */
100
+ }
101
+ return buf;
102
+ }
103
+
104
+ void frt_is_seek(FrtInStream *is, frt_off_t pos) {
105
+ if (pos >= is->buf.start && pos < (is->buf.start + is->buf.len)) {
106
+ is->buf.pos = pos - is->buf.start; /* seek within buffer */
107
+ } else {
108
+ is->buf.start = pos;
109
+ is->buf.pos = 0;
110
+ is->buf.len = 0; /* trigger refill() on read() */
111
+ is->m->seek_i(is, pos);
112
+ }
113
+ }
114
+
115
+ void frt_is_close(FrtInStream *is) {
116
+ if (is->ref_cnt == 0) {
117
+ FRT_RAISE(FRT_STATE_ERROR, "is ref_cnt to low\n");
118
+ }
119
+
120
+ if (FRT_DEREF(is) == 0) {
121
+ if (FRT_DEREF(is->f) == 0) {
122
+ is->m->close_i(is);
123
+ free(is->f);
124
+ }
125
+ if (is->folder_name) {
126
+ free((void *)is->folder_name);
127
+ }
128
+ free(is);
129
+ }
130
+ }
131
+
132
+ FrtInStream *frt_is_clone(FrtInStream *is) {
133
+ if (!(is->f))
134
+ return NULL;
135
+ FrtInStream *new_is = FRT_ALLOC(FrtInStream);
136
+ memcpy(new_is, is, sizeof(FrtInStream));
137
+ if (is->folder_name) {
138
+ new_is->folder_name = frt_estrdup(is->folder_name);
139
+ }
140
+ new_is->ref_cnt = 1;
141
+ FRT_REF(new_is->f);
142
+ return new_is;
143
+ }
144
+
145
+ frt_i32 frt_is_read_i32(FrtInStream *is) {
146
+ return ((frt_i32)frt_is_read_byte(is) << 24) |
147
+ ((frt_i32)frt_is_read_byte(is) << 16) |
148
+ ((frt_i32)frt_is_read_byte(is) << 8) |
149
+ ((frt_i32)frt_is_read_byte(is));
150
+ }
151
+
152
+
153
+ frt_i64 frt_is_read_i64(FrtInStream *is) {
154
+ return ((frt_i64)frt_is_read_byte(is) << 56) |
155
+ ((frt_i64)frt_is_read_byte(is) << 48) |
156
+ ((frt_i64)frt_is_read_byte(is) << 40) |
157
+ ((frt_i64)frt_is_read_byte(is) << 32) |
158
+ ((frt_i64)frt_is_read_byte(is) << 24) |
159
+ ((frt_i64)frt_is_read_byte(is) << 16) |
160
+ ((frt_i64)frt_is_read_byte(is) << 8) |
161
+ ((frt_i64)frt_is_read_byte(is));
162
+ }
163
+
164
+
165
+ frt_u32 frt_is_read_u32(FrtInStream *is) {
166
+ return ((frt_u32)frt_is_read_byte(is) << 24) |
167
+ ((frt_u32)frt_is_read_byte(is) << 16) |
168
+ ((frt_u32)frt_is_read_byte(is) << 8) |
169
+ ((frt_u32)frt_is_read_byte(is));
170
+ }
171
+
172
+ frt_u64 frt_is_read_u64(FrtInStream *is) {
173
+ return ((frt_u64)frt_is_read_byte(is) << 56) |
174
+ ((frt_u64)frt_is_read_byte(is) << 48) |
175
+ ((frt_u64)frt_is_read_byte(is) << 40) |
176
+ ((frt_u64)frt_is_read_byte(is) << 32) |
177
+ ((frt_u64)frt_is_read_byte(is) << 24) |
178
+ ((frt_u64)frt_is_read_byte(is) << 16) |
179
+ ((frt_u64)frt_is_read_byte(is) << 8) |
180
+ ((frt_u64)frt_is_read_byte(is));
181
+ }
182
+
183
+
184
+ /* optimized to use unchecked read_byte if there is definitely space */
185
+ unsigned int frt_is_read_vint(FrtInStream *is) {
186
+ register unsigned int res, b;
187
+ register int shift = 7;
188
+
189
+ if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
190
+ b = frt_is_read_byte(is);
191
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
192
+
193
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
194
+ b = frt_is_read_byte(is);
195
+ res |= (b & 0x7F) << shift;
196
+ shift += 7;
197
+ }
198
+ } else { /* unchecked optimization */
199
+ b = read_byte(is);
200
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
201
+
202
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
203
+ b = read_byte(is);
204
+ res |= (b & 0x7F) << shift;
205
+ shift += 7;
206
+ }
207
+ }
208
+
209
+ return res;
210
+ }
211
+
212
+
213
+ /* optimized to use unchecked read_byte if there is definitely space */
214
+ off_t frt_is_read_voff_t(FrtInStream *is) {
215
+ register frt_off_t res, b;
216
+ register int shift = 7;
217
+
218
+ if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
219
+ b = frt_is_read_byte(is);
220
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
221
+
222
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
223
+ b = frt_is_read_byte(is);
224
+ res |= (b & 0x7F) << shift;
225
+ shift += 7;
226
+ }
227
+ } else { /* unchecked optimization */
228
+ b = read_byte(is);
229
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
230
+
231
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
232
+ b = read_byte(is);
233
+ res |= (b & 0x7F) << shift;
234
+ shift += 7;
235
+ }
236
+ }
237
+
238
+ return res;
239
+ }
240
+
241
+ /* optimized to use unchecked read_byte if there is definitely space */
242
+ frt_u64 frt_is_read_vll(FrtInStream *is) {
243
+ register frt_u64 res, b;
244
+ register int shift = 7;
245
+
246
+ if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
247
+ b = frt_is_read_byte(is);
248
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
249
+
250
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
251
+ b = frt_is_read_byte(is);
252
+ res |= (b & 0x7F) << shift;
253
+ shift += 7;
254
+ }
255
+ } else { /* unchecked optimization */
256
+ b = read_byte(is);
257
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
258
+
259
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
260
+ b = read_byte(is);
261
+ res |= (b & 0x7F) << shift;
262
+ shift += 7;
263
+ }
264
+ }
265
+
266
+ return res;
267
+ }
268
+
269
+ void frt_is_skip_vints(FrtInStream *is, register int cnt) {
270
+ for (; cnt > 0; cnt--) {
271
+ while ((frt_is_read_byte(is) & 0x80) != 0) {
272
+ }
273
+ }
274
+ }
275
+
276
+ char *frt_is_read_string(FrtInStream *is) {
277
+ register int length = (int) frt_is_read_vint(is);
278
+ char *str = FRT_ALLOC_N(char, length + 1);
279
+ str[length] = '\0';
280
+
281
+ if (is->buf.pos > (is->buf.len - length)) {
282
+ register int i;
283
+ for (i = 0; i < length; i++) {
284
+ str[i] = frt_is_read_byte(is);
285
+ }
286
+ } else { /* unchecked optimization */
287
+ memcpy(str, is->buf.buf + is->buf.pos, length);
288
+ is->buf.pos += length;
289
+ }
290
+
291
+ return str;
292
+ }
293
+
294
+ char *frt_is_read_string_safe(FrtInStream *is) {
295
+ register int length = (int) frt_is_read_vint(is);
296
+ char *str = FRT_ALLOC_N(char, length + 1);
297
+ str[length] = '\0';
298
+
299
+ FRT_TRY
300
+ if (is->buf.pos > (is->buf.len - length)) {
301
+ register int i;
302
+ for (i = 0; i < length; i++) {
303
+ str[i] = frt_is_read_byte(is);
304
+ }
305
+ } else { /* unchecked optimization */
306
+ memcpy(str, is->buf.buf + is->buf.pos, length);
307
+ is->buf.pos += length;
308
+ }
309
+ FRT_XCATCHALL
310
+ free(str);
311
+ FRT_XENDTRY
312
+
313
+ return str;
314
+ }
315
+
316
+ static char *frt_is_read_brotli_compressed_bytes(FrtInStream *is, int compressed_len, int *len) {
317
+ int buf_out_idx = 0;
318
+ int read_len;
319
+ frt_uchar buf_in[FRT_COMPRESSION_BUFFER_SIZE];
320
+ const frt_uchar *next_in;
321
+ size_t available_in;
322
+ frt_uchar *buf_out = NULL;
323
+ frt_uchar *next_out;
324
+ size_t available_out;
325
+
326
+ BrotliDecoderState *b_state = BrotliDecoderCreateInstance(NULL, NULL, NULL);
327
+ BrotliDecoderResult b_result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
328
+ if (!b_state) { frt_comp_raise(); return NULL; }
329
+
330
+ do {
331
+ read_len = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
332
+ frt_is_read_bytes(is, buf_in, read_len);
333
+ compressed_len -= read_len;
334
+ available_in = read_len;
335
+ next_in = buf_in;
336
+ available_out = FRT_COMPRESSION_BUFFER_SIZE;
337
+ do {
338
+ FRT_REALLOC_N(buf_out, frt_uchar, buf_out_idx + FRT_COMPRESSION_BUFFER_SIZE);
339
+ next_out = buf_out + buf_out_idx;
340
+ b_result = BrotliDecoderDecompressStream(b_state,
341
+ &available_in, &next_in,
342
+ &available_out, &next_out, NULL);
343
+ if (b_result == BROTLI_DECODER_RESULT_ERROR) { frt_comp_raise(); return NULL; }
344
+ buf_out_idx += FRT_COMPRESSION_BUFFER_SIZE - available_out;
345
+ } while (b_result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT);
346
+ } while (b_result != BROTLI_DECODER_RESULT_SUCCESS && compressed_len > 0);
347
+
348
+ BrotliDecoderDestroyInstance(b_state);
349
+
350
+ FRT_REALLOC_N(buf_out, frt_uchar, buf_out_idx + 1);
351
+ buf_out[buf_out_idx] = '\0';
352
+ *len = buf_out_idx;
353
+ return (char *)buf_out;
354
+ }
355
+
356
+ static char *frt_is_read_bz2_compressed_bytes(FrtInStream *is, int compressed_len, int *len) {
357
+ int buf_out_idx = 0, ret, read_len;
358
+ char *buf_out = NULL;
359
+ char buf_in[FRT_COMPRESSION_BUFFER_SIZE];
360
+ bz_stream zstrm;
361
+ zstrm.bzalloc = NULL;
362
+ zstrm.bzfree = NULL;
363
+ zstrm.opaque = NULL;
364
+ zstrm.next_in = NULL;
365
+ zstrm.avail_in = 0;
366
+ if ((ret = BZ2_bzDecompressInit(&zstrm, 0, 0)) != BZ_OK) frt_zraise(ret);
367
+
368
+ do {
369
+ read_len = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
370
+ frt_is_read_bytes(is, (frt_uchar *)buf_in, read_len);
371
+ compressed_len -= read_len;
372
+ zstrm.avail_in = read_len;
373
+ zstrm.next_in = buf_in;
374
+ zstrm.avail_out = FRT_COMPRESSION_BUFFER_SIZE;
375
+
376
+ do {
377
+ REALLOC_N(buf_out, char, buf_out_idx + FRT_COMPRESSION_BUFFER_SIZE);
378
+ zstrm.next_out = buf_out + buf_out_idx;
379
+ ret = BZ2_bzDecompress(&zstrm);
380
+ assert(ret != BZ_SEQUENCE_ERROR); /* state not clobbered */
381
+ if (ret != BZ_OK && ret != BZ_STREAM_END) {
382
+ (void)BZ2_bzDecompressEnd(&zstrm);
383
+ frt_zraise(ret);
384
+ }
385
+ buf_out_idx += FRT_COMPRESSION_BUFFER_SIZE - zstrm.avail_out;
386
+ } while (zstrm.avail_out == 0);
387
+ } while (ret != BZ_STREAM_END && compressed_len != 0);
388
+
389
+ (void)BZ2_bzDecompressEnd(&zstrm);
390
+
391
+ FRT_REALLOC_N(buf_out, char, buf_out_idx + 1);
392
+ buf_out[buf_out_idx] = '\0';
393
+
394
+ *len = buf_out_idx;
395
+ return (char *)buf_out;
396
+ }
397
+
398
+ static char *frt_is_read_lz4_compressed_bytes(FrtInStream *is, int compressed_len, int *length) {
399
+ frt_uchar buf_in[FRT_COMPRESSION_BUFFER_SIZE];
400
+ char *buf_out = NULL;
401
+ int dc_length = 0;
402
+ LZ4F_dctx *dctx;
403
+ LZ4F_frameInfo_t frame_info;
404
+ LZ4F_errorCode_t dctx_status = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);
405
+ if (LZ4F_isError(dctx_status)) { *length = -1; return NULL; }
406
+
407
+ /* header and buffer */
408
+ int read_length = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
409
+ frt_is_read_bytes(is, buf_in, read_length);
410
+ compressed_len -= read_length;
411
+
412
+ size_t consumed_size = read_length;
413
+ size_t res = LZ4F_getFrameInfo(dctx, &frame_info, buf_in, &consumed_size);
414
+ if (LZ4F_isError(res)) { *length = -1; return NULL; }
415
+ size_t buf_out_length;
416
+ switch(frame_info.blockSizeID) {
417
+ case LZ4F_default:
418
+ case LZ4F_max64KB:
419
+ buf_out_length = 1 << 16;
420
+ break;
421
+ case LZ4F_max256KB:
422
+ buf_out_length = 1 << 18;
423
+ break;
424
+ case LZ4F_max1MB:
425
+ buf_out_length = 1 << 20;
426
+ break;
427
+ case LZ4F_max4MB:
428
+ buf_out_length = 1 << 22;
429
+ break;
430
+ default:
431
+ buf_out_length = 0;
432
+ }
433
+
434
+ res = 1;
435
+ int first_chunk = 1;
436
+
437
+ /* decompress data */
438
+ while (res != 0) {
439
+ if (!first_chunk) {
440
+ read_length = (compressed_len > FRT_COMPRESSION_BUFFER_SIZE) ? FRT_COMPRESSION_BUFFER_SIZE : compressed_len;
441
+ frt_is_read_bytes(is, buf_in, read_length);
442
+ compressed_len -= read_length;
443
+ consumed_size = 0;
444
+ }
445
+ first_chunk = 0;
446
+
447
+ char *src = (char *)(buf_in + consumed_size);
448
+ char *src_end = (char *)buf_in + read_length;
449
+
450
+ while (src < src_end && res != 0){
451
+ size_t dest_length = buf_out_length;
452
+ size_t consumed_size = read_length;
453
+ FRT_REALLOC_N(buf_out, char, dc_length + buf_out_length);
454
+ res = LZ4F_decompress(dctx, buf_out + dc_length, &dest_length, src, &consumed_size, NULL);
455
+ if (LZ4F_isError(res)) { *length = -1; return NULL; }
456
+ dc_length += dest_length;
457
+ src = src + consumed_size;
458
+ }
459
+ }
460
+
461
+ /* finish up */
462
+ LZ4F_freeDecompressionContext(dctx);
463
+
464
+ FRT_REALLOC_N(buf_out, char, dc_length + 1);
465
+ buf_out[dc_length] = '\0';
466
+
467
+ *length = dc_length;
468
+ return buf_out;
469
+ }
470
+
471
+ char *frt_is_read_compressed_bytes(FrtInStream *is, int compressed_len, int *len, FrtCompressionType compression_type) {
472
+ switch (compression_type) {
473
+ case FRT_COMPRESSION_BROTLI:
474
+ return frt_is_read_brotli_compressed_bytes(is, compressed_len, len);
475
+ case FRT_COMPRESSION_BZ2:
476
+ return frt_is_read_bz2_compressed_bytes(is, compressed_len, len);
477
+ case FRT_COMPRESSION_LZ4:
478
+ return frt_is_read_lz4_compressed_bytes(is, compressed_len, len);
479
+ default:
480
+ return NULL;
481
+ }
482
+ }