ngs_server 0.4 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/tabix/bgzf.c ADDED
@@ -0,0 +1,714 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
22
+ */
23
+
24
+ /*
25
+ 2009-06-29 by lh3: cache recent uncompressed blocks.
26
+ 2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP.
27
+ 2009-06-12 by lh3: support a mode string like "wu" where 'u' for uncompressed output */
28
+
29
+ #include <stdio.h>
30
+ #include <stdlib.h>
31
+ #include <string.h>
32
+ #include <unistd.h>
33
+ #include <fcntl.h>
34
+ #include <sys/types.h>
35
+ #include <sys/stat.h>
36
+ #include "bgzf.h"
37
+
38
+ #include "khash.h"
39
+ typedef struct {
40
+ int size;
41
+ uint8_t *block;
42
+ int64_t end_offset;
43
+ } cache_t;
44
+ KHASH_MAP_INIT_INT64(cache, cache_t)
45
+
46
+ #if defined(_WIN32) || defined(_MSC_VER)
47
+ #define ftello(fp) ftell(fp)
48
+ #define fseeko(fp, offset, whence) fseek(fp, offset, whence)
49
+ #else
50
+ extern off_t ftello(FILE *stream);
51
+ extern int fseeko(FILE *stream, off_t offset, int whence);
52
+ #endif
53
+
54
+ typedef int8_t bgzf_byte_t;
55
+
56
+ static const int DEFAULT_BLOCK_SIZE = 64 * 1024;
57
+ static const int MAX_BLOCK_SIZE = 64 * 1024;
58
+
59
+ static const int BLOCK_HEADER_LENGTH = 18;
60
+ static const int BLOCK_FOOTER_LENGTH = 8;
61
+
62
+ static const int GZIP_ID1 = 31;
63
+ static const int GZIP_ID2 = 139;
64
+ static const int CM_DEFLATE = 8;
65
+ static const int FLG_FEXTRA = 4;
66
+ static const int OS_UNKNOWN = 255;
67
+ static const int BGZF_ID1 = 66; // 'B'
68
+ static const int BGZF_ID2 = 67; // 'C'
69
+ static const int BGZF_LEN = 2;
70
+ static const int BGZF_XLEN = 6; // BGZF_LEN+4
71
+
72
+ static const int GZIP_WINDOW_BITS = -15; // no zlib header
73
+ static const int Z_DEFAULT_MEM_LEVEL = 8;
74
+
75
+
76
+ inline
77
+ void
78
+ packInt16(uint8_t* buffer, uint16_t value)
79
+ {
80
+ buffer[0] = value;
81
+ buffer[1] = value >> 8;
82
+ }
83
+
84
+ inline
85
+ int
86
+ unpackInt16(const uint8_t* buffer)
87
+ {
88
+ return (buffer[0] | (buffer[1] << 8));
89
+ }
90
+
91
+ inline
92
+ void
93
+ packInt32(uint8_t* buffer, uint32_t value)
94
+ {
95
+ buffer[0] = value;
96
+ buffer[1] = value >> 8;
97
+ buffer[2] = value >> 16;
98
+ buffer[3] = value >> 24;
99
+ }
100
+
101
+ static inline
102
+ int
103
+ bgzf_min(int x, int y)
104
+ {
105
+ return (x < y) ? x : y;
106
+ }
107
+
108
+ static
109
+ void
110
+ report_error(BGZF* fp, const char* message) {
111
+ fp->error = message;
112
+ }
113
+
114
+ int bgzf_check_bgzf(const char *fn)
115
+ {
116
+ BGZF *fp;
117
+ uint8_t buf[10],magic[10]="\037\213\010\4\0\0\0\0\0\377";
118
+ int n;
119
+
120
+ if ((fp = bgzf_open(fn, "r")) == 0)
121
+ {
122
+ fprintf(stderr, "[bgzf_check_bgzf] failed to open the file: %s\n",fn);
123
+ return -1;
124
+ }
125
+
126
+ #ifdef _USE_KNETFILE
127
+ n = knet_read(fp->x.fpr, buf, 10);
128
+ #else
129
+ n = fread(buf, 1, 10, fp->file);
130
+ #endif
131
+ bgzf_close(fp);
132
+
133
+ if ( n!=10 )
134
+ return -1;
135
+
136
+ if ( !memcmp(magic, buf, 10) ) return 1;
137
+ return 0;
138
+ }
139
+
140
+ static BGZF *bgzf_read_init()
141
+ {
142
+ BGZF *fp;
143
+ fp = calloc(1, sizeof(BGZF));
144
+ fp->uncompressed_block_size = MAX_BLOCK_SIZE;
145
+ fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);
146
+ fp->compressed_block_size = MAX_BLOCK_SIZE;
147
+ fp->compressed_block = malloc(MAX_BLOCK_SIZE);
148
+ fp->cache_size = 0;
149
+ fp->cache = kh_init(cache);
150
+ return fp;
151
+ }
152
+
153
+ static
154
+ BGZF*
155
+ open_read(int fd)
156
+ {
157
+ #ifdef _USE_KNETFILE
158
+ knetFile *file = knet_dopen(fd, "r");
159
+ #else
160
+ FILE* file = fdopen(fd, "r");
161
+ #endif
162
+ BGZF* fp;
163
+ if (file == 0) return 0;
164
+ fp = bgzf_read_init();
165
+ fp->file_descriptor = fd;
166
+ fp->open_mode = 'r';
167
+ #ifdef _USE_KNETFILE
168
+ fp->x.fpr = file;
169
+ #else
170
+ fp->file = file;
171
+ #endif
172
+ return fp;
173
+ }
174
+
175
+ static
176
+ BGZF*
177
+ open_write(int fd, int compress_level) // compress_level==-1 for the default level
178
+ {
179
+ FILE* file = fdopen(fd, "w");
180
+ BGZF* fp;
181
+ if (file == 0) return 0;
182
+ fp = malloc(sizeof(BGZF));
183
+ fp->file_descriptor = fd;
184
+ fp->open_mode = 'w';
185
+ fp->owned_file = 0;
186
+ fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1
187
+ if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION;
188
+ #ifdef _USE_KNETFILE
189
+ fp->x.fpw = file;
190
+ #else
191
+ fp->file = file;
192
+ #endif
193
+ fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE;
194
+ fp->uncompressed_block = NULL;
195
+ fp->compressed_block_size = MAX_BLOCK_SIZE;
196
+ fp->compressed_block = malloc(MAX_BLOCK_SIZE);
197
+ fp->block_address = 0;
198
+ fp->block_offset = 0;
199
+ fp->block_length = 0;
200
+ fp->error = NULL;
201
+ return fp;
202
+ }
203
+
204
+ BGZF*
205
+ bgzf_open(const char* __restrict path, const char* __restrict mode)
206
+ {
207
+ BGZF* fp = NULL;
208
+ if (strchr(mode, 'r') || strchr(mode, 'R')) { /* The reading mode is preferred. */
209
+ #ifdef _USE_KNETFILE
210
+ knetFile *file = knet_open(path, mode);
211
+ if (file == 0) return 0;
212
+ fp = bgzf_read_init();
213
+ fp->file_descriptor = -1;
214
+ fp->open_mode = 'r';
215
+ fp->x.fpr = file;
216
+ #else
217
+ int fd, oflag = O_RDONLY;
218
+ #ifdef _WIN32
219
+ oflag |= O_BINARY;
220
+ #endif
221
+ fd = open(path, oflag);
222
+ if (fd == -1) return 0;
223
+ fp = open_read(fd);
224
+ #endif
225
+ } else if (strchr(mode, 'w') || strchr(mode, 'W')) {
226
+ int fd, compress_level = -1, oflag = O_WRONLY | O_CREAT | O_TRUNC;
227
+ #ifdef _WIN32
228
+ oflag |= O_BINARY;
229
+ #endif
230
+ fd = open(path, oflag, 0666);
231
+ if (fd == -1) return 0;
232
+ { // set compress_level
233
+ int i;
234
+ for (i = 0; mode[i]; ++i)
235
+ if (mode[i] >= '0' && mode[i] <= '9') break;
236
+ if (mode[i]) compress_level = (int)mode[i] - '0';
237
+ if (strchr(mode, 'u')) compress_level = 0;
238
+ }
239
+ fp = open_write(fd, compress_level);
240
+ }
241
+ if (fp != NULL) fp->owned_file = 1;
242
+ return fp;
243
+ }
244
+
245
+ BGZF*
246
+ bgzf_fdopen(int fd, const char * __restrict mode)
247
+ {
248
+ if (fd == -1) return 0;
249
+ if (mode[0] == 'r' || mode[0] == 'R') {
250
+ return open_read(fd);
251
+ } else if (mode[0] == 'w' || mode[0] == 'W') {
252
+ int i, compress_level = -1;
253
+ for (i = 0; mode[i]; ++i)
254
+ if (mode[i] >= '0' && mode[i] <= '9') break;
255
+ if (mode[i]) compress_level = (int)mode[i] - '0';
256
+ if (strchr(mode, 'u')) compress_level = 0;
257
+ return open_write(fd, compress_level);
258
+ } else {
259
+ return NULL;
260
+ }
261
+ }
262
+
263
+ static
264
+ int
265
+ deflate_block(BGZF* fp, int block_length)
266
+ {
267
+ // Deflate the block in fp->uncompressed_block into fp->compressed_block.
268
+ // Also adds an extra field that stores the compressed block length.
269
+
270
+ bgzf_byte_t* buffer = fp->compressed_block;
271
+ int buffer_size = fp->compressed_block_size;
272
+
273
+ // Init gzip header
274
+ buffer[0] = GZIP_ID1;
275
+ buffer[1] = GZIP_ID2;
276
+ buffer[2] = CM_DEFLATE;
277
+ buffer[3] = FLG_FEXTRA;
278
+ buffer[4] = 0; // mtime
279
+ buffer[5] = 0;
280
+ buffer[6] = 0;
281
+ buffer[7] = 0;
282
+ buffer[8] = 0;
283
+ buffer[9] = OS_UNKNOWN;
284
+ buffer[10] = BGZF_XLEN;
285
+ buffer[11] = 0;
286
+ buffer[12] = BGZF_ID1;
287
+ buffer[13] = BGZF_ID2;
288
+ buffer[14] = BGZF_LEN;
289
+ buffer[15] = 0;
290
+ buffer[16] = 0; // placeholder for block length
291
+ buffer[17] = 0;
292
+
293
+ // loop to retry for blocks that do not compress enough
294
+ int input_length = block_length;
295
+ int compressed_length = 0;
296
+ while (1) {
297
+ z_stream zs;
298
+ zs.zalloc = NULL;
299
+ zs.zfree = NULL;
300
+ zs.next_in = fp->uncompressed_block;
301
+ zs.avail_in = input_length;
302
+ zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH];
303
+ zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
304
+
305
+ int status = deflateInit2(&zs, fp->compress_level, Z_DEFLATED,
306
+ GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
307
+ if (status != Z_OK) {
308
+ report_error(fp, "deflate init failed");
309
+ return -1;
310
+ }
311
+ status = deflate(&zs, Z_FINISH);
312
+ if (status != Z_STREAM_END) {
313
+ deflateEnd(&zs);
314
+ if (status == Z_OK) {
315
+ // Not enough space in buffer.
316
+ // Can happen in the rare case the input doesn't compress enough.
317
+ // Reduce the amount of input until it fits.
318
+ input_length -= 1024;
319
+ if (input_length <= 0) {
320
+ // should never happen
321
+ report_error(fp, "input reduction failed");
322
+ return -1;
323
+ }
324
+ continue;
325
+ }
326
+ report_error(fp, "deflate failed");
327
+ return -1;
328
+ }
329
+ status = deflateEnd(&zs);
330
+ if (status != Z_OK) {
331
+ report_error(fp, "deflate end failed");
332
+ return -1;
333
+ }
334
+ compressed_length = zs.total_out;
335
+ compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
336
+ if (compressed_length > MAX_BLOCK_SIZE) {
337
+ // should never happen
338
+ report_error(fp, "deflate overflow");
339
+ return -1;
340
+ }
341
+ break;
342
+ }
343
+
344
+ packInt16((uint8_t*)&buffer[16], compressed_length-1);
345
+ uint32_t crc = crc32(0L, NULL, 0L);
346
+ crc = crc32(crc, fp->uncompressed_block, input_length);
347
+ packInt32((uint8_t*)&buffer[compressed_length-8], crc);
348
+ packInt32((uint8_t*)&buffer[compressed_length-4], input_length);
349
+
350
+ int remaining = block_length - input_length;
351
+ if (remaining > 0) {
352
+ if (remaining > input_length) {
353
+ // should never happen (check so we can use memcpy)
354
+ report_error(fp, "remainder too large");
355
+ return -1;
356
+ }
357
+ memcpy(fp->uncompressed_block,
358
+ fp->uncompressed_block + input_length,
359
+ remaining);
360
+ }
361
+ fp->block_offset = remaining;
362
+ return compressed_length;
363
+ }
364
+
365
+ static
366
+ int
367
+ inflate_block(BGZF* fp, int block_length)
368
+ {
369
+ // Inflate the block in fp->compressed_block into fp->uncompressed_block
370
+
371
+ z_stream zs;
372
+ int status;
373
+ zs.zalloc = NULL;
374
+ zs.zfree = NULL;
375
+ zs.next_in = fp->compressed_block + 18;
376
+ zs.avail_in = block_length - 16;
377
+ zs.next_out = fp->uncompressed_block;
378
+ zs.avail_out = fp->uncompressed_block_size;
379
+
380
+ status = inflateInit2(&zs, GZIP_WINDOW_BITS);
381
+ if (status != Z_OK) {
382
+ report_error(fp, "inflate init failed");
383
+ return -1;
384
+ }
385
+ status = inflate(&zs, Z_FINISH);
386
+ if (status != Z_STREAM_END) {
387
+ inflateEnd(&zs);
388
+ report_error(fp, "inflate failed");
389
+ return -1;
390
+ }
391
+ status = inflateEnd(&zs);
392
+ if (status != Z_OK) {
393
+ report_error(fp, "inflate failed");
394
+ return -1;
395
+ }
396
+ return zs.total_out;
397
+ }
398
+
399
+ static
400
+ int
401
+ check_header(const bgzf_byte_t* header)
402
+ {
403
+ return (header[0] == GZIP_ID1 &&
404
+ header[1] == (bgzf_byte_t) GZIP_ID2 &&
405
+ header[2] == Z_DEFLATED &&
406
+ (header[3] & FLG_FEXTRA) != 0 &&
407
+ unpackInt16((uint8_t*)&header[10]) == BGZF_XLEN &&
408
+ header[12] == BGZF_ID1 &&
409
+ header[13] == BGZF_ID2 &&
410
+ unpackInt16((uint8_t*)&header[14]) == BGZF_LEN);
411
+ }
412
+
413
+ static void free_cache(BGZF *fp)
414
+ {
415
+ khint_t k;
416
+ khash_t(cache) *h = (khash_t(cache)*)fp->cache;
417
+ if (fp->open_mode != 'r') return;
418
+ for (k = kh_begin(h); k < kh_end(h); ++k)
419
+ if (kh_exist(h, k)) free(kh_val(h, k).block);
420
+ kh_destroy(cache, h);
421
+ }
422
+
423
+ static int load_block_from_cache(BGZF *fp, int64_t block_address)
424
+ {
425
+ khint_t k;
426
+ cache_t *p;
427
+ khash_t(cache) *h = (khash_t(cache)*)fp->cache;
428
+ k = kh_get(cache, h, block_address);
429
+ if (k == kh_end(h)) return 0;
430
+ p = &kh_val(h, k);
431
+ if (fp->block_length != 0) fp->block_offset = 0;
432
+ fp->block_address = block_address;
433
+ fp->block_length = p->size;
434
+ memcpy(fp->uncompressed_block, p->block, MAX_BLOCK_SIZE);
435
+ #ifdef _USE_KNETFILE
436
+ knet_seek(fp->x.fpr, p->end_offset, SEEK_SET);
437
+ #else
438
+ fseeko(fp->file, p->end_offset, SEEK_SET);
439
+ #endif
440
+ return p->size;
441
+ }
442
+
443
+ static void cache_block(BGZF *fp, int size)
444
+ {
445
+ int ret;
446
+ khint_t k;
447
+ cache_t *p;
448
+ khash_t(cache) *h = (khash_t(cache)*)fp->cache;
449
+ if (MAX_BLOCK_SIZE >= fp->cache_size) return;
450
+ if ((kh_size(h) + 1) * MAX_BLOCK_SIZE > fp->cache_size) {
451
+ /* A better way would be to remove the oldest block in the
452
+ * cache, but here we remove a random one for simplicity. This
453
+ * should not have a big impact on performance. */
454
+ for (k = kh_begin(h); k < kh_end(h); ++k)
455
+ if (kh_exist(h, k)) break;
456
+ if (k < kh_end(h)) {
457
+ free(kh_val(h, k).block);
458
+ kh_del(cache, h, k);
459
+ }
460
+ }
461
+ k = kh_put(cache, h, fp->block_address, &ret);
462
+ if (ret == 0) return; // if this happens, a bug!
463
+ p = &kh_val(h, k);
464
+ p->size = fp->block_length;
465
+ p->end_offset = fp->block_address + size;
466
+ p->block = malloc(MAX_BLOCK_SIZE);
467
+ memcpy(kh_val(h, k).block, fp->uncompressed_block, MAX_BLOCK_SIZE);
468
+ }
469
+
470
+ int
471
+ bgzf_read_block(BGZF* fp)
472
+ {
473
+ bgzf_byte_t header[BLOCK_HEADER_LENGTH];
474
+ int count, size = 0, block_length, remaining;
475
+ #ifdef _USE_KNETFILE
476
+ int64_t block_address = knet_tell(fp->x.fpr);
477
+ if (load_block_from_cache(fp, block_address)) return 0;
478
+ count = knet_read(fp->x.fpr, header, sizeof(header));
479
+ #else
480
+ int64_t block_address = ftello(fp->file);
481
+ if (load_block_from_cache(fp, block_address)) return 0;
482
+ count = fread(header, 1, sizeof(header), fp->file);
483
+ #endif
484
+ if (count == 0) {
485
+ fp->block_length = 0;
486
+ return 0;
487
+ }
488
+ size = count;
489
+ if (count != sizeof(header)) {
490
+ report_error(fp, "read failed");
491
+ return -1;
492
+ }
493
+ if (!check_header(header)) {
494
+ report_error(fp, "invalid block header");
495
+ return -1;
496
+ }
497
+ block_length = unpackInt16((uint8_t*)&header[16]) + 1;
498
+ bgzf_byte_t* compressed_block = (bgzf_byte_t*) fp->compressed_block;
499
+ memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
500
+ remaining = block_length - BLOCK_HEADER_LENGTH;
501
+ #ifdef _USE_KNETFILE
502
+ count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
503
+ #else
504
+ count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file);
505
+ #endif
506
+ if (count != remaining) {
507
+ report_error(fp, "read failed");
508
+ return -1;
509
+ }
510
+ size += count;
511
+ count = inflate_block(fp, block_length);
512
+ if (count < 0) return -1;
513
+ if (fp->block_length != 0) {
514
+ // Do not reset offset if this read follows a seek.
515
+ fp->block_offset = 0;
516
+ }
517
+ fp->block_address = block_address;
518
+ fp->block_length = count;
519
+ cache_block(fp, size);
520
+ return 0;
521
+ }
522
+
523
+ int
524
+ bgzf_read(BGZF* fp, void* data, int length)
525
+ {
526
+ if (length <= 0) {
527
+ return 0;
528
+ }
529
+ if (fp->open_mode != 'r') {
530
+ report_error(fp, "file not open for reading");
531
+ return -1;
532
+ }
533
+
534
+ int bytes_read = 0;
535
+ bgzf_byte_t* output = data;
536
+ while (bytes_read < length) {
537
+ int copy_length, available = fp->block_length - fp->block_offset;
538
+ bgzf_byte_t *buffer;
539
+ if (available <= 0) {
540
+ if (bgzf_read_block(fp) != 0) {
541
+ return -1;
542
+ }
543
+ available = fp->block_length - fp->block_offset;
544
+ if (available <= 0) {
545
+ break;
546
+ }
547
+ }
548
+ copy_length = bgzf_min(length-bytes_read, available);
549
+ buffer = fp->uncompressed_block;
550
+ memcpy(output, buffer + fp->block_offset, copy_length);
551
+ fp->block_offset += copy_length;
552
+ output += copy_length;
553
+ bytes_read += copy_length;
554
+ }
555
+ if (fp->block_offset == fp->block_length) {
556
+ #ifdef _USE_KNETFILE
557
+ fp->block_address = knet_tell(fp->x.fpr);
558
+ #else
559
+ fp->block_address = ftello(fp->file);
560
+ #endif
561
+ fp->block_offset = 0;
562
+ fp->block_length = 0;
563
+ }
564
+ return bytes_read;
565
+ }
566
+
567
+ int bgzf_flush(BGZF* fp)
568
+ {
569
+ while (fp->block_offset > 0) {
570
+ int count, block_length;
571
+ block_length = deflate_block(fp, fp->block_offset);
572
+ if (block_length < 0) return -1;
573
+ #ifdef _USE_KNETFILE
574
+ count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
575
+ #else
576
+ count = fwrite(fp->compressed_block, 1, block_length, fp->file);
577
+ #endif
578
+ if (count != block_length) {
579
+ report_error(fp, "write failed");
580
+ return -1;
581
+ }
582
+ fp->block_address += block_length;
583
+ }
584
+ return 0;
585
+ }
586
+
587
+ int bgzf_flush_try(BGZF *fp, int size)
588
+ {
589
+ if (fp->block_offset + size > fp->uncompressed_block_size)
590
+ return bgzf_flush(fp);
591
+ return -1;
592
+ }
593
+
594
+ int bgzf_write(BGZF* fp, const void* data, int length)
595
+ {
596
+ const bgzf_byte_t *input = data;
597
+ int block_length, bytes_written;
598
+ if (fp->open_mode != 'w') {
599
+ report_error(fp, "file not open for writing");
600
+ return -1;
601
+ }
602
+
603
+ if (fp->uncompressed_block == NULL)
604
+ fp->uncompressed_block = malloc(fp->uncompressed_block_size);
605
+
606
+ input = data;
607
+ block_length = fp->uncompressed_block_size;
608
+ bytes_written = 0;
609
+ while (bytes_written < length) {
610
+ int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);
611
+ bgzf_byte_t* buffer = fp->uncompressed_block;
612
+ memcpy(buffer + fp->block_offset, input, copy_length);
613
+ fp->block_offset += copy_length;
614
+ input += copy_length;
615
+ bytes_written += copy_length;
616
+ if (fp->block_offset == block_length) {
617
+ if (bgzf_flush(fp) != 0) {
618
+ break;
619
+ }
620
+ }
621
+ }
622
+ return bytes_written;
623
+ }
624
+
625
+ int bgzf_close(BGZF* fp)
626
+ {
627
+ if (fp->open_mode == 'w') {
628
+ if (bgzf_flush(fp) != 0) return -1;
629
+ { // add an empty block
630
+ int count, block_length = deflate_block(fp, 0);
631
+ #ifdef _USE_KNETFILE
632
+ count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
633
+ #else
634
+ count = fwrite(fp->compressed_block, 1, block_length, fp->file);
635
+ #endif
636
+ }
637
+ #ifdef _USE_KNETFILE
638
+ if (fflush(fp->x.fpw) != 0) {
639
+ #else
640
+ if (fflush(fp->file) != 0) {
641
+ #endif
642
+ report_error(fp, "flush failed");
643
+ return -1;
644
+ }
645
+ }
646
+ if (fp->owned_file) {
647
+ #ifdef _USE_KNETFILE
648
+ int ret;
649
+ if (fp->open_mode == 'w') ret = fclose(fp->x.fpw);
650
+ else ret = knet_close(fp->x.fpr);
651
+ if (ret != 0) return -1;
652
+ #else
653
+ if (fclose(fp->file) != 0) return -1;
654
+ #endif
655
+ }
656
+ free(fp->uncompressed_block);
657
+ free(fp->compressed_block);
658
+ free_cache(fp);
659
+ free(fp);
660
+ return 0;
661
+ }
662
+
663
+ void bgzf_set_cache_size(BGZF *fp, int cache_size)
664
+ {
665
+ if (fp) fp->cache_size = cache_size;
666
+ }
667
+
668
+ int bgzf_check_EOF(BGZF *fp)
669
+ {
670
+ static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
671
+ uint8_t buf[28];
672
+ off_t offset;
673
+ #ifdef _USE_KNETFILE
674
+ offset = knet_tell(fp->x.fpr);
675
+ if (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1;
676
+ knet_read(fp->x.fpr, buf, 28);
677
+ knet_seek(fp->x.fpr, offset, SEEK_SET);
678
+ #else
679
+ offset = ftello(fp->file);
680
+ if (fseeko(fp->file, -28, SEEK_END) != 0) return -1;
681
+ fread(buf, 1, 28, fp->file);
682
+ fseeko(fp->file, offset, SEEK_SET);
683
+ #endif
684
+ return (memcmp(magic, buf, 28) == 0)? 1 : 0;
685
+ }
686
+
687
+ int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)
688
+ {
689
+ int block_offset;
690
+ int64_t block_address;
691
+
692
+ if (fp->open_mode != 'r') {
693
+ report_error(fp, "file not open for read");
694
+ return -1;
695
+ }
696
+ if (where != SEEK_SET) {
697
+ report_error(fp, "unimplemented seek option");
698
+ return -1;
699
+ }
700
+ block_offset = pos & 0xFFFF;
701
+ block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
702
+ #ifdef _USE_KNETFILE
703
+ if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {
704
+ #else
705
+ if (fseeko(fp->file, block_address, SEEK_SET) != 0) {
706
+ #endif
707
+ report_error(fp, "seek failed");
708
+ return -1;
709
+ }
710
+ fp->block_length = 0; // indicates current block is not loaded
711
+ fp->block_address = block_address;
712
+ fp->block_offset = block_offset;
713
+ return 0;
714
+ }