ngs_server 0.4 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
data/ext/tabix/kseq.h ADDED
@@ -0,0 +1,227 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008 Genome Research Ltd (GRL).
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
+
28
+ /*
29
+ 2009-07-16 (lh3): in kstream_t, change "char*" to "unsigned char*"
30
+ */
31
+
32
+ /* Last Modified: 12APR2009 */
33
+
34
+ #ifndef AC_KSEQ_H
35
+ #define AC_KSEQ_H
36
+
37
+ #include <ctype.h>
38
+ #include <string.h>
39
+ #include <stdlib.h>
40
+
41
+ #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
42
+ #define KS_SEP_TAB 1 // isspace() && !' '
43
+ #define KS_SEP_MAX 1
44
+
45
+ #define __KS_TYPE(type_t) \
46
+ typedef struct __kstream_t { \
47
+ unsigned char *buf; \
48
+ int begin, end, is_eof; \
49
+ type_t f; \
50
+ } kstream_t;
51
+
52
+ #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
53
+ #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
54
+
55
+ #define __KS_BASIC(type_t, __bufsize) \
56
+ static inline kstream_t *ks_init(type_t f) \
57
+ { \
58
+ kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
59
+ ks->f = f; \
60
+ ks->buf = malloc(__bufsize); \
61
+ return ks; \
62
+ } \
63
+ static inline void ks_destroy(kstream_t *ks) \
64
+ { \
65
+ if (ks) { \
66
+ free(ks->buf); \
67
+ free(ks); \
68
+ } \
69
+ }
70
+
71
+ #define __KS_GETC(__read, __bufsize) \
72
+ static inline int ks_getc(kstream_t *ks) \
73
+ { \
74
+ if (ks->is_eof && ks->begin >= ks->end) return -1; \
75
+ if (ks->begin >= ks->end) { \
76
+ ks->begin = 0; \
77
+ ks->end = __read(ks->f, ks->buf, __bufsize); \
78
+ if (ks->end < __bufsize) ks->is_eof = 1; \
79
+ if (ks->end == 0) return -1; \
80
+ } \
81
+ return (int)ks->buf[ks->begin++]; \
82
+ }
83
+
84
+ #ifndef KSTRING_T
85
+ #define KSTRING_T kstring_t
86
+ typedef struct __kstring_t {
87
+ size_t l, m;
88
+ char *s;
89
+ } kstring_t;
90
+ #endif
91
+
92
+ #ifndef kroundup32
93
+ #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
94
+ #endif
95
+
96
+ #define __KS_GETUNTIL(__read, __bufsize) \
97
+ static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
98
+ { \
99
+ if (dret) *dret = 0; \
100
+ str->l = 0; \
101
+ if (ks->begin >= ks->end && ks->is_eof) return -1; \
102
+ for (;;) { \
103
+ int i; \
104
+ if (ks->begin >= ks->end) { \
105
+ if (!ks->is_eof) { \
106
+ ks->begin = 0; \
107
+ ks->end = __read(ks->f, ks->buf, __bufsize); \
108
+ if (ks->end < __bufsize) ks->is_eof = 1; \
109
+ if (ks->end == 0) break; \
110
+ } else break; \
111
+ } \
112
+ if (delimiter > KS_SEP_MAX) { \
113
+ for (i = ks->begin; i < ks->end; ++i) \
114
+ if (ks->buf[i] == delimiter) break; \
115
+ } else if (delimiter == KS_SEP_SPACE) { \
116
+ for (i = ks->begin; i < ks->end; ++i) \
117
+ if (isspace(ks->buf[i])) break; \
118
+ } else if (delimiter == KS_SEP_TAB) { \
119
+ for (i = ks->begin; i < ks->end; ++i) \
120
+ if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
121
+ } else i = 0; /* never come to here! */ \
122
+ if (str->m - str->l < i - ks->begin + 1) { \
123
+ str->m = str->l + (i - ks->begin) + 1; \
124
+ kroundup32(str->m); \
125
+ str->s = (char*)realloc(str->s, str->m); \
126
+ } \
127
+ memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
128
+ str->l = str->l + (i - ks->begin); \
129
+ ks->begin = i + 1; \
130
+ if (i < ks->end) { \
131
+ if (dret) *dret = ks->buf[i]; \
132
+ break; \
133
+ } \
134
+ } \
135
+ if (str->l == 0) { \
136
+ str->m = 1; \
137
+ str->s = (char*)calloc(1, 1); \
138
+ } \
139
+ str->s[str->l] = '\0'; \
140
+ return str->l; \
141
+ }
142
+
143
+ #define KSTREAM_INIT(type_t, __read, __bufsize) \
144
+ __KS_TYPE(type_t) \
145
+ __KS_BASIC(type_t, __bufsize) \
146
+ __KS_GETC(__read, __bufsize) \
147
+ __KS_GETUNTIL(__read, __bufsize)
148
+
149
+ #define __KSEQ_BASIC(type_t) \
150
+ static inline kseq_t *kseq_init(type_t fd) \
151
+ { \
152
+ kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
153
+ s->f = ks_init(fd); \
154
+ return s; \
155
+ } \
156
+ static inline void kseq_rewind(kseq_t *ks) \
157
+ { \
158
+ ks->last_char = 0; \
159
+ ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
160
+ } \
161
+ static inline void kseq_destroy(kseq_t *ks) \
162
+ { \
163
+ if (!ks) return; \
164
+ free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
165
+ ks_destroy(ks->f); \
166
+ free(ks); \
167
+ }
168
+
169
+ /* Return value:
170
+ >=0 length of the sequence (normal)
171
+ -1 end-of-file
172
+ -2 truncated quality string
173
+ */
174
+ #define __KSEQ_READ \
175
+ static int kseq_read(kseq_t *seq) \
176
+ { \
177
+ int c; \
178
+ kstream_t *ks = seq->f; \
179
+ if (seq->last_char == 0) { /* then jump to the next header line */ \
180
+ while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
181
+ if (c == -1) return -1; /* end of file */ \
182
+ seq->last_char = c; \
183
+ } /* the first header char has been read */ \
184
+ seq->comment.l = seq->seq.l = seq->qual.l = 0; \
185
+ if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \
186
+ if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \
187
+ while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
188
+ if (isgraph(c)) { /* printable non-space character */ \
189
+ if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
190
+ seq->seq.m = seq->seq.l + 2; \
191
+ kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
192
+ seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
193
+ } \
194
+ seq->seq.s[seq->seq.l++] = (char)c; \
195
+ } \
196
+ } \
197
+ if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
198
+ seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
199
+ if (c != '+') return seq->seq.l; /* FASTA */ \
200
+ if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \
201
+ seq->qual.m = seq->seq.m; \
202
+ seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
203
+ } \
204
+ while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
205
+ if (c == -1) return -2; /* we should not stop here */ \
206
+ while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \
207
+ if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
208
+ seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \
209
+ seq->last_char = 0; /* we have not come to the next header line */ \
210
+ if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
211
+ return seq->seq.l; \
212
+ }
213
+
214
+ #define __KSEQ_TYPE(type_t) \
215
+ typedef struct { \
216
+ kstring_t name, comment, seq, qual; \
217
+ int last_char; \
218
+ kstream_t *f; \
219
+ } kseq_t;
220
+
221
+ #define KSEQ_INIT(type_t, __read) \
222
+ KSTREAM_INIT(type_t, __read, 4096) \
223
+ __KSEQ_TYPE(type_t) \
224
+ __KSEQ_BASIC(type_t) \
225
+ __KSEQ_READ
226
+
227
+ #endif
data/ext/tabix/ksort.h ADDED
@@ -0,0 +1,271 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008 Genome Research Ltd (GRL).
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
+
28
+ /*
29
+ 2008-11-16 (0.1.4):
30
+
31
+ * Fixed a bug in introsort() that happens in rare cases.
32
+
33
+ 2008-11-05 (0.1.3):
34
+
35
+ * Fixed a bug in introsort() for complex comparisons.
36
+
37
+ * Fixed a bug in mergesort(). The previous version is not stable.
38
+
39
+ 2008-09-15 (0.1.2):
40
+
41
+ * Accelerated introsort. On my Mac (not on another Linux machine),
42
+ my implementation is as fast as std::sort on random input.
43
+
44
+ * Added combsort and in introsort, switch to combsort if the
45
+ recursion is too deep.
46
+
47
+ 2008-09-13 (0.1.1):
48
+
49
+ * Added k-small algorithm
50
+
51
+ 2008-09-05 (0.1.0):
52
+
53
+ * Initial version
54
+
55
+ */
56
+
57
+ #ifndef AC_KSORT_H
58
+ #define AC_KSORT_H
59
+
60
+ #include <stdlib.h>
61
+ #include <string.h>
62
+
63
+ typedef struct {
64
+ void *left, *right;
65
+ int depth;
66
+ } ks_isort_stack_t;
67
+
68
+ #define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }
69
+
70
+ #define KSORT_INIT(name, type_t, __sort_lt) \
71
+ void ks_mergesort_##name(size_t n, type_t array[], type_t temp[]) \
72
+ { \
73
+ type_t *a2[2], *a, *b; \
74
+ int curr, shift; \
75
+ \
76
+ a2[0] = array; \
77
+ a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \
78
+ for (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) { \
79
+ a = a2[curr]; b = a2[1-curr]; \
80
+ if (shift == 0) { \
81
+ type_t *p = b, *i, *eb = a + n; \
82
+ for (i = a; i < eb; i += 2) { \
83
+ if (i == eb - 1) *p++ = *i; \
84
+ else { \
85
+ if (__sort_lt(*(i+1), *i)) { \
86
+ *p++ = *(i+1); *p++ = *i; \
87
+ } else { \
88
+ *p++ = *i; *p++ = *(i+1); \
89
+ } \
90
+ } \
91
+ } \
92
+ } else { \
93
+ size_t i, step = 1ul<<shift; \
94
+ for (i = 0; i < n; i += step<<1) { \
95
+ type_t *p, *j, *k, *ea, *eb; \
96
+ if (n < i + step) { \
97
+ ea = a + n; eb = a; \
98
+ } else { \
99
+ ea = a + i + step; \
100
+ eb = a + (n < i + (step<<1)? n : i + (step<<1)); \
101
+ } \
102
+ j = a + i; k = a + i + step; p = b + i; \
103
+ while (j < ea && k < eb) { \
104
+ if (__sort_lt(*k, *j)) *p++ = *k++; \
105
+ else *p++ = *j++; \
106
+ } \
107
+ while (j < ea) *p++ = *j++; \
108
+ while (k < eb) *p++ = *k++; \
109
+ } \
110
+ } \
111
+ curr = 1 - curr; \
112
+ } \
113
+ if (curr == 1) { \
114
+ type_t *p = a2[0], *i = a2[1], *eb = array + n; \
115
+ for (; p < eb; ++i) *p++ = *i; \
116
+ } \
117
+ if (temp == 0) free(a2[1]); \
118
+ } \
119
+ void ks_heapadjust_##name(size_t i, size_t n, type_t l[]) \
120
+ { \
121
+ size_t k = i; \
122
+ type_t tmp = l[i]; \
123
+ while ((k = (k << 1) + 1) < n) { \
124
+ if (k != n - 1 && __sort_lt(l[k], l[k+1])) ++k; \
125
+ if (__sort_lt(l[k], tmp)) break; \
126
+ l[i] = l[k]; i = k; \
127
+ } \
128
+ l[i] = tmp; \
129
+ } \
130
+ void ks_heapmake_##name(size_t lsize, type_t l[]) \
131
+ { \
132
+ size_t i; \
133
+ for (i = (lsize >> 1) - 1; i != (size_t)(-1); --i) \
134
+ ks_heapadjust_##name(i, lsize, l); \
135
+ } \
136
+ void ks_heapsort_##name(size_t lsize, type_t l[]) \
137
+ { \
138
+ size_t i; \
139
+ for (i = lsize - 1; i > 0; --i) { \
140
+ type_t tmp; \
141
+ tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust_##name(0, i, l); \
142
+ } \
143
+ } \
144
+ inline void __ks_insertsort_##name(type_t *s, type_t *t) \
145
+ { \
146
+ type_t *i, *j, swap_tmp; \
147
+ for (i = s + 1; i < t; ++i) \
148
+ for (j = i; j > s && __sort_lt(*j, *(j-1)); --j) { \
149
+ swap_tmp = *j; *j = *(j-1); *(j-1) = swap_tmp; \
150
+ } \
151
+ } \
152
+ void ks_combsort_##name(size_t n, type_t a[]) \
153
+ { \
154
+ const double shrink_factor = 1.2473309501039786540366528676643; \
155
+ int do_swap; \
156
+ size_t gap = n; \
157
+ type_t tmp, *i, *j; \
158
+ do { \
159
+ if (gap > 2) { \
160
+ gap = (size_t)(gap / shrink_factor); \
161
+ if (gap == 9 || gap == 10) gap = 11; \
162
+ } \
163
+ do_swap = 0; \
164
+ for (i = a; i < a + n - gap; ++i) { \
165
+ j = i + gap; \
166
+ if (__sort_lt(*j, *i)) { \
167
+ tmp = *i; *i = *j; *j = tmp; \
168
+ do_swap = 1; \
169
+ } \
170
+ } \
171
+ } while (do_swap || gap > 2); \
172
+ if (gap != 1) __ks_insertsort_##name(a, a + n); \
173
+ } \
174
+ void ks_introsort_##name(size_t n, type_t a[]) \
175
+ { \
176
+ int d; \
177
+ ks_isort_stack_t *top, *stack; \
178
+ type_t rp, swap_tmp; \
179
+ type_t *s, *t, *i, *j, *k; \
180
+ \
181
+ if (n < 1) return; \
182
+ else if (n == 2) { \
183
+ if (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \
184
+ return; \
185
+ } \
186
+ for (d = 2; 1ul<<d < n; ++d); \
187
+ stack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \
188
+ top = stack; s = a; t = a + (n-1); d <<= 1; \
189
+ while (1) { \
190
+ if (s < t) { \
191
+ if (--d == 0) { \
192
+ ks_combsort_##name(t - s + 1, s); \
193
+ t = s; \
194
+ continue; \
195
+ } \
196
+ i = s; j = t; k = i + ((j-i)>>1) + 1; \
197
+ if (__sort_lt(*k, *i)) { \
198
+ if (__sort_lt(*k, *j)) k = j; \
199
+ } else k = __sort_lt(*j, *i)? i : j; \
200
+ rp = *k; \
201
+ if (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; } \
202
+ for (;;) { \
203
+ do ++i; while (__sort_lt(*i, rp)); \
204
+ do --j; while (i <= j && __sort_lt(rp, *j)); \
205
+ if (j <= i) break; \
206
+ swap_tmp = *i; *i = *j; *j = swap_tmp; \
207
+ } \
208
+ swap_tmp = *i; *i = *t; *t = swap_tmp; \
209
+ if (i-s > t-i) { \
210
+ if (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \
211
+ s = t-i > 16? i+1 : t; \
212
+ } else { \
213
+ if (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \
214
+ t = i-s > 16? i-1 : s; \
215
+ } \
216
+ } else { \
217
+ if (top == stack) { \
218
+ free(stack); \
219
+ __ks_insertsort_##name(a, a+n); \
220
+ return; \
221
+ } else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \
222
+ } \
223
+ } \
224
+ } \
225
+ /* This function is adapted from: http://ndevilla.free.fr/median/ */ \
226
+ /* 0 <= kk < n */ \
227
+ type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk) \
228
+ { \
229
+ type_t *low, *high, *k, *ll, *hh, *mid; \
230
+ low = arr; high = arr + n - 1; k = arr + kk; \
231
+ for (;;) { \
232
+ if (high <= low) return *k; \
233
+ if (high == low + 1) { \
234
+ if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
235
+ return *k; \
236
+ } \
237
+ mid = low + (high - low) / 2; \
238
+ if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \
239
+ if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
240
+ if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low); \
241
+ KSORT_SWAP(type_t, *mid, *(low+1)); \
242
+ ll = low + 1; hh = high; \
243
+ for (;;) { \
244
+ do ++ll; while (__sort_lt(*ll, *low)); \
245
+ do --hh; while (__sort_lt(*low, *hh)); \
246
+ if (hh < ll) break; \
247
+ KSORT_SWAP(type_t, *ll, *hh); \
248
+ } \
249
+ KSORT_SWAP(type_t, *low, *hh); \
250
+ if (hh <= k) low = ll; \
251
+ if (hh >= k) high = hh - 1; \
252
+ } \
253
+ }
254
+
255
+ #define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)
256
+ #define ks_introsort(name, n, a) ks_introsort_##name(n, a)
257
+ #define ks_combsort(name, n, a) ks_combsort_##name(n, a)
258
+ #define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)
259
+ #define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)
260
+ #define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)
261
+ #define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)
262
+
263
+ #define ks_lt_generic(a, b) ((a) < (b))
264
+ #define ks_lt_str(a, b) (strcmp((a), (b)) < 0)
265
+
266
+ typedef const char *ksstr_t;
267
+
268
+ #define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)
269
+ #define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)
270
+
271
+ #endif
@@ -0,0 +1,165 @@
1
+ #include <stdarg.h>
2
+ #include <stdio.h>
3
+ #include <ctype.h>
4
+ #include <string.h>
5
+ #include <stdint.h>
6
+ #include "kstring.h"
7
+
8
+ int ksprintf(kstring_t *s, const char *fmt, ...)
9
+ {
10
+ va_list ap;
11
+ int l;
12
+ va_start(ap, fmt);
13
+ l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); // This line does not work with glibc 2.0. See `man snprintf'.
14
+ va_end(ap);
15
+ if (l + 1 > s->m - s->l) {
16
+ s->m = s->l + l + 2;
17
+ kroundup32(s->m);
18
+ s->s = (char*)realloc(s->s, s->m);
19
+ va_start(ap, fmt);
20
+ l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap);
21
+ }
22
+ va_end(ap);
23
+ s->l += l;
24
+ return l;
25
+ }
26
+
27
+ // s MUST BE a null terminated string; l = strlen(s)
28
+ int ksplit_core(char *s, int delimiter, int *_max, int **_offsets)
29
+ {
30
+ int i, n, max, last_char, last_start, *offsets, l;
31
+ n = 0; max = *_max; offsets = *_offsets;
32
+ l = strlen(s);
33
+
34
+ #define __ksplit_aux do { \
35
+ if (_offsets) { \
36
+ s[i] = 0; \
37
+ if (n == max) { \
38
+ max = max? max<<1 : 2; \
39
+ offsets = (int*)realloc(offsets, sizeof(int) * max); \
40
+ } \
41
+ offsets[n++] = last_start; \
42
+ } else ++n; \
43
+ } while (0)
44
+
45
+ for (i = 0, last_char = last_start = 0; i <= l; ++i) {
46
+ if (delimiter == 0) {
47
+ if (isspace(s[i]) || s[i] == 0) {
48
+ if (isgraph(last_char)) __ksplit_aux; // the end of a field
49
+ } else {
50
+ if (isspace(last_char) || last_char == 0) last_start = i;
51
+ }
52
+ } else {
53
+ if (s[i] == delimiter || s[i] == 0) {
54
+ if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field
55
+ } else {
56
+ if (last_char == delimiter || last_char == 0) last_start = i;
57
+ }
58
+ }
59
+ last_char = s[i];
60
+ }
61
+ *_max = max; *_offsets = offsets;
62
+ return n;
63
+ }
64
+
65
+ /**********************
66
+ * Boyer-Moore search *
67
+ **********************/
68
+
69
+ // reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html
70
+ int *ksBM_prep(const uint8_t *pat, int m)
71
+ {
72
+ int i, *suff, *prep, *bmGs, *bmBc;
73
+ prep = calloc(m + 256, 1);
74
+ bmGs = prep; bmBc = prep + m;
75
+ { // preBmBc()
76
+ for (i = 0; i < 256; ++i) bmBc[i] = m;
77
+ for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1;
78
+ }
79
+ suff = calloc(m, sizeof(int));
80
+ { // suffixes()
81
+ int f = 0, g;
82
+ suff[m - 1] = m;
83
+ g = m - 1;
84
+ for (i = m - 2; i >= 0; --i) {
85
+ if (i > g && suff[i + m - 1 - f] < i - g)
86
+ suff[i] = suff[i + m - 1 - f];
87
+ else {
88
+ if (i < g) g = i;
89
+ f = i;
90
+ while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g;
91
+ suff[i] = f - g;
92
+ }
93
+ }
94
+ }
95
+ { // preBmGs()
96
+ int j = 0;
97
+ for (i = 0; i < m; ++i) bmGs[i] = m;
98
+ for (i = m - 1; i >= 0; --i)
99
+ if (suff[i] == i + 1)
100
+ for (; j < m - 1 - i; ++j)
101
+ if (bmGs[j] == m)
102
+ bmGs[j] = m - 1 - i;
103
+ for (i = 0; i <= m - 2; ++i)
104
+ bmGs[m - 1 - suff[i]] = m - 1 - i;
105
+ }
106
+ free(suff);
107
+ return prep;
108
+ }
109
+
110
+ int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches)
111
+ {
112
+ int i, j, *prep, *bmGs, *bmBc;
113
+ int *matches = 0, mm = 0, nm = 0;
114
+ prep = _prep? _prep : ksBM_prep(pat, m);
115
+ bmGs = prep; bmBc = prep + m;
116
+ j = 0;
117
+ while (j <= n - m) {
118
+ for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i);
119
+ if (i < 0) {
120
+ if (nm == mm) {
121
+ mm = mm? mm<<1 : 1;
122
+ matches = realloc(matches, mm * sizeof(int));
123
+ }
124
+ matches[nm++] = j;
125
+ j += bmGs[0];
126
+ } else {
127
+ int max = bmBc[str[i+j]] - m + 1 + i;
128
+ if (max < bmGs[i]) max = bmGs[i];
129
+ j += max;
130
+ }
131
+ }
132
+ *n_matches = nm;
133
+ if (_prep == 0) free(prep);
134
+ return matches;
135
+ }
136
+
137
+ #ifdef KSTRING_MAIN
138
+ #include <stdio.h>
139
+ int main()
140
+ {
141
+ kstring_t *s;
142
+ int *fields, n, i;
143
+ s = (kstring_t*)calloc(1, sizeof(kstring_t));
144
+ // test ksprintf()
145
+ ksprintf(s, " abcdefg: %d ", 100);
146
+ printf("'%s'\n", s->s);
147
+ // test ksplit()
148
+ fields = ksplit(s, 0, &n);
149
+ for (i = 0; i < n; ++i)
150
+ printf("field[%d] = '%s'\n", i, s->s + fields[i]);
151
+ free(s);
152
+
153
+ {
154
+ static char *str = "abcdefgcdg";
155
+ static char *pat = "cd";
156
+ int n, *matches;
157
+ matches = ksBM_search(str, strlen(str), pat, strlen(pat), 0, &n);
158
+ printf("%d: \n", n);
159
+ for (i = 0; i < n; ++i)
160
+ printf("- %d\n", matches[i]);
161
+ free(matches);
162
+ }
163
+ return 0;
164
+ }
165
+ #endif