divsufsort 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.txt +77 -0
- data/ext/Makefile +149 -0
- data/ext/divsufsort.c +398 -0
- data/ext/divsufsort.h +191 -0
- data/ext/divsufsort.o +0 -0
- data/ext/divsufsort.so +0 -0
- data/ext/divsufsort_private.h +207 -0
- data/ext/divsufsort_ruby.c +227 -0
- data/ext/divsufsort_ruby.o +0 -0
- data/ext/extconf.rb +18 -0
- data/ext/lfs.h +56 -0
- data/ext/mkmf.log +266 -0
- data/ext/sssort.c +815 -0
- data/ext/sssort.o +0 -0
- data/ext/trsort.c +586 -0
- data/ext/trsort.o +0 -0
- data/ext/utils.c +381 -0
- data/ext/utils.o +0 -0
- data/libdivsufsort/COPYING +27 -0
- data/libdivsufsort/divsufsort.c +398 -0
- data/libdivsufsort/divsufsort.h +191 -0
- data/libdivsufsort/divsufsort_private.h +207 -0
- data/libdivsufsort/lfs.h +56 -0
- data/libdivsufsort/sssort.c +815 -0
- data/libdivsufsort/trsort.c +586 -0
- data/libdivsufsort/utils.c +381 -0
- metadata +80 -0
data/ext/divsufsort.h
ADDED
@@ -0,0 +1,191 @@
|
|
1
|
+
/*
|
2
|
+
* divsufsort.h for libdivsufsort
|
3
|
+
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
4
|
+
*
|
5
|
+
* Permission is hereby granted, free of charge, to any person
|
6
|
+
* obtaining a copy of this software and associated documentation
|
7
|
+
* files (the "Software"), to deal in the Software without
|
8
|
+
* restriction, including without limitation the rights to use,
|
9
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
* copies of the Software, and to permit persons to whom the
|
11
|
+
* Software is furnished to do so, subject to the following
|
12
|
+
* conditions:
|
13
|
+
*
|
14
|
+
* The above copyright notice and this permission notice shall be
|
15
|
+
* included in all copies or substantial portions of the Software.
|
16
|
+
*
|
17
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
19
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
21
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
22
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
23
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
24
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
25
|
+
*/
|
26
|
+
|
27
|
+
#ifndef _DIVSUFSORT_H
|
28
|
+
#define _DIVSUFSORT_H 1
|
29
|
+
|
30
|
+
#ifdef __cplusplus
|
31
|
+
extern "C" {
|
32
|
+
#endif /* __cplusplus */
|
33
|
+
|
34
|
+
// modified by SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
|
35
|
+
#define PROJECT_VERSION_FULL "2.0.0"
|
36
|
+
|
37
|
+
#ifdef _WIN32
|
38
|
+
#include <stdlib.h>
|
39
|
+
#include <stdint.h>
|
40
|
+
#define INLINE _inline
|
41
|
+
#define PRId32 "ld"
|
42
|
+
#else
|
43
|
+
#define INLINE inline
|
44
|
+
#include <inttypes.h>
|
45
|
+
#endif
|
46
|
+
|
47
|
+
#ifndef DIVSUFSORT_API
|
48
|
+
# ifdef DIVSUFSORT_BUILD_DLL
|
49
|
+
# define DIVSUFSORT_API
|
50
|
+
# else
|
51
|
+
# define DIVSUFSORT_API
|
52
|
+
# endif
|
53
|
+
#endif
|
54
|
+
|
55
|
+
/*- Datatypes -*/
|
56
|
+
#ifndef SAUCHAR_T
|
57
|
+
#define SAUCHAR_T
|
58
|
+
typedef uint8_t sauchar_t;
|
59
|
+
#endif /* SAUCHAR_T */
|
60
|
+
#ifndef SAINT_T
|
61
|
+
#define SAINT_T
|
62
|
+
typedef int32_t saint_t;
|
63
|
+
#endif /* SAINT_T */
|
64
|
+
#ifndef SAIDX_T
|
65
|
+
#define SAIDX_T
|
66
|
+
typedef int32_t saidx_t;
|
67
|
+
#endif /* SAIDX_T */
|
68
|
+
#ifndef PRIdSAINT_T
|
69
|
+
#define PRIdSAINT_T PRId32
|
70
|
+
#endif /* PRIdSAINT_T */
|
71
|
+
#ifndef PRIdSAIDX_T
|
72
|
+
#define PRIdSAIDX_T PRId32
|
73
|
+
#endif /* PRIdSAIDX_T */
|
74
|
+
|
75
|
+
|
76
|
+
/*- Prototypes -*/
|
77
|
+
|
78
|
+
/**
|
79
|
+
* Constructs the suffix array of a given string.
|
80
|
+
* @param T[0..n-1] The input string.
|
81
|
+
* @param SA[0..n-1] The output array of suffixes.
|
82
|
+
* @param n The length of the given string.
|
83
|
+
* @return 0 if no error occurred, -1 or -2 otherwise.
|
84
|
+
*/
|
85
|
+
DIVSUFSORT_API
|
86
|
+
saint_t
|
87
|
+
divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
|
88
|
+
|
89
|
+
/**
|
90
|
+
* Constructs the burrows-wheeler transformed string of a given string.
|
91
|
+
* @param T[0..n-1] The input string.
|
92
|
+
* @param U[0..n-1] The output string. (can be T)
|
93
|
+
* @param A[0..n-1] The temporary array. (can be NULL)
|
94
|
+
* @param n The length of the given string.
|
95
|
+
* @return The primary index if no error occurred, -1 or -2 otherwise.
|
96
|
+
*/
|
97
|
+
DIVSUFSORT_API
|
98
|
+
saidx_t
|
99
|
+
divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
|
100
|
+
|
101
|
+
/**
|
102
|
+
* Returns the version of the divsufsort library.
|
103
|
+
* @return The version number string.
|
104
|
+
*/
|
105
|
+
DIVSUFSORT_API
|
106
|
+
const char *
|
107
|
+
divsufsort_version(void);
|
108
|
+
|
109
|
+
|
110
|
+
/**
|
111
|
+
* Constructs the burrows-wheeler transformed string of a given string and suffix array.
|
112
|
+
* @param T[0..n-1] The input string.
|
113
|
+
* @param U[0..n-1] The output string. (can be T)
|
114
|
+
* @param SA[0..n-1] The suffix array. (can be NULL)
|
115
|
+
* @param n The length of the given string.
|
116
|
+
* @param idx The output primary index.
|
117
|
+
* @return 0 if no error occurred, -1 or -2 otherwise.
|
118
|
+
*/
|
119
|
+
DIVSUFSORT_API
|
120
|
+
saint_t
|
121
|
+
bw_transform(const sauchar_t *T, sauchar_t *U,
|
122
|
+
saidx_t *SA /* can NULL */,
|
123
|
+
saidx_t n, saidx_t *idx);
|
124
|
+
|
125
|
+
/**
|
126
|
+
* Inverse BW-transforms a given BWTed string.
|
127
|
+
* @param T[0..n-1] The input string.
|
128
|
+
* @param U[0..n-1] The output string. (can be T)
|
129
|
+
* @param A[0..n-1] The temporary array. (can be NULL)
|
130
|
+
* @param n The length of the given string.
|
131
|
+
* @param idx The primary index.
|
132
|
+
* @return 0 if no error occurred, -1 or -2 otherwise.
|
133
|
+
*/
|
134
|
+
DIVSUFSORT_API
|
135
|
+
saint_t
|
136
|
+
inverse_bw_transform(const sauchar_t *T, sauchar_t *U,
|
137
|
+
saidx_t *A /* can NULL */,
|
138
|
+
saidx_t n, saidx_t idx);
|
139
|
+
|
140
|
+
/**
|
141
|
+
* Checks the correctness of a given suffix array.
|
142
|
+
* @param T[0..n-1] The input string.
|
143
|
+
* @param SA[0..n-1] The input suffix array.
|
144
|
+
* @param n The length of the given string.
|
145
|
+
* @param verbose The verbose mode.
|
146
|
+
* @return 0 if no error occurred.
|
147
|
+
*/
|
148
|
+
DIVSUFSORT_API
|
149
|
+
saint_t
|
150
|
+
sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose);
|
151
|
+
|
152
|
+
/**
|
153
|
+
* Search for the pattern P in the string T.
|
154
|
+
* @param T[0..Tsize-1] The input string.
|
155
|
+
* @param Tsize The length of the given string.
|
156
|
+
* @param P[0..Psize-1] The input pattern string.
|
157
|
+
* @param Psize The length of the given pattern string.
|
158
|
+
* @param SA[0..SAsize-1] The input suffix array.
|
159
|
+
* @param SAsize The length of the given suffix array.
|
160
|
+
* @param idx The output index.
|
161
|
+
* @return The count of matches if no error occurred, -1 otherwise.
|
162
|
+
*/
|
163
|
+
DIVSUFSORT_API
|
164
|
+
saidx_t
|
165
|
+
sa_search(const sauchar_t *T, saidx_t Tsize,
|
166
|
+
const sauchar_t *P, saidx_t Psize,
|
167
|
+
const saidx_t *SA, saidx_t SAsize,
|
168
|
+
saidx_t *left);
|
169
|
+
|
170
|
+
/**
|
171
|
+
* Search for the character c in the string T.
|
172
|
+
* @param T[0..Tsize-1] The input string.
|
173
|
+
* @param Tsize The length of the given string.
|
174
|
+
* @param SA[0..SAsize-1] The input suffix array.
|
175
|
+
* @param SAsize The length of the given suffix array.
|
176
|
+
* @param c The input character.
|
177
|
+
* @param idx The output index.
|
178
|
+
* @return The count of matches if no error occurred, -1 otherwise.
|
179
|
+
*/
|
180
|
+
DIVSUFSORT_API
|
181
|
+
saidx_t
|
182
|
+
sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
|
183
|
+
const saidx_t *SA, saidx_t SAsize,
|
184
|
+
saint_t c, saidx_t *left);
|
185
|
+
|
186
|
+
|
187
|
+
#ifdef __cplusplus
|
188
|
+
} /* extern "C" */
|
189
|
+
#endif /* __cplusplus */
|
190
|
+
|
191
|
+
#endif /* _DIVSUFSORT_H */
|
data/ext/divsufsort.o
ADDED
Binary file
|
data/ext/divsufsort.so
ADDED
Binary file
|
@@ -0,0 +1,207 @@
|
|
1
|
+
/*
|
2
|
+
* divsufsort_private.h for libdivsufsort
|
3
|
+
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
4
|
+
*
|
5
|
+
* Permission is hereby granted, free of charge, to any person
|
6
|
+
* obtaining a copy of this software and associated documentation
|
7
|
+
* files (the "Software"), to deal in the Software without
|
8
|
+
* restriction, including without limitation the rights to use,
|
9
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
* copies of the Software, and to permit persons to whom the
|
11
|
+
* Software is furnished to do so, subject to the following
|
12
|
+
* conditions:
|
13
|
+
*
|
14
|
+
* The above copyright notice and this permission notice shall be
|
15
|
+
* included in all copies or substantial portions of the Software.
|
16
|
+
*
|
17
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
19
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
21
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
22
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
23
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
24
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
25
|
+
*/
|
26
|
+
|
27
|
+
#ifndef _DIVSUFSORT_PRIVATE_H
|
28
|
+
#define _DIVSUFSORT_PRIVATE_H 1
|
29
|
+
|
30
|
+
#ifdef __cplusplus
|
31
|
+
extern "C" {
|
32
|
+
#endif /* __cplusplus */
|
33
|
+
|
34
|
+
#if HAVE_CONFIG_H
|
35
|
+
# include "config.h"
|
36
|
+
#endif
|
37
|
+
#include <assert.h>
|
38
|
+
#include <stdio.h>
|
39
|
+
#if HAVE_STRING_H
|
40
|
+
# include <string.h>
|
41
|
+
#endif
|
42
|
+
#if HAVE_STDLIB_H
|
43
|
+
# include <stdlib.h>
|
44
|
+
#endif
|
45
|
+
#if HAVE_MEMORY_H
|
46
|
+
# include <memory.h>
|
47
|
+
#endif
|
48
|
+
#if HAVE_STDDEF_H
|
49
|
+
# include <stddef.h>
|
50
|
+
#endif
|
51
|
+
#if HAVE_STRINGS_H
|
52
|
+
# include <strings.h>
|
53
|
+
#endif
|
54
|
+
#if HAVE_INTTYPES_H
|
55
|
+
# include <inttypes.h>
|
56
|
+
#else
|
57
|
+
# if HAVE_STDINT_H
|
58
|
+
# include <stdint.h>
|
59
|
+
# endif
|
60
|
+
#endif
|
61
|
+
#if defined(BUILD_DIVSUFSORT64)
|
62
|
+
# include "divsufsort64.h"
|
63
|
+
# ifndef SAIDX_T
|
64
|
+
# define SAIDX_T
|
65
|
+
# define saidx_t saidx64_t
|
66
|
+
# endif /* SAIDX_T */
|
67
|
+
# ifndef PRIdSAIDX_T
|
68
|
+
# define PRIdSAIDX_T PRIdSAIDX64_T
|
69
|
+
# endif /* PRIdSAIDX_T */
|
70
|
+
# define divsufsort divsufsort64
|
71
|
+
# define divbwt divbwt64
|
72
|
+
# define divsufsort_version divsufsort64_version
|
73
|
+
# define bw_transform bw_transform64
|
74
|
+
# define inverse_bw_transform inverse_bw_transform64
|
75
|
+
# define sufcheck sufcheck64
|
76
|
+
# define sa_search sa_search64
|
77
|
+
# define sa_simplesearch sa_simplesearch64
|
78
|
+
# define sssort sssort64
|
79
|
+
# define trsort trsort64
|
80
|
+
#else
|
81
|
+
# include "divsufsort.h"
|
82
|
+
#endif
|
83
|
+
|
84
|
+
|
85
|
+
/*- Constants -*/
|
86
|
+
#if !defined(UINT8_MAX)
|
87
|
+
# define UINT8_MAX (255)
|
88
|
+
#endif /* UINT8_MAX */
|
89
|
+
#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
|
90
|
+
# undef ALPHABET_SIZE
|
91
|
+
#endif
|
92
|
+
#if !defined(ALPHABET_SIZE)
|
93
|
+
# define ALPHABET_SIZE (UINT8_MAX + 1)
|
94
|
+
#endif
|
95
|
+
/* for divsufsort.c */
|
96
|
+
#define BUCKET_A_SIZE (ALPHABET_SIZE)
|
97
|
+
#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
|
98
|
+
/* for sssort.c */
|
99
|
+
#if defined(SS_INSERTIONSORT_THRESHOLD)
|
100
|
+
# if SS_INSERTIONSORT_THRESHOLD < 1
|
101
|
+
# undef SS_INSERTIONSORT_THRESHOLD
|
102
|
+
# define SS_INSERTIONSORT_THRESHOLD (1)
|
103
|
+
# endif
|
104
|
+
#else
|
105
|
+
# define SS_INSERTIONSORT_THRESHOLD (8)
|
106
|
+
#endif
|
107
|
+
#if defined(SS_BLOCKSIZE)
|
108
|
+
# if SS_BLOCKSIZE < 0
|
109
|
+
# undef SS_BLOCKSIZE
|
110
|
+
# define SS_BLOCKSIZE (0)
|
111
|
+
# elif 32768 <= SS_BLOCKSIZE
|
112
|
+
# undef SS_BLOCKSIZE
|
113
|
+
# define SS_BLOCKSIZE (32767)
|
114
|
+
# endif
|
115
|
+
#else
|
116
|
+
# define SS_BLOCKSIZE (1024)
|
117
|
+
#endif
|
118
|
+
/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
|
119
|
+
#if SS_BLOCKSIZE == 0
|
120
|
+
# if defined(BUILD_DIVSUFSORT64)
|
121
|
+
# define SS_MISORT_STACKSIZE (96)
|
122
|
+
# else
|
123
|
+
# define SS_MISORT_STACKSIZE (64)
|
124
|
+
# endif
|
125
|
+
#elif SS_BLOCKSIZE <= 4096
|
126
|
+
# define SS_MISORT_STACKSIZE (16)
|
127
|
+
#else
|
128
|
+
# define SS_MISORT_STACKSIZE (24)
|
129
|
+
#endif
|
130
|
+
#if defined(BUILD_DIVSUFSORT64)
|
131
|
+
# define SS_SMERGE_STACKSIZE (64)
|
132
|
+
#else
|
133
|
+
# define SS_SMERGE_STACKSIZE (32)
|
134
|
+
#endif
|
135
|
+
/* for trsort.c */
|
136
|
+
#define TR_INSERTIONSORT_THRESHOLD (8)
|
137
|
+
#if defined(BUILD_DIVSUFSORT64)
|
138
|
+
# define TR_STACKSIZE (96)
|
139
|
+
#else
|
140
|
+
# define TR_STACKSIZE (64)
|
141
|
+
#endif
|
142
|
+
|
143
|
+
|
144
|
+
/*- Macros -*/
|
145
|
+
#ifndef SWAP
|
146
|
+
# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
|
147
|
+
#endif /* SWAP */
|
148
|
+
#ifndef MIN
|
149
|
+
# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
|
150
|
+
#endif /* MIN */
|
151
|
+
#ifndef MAX
|
152
|
+
# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
|
153
|
+
#endif /* MAX */
|
154
|
+
#define STACK_PUSH(_a, _b, _c, _d)\
|
155
|
+
do {\
|
156
|
+
assert(ssize < STACK_SIZE);\
|
157
|
+
stack[ssize].a = (_a), stack[ssize].b = (_b),\
|
158
|
+
stack[ssize].c = (_c), stack[ssize++].d = (_d);\
|
159
|
+
} while(0)
|
160
|
+
#define STACK_PUSH5(_a, _b, _c, _d, _e)\
|
161
|
+
do {\
|
162
|
+
assert(ssize < STACK_SIZE);\
|
163
|
+
stack[ssize].a = (_a), stack[ssize].b = (_b),\
|
164
|
+
stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
|
165
|
+
} while(0)
|
166
|
+
#define STACK_POP(_a, _b, _c, _d)\
|
167
|
+
do {\
|
168
|
+
assert(0 <= ssize);\
|
169
|
+
if(ssize == 0) { return; }\
|
170
|
+
(_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
|
171
|
+
(_c) = stack[ssize].c, (_d) = stack[ssize].d;\
|
172
|
+
} while(0)
|
173
|
+
#define STACK_POP5(_a, _b, _c, _d, _e)\
|
174
|
+
do {\
|
175
|
+
assert(0 <= ssize);\
|
176
|
+
if(ssize == 0) { return; }\
|
177
|
+
(_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
|
178
|
+
(_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
|
179
|
+
} while(0)
|
180
|
+
/* for divsufsort.c */
|
181
|
+
#define BUCKET_A(_c0) bucket_A[(_c0)]
|
182
|
+
#if ALPHABET_SIZE == 256
|
183
|
+
#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
|
184
|
+
#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
|
185
|
+
#else
|
186
|
+
#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
|
187
|
+
#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
|
188
|
+
#endif
|
189
|
+
|
190
|
+
|
191
|
+
/*- Private Prototypes -*/
|
192
|
+
/* sssort.c */
|
193
|
+
void
|
194
|
+
sssort(const sauchar_t *Td, const saidx_t *PA,
|
195
|
+
saidx_t *first, saidx_t *last,
|
196
|
+
saidx_t *buf, saidx_t bufsize,
|
197
|
+
saidx_t depth, saidx_t n, saint_t lastsuffix);
|
198
|
+
/* trsort.c */
|
199
|
+
void
|
200
|
+
trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth);
|
201
|
+
|
202
|
+
|
203
|
+
#ifdef __cplusplus
|
204
|
+
} /* extern "C" */
|
205
|
+
#endif /* __cplusplus */
|
206
|
+
|
207
|
+
#endif /* _DIVSUFSORT_PRIVATE_H */
|
@@ -0,0 +1,227 @@
|
|
1
|
+
#ifdef DIVSUFSORT_EXPORTS
|
2
|
+
#define DLLEXPORT __declspec(dllexport)
|
3
|
+
#else
|
4
|
+
#define DLLEXPORT
|
5
|
+
#endif
|
6
|
+
|
7
|
+
#define VERSION "0.1.0"
|
8
|
+
|
9
|
+
#include "divsufsort.h"
|
10
|
+
#include "ruby.h"
|
11
|
+
#include "rubysig.h"
|
12
|
+
|
13
|
+
#ifndef RSTRING_PTR
|
14
|
+
#define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
15
|
+
#endif
|
16
|
+
#ifndef RSTRING_LEN
|
17
|
+
#define RSTRING_LEN(s) (RSTRING(s)->len)
|
18
|
+
#endif
|
19
|
+
|
20
|
+
#define RB_STR_CAT_INT(o, n) do { \
|
21
|
+
unsigned char c[4]; \
|
22
|
+
c[0] = (unsigned char) (((n) >> 0) & 0xff); \
|
23
|
+
c[1] = (unsigned char) (((n) >> 8) & 0xff); \
|
24
|
+
c[2] = (unsigned char) (((n) >> 16) & 0xff); \
|
25
|
+
c[3] = (unsigned char) (((n) >> 24) & 0xff); \
|
26
|
+
rb_str_cat((o), c, 4); \
|
27
|
+
} while(0)
|
28
|
+
|
29
|
+
#define PTR_READ_INT(p, n, x) do { \
|
30
|
+
unsigned char c[4]; \
|
31
|
+
c[0] = (unsigned char) (p)[0]; \
|
32
|
+
c[1] = (unsigned char) (p)[1]; \
|
33
|
+
c[2] = (unsigned char) (p)[2]; \
|
34
|
+
c[3] = (unsigned char) (p)[3]; \
|
35
|
+
(x) = (c[0] << 0) | (c[1] << 8) | (c[2] << 16) | (c[3] << 24); \
|
36
|
+
(p) += 4; \
|
37
|
+
(n) -= 4; \
|
38
|
+
} while(0)
|
39
|
+
|
40
|
+
static VALUE Divsufsort;
|
41
|
+
|
42
|
+
/* */
|
43
|
+
static VALUE divsufsort_divsufsort(VALUE self, VALUE src) {
|
44
|
+
VALUE dst;
|
45
|
+
sauchar_t *T;
|
46
|
+
saidx_t *SA;
|
47
|
+
char *p;
|
48
|
+
long n, i;
|
49
|
+
saint_t err;
|
50
|
+
|
51
|
+
Check_Type(src, T_STRING);
|
52
|
+
p = RSTRING_PTR(src);
|
53
|
+
n = RSTRING_LEN(src);
|
54
|
+
|
55
|
+
if(n >= 0x7fffffff) {
|
56
|
+
rb_raise(rb_eRuntimeError, "Input data is too big.");
|
57
|
+
}
|
58
|
+
|
59
|
+
T = (sauchar_t *) xmalloc((size_t) n * sizeof(sauchar_t));
|
60
|
+
SA = (saidx_t *) xmalloc((size_t) n * sizeof(saidx_t));
|
61
|
+
dst = rb_ary_new();
|
62
|
+
memcpy(T, p, (size_t) n * sizeof(sauchar_t));
|
63
|
+
|
64
|
+
TRAP_BEG;
|
65
|
+
err = divsufsort(T, SA, (saidx_t) n);
|
66
|
+
TRAP_END;
|
67
|
+
|
68
|
+
if(err != 0) {
|
69
|
+
xfree(SA);
|
70
|
+
xfree(T);
|
71
|
+
rb_raise(rb_eRuntimeError, "Cannot allocate memory.");
|
72
|
+
}
|
73
|
+
|
74
|
+
TRAP_BEG;
|
75
|
+
err = sufcheck(T, SA, (saidx_t) n, 0);
|
76
|
+
TRAP_END;
|
77
|
+
|
78
|
+
if(err != 0) {
|
79
|
+
xfree(SA);
|
80
|
+
xfree(T);
|
81
|
+
rb_raise(rb_eRuntimeError, "Wrong suffix array.");
|
82
|
+
}
|
83
|
+
|
84
|
+
for(i = 0; i < n; i++) {
|
85
|
+
saidx_t SA_i = SA[i];
|
86
|
+
rb_ary_push(dst, LONG2NUM(SA_i));
|
87
|
+
}
|
88
|
+
|
89
|
+
xfree(SA);
|
90
|
+
xfree(T);
|
91
|
+
|
92
|
+
if(err != 0) {
|
93
|
+
rb_raise(rb_eRuntimeError, "Cannot allocate memory.");
|
94
|
+
}
|
95
|
+
|
96
|
+
return dst;
|
97
|
+
}
|
98
|
+
|
99
|
+
/* */
|
100
|
+
static VALUE divsufsort_divbwt(int argc, VALUE *argv, VALUE self) {
|
101
|
+
VALUE src, v_blocksize, transformed;
|
102
|
+
sauchar_t *T;
|
103
|
+
saidx_t *SA;
|
104
|
+
char *p;
|
105
|
+
long n, T_len;
|
106
|
+
saint_t blocksize = 32;
|
107
|
+
saidx_t pidx;
|
108
|
+
|
109
|
+
rb_scan_args(argc, argv, "11", &src, &v_blocksize);
|
110
|
+
Check_Type(src, T_STRING);
|
111
|
+
|
112
|
+
if (!NIL_P(v_blocksize)) {
|
113
|
+
blocksize = (saint_t) NUM2INT(v_blocksize);
|
114
|
+
}
|
115
|
+
|
116
|
+
blocksize <<= 20;
|
117
|
+
p = RSTRING_PTR(src);
|
118
|
+
n = RSTRING_LEN(src);
|
119
|
+
|
120
|
+
if(n > 0x20000000L) {
|
121
|
+
n = 0x20000000L;
|
122
|
+
}
|
123
|
+
|
124
|
+
if(blocksize == 0 || n < blocksize) {
|
125
|
+
blocksize = (saidx_t) n;
|
126
|
+
}
|
127
|
+
|
128
|
+
T_len = blocksize * sizeof(sauchar_t);
|
129
|
+
T = (sauchar_t *) xmalloc(T_len);
|
130
|
+
SA = (saidx_t *) xmalloc(blocksize * sizeof(saidx_t));
|
131
|
+
transformed = rb_str_new("", 0);
|
132
|
+
|
133
|
+
RB_STR_CAT_INT(transformed, blocksize);
|
134
|
+
|
135
|
+
while (n > 0) {
|
136
|
+
int m = (n < T_len) ? n : T_len;
|
137
|
+
|
138
|
+
memcpy(T, p, m);
|
139
|
+
p += m; n -= m;
|
140
|
+
|
141
|
+
TRAP_BEG;
|
142
|
+
pidx = divbwt(T, T, SA, m);
|
143
|
+
TRAP_END;
|
144
|
+
|
145
|
+
if(pidx < 0) {
|
146
|
+
break;
|
147
|
+
}
|
148
|
+
|
149
|
+
RB_STR_CAT_INT(transformed, pidx);
|
150
|
+
rb_str_cat(transformed, T, m);
|
151
|
+
}
|
152
|
+
|
153
|
+
xfree(SA);
|
154
|
+
xfree(T);
|
155
|
+
|
156
|
+
if(pidx < 0) {
|
157
|
+
rb_raise(rb_eRuntimeError, "bw_transform: %s.", (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory");
|
158
|
+
}
|
159
|
+
|
160
|
+
return transformed;
|
161
|
+
}
|
162
|
+
|
163
|
+
/* */
|
164
|
+
static VALUE divsufsort_inverse_bw_transform(VALUE self, VALUE transformed) {
|
165
|
+
VALUE dst;
|
166
|
+
sauchar_t *T;
|
167
|
+
saidx_t *A;
|
168
|
+
char *p;
|
169
|
+
long n;
|
170
|
+
saint_t blocksize;
|
171
|
+
int err = 0;
|
172
|
+
|
173
|
+
Check_Type(transformed, T_STRING);
|
174
|
+
p = RSTRING_PTR(transformed);
|
175
|
+
n = RSTRING_LEN(transformed);
|
176
|
+
|
177
|
+
if (n < 4) {
|
178
|
+
rb_raise(rb_eRuntimeError, "reverseBWT: Invalid data.\n");
|
179
|
+
}
|
180
|
+
|
181
|
+
PTR_READ_INT(p, n, blocksize);
|
182
|
+
T = (sauchar_t *) xmalloc(blocksize * sizeof(sauchar_t));
|
183
|
+
A = (saidx_t *) xmalloc(blocksize * sizeof(saidx_t));
|
184
|
+
dst = rb_str_new("", 0);
|
185
|
+
|
186
|
+
while (n > 0) {
|
187
|
+
int m;
|
188
|
+
saidx_t pidx;
|
189
|
+
|
190
|
+
if (n < 4) {
|
191
|
+
err = -1;
|
192
|
+
break;
|
193
|
+
}
|
194
|
+
|
195
|
+
PTR_READ_INT(p, n, pidx);
|
196
|
+
m = (n < blocksize) ? n : blocksize;
|
197
|
+
memcpy(T, p, m);
|
198
|
+
p += m; n -= m;
|
199
|
+
|
200
|
+
TRAP_BEG;
|
201
|
+
err = inverse_bw_transform(T, T, A, m, pidx);
|
202
|
+
TRAP_END;
|
203
|
+
|
204
|
+
if(err != 0) {
|
205
|
+
break;
|
206
|
+
}
|
207
|
+
|
208
|
+
rb_str_cat(dst, T, m);
|
209
|
+
}
|
210
|
+
|
211
|
+
xfree(A);
|
212
|
+
xfree(T);
|
213
|
+
|
214
|
+
if (err != 0) {
|
215
|
+
rb_raise(rb_eRuntimeError, "reverseBWT: %s.\n", (err == -1) ? "Invalid data" : "Cannot allocate memory");
|
216
|
+
}
|
217
|
+
|
218
|
+
return dst;
|
219
|
+
}
|
220
|
+
|
221
|
+
void DLLEXPORT Init_divsufsort() {
|
222
|
+
Divsufsort = rb_define_module("Divsufsort");
|
223
|
+
rb_define_const(Divsufsort, "VERSION", rb_str_new2(VERSION));
|
224
|
+
rb_define_module_function(Divsufsort, "divsufsort", divsufsort_divsufsort, 1);
|
225
|
+
rb_define_module_function(Divsufsort, "divbwt", divsufsort_divbwt, -1);
|
226
|
+
rb_define_module_function(Divsufsort, "inverse_bw_transform", divsufsort_inverse_bw_transform, 1);
|
227
|
+
}
|