divsufsort 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.txt +77 -0
- data/ext/Makefile +149 -0
- data/ext/divsufsort.c +398 -0
- data/ext/divsufsort.h +191 -0
- data/ext/divsufsort.o +0 -0
- data/ext/divsufsort.so +0 -0
- data/ext/divsufsort_private.h +207 -0
- data/ext/divsufsort_ruby.c +227 -0
- data/ext/divsufsort_ruby.o +0 -0
- data/ext/extconf.rb +18 -0
- data/ext/lfs.h +56 -0
- data/ext/mkmf.log +266 -0
- data/ext/sssort.c +815 -0
- data/ext/sssort.o +0 -0
- data/ext/trsort.c +586 -0
- data/ext/trsort.o +0 -0
- data/ext/utils.c +381 -0
- data/ext/utils.o +0 -0
- data/libdivsufsort/COPYING +27 -0
- data/libdivsufsort/divsufsort.c +398 -0
- data/libdivsufsort/divsufsort.h +191 -0
- data/libdivsufsort/divsufsort_private.h +207 -0
- data/libdivsufsort/lfs.h +56 -0
- data/libdivsufsort/sssort.c +815 -0
- data/libdivsufsort/trsort.c +586 -0
- data/libdivsufsort/utils.c +381 -0
- metadata +80 -0
data/ext/divsufsort.h
ADDED
@@ -0,0 +1,191 @@
|
|
1
|
+
/*
|
2
|
+
* divsufsort.h for libdivsufsort
|
3
|
+
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
4
|
+
*
|
5
|
+
* Permission is hereby granted, free of charge, to any person
|
6
|
+
* obtaining a copy of this software and associated documentation
|
7
|
+
* files (the "Software"), to deal in the Software without
|
8
|
+
* restriction, including without limitation the rights to use,
|
9
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
* copies of the Software, and to permit persons to whom the
|
11
|
+
* Software is furnished to do so, subject to the following
|
12
|
+
* conditions:
|
13
|
+
*
|
14
|
+
* The above copyright notice and this permission notice shall be
|
15
|
+
* included in all copies or substantial portions of the Software.
|
16
|
+
*
|
17
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
19
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
21
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
22
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
23
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
24
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
25
|
+
*/
|
26
|
+
|
27
|
+
#ifndef _DIVSUFSORT_H
|
28
|
+
#define _DIVSUFSORT_H 1
|
29
|
+
|
30
|
+
#ifdef __cplusplus
|
31
|
+
extern "C" {
|
32
|
+
#endif /* __cplusplus */
|
33
|
+
|
34
|
+
// modified by SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
|
35
|
+
#define PROJECT_VERSION_FULL "2.0.0"
|
36
|
+
|
37
|
+
#ifdef _WIN32
|
38
|
+
#include <stdlib.h>
|
39
|
+
#include <stdint.h>
|
40
|
+
#define INLINE _inline
|
41
|
+
#define PRId32 "ld"
|
42
|
+
#else
|
43
|
+
#define INLINE inline
|
44
|
+
#include <inttypes.h>
|
45
|
+
#endif
|
46
|
+
|
47
|
+
#ifndef DIVSUFSORT_API
|
48
|
+
# ifdef DIVSUFSORT_BUILD_DLL
|
49
|
+
# define DIVSUFSORT_API
|
50
|
+
# else
|
51
|
+
# define DIVSUFSORT_API
|
52
|
+
# endif
|
53
|
+
#endif
|
54
|
+
|
55
|
+
/*- Datatypes -*/
|
56
|
+
#ifndef SAUCHAR_T
|
57
|
+
#define SAUCHAR_T
|
58
|
+
typedef uint8_t sauchar_t;
|
59
|
+
#endif /* SAUCHAR_T */
|
60
|
+
#ifndef SAINT_T
|
61
|
+
#define SAINT_T
|
62
|
+
typedef int32_t saint_t;
|
63
|
+
#endif /* SAINT_T */
|
64
|
+
#ifndef SAIDX_T
|
65
|
+
#define SAIDX_T
|
66
|
+
typedef int32_t saidx_t;
|
67
|
+
#endif /* SAIDX_T */
|
68
|
+
#ifndef PRIdSAINT_T
|
69
|
+
#define PRIdSAINT_T PRId32
|
70
|
+
#endif /* PRIdSAINT_T */
|
71
|
+
#ifndef PRIdSAIDX_T
|
72
|
+
#define PRIdSAIDX_T PRId32
|
73
|
+
#endif /* PRIdSAIDX_T */
|
74
|
+
|
75
|
+
|
76
|
+
/*- Prototypes -*/
|
77
|
+
|
78
|
+
/**
|
79
|
+
* Constructs the suffix array of a given string.
|
80
|
+
* @param T[0..n-1] The input string.
|
81
|
+
* @param SA[0..n-1] The output array of suffixes.
|
82
|
+
* @param n The length of the given string.
|
83
|
+
* @return 0 if no error occurred, -1 or -2 otherwise.
|
84
|
+
*/
|
85
|
+
DIVSUFSORT_API
|
86
|
+
saint_t
|
87
|
+
divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
|
88
|
+
|
89
|
+
/**
|
90
|
+
* Constructs the burrows-wheeler transformed string of a given string.
|
91
|
+
* @param T[0..n-1] The input string.
|
92
|
+
* @param U[0..n-1] The output string. (can be T)
|
93
|
+
* @param A[0..n-1] The temporary array. (can be NULL)
|
94
|
+
* @param n The length of the given string.
|
95
|
+
* @return The primary index if no error occurred, -1 or -2 otherwise.
|
96
|
+
*/
|
97
|
+
DIVSUFSORT_API
|
98
|
+
saidx_t
|
99
|
+
divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
|
100
|
+
|
101
|
+
/**
|
102
|
+
* Returns the version of the divsufsort library.
|
103
|
+
* @return The version number string.
|
104
|
+
*/
|
105
|
+
DIVSUFSORT_API
|
106
|
+
const char *
|
107
|
+
divsufsort_version(void);
|
108
|
+
|
109
|
+
|
110
|
+
/**
|
111
|
+
* Constructs the burrows-wheeler transformed string of a given string and suffix array.
|
112
|
+
* @param T[0..n-1] The input string.
|
113
|
+
* @param U[0..n-1] The output string. (can be T)
|
114
|
+
* @param SA[0..n-1] The suffix array. (can be NULL)
|
115
|
+
* @param n The length of the given string.
|
116
|
+
* @param idx The output primary index.
|
117
|
+
* @return 0 if no error occurred, -1 or -2 otherwise.
|
118
|
+
*/
|
119
|
+
DIVSUFSORT_API
|
120
|
+
saint_t
|
121
|
+
bw_transform(const sauchar_t *T, sauchar_t *U,
|
122
|
+
saidx_t *SA /* can NULL */,
|
123
|
+
saidx_t n, saidx_t *idx);
|
124
|
+
|
125
|
+
/**
|
126
|
+
* Inverse BW-transforms a given BWTed string.
|
127
|
+
* @param T[0..n-1] The input string.
|
128
|
+
* @param U[0..n-1] The output string. (can be T)
|
129
|
+
* @param A[0..n-1] The temporary array. (can be NULL)
|
130
|
+
* @param n The length of the given string.
|
131
|
+
* @param idx The primary index.
|
132
|
+
* @return 0 if no error occurred, -1 or -2 otherwise.
|
133
|
+
*/
|
134
|
+
DIVSUFSORT_API
|
135
|
+
saint_t
|
136
|
+
inverse_bw_transform(const sauchar_t *T, sauchar_t *U,
|
137
|
+
saidx_t *A /* can NULL */,
|
138
|
+
saidx_t n, saidx_t idx);
|
139
|
+
|
140
|
+
/**
|
141
|
+
* Checks the correctness of a given suffix array.
|
142
|
+
* @param T[0..n-1] The input string.
|
143
|
+
* @param SA[0..n-1] The input suffix array.
|
144
|
+
* @param n The length of the given string.
|
145
|
+
* @param verbose The verbose mode.
|
146
|
+
* @return 0 if no error occurred.
|
147
|
+
*/
|
148
|
+
DIVSUFSORT_API
|
149
|
+
saint_t
|
150
|
+
sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose);
|
151
|
+
|
152
|
+
/**
|
153
|
+
* Search for the pattern P in the string T.
|
154
|
+
* @param T[0..Tsize-1] The input string.
|
155
|
+
* @param Tsize The length of the given string.
|
156
|
+
* @param P[0..Psize-1] The input pattern string.
|
157
|
+
* @param Psize The length of the given pattern string.
|
158
|
+
* @param SA[0..SAsize-1] The input suffix array.
|
159
|
+
* @param SAsize The length of the given suffix array.
|
160
|
+
* @param idx The output index.
|
161
|
+
* @return The count of matches if no error occurred, -1 otherwise.
|
162
|
+
*/
|
163
|
+
DIVSUFSORT_API
|
164
|
+
saidx_t
|
165
|
+
sa_search(const sauchar_t *T, saidx_t Tsize,
|
166
|
+
const sauchar_t *P, saidx_t Psize,
|
167
|
+
const saidx_t *SA, saidx_t SAsize,
|
168
|
+
saidx_t *left);
|
169
|
+
|
170
|
+
/**
|
171
|
+
* Search for the character c in the string T.
|
172
|
+
* @param T[0..Tsize-1] The input string.
|
173
|
+
* @param Tsize The length of the given string.
|
174
|
+
* @param SA[0..SAsize-1] The input suffix array.
|
175
|
+
* @param SAsize The length of the given suffix array.
|
176
|
+
* @param c The input character.
|
177
|
+
* @param idx The output index.
|
178
|
+
* @return The count of matches if no error occurred, -1 otherwise.
|
179
|
+
*/
|
180
|
+
DIVSUFSORT_API
|
181
|
+
saidx_t
|
182
|
+
sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
|
183
|
+
const saidx_t *SA, saidx_t SAsize,
|
184
|
+
saint_t c, saidx_t *left);
|
185
|
+
|
186
|
+
|
187
|
+
#ifdef __cplusplus
|
188
|
+
} /* extern "C" */
|
189
|
+
#endif /* __cplusplus */
|
190
|
+
|
191
|
+
#endif /* _DIVSUFSORT_H */
|
data/ext/divsufsort.o
ADDED
Binary file
|
data/ext/divsufsort.so
ADDED
Binary file
|
@@ -0,0 +1,207 @@
|
|
1
|
+
/*
|
2
|
+
* divsufsort_private.h for libdivsufsort
|
3
|
+
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
4
|
+
*
|
5
|
+
* Permission is hereby granted, free of charge, to any person
|
6
|
+
* obtaining a copy of this software and associated documentation
|
7
|
+
* files (the "Software"), to deal in the Software without
|
8
|
+
* restriction, including without limitation the rights to use,
|
9
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
* copies of the Software, and to permit persons to whom the
|
11
|
+
* Software is furnished to do so, subject to the following
|
12
|
+
* conditions:
|
13
|
+
*
|
14
|
+
* The above copyright notice and this permission notice shall be
|
15
|
+
* included in all copies or substantial portions of the Software.
|
16
|
+
*
|
17
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
19
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
21
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
22
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
23
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
24
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
25
|
+
*/
|
26
|
+
|
27
|
+
#ifndef _DIVSUFSORT_PRIVATE_H
|
28
|
+
#define _DIVSUFSORT_PRIVATE_H 1
|
29
|
+
|
30
|
+
#ifdef __cplusplus
|
31
|
+
extern "C" {
|
32
|
+
#endif /* __cplusplus */
|
33
|
+
|
34
|
+
#if HAVE_CONFIG_H
|
35
|
+
# include "config.h"
|
36
|
+
#endif
|
37
|
+
#include <assert.h>
|
38
|
+
#include <stdio.h>
|
39
|
+
#if HAVE_STRING_H
|
40
|
+
# include <string.h>
|
41
|
+
#endif
|
42
|
+
#if HAVE_STDLIB_H
|
43
|
+
# include <stdlib.h>
|
44
|
+
#endif
|
45
|
+
#if HAVE_MEMORY_H
|
46
|
+
# include <memory.h>
|
47
|
+
#endif
|
48
|
+
#if HAVE_STDDEF_H
|
49
|
+
# include <stddef.h>
|
50
|
+
#endif
|
51
|
+
#if HAVE_STRINGS_H
|
52
|
+
# include <strings.h>
|
53
|
+
#endif
|
54
|
+
#if HAVE_INTTYPES_H
|
55
|
+
# include <inttypes.h>
|
56
|
+
#else
|
57
|
+
# if HAVE_STDINT_H
|
58
|
+
# include <stdint.h>
|
59
|
+
# endif
|
60
|
+
#endif
|
61
|
+
#if defined(BUILD_DIVSUFSORT64)
|
62
|
+
# include "divsufsort64.h"
|
63
|
+
# ifndef SAIDX_T
|
64
|
+
# define SAIDX_T
|
65
|
+
# define saidx_t saidx64_t
|
66
|
+
# endif /* SAIDX_T */
|
67
|
+
# ifndef PRIdSAIDX_T
|
68
|
+
# define PRIdSAIDX_T PRIdSAIDX64_T
|
69
|
+
# endif /* PRIdSAIDX_T */
|
70
|
+
# define divsufsort divsufsort64
|
71
|
+
# define divbwt divbwt64
|
72
|
+
# define divsufsort_version divsufsort64_version
|
73
|
+
# define bw_transform bw_transform64
|
74
|
+
# define inverse_bw_transform inverse_bw_transform64
|
75
|
+
# define sufcheck sufcheck64
|
76
|
+
# define sa_search sa_search64
|
77
|
+
# define sa_simplesearch sa_simplesearch64
|
78
|
+
# define sssort sssort64
|
79
|
+
# define trsort trsort64
|
80
|
+
#else
|
81
|
+
# include "divsufsort.h"
|
82
|
+
#endif
|
83
|
+
|
84
|
+
|
85
|
+
/*- Constants -*/
|
86
|
+
#if !defined(UINT8_MAX)
|
87
|
+
# define UINT8_MAX (255)
|
88
|
+
#endif /* UINT8_MAX */
|
89
|
+
#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
|
90
|
+
# undef ALPHABET_SIZE
|
91
|
+
#endif
|
92
|
+
#if !defined(ALPHABET_SIZE)
|
93
|
+
# define ALPHABET_SIZE (UINT8_MAX + 1)
|
94
|
+
#endif
|
95
|
+
/* for divsufsort.c */
|
96
|
+
#define BUCKET_A_SIZE (ALPHABET_SIZE)
|
97
|
+
#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
|
98
|
+
/* for sssort.c */
|
99
|
+
#if defined(SS_INSERTIONSORT_THRESHOLD)
|
100
|
+
# if SS_INSERTIONSORT_THRESHOLD < 1
|
101
|
+
# undef SS_INSERTIONSORT_THRESHOLD
|
102
|
+
# define SS_INSERTIONSORT_THRESHOLD (1)
|
103
|
+
# endif
|
104
|
+
#else
|
105
|
+
# define SS_INSERTIONSORT_THRESHOLD (8)
|
106
|
+
#endif
|
107
|
+
#if defined(SS_BLOCKSIZE)
|
108
|
+
# if SS_BLOCKSIZE < 0
|
109
|
+
# undef SS_BLOCKSIZE
|
110
|
+
# define SS_BLOCKSIZE (0)
|
111
|
+
# elif 32768 <= SS_BLOCKSIZE
|
112
|
+
# undef SS_BLOCKSIZE
|
113
|
+
# define SS_BLOCKSIZE (32767)
|
114
|
+
# endif
|
115
|
+
#else
|
116
|
+
# define SS_BLOCKSIZE (1024)
|
117
|
+
#endif
|
118
|
+
/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
|
119
|
+
#if SS_BLOCKSIZE == 0
|
120
|
+
# if defined(BUILD_DIVSUFSORT64)
|
121
|
+
# define SS_MISORT_STACKSIZE (96)
|
122
|
+
# else
|
123
|
+
# define SS_MISORT_STACKSIZE (64)
|
124
|
+
# endif
|
125
|
+
#elif SS_BLOCKSIZE <= 4096
|
126
|
+
# define SS_MISORT_STACKSIZE (16)
|
127
|
+
#else
|
128
|
+
# define SS_MISORT_STACKSIZE (24)
|
129
|
+
#endif
|
130
|
+
#if defined(BUILD_DIVSUFSORT64)
|
131
|
+
# define SS_SMERGE_STACKSIZE (64)
|
132
|
+
#else
|
133
|
+
# define SS_SMERGE_STACKSIZE (32)
|
134
|
+
#endif
|
135
|
+
/* for trsort.c */
|
136
|
+
#define TR_INSERTIONSORT_THRESHOLD (8)
|
137
|
+
#if defined(BUILD_DIVSUFSORT64)
|
138
|
+
# define TR_STACKSIZE (96)
|
139
|
+
#else
|
140
|
+
# define TR_STACKSIZE (64)
|
141
|
+
#endif
|
142
|
+
|
143
|
+
|
144
|
+
/*- Macros -*/
|
145
|
+
#ifndef SWAP
|
146
|
+
# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
|
147
|
+
#endif /* SWAP */
|
148
|
+
#ifndef MIN
|
149
|
+
# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
|
150
|
+
#endif /* MIN */
|
151
|
+
#ifndef MAX
|
152
|
+
# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
|
153
|
+
#endif /* MAX */
|
154
|
+
#define STACK_PUSH(_a, _b, _c, _d)\
|
155
|
+
do {\
|
156
|
+
assert(ssize < STACK_SIZE);\
|
157
|
+
stack[ssize].a = (_a), stack[ssize].b = (_b),\
|
158
|
+
stack[ssize].c = (_c), stack[ssize++].d = (_d);\
|
159
|
+
} while(0)
|
160
|
+
#define STACK_PUSH5(_a, _b, _c, _d, _e)\
|
161
|
+
do {\
|
162
|
+
assert(ssize < STACK_SIZE);\
|
163
|
+
stack[ssize].a = (_a), stack[ssize].b = (_b),\
|
164
|
+
stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
|
165
|
+
} while(0)
|
166
|
+
#define STACK_POP(_a, _b, _c, _d)\
|
167
|
+
do {\
|
168
|
+
assert(0 <= ssize);\
|
169
|
+
if(ssize == 0) { return; }\
|
170
|
+
(_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
|
171
|
+
(_c) = stack[ssize].c, (_d) = stack[ssize].d;\
|
172
|
+
} while(0)
|
173
|
+
#define STACK_POP5(_a, _b, _c, _d, _e)\
|
174
|
+
do {\
|
175
|
+
assert(0 <= ssize);\
|
176
|
+
if(ssize == 0) { return; }\
|
177
|
+
(_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
|
178
|
+
(_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
|
179
|
+
} while(0)
|
180
|
+
/* for divsufsort.c */
|
181
|
+
#define BUCKET_A(_c0) bucket_A[(_c0)]
|
182
|
+
#if ALPHABET_SIZE == 256
|
183
|
+
#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
|
184
|
+
#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
|
185
|
+
#else
|
186
|
+
#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
|
187
|
+
#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
|
188
|
+
#endif
|
189
|
+
|
190
|
+
|
191
|
+
/*- Private Prototypes -*/
|
192
|
+
/* sssort.c */
|
193
|
+
void
|
194
|
+
sssort(const sauchar_t *Td, const saidx_t *PA,
|
195
|
+
saidx_t *first, saidx_t *last,
|
196
|
+
saidx_t *buf, saidx_t bufsize,
|
197
|
+
saidx_t depth, saidx_t n, saint_t lastsuffix);
|
198
|
+
/* trsort.c */
|
199
|
+
void
|
200
|
+
trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth);
|
201
|
+
|
202
|
+
|
203
|
+
#ifdef __cplusplus
|
204
|
+
} /* extern "C" */
|
205
|
+
#endif /* __cplusplus */
|
206
|
+
|
207
|
+
#endif /* _DIVSUFSORT_PRIVATE_H */
|
@@ -0,0 +1,227 @@
|
|
1
|
+
#ifdef DIVSUFSORT_EXPORTS
|
2
|
+
#define DLLEXPORT __declspec(dllexport)
|
3
|
+
#else
|
4
|
+
#define DLLEXPORT
|
5
|
+
#endif
|
6
|
+
|
7
|
+
#define VERSION "0.1.0"
|
8
|
+
|
9
|
+
#include "divsufsort.h"
|
10
|
+
#include "ruby.h"
|
11
|
+
#include "rubysig.h"
|
12
|
+
|
13
|
+
#ifndef RSTRING_PTR
|
14
|
+
#define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
15
|
+
#endif
|
16
|
+
#ifndef RSTRING_LEN
|
17
|
+
#define RSTRING_LEN(s) (RSTRING(s)->len)
|
18
|
+
#endif
|
19
|
+
|
20
|
+
#define RB_STR_CAT_INT(o, n) do { \
|
21
|
+
unsigned char c[4]; \
|
22
|
+
c[0] = (unsigned char) (((n) >> 0) & 0xff); \
|
23
|
+
c[1] = (unsigned char) (((n) >> 8) & 0xff); \
|
24
|
+
c[2] = (unsigned char) (((n) >> 16) & 0xff); \
|
25
|
+
c[3] = (unsigned char) (((n) >> 24) & 0xff); \
|
26
|
+
rb_str_cat((o), c, 4); \
|
27
|
+
} while(0)
|
28
|
+
|
29
|
+
#define PTR_READ_INT(p, n, x) do { \
|
30
|
+
unsigned char c[4]; \
|
31
|
+
c[0] = (unsigned char) (p)[0]; \
|
32
|
+
c[1] = (unsigned char) (p)[1]; \
|
33
|
+
c[2] = (unsigned char) (p)[2]; \
|
34
|
+
c[3] = (unsigned char) (p)[3]; \
|
35
|
+
(x) = (c[0] << 0) | (c[1] << 8) | (c[2] << 16) | (c[3] << 24); \
|
36
|
+
(p) += 4; \
|
37
|
+
(n) -= 4; \
|
38
|
+
} while(0)
|
39
|
+
|
40
|
+
static VALUE Divsufsort;
|
41
|
+
|
42
|
+
/* */
|
43
|
+
static VALUE divsufsort_divsufsort(VALUE self, VALUE src) {
|
44
|
+
VALUE dst;
|
45
|
+
sauchar_t *T;
|
46
|
+
saidx_t *SA;
|
47
|
+
char *p;
|
48
|
+
long n, i;
|
49
|
+
saint_t err;
|
50
|
+
|
51
|
+
Check_Type(src, T_STRING);
|
52
|
+
p = RSTRING_PTR(src);
|
53
|
+
n = RSTRING_LEN(src);
|
54
|
+
|
55
|
+
if(n >= 0x7fffffff) {
|
56
|
+
rb_raise(rb_eRuntimeError, "Input data is too big.");
|
57
|
+
}
|
58
|
+
|
59
|
+
T = (sauchar_t *) xmalloc((size_t) n * sizeof(sauchar_t));
|
60
|
+
SA = (saidx_t *) xmalloc((size_t) n * sizeof(saidx_t));
|
61
|
+
dst = rb_ary_new();
|
62
|
+
memcpy(T, p, (size_t) n * sizeof(sauchar_t));
|
63
|
+
|
64
|
+
TRAP_BEG;
|
65
|
+
err = divsufsort(T, SA, (saidx_t) n);
|
66
|
+
TRAP_END;
|
67
|
+
|
68
|
+
if(err != 0) {
|
69
|
+
xfree(SA);
|
70
|
+
xfree(T);
|
71
|
+
rb_raise(rb_eRuntimeError, "Cannot allocate memory.");
|
72
|
+
}
|
73
|
+
|
74
|
+
TRAP_BEG;
|
75
|
+
err = sufcheck(T, SA, (saidx_t) n, 0);
|
76
|
+
TRAP_END;
|
77
|
+
|
78
|
+
if(err != 0) {
|
79
|
+
xfree(SA);
|
80
|
+
xfree(T);
|
81
|
+
rb_raise(rb_eRuntimeError, "Wrong suffix array.");
|
82
|
+
}
|
83
|
+
|
84
|
+
for(i = 0; i < n; i++) {
|
85
|
+
saidx_t SA_i = SA[i];
|
86
|
+
rb_ary_push(dst, LONG2NUM(SA_i));
|
87
|
+
}
|
88
|
+
|
89
|
+
xfree(SA);
|
90
|
+
xfree(T);
|
91
|
+
|
92
|
+
if(err != 0) {
|
93
|
+
rb_raise(rb_eRuntimeError, "Cannot allocate memory.");
|
94
|
+
}
|
95
|
+
|
96
|
+
return dst;
|
97
|
+
}
|
98
|
+
|
99
|
+
/* */
|
100
|
+
static VALUE divsufsort_divbwt(int argc, VALUE *argv, VALUE self) {
|
101
|
+
VALUE src, v_blocksize, transformed;
|
102
|
+
sauchar_t *T;
|
103
|
+
saidx_t *SA;
|
104
|
+
char *p;
|
105
|
+
long n, T_len;
|
106
|
+
saint_t blocksize = 32;
|
107
|
+
saidx_t pidx;
|
108
|
+
|
109
|
+
rb_scan_args(argc, argv, "11", &src, &v_blocksize);
|
110
|
+
Check_Type(src, T_STRING);
|
111
|
+
|
112
|
+
if (!NIL_P(v_blocksize)) {
|
113
|
+
blocksize = (saint_t) NUM2INT(v_blocksize);
|
114
|
+
}
|
115
|
+
|
116
|
+
blocksize <<= 20;
|
117
|
+
p = RSTRING_PTR(src);
|
118
|
+
n = RSTRING_LEN(src);
|
119
|
+
|
120
|
+
if(n > 0x20000000L) {
|
121
|
+
n = 0x20000000L;
|
122
|
+
}
|
123
|
+
|
124
|
+
if(blocksize == 0 || n < blocksize) {
|
125
|
+
blocksize = (saidx_t) n;
|
126
|
+
}
|
127
|
+
|
128
|
+
T_len = blocksize * sizeof(sauchar_t);
|
129
|
+
T = (sauchar_t *) xmalloc(T_len);
|
130
|
+
SA = (saidx_t *) xmalloc(blocksize * sizeof(saidx_t));
|
131
|
+
transformed = rb_str_new("", 0);
|
132
|
+
|
133
|
+
RB_STR_CAT_INT(transformed, blocksize);
|
134
|
+
|
135
|
+
while (n > 0) {
|
136
|
+
int m = (n < T_len) ? n : T_len;
|
137
|
+
|
138
|
+
memcpy(T, p, m);
|
139
|
+
p += m; n -= m;
|
140
|
+
|
141
|
+
TRAP_BEG;
|
142
|
+
pidx = divbwt(T, T, SA, m);
|
143
|
+
TRAP_END;
|
144
|
+
|
145
|
+
if(pidx < 0) {
|
146
|
+
break;
|
147
|
+
}
|
148
|
+
|
149
|
+
RB_STR_CAT_INT(transformed, pidx);
|
150
|
+
rb_str_cat(transformed, T, m);
|
151
|
+
}
|
152
|
+
|
153
|
+
xfree(SA);
|
154
|
+
xfree(T);
|
155
|
+
|
156
|
+
if(pidx < 0) {
|
157
|
+
rb_raise(rb_eRuntimeError, "bw_transform: %s.", (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory");
|
158
|
+
}
|
159
|
+
|
160
|
+
return transformed;
|
161
|
+
}
|
162
|
+
|
163
|
+
/* */
|
164
|
+
static VALUE divsufsort_inverse_bw_transform(VALUE self, VALUE transformed) {
|
165
|
+
VALUE dst;
|
166
|
+
sauchar_t *T;
|
167
|
+
saidx_t *A;
|
168
|
+
char *p;
|
169
|
+
long n;
|
170
|
+
saint_t blocksize;
|
171
|
+
int err = 0;
|
172
|
+
|
173
|
+
Check_Type(transformed, T_STRING);
|
174
|
+
p = RSTRING_PTR(transformed);
|
175
|
+
n = RSTRING_LEN(transformed);
|
176
|
+
|
177
|
+
if (n < 4) {
|
178
|
+
rb_raise(rb_eRuntimeError, "reverseBWT: Invalid data.\n");
|
179
|
+
}
|
180
|
+
|
181
|
+
PTR_READ_INT(p, n, blocksize);
|
182
|
+
T = (sauchar_t *) xmalloc(blocksize * sizeof(sauchar_t));
|
183
|
+
A = (saidx_t *) xmalloc(blocksize * sizeof(saidx_t));
|
184
|
+
dst = rb_str_new("", 0);
|
185
|
+
|
186
|
+
while (n > 0) {
|
187
|
+
int m;
|
188
|
+
saidx_t pidx;
|
189
|
+
|
190
|
+
if (n < 4) {
|
191
|
+
err = -1;
|
192
|
+
break;
|
193
|
+
}
|
194
|
+
|
195
|
+
PTR_READ_INT(p, n, pidx);
|
196
|
+
m = (n < blocksize) ? n : blocksize;
|
197
|
+
memcpy(T, p, m);
|
198
|
+
p += m; n -= m;
|
199
|
+
|
200
|
+
TRAP_BEG;
|
201
|
+
err = inverse_bw_transform(T, T, A, m, pidx);
|
202
|
+
TRAP_END;
|
203
|
+
|
204
|
+
if(err != 0) {
|
205
|
+
break;
|
206
|
+
}
|
207
|
+
|
208
|
+
rb_str_cat(dst, T, m);
|
209
|
+
}
|
210
|
+
|
211
|
+
xfree(A);
|
212
|
+
xfree(T);
|
213
|
+
|
214
|
+
if (err != 0) {
|
215
|
+
rb_raise(rb_eRuntimeError, "reverseBWT: %s.\n", (err == -1) ? "Invalid data" : "Cannot allocate memory");
|
216
|
+
}
|
217
|
+
|
218
|
+
return dst;
|
219
|
+
}
|
220
|
+
|
221
|
+
void DLLEXPORT Init_divsufsort() {
|
222
|
+
Divsufsort = rb_define_module("Divsufsort");
|
223
|
+
rb_define_const(Divsufsort, "VERSION", rb_str_new2(VERSION));
|
224
|
+
rb_define_module_function(Divsufsort, "divsufsort", divsufsort_divsufsort, 1);
|
225
|
+
rb_define_module_function(Divsufsort, "divbwt", divsufsort_divbwt, -1);
|
226
|
+
rb_define_module_function(Divsufsort, "inverse_bw_transform", divsufsort_inverse_bw_transform, 1);
|
227
|
+
}
|