ferret 0.10.11 → 0.10.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,16 @@
1
+
2
+ Fri Oct 13 09:18:31 JST 2006
3
+ * Changed documentation to state truthfully that FULL_ENGLISH_STOP_WORDS is
4
+ being used by default in StandardAnalyzer and StopwordFilter.
5
+ * Removed 'will', 's' and 't' from ENGLISH_STOP_WORDS so that all words in
6
+ ENGLISH_STOP_WORDS can be found in FULL_ENGLISH_STOP_WORDS, that is
7
+ ENGLISH_STOP_WORDS is a subset of FULL_ENGLISH_STOP_WORDS.
8
+
9
+ Thu Oct 12 23:04:19 JST 2006
10
+ * Fixed adding SortField to Sort object in Ruby. Garbage collection wasn't
11
+ working.
12
+ * Can now set :sort => SortField#new
13
+
1
14
  Tue Oct 10 14:42:17 JST 2006
2
15
  * Fixed MultiTermDocEnum bug introduced in version 0.10.10 during
3
16
  performance enhancements.
data/Rakefile CHANGED
@@ -41,7 +41,7 @@ SRC = (FileList["ext/*.[ch]"] + EXT_SRC_DEST).uniq
41
41
 
42
42
  CLEAN.include(FileList['**/*.o', '**/*.obj', 'InstalledFiles',
43
43
  '.config', 'ext/cferret.c'])
44
- CLOBBER.include(FileList['**/*.so'], 'ext/Makefile', EXT_SRC_DEST)
44
+ CLOBBER.include(FileList['**/*.so'], 'ext/Makefile', 'ext/mem_pool.*', 'ext/defines.h', EXT_SRC_DEST)
45
45
  POLISH = Rake::FileList.new.include(FileList['**/*.so'], 'ext/Makefile')
46
46
 
47
47
  desc "Clean specifically for the release."
data/ext/analysis.c CHANGED
@@ -1078,19 +1078,10 @@ static void sf_destroy_i(TokenStream *ts)
1078
1078
  filter_destroy_i(ts);
1079
1079
  }
1080
1080
 
1081
- static void sf_clone_i_i(void *key, void *value, void *arg)
1082
- {
1083
- HashTable *word_table = (HashTable *)arg;
1084
- char *word = estrdup(key);
1085
- (void)value;
1086
- h_set(word_table, word, word);
1087
- }
1088
-
1089
1081
  static TokenStream *sf_clone_i(TokenStream *orig_ts)
1090
1082
  {
1091
- TokenStream *new_ts = filter_clone_size(orig_ts, sizeof(StopFilter));
1092
- StopFilt(new_ts)->words = h_new_str(&free, NULL);
1093
- h_each(StopFilt(orig_ts)->words, &sf_clone_i_i, StopFilt(new_ts)->words);
1083
+ TokenStream *new_ts = filter_clone_size(orig_ts, sizeof(MappingFilter));
1084
+ REF(StopFilt(new_ts)->words);
1094
1085
  return new_ts;
1095
1086
  }
1096
1087
 
@@ -1157,6 +1148,66 @@ TokenStream *stop_filter_new(TokenStream *ts)
1157
1148
  return stop_filter_new_with_words(ts, FULL_ENGLISH_STOP_WORDS);
1158
1149
  }
1159
1150
 
1151
+ /****************************************************************************
1152
+ * MappingFilter
1153
+ ****************************************************************************/
1154
+
1155
+ #define MFilt(filter) ((MappingFilter *)(filter))
1156
+
1157
+ static void mf_destroy_i(TokenStream *ts)
1158
+ {
1159
+ mulmap_destroy(MFilt(ts)->mapper);
1160
+ filter_destroy_i(ts);
1161
+ }
1162
+
1163
+ static TokenStream *mf_clone_i(TokenStream *orig_ts)
1164
+ {
1165
+ TokenStream *new_ts = filter_clone_size(orig_ts, sizeof(MappingFilter));
1166
+ REF(MFilt(new_ts)->mapper);
1167
+ return new_ts;
1168
+ }
1169
+
1170
+ static Token *mf_next(TokenStream *ts)
1171
+ {
1172
+ char buf[MAX_WORD_SIZE];
1173
+ MultiMapper *mapper = MFilt(ts)->mapper;
1174
+ TokenFilter *tf = TkFilt(ts);
1175
+ Token *tk = tf->sub_ts->next(tf->sub_ts);
1176
+ if (tk != NULL) {
1177
+ tk->len = mulmap_map_len(mapper, buf, tk->text, MAX_WORD_SIZE);
1178
+ memcpy(tk->text, buf, tk->len + 1);
1179
+ }
1180
+ return tk;
1181
+ }
1182
+
1183
+ static TokenStream *mf_reset(TokenStream *ts, char *text)
1184
+ {
1185
+ MultiMapper *mm = MFilt(ts)->mapper;
1186
+ if (mm->d_size == 0) {
1187
+ mulmap_compile(MFilt(ts)->mapper);
1188
+ }
1189
+ filter_reset(ts, text);
1190
+ return ts;
1191
+ }
1192
+
1193
+ TokenStream *mapping_filter_new(TokenStream *sub_ts)
1194
+ {
1195
+ TokenStream *ts = tf_new(MappingFilter, sub_ts);
1196
+ MFilt(ts)->mapper = mulmap_new();
1197
+ ts->next = &mf_next;
1198
+ ts->destroy_i = &mf_destroy_i;
1199
+ ts->clone_i = &mf_clone_i;
1200
+ ts->reset = &mf_reset;
1201
+ return ts;
1202
+ }
1203
+
1204
+ TokenStream *mapping_filter_add(TokenStream *ts, const char *pattern,
1205
+ const char *replacement)
1206
+ {
1207
+ mulmap_add_mapping(MFilt(ts)->mapper, pattern, replacement);
1208
+ return ts;
1209
+ }
1210
+
1160
1211
  /****************************************************************************
1161
1212
  * HyphenFilter
1162
1213
  ****************************************************************************/
data/ext/analysis.h CHANGED
@@ -3,6 +3,7 @@
3
3
 
4
4
  #include "global.h"
5
5
  #include "hash.h"
6
+ #include "multimapper.h"
6
7
  #include <wchar.h>
7
8
 
8
9
  /****************************************************************************
@@ -89,6 +90,12 @@ typedef struct StopFilter
89
90
  HashTable *words;
90
91
  } StopFilter;
91
92
 
93
+ typedef struct MappingFilter
94
+ {
95
+ TokenFilter super;
96
+ MultiMapper *mapper;
97
+ } MappingFilter;
98
+
92
99
  typedef struct HyphenFilter
93
100
  {
94
101
  TokenFilter super;
@@ -150,6 +157,10 @@ extern TokenStream *stop_filter_new(TokenStream *ts);
150
157
  extern TokenStream *stem_filter_new(TokenStream *ts, const char *algorithm,
151
158
  const char *charenc);
152
159
 
160
+ extern TokenStream *mapping_filter_new(TokenStream *ts);
161
+ extern TokenStream *mapping_filter_add(TokenStream *ts, const char *pattern,
162
+ const char *replacement);
163
+
153
164
  /****************************************************************************
154
165
  *
155
166
  * Analyzer
data/ext/bitvector.c CHANGED
@@ -360,32 +360,53 @@ unsigned long bv_hash(BitVector *bv)
360
360
  return hash;
361
361
  }
362
362
 
363
+ static __inline void bv_recapa(BitVector *bv, int new_capa)
364
+ {
365
+ if (bv->capa < new_capa) {
366
+ REALLOC_N(bv->bits, f_u32, new_capa);
367
+ memset(bv->bits + bv->capa, (bv->extends_as_ones ? 0xFF : 0),
368
+ sizeof(f_u32) * (new_capa - bv->capa));
369
+ bv->capa = new_capa;
370
+ }
371
+ }
372
+
363
373
  static BitVector *bv_and_i(BitVector *bv, BitVector *bv1, BitVector *bv2)
364
374
  {
365
375
  int i;
366
- int min_size = min2(bv1->size, bv2->size);
367
- int word_size = (min_size >> 5) + 1;
376
+ int size;
377
+ int word_size;
368
378
  int capa = 4;
369
- while (capa < word_size) {
370
- capa <<= 1;
371
- }
372
- REALLOC_N(bv->bits, f_u32, capa);
373
- bv->capa = capa;
374
- bv->size = min_size;
375
379
 
376
380
  if (bv1->extends_as_ones && bv2->extends_as_ones) {
381
+ size = max2(bv1->size, bv2->size);
377
382
  bv->extends_as_ones = true;
378
383
  }
384
+ else if (bv1->extends_as_ones || bv2->extends_as_ones) {
385
+ size = max2(bv1->size, bv2->size);
386
+ bv->extends_as_ones = false;
387
+ }
379
388
  else {
389
+ size = min2(bv1->size, bv2->size);
380
390
  bv->extends_as_ones = false;
381
391
  }
382
392
 
393
+ word_size = (size >> 5) + 1;
394
+ while (capa < word_size) {
395
+ capa <<= 1;
396
+ }
397
+ bv_recapa(bv1, capa);
398
+ bv_recapa(bv2, capa);
399
+ REALLOC_N(bv->bits, f_u32, capa);
400
+ bv->capa = capa;
401
+ bv->size = size;
402
+
383
403
  memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
384
404
  sizeof(f_u32) * (capa - word_size));
385
405
 
386
406
  for (i = 0; i < word_size; i++) {
387
407
  bv->bits[i] = bv1->bits[i] & bv2->bits[i];
388
408
  }
409
+
389
410
  bv_recount(bv);
390
411
  return bv;
391
412
  }
@@ -400,16 +421,6 @@ BitVector *bv_and_x(BitVector *bv1, BitVector *bv2)
400
421
  return bv_and_i(bv1, bv1, bv2);
401
422
  }
402
423
 
403
- static __inline void bv_recapa(BitVector *bv, int new_capa)
404
- {
405
- if (bv->capa < new_capa) {
406
- REALLOC_N(bv->bits, f_u32, new_capa);
407
- memset(bv->bits + bv->capa, (bv->extends_as_ones ? 0xFF : 0),
408
- sizeof(f_u32) * (new_capa - bv->capa));
409
- bv->capa = new_capa;
410
- }
411
- }
412
-
413
424
  static BitVector *bv_or_i(BitVector *bv, BitVector *bv1, BitVector *bv2)
414
425
  {
415
426
  int i;
File without changes
data/ext/except.h CHANGED
@@ -63,7 +63,7 @@
63
63
  #define FRT_EXCEPT_H
64
64
 
65
65
  #include <setjmp.h>
66
- #include "defines.h"
66
+ #include "config.h"
67
67
 
68
68
  #define BODY 0
69
69
  #define FINALLY 1
data/ext/extconf.rb CHANGED
@@ -5,6 +5,7 @@ if (/mswin/ =~ RUBY_PLATFORM) and ENV['make'].nil?
5
5
  create_makefile("ferret_ext")
6
6
  else
7
7
  require 'mkmf'
8
- $CFLAGS += " -fno-common"
8
+ #$CFLAGS += " -fno-common"
9
+ $CFLAGS += " -fno-common -D_FILE_OFFSET_BITS=64"
9
10
  create_makefile("ferret_ext")
10
11
  end
data/ext/fs_store.c CHANGED
@@ -223,7 +223,8 @@ static off_t fs_length(Store *store, char *filename)
223
223
  static void fso_flush_i(OutStream *os, uchar *src, int len)
224
224
  {
225
225
  if (len != write(os->file.fd, src, len)) {
226
- RAISE(IO_ERROR, "flushing src of length %d", len);
226
+ RAISE(IO_ERROR, "flushing src of length %d, <%s>", len,
227
+ strerror(errno));
227
228
  }
228
229
  }
229
230
 
@@ -268,7 +269,7 @@ static OutStream *fs_new_output(Store *store, const char *filename)
268
269
  static void fsi_read_i(InStream *is, uchar *path, int len)
269
270
  {
270
271
  int fd = is->file.fd;
271
- int pos = is_pos(is);
272
+ off_t pos = is_pos(is);
272
273
  if (pos != lseek(fd, 0, SEEK_CUR)) {
273
274
  lseek(fd, pos, SEEK_SET);
274
275
  }
@@ -409,6 +410,7 @@ static HashTable stores = {
409
410
  /* fill */ 0,
410
411
  /* used */ 0,
411
412
  /* mask */ HASH_MINSIZE - 1,
413
+ /* ref_cnt */ 1,
412
414
  /* table */ stores.smalltable,
413
415
  /* smalltable */ {{0, NULL, NULL}},
414
416
  /* lookup */ (h_lookup_ft)&h_lookup_str,
data/ext/global.h CHANGED
@@ -1,7 +1,7 @@
1
1
  #ifndef FRT_GLOBAL_H
2
2
  #define FRT_GLOBAL_H
3
3
 
4
- #include "defines.h"
4
+ #include "config.h"
5
5
  #include "except.h"
6
6
  #include "lang.h"
7
7
  #include <stdlib.h>
data/ext/hash.c CHANGED
@@ -238,6 +238,7 @@ HashTable *h_new_str(free_ft free_key, free_ft free_value)
238
238
 
239
239
  ht->free_key_i = free_key != NULL ? free_key : &dummy_free;
240
240
  ht->free_value_i = free_value != NULL ? free_value : &dummy_free;
241
+ ht->ref_cnt = 1;
241
242
  return ht;
242
243
  }
243
244
 
@@ -285,23 +286,25 @@ void h_clear(HashTable *ht)
285
286
 
286
287
  void h_destroy(HashTable *ht)
287
288
  {
288
- h_clear(ht);
289
+ if (--(ht->ref_cnt) <= 0) {
290
+ h_clear(ht);
289
291
 
290
- /* if a new table was created, be sure to free it */
291
- if (ht->table != ht->smalltable) {
292
- free(ht->table);
293
- }
292
+ /* if a new table was created, be sure to free it */
293
+ if (ht->table != ht->smalltable) {
294
+ free(ht->table);
295
+ }
294
296
 
295
297
  #ifdef DEBUG
296
- free(ht);
297
- #else
298
- if (num_free_hts < MAX_FREE_HASH_TABLES) {
299
- free_hts[num_free_hts++] = ht;
300
- }
301
- else {
302
298
  free(ht);
303
- }
299
+ #else
300
+ if (num_free_hts < MAX_FREE_HASH_TABLES) {
301
+ free_hts[num_free_hts++] = ht;
302
+ }
303
+ else {
304
+ free(ht);
305
+ }
304
306
  #endif
307
+ }
305
308
  }
306
309
 
307
310
  void *h_get(HashTable *ht, const void *key)
data/ext/hash.h CHANGED
@@ -46,6 +46,7 @@ typedef struct HashTable
46
46
  int fill; /* num Active + num Dummy */
47
47
  int size; /* num Active ie, num keys set */
48
48
  int mask; /* capacity_of_table - 1 */
49
+ int ref_cnt;
49
50
 
50
51
  /* table points to smalltable initially. If the table grows beyond 2/3 of
51
52
  * HASH_MINSIZE it will point to newly malloced memory as it grows. */
data/ext/helper.c CHANGED
@@ -14,13 +14,13 @@ f_i32 float2int(float f)
14
14
  {
15
15
  union { f_i32 i; float f; } tmp;
16
16
  tmp.f = f;
17
- return POSH_LittleU32(tmp.i);
17
+ return tmp.i;
18
18
  }
19
19
 
20
20
  float int2float(f_i32 i32)
21
21
  {
22
22
  union { f_i32 i; float f; } tmp;
23
- tmp.i = POSH_LittleU32(i32);
23
+ tmp.i = i32;
24
24
  return tmp.f;
25
25
  }
26
26
 
data/ext/helper.h CHANGED
@@ -1,7 +1,7 @@
1
1
  #ifndef FRT_HELPER_H
2
2
  #define FRT_HELPER_H
3
3
 
4
- #include "defines.h"
4
+ #include "config.h"
5
5
 
6
6
  extern __inline int hlp_string_diff(register const char *const s1,
7
7
  register const char *const s2);
data/ext/index.c CHANGED
@@ -5350,7 +5350,8 @@ void iw_close(IndexWriter *iw)
5350
5350
  free(iw);
5351
5351
  }
5352
5352
 
5353
- IndexWriter *iw_open(Store *store, Analyzer *analyzer, const Config *config)
5353
+ IndexWriter *iw_open(Store *store, volatile Analyzer *analyzer,
5354
+ const Config *config)
5354
5355
  {
5355
5356
  IndexWriter *iw = ALLOC_AND_ZERO(IndexWriter);
5356
5357
  mutex_init(&iw->mutex, NULL);
@@ -5381,7 +5382,8 @@ IndexWriter *iw_open(Store *store, Analyzer *analyzer, const Config *config)
5381
5382
  XENDTRY
5382
5383
 
5383
5384
  iw->similarity = sim_create_default();
5384
- iw->analyzer = analyzer ? analyzer : mb_standard_analyzer_new(true);
5385
+ iw->analyzer = analyzer ? (Analyzer *)analyzer
5386
+ : mb_standard_analyzer_new(true);
5385
5387
 
5386
5388
  REF(store);
5387
5389
  return iw;
data/ext/index.h CHANGED
@@ -7,7 +7,7 @@
7
7
  #include "hash.h"
8
8
  #include "hashset.h"
9
9
  #include "store.h"
10
- #include "mem_pool.h"
10
+ #include "mempool.h"
11
11
  #include "similarity.h"
12
12
  #include "bitvector.h"
13
13
  #include "priorityqueue.h"
@@ -874,7 +874,7 @@ struct IndexWriter
874
874
  };
875
875
 
876
876
  extern void index_create(Store *store, FieldInfos *fis);
877
- extern IndexWriter *iw_open(Store *store, Analyzer *analyzer,
877
+ extern IndexWriter *iw_open(Store *store, volatile Analyzer *analyzer,
878
878
  const Config *config);
879
879
  extern void iw_delete_term(IndexWriter *iw, const char *field,
880
880
  const char *term);
@@ -1,5 +1,5 @@
1
1
  #include "global.h"
2
- #include "mem_pool.h"
2
+ #include "mempool.h"
3
3
  #include <string.h>
4
4
 
5
5
  MemoryPool *mp_new_capa(int chuck_size, int init_buf_capa)
File without changes
data/ext/multimapper.c ADDED
@@ -0,0 +1,310 @@
1
+ #include "multimapper.h"
2
+ #include "array.h"
3
+ #include "bitvector.h"
4
+ #include <string.h>
5
+
6
+ #define St(state) ((State *)(state))
7
+ #define UCtoI(val) ((int)(unsigned char)(val))
8
+
9
+ static void state_destroy(State *state)
10
+ {
11
+ state->destroy_i(state);
12
+ }
13
+
14
+ typedef struct LetterState
15
+ {
16
+ State super;
17
+ int c;
18
+ int val;
19
+ char *mapping;
20
+ } LetterState;
21
+ #define LSt(state) ((LetterState *)(state))
22
+
23
+
24
+ static int lstate_next(LetterState *self, int c, int *states)
25
+ {
26
+ if (c == self->c) {
27
+ states[0] = self->val;
28
+ return 1;
29
+ }
30
+ else {
31
+ return 0;
32
+ }
33
+ }
34
+
35
+ static int lstate_is_match(LetterState *self, char **mapping)
36
+ {
37
+ if (self->val < 0) {
38
+ *mapping = self->mapping;
39
+ return self->val;
40
+ }
41
+ else {
42
+ return 0;
43
+ }
44
+ }
45
+
46
+ static LetterState *lstate_new(int c, int val)
47
+ {
48
+ LetterState *self = ALLOC(LetterState);
49
+ self->c = c;
50
+ self->val = val;
51
+ self->mapping = NULL;
52
+ St(self)->next = (int (*)(State *, int, int *))&lstate_next;
53
+ St(self)->destroy_i = (void (*)(State *))&free;
54
+ St(self)->is_match = (int (*)(State *, char **))&lstate_is_match;
55
+ return self;
56
+ }
57
+
58
+ typedef struct NonDeterministicState
59
+ {
60
+ State super;
61
+ int *states[256];
62
+ int size[256];
63
+ int capa[256];
64
+ } NonDeterministicState;
65
+
66
+ static int ndstate_next(NonDeterministicState *self, int c, int *states)
67
+ {
68
+ int size = self->size[c];
69
+ memcpy(states, self->states[c], size * sizeof(int));
70
+ return size;
71
+ }
72
+
73
+ static void ndstate_add(NonDeterministicState *self, int c, int state)
74
+ {
75
+ if (self->capa[c] <= self->size[c]) {
76
+ if (self->capa[c] == 0) {
77
+ self->capa[c] = 4;
78
+ }
79
+ else {
80
+ self->capa[c] <<= 1;
81
+ }
82
+ REALLOC_N(self->states[c], int, self->capa[c]);
83
+ }
84
+ self->states[c][self->size[c]++] = state;
85
+ }
86
+
87
+ static void ndstate_destroy_i(NonDeterministicState *self)
88
+ {
89
+ int i;
90
+ for (i = 0; i < 256; i++) {
91
+ free(self->states[i]);
92
+ }
93
+ free(self);
94
+ }
95
+
96
+ static int ndstate_is_match(State *self, char **mapping)
97
+ {
98
+ (void)self; (void)mapping;
99
+ return 0;
100
+ }
101
+
102
+ static NonDeterministicState *ndstate_new()
103
+ {
104
+ NonDeterministicState *self = ALLOC_AND_ZERO(NonDeterministicState);
105
+ St(self)->next = (int (*)(State *, int, int *))&ndstate_next;
106
+ St(self)->destroy_i = (void (*)(State *))&ndstate_destroy_i;
107
+ St(self)->is_match = &ndstate_is_match;
108
+ return self;
109
+ }
110
+
111
+ MultiMapper *mulmap_new()
112
+ {
113
+ MultiMapper *self = ALLOC_AND_ZERO(MultiMapper);
114
+ self->capa = 128;
115
+ self->mappings = ALLOC_N(Mapping *, 128);
116
+ self->d_capa = 128;
117
+ self->dstates = ALLOC_N(DeterministicState *, 128);
118
+ self->dstates_map = NULL;
119
+ self->nstates = NULL;
120
+ self->ref_cnt = 1;
121
+ return self;
122
+ }
123
+
124
+ static __inline void mulmap_free_dstates(MultiMapper *self)
125
+ {
126
+ if (self->d_size > 0) {
127
+ int i;
128
+ for (i = self->d_size - 1; i >= 0; i--) {
129
+ free(self->dstates[i]);
130
+ }
131
+ self->d_size = 0;
132
+ }
133
+ }
134
+
135
+ void mulmap_add_mapping(MultiMapper *self, const char *pattern, const char *rep)
136
+ {
137
+ if (pattern == NULL || pattern[0] == '\0') {
138
+ RAISE(ARG_ERROR, "Tried to add empty pattern to multi_mapper");
139
+ }
140
+ else {
141
+ Mapping *mapping = ALLOC(Mapping);
142
+ if (self->size >= self->capa) {
143
+ self->capa <<= 1;
144
+ REALLOC_N(self->mappings, Mapping *, self->capa);
145
+ }
146
+ mapping->pattern = estrdup(pattern);
147
+ mapping->replacement = estrdup(rep);
148
+ self->mappings[self->size++] = mapping;
149
+ mulmap_free_dstates(self);
150
+ }
151
+ }
152
+
153
+
154
+ static __inline void mulmap_bv_set_states(BitVector *bv, int *states, int cnt)
155
+ {
156
+ int i;
157
+ for (i = cnt - 1; i >= 0; i--) {
158
+ bv_set(bv, states[i]);
159
+ }
160
+ }
161
+
162
+ static DeterministicState *mulmap_process_state(MultiMapper *self, BitVector *bv)
163
+ {
164
+ DeterministicState *current_state = h_get(self->dstates_map, bv);
165
+ if (current_state == NULL) {
166
+ int bit, i;
167
+ int match_len = 0, max_match_len = 0;
168
+ State *start = self->nstates[0];
169
+ DeterministicState *start_ds;
170
+ current_state = ALLOC_AND_ZERO(DeterministicState);
171
+ h_set(self->dstates_map, bv, current_state);
172
+ if (self->d_size >= self->d_capa) {
173
+ self->d_capa <<= 1;
174
+ REALLOC_N(self->dstates, DeterministicState *, self->d_capa);
175
+ }
176
+ self->dstates[self->d_size++] = current_state;
177
+ start_ds = self->dstates[0];
178
+ for (i = 0; i <= 256; i++) {
179
+ current_state->next[i] = start_ds;
180
+ }
181
+ while ((bit = bv_scan_next(bv)) >= 0) {
182
+ char *mapping;
183
+ State *st = self->nstates[bit];
184
+ if ((match_len = -st->is_match(st, &mapping)) > max_match_len) {
185
+ current_state->longest_match = max_match_len = match_len;
186
+ current_state->mapping = mapping;
187
+ current_state->mapping_len = strlen(mapping);
188
+ }
189
+ }
190
+ for (i = self->a_size - 1; i >= 0; i--) {
191
+ unsigned char c = self->alphabet[i];
192
+ BitVector *nxt_bv = bv_new_capa(self->nsize);
193
+ mulmap_bv_set_states(nxt_bv, self->next_states,
194
+ start->next(start, (int)c, self->next_states));
195
+ bv_scan_reset(bv);
196
+ while ((bit = bv_scan_next(bv)) >= 0) {
197
+ State *state = self->nstates[bit];
198
+ mulmap_bv_set_states(nxt_bv, self->next_states,
199
+ state->next(state, (int)c, self->next_states));
200
+ }
201
+ current_state->next[(int)c] = mulmap_process_state(self, nxt_bv);
202
+ }
203
+ }
204
+ else {
205
+ bv_destroy(bv);
206
+ }
207
+ return current_state;
208
+ }
209
+
210
+ void mulmap_compile(MultiMapper *self)
211
+ {
212
+ NonDeterministicState *start = ndstate_new();
213
+ int i, j;
214
+ int size = 1;
215
+ int capa = 128;
216
+ LetterState *ls;
217
+ State **nstates = ALLOC_N(State *, capa);
218
+ Mapping **mappings = self->mappings;
219
+ unsigned char alphabet[256];
220
+ nstates[0] = (State *)start;
221
+ memset(alphabet, 0, 256);
222
+
223
+ for (i = self->size - 1; i >= 0; i--) {
224
+ const char *pattern = mappings[i]->pattern;
225
+ const int plen = (int)strlen(pattern);
226
+ ndstate_add(start, UCtoI(pattern[0]), size);
227
+ if (size + plen + 1 >= capa) {
228
+ capa <<= 2;
229
+ REALLOC_N(nstates, State *, capa);
230
+ }
231
+ for (j = 0; j < plen; j++) {
232
+ alphabet[UCtoI(pattern[j])] = 1;
233
+ size += 1;
234
+ nstates[size-1] = (State *)lstate_new(UCtoI(pattern[j+1]), size);
235
+ }
236
+ ls = LSt(nstates[size-1]);
237
+ ls->mapping = mappings[i]->replacement;
238
+ ls->val = -plen;
239
+ ls->c = -1;
240
+ }
241
+ for (i = j = 0; i < 256; i++) {
242
+ if (alphabet[i]) self->alphabet[j++] = i;
243
+ }
244
+ self->a_size = j;
245
+ mulmap_free_dstates(self);
246
+ self->nstates = nstates;
247
+ self->nsize = size;
248
+ self->next_states = ALLOC_N(int, size);
249
+ self->dstates_map = h_new((hash_ft)&bv_hash, (eq_ft)&bv_eq,
250
+ (free_ft)&bv_destroy, (free_ft)NULL);
251
+ mulmap_process_state(self, bv_new_capa(0));
252
+ h_destroy(self->dstates_map);
253
+ for (i = size - 1; i >= 0; i--) {
254
+ state_destroy(nstates[i]);
255
+ }
256
+ free(self->next_states);
257
+ free(nstates);
258
+ }
259
+
260
+ int mulmap_map_len(MultiMapper *self, char *to, char *from, int capa)
261
+ {
262
+ DeterministicState *start = self->dstates[0];
263
+ DeterministicState *state = start;
264
+ char *s = from, *d = to, *end = to + capa - 1;
265
+ if (self->d_size == 0) {
266
+ RAISE(STATE_ERROR, "You forgot to compile your MultiMapper");
267
+ }
268
+ while (*s && d < end) {
269
+ state = state->next[UCtoI(*s)];
270
+ if (state->mapping) {
271
+ int len = state->mapping_len;
272
+ d -= (state->longest_match - 1);
273
+ if ((d + len) > end) {
274
+ len = end - d;
275
+ }
276
+ memcpy(d, state->mapping, len);
277
+ d += len;
278
+ state = start;
279
+ }
280
+ else {
281
+ *(d++) = *s;
282
+ }
283
+ s++;
284
+ }
285
+ *d = '\0';
286
+ return d - to;
287
+ }
288
+
289
+ char *mulmap_map(MultiMapper *self, char *to, char *from, int capa)
290
+ {
291
+ mulmap_map_len(self, to, from, capa);
292
+ return to;
293
+ }
294
+
295
+ void mulmap_destroy(MultiMapper *self)
296
+ {
297
+ if (--(self->ref_cnt) <= 0) {
298
+ int i;
299
+ mulmap_free_dstates(self);
300
+ for (i = self->size - 1; i >= 0; i--) {
301
+ Mapping *mapping = self->mappings[i];
302
+ free(mapping->pattern);
303
+ free(mapping->replacement);
304
+ free(mapping);
305
+ }
306
+ free(self->mappings);
307
+ free(self->dstates);
308
+ free(self);
309
+ }
310
+ }