ferret 0.10.11 → 0.10.12

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,16 @@
1
+
2
+ Fri Oct 13 09:18:31 JST 2006
3
+ * Changed documentation to state truthfully that FULL_ENGLISH_STOP_WORDS is
4
+ being used by default in StandardAnalyzer and StopwordFilter.
5
+ * Removed 'will', 's' and 't' from ENGLISH_STOP_WORDS so that all words in
6
+ ENGLISH_STOP_WORDS can be found in FULL_ENGLISH_STOP_WORDS, that is
7
+ ENGLISH_STOP_WORDS is a subset of FULL_ENGLISH_STOP_WORDS.
8
+
9
+ Thu Oct 12 23:04:19 JST 2006
10
+ * Fixed adding SortField to Sort object in Ruby. Garbage collection wasn't
11
+ working.
12
+ * Can now set :sort => SortField#new
13
+
1
14
  Tue Oct 10 14:42:17 JST 2006
2
15
  * Fixed MultiTermDocEnum bug introduced in version 0.10.10 during
3
16
  performance enhancements.
data/Rakefile CHANGED
@@ -41,7 +41,7 @@ SRC = (FileList["ext/*.[ch]"] + EXT_SRC_DEST).uniq
41
41
 
42
42
  CLEAN.include(FileList['**/*.o', '**/*.obj', 'InstalledFiles',
43
43
  '.config', 'ext/cferret.c'])
44
- CLOBBER.include(FileList['**/*.so'], 'ext/Makefile', EXT_SRC_DEST)
44
+ CLOBBER.include(FileList['**/*.so'], 'ext/Makefile', 'ext/mem_pool.*', 'ext/defines.h', EXT_SRC_DEST)
45
45
  POLISH = Rake::FileList.new.include(FileList['**/*.so'], 'ext/Makefile')
46
46
 
47
47
  desc "Clean specifically for the release."
data/ext/analysis.c CHANGED
@@ -1078,19 +1078,10 @@ static void sf_destroy_i(TokenStream *ts)
1078
1078
  filter_destroy_i(ts);
1079
1079
  }
1080
1080
 
1081
- static void sf_clone_i_i(void *key, void *value, void *arg)
1082
- {
1083
- HashTable *word_table = (HashTable *)arg;
1084
- char *word = estrdup(key);
1085
- (void)value;
1086
- h_set(word_table, word, word);
1087
- }
1088
-
1089
1081
  static TokenStream *sf_clone_i(TokenStream *orig_ts)
1090
1082
  {
1091
- TokenStream *new_ts = filter_clone_size(orig_ts, sizeof(StopFilter));
1092
- StopFilt(new_ts)->words = h_new_str(&free, NULL);
1093
- h_each(StopFilt(orig_ts)->words, &sf_clone_i_i, StopFilt(new_ts)->words);
1083
+ TokenStream *new_ts = filter_clone_size(orig_ts, sizeof(MappingFilter));
1084
+ REF(StopFilt(new_ts)->words);
1094
1085
  return new_ts;
1095
1086
  }
1096
1087
 
@@ -1157,6 +1148,66 @@ TokenStream *stop_filter_new(TokenStream *ts)
1157
1148
  return stop_filter_new_with_words(ts, FULL_ENGLISH_STOP_WORDS);
1158
1149
  }
1159
1150
 
1151
+ /****************************************************************************
1152
+ * MappingFilter
1153
+ ****************************************************************************/
1154
+
1155
+ #define MFilt(filter) ((MappingFilter *)(filter))
1156
+
1157
+ static void mf_destroy_i(TokenStream *ts)
1158
+ {
1159
+ mulmap_destroy(MFilt(ts)->mapper);
1160
+ filter_destroy_i(ts);
1161
+ }
1162
+
1163
+ static TokenStream *mf_clone_i(TokenStream *orig_ts)
1164
+ {
1165
+ TokenStream *new_ts = filter_clone_size(orig_ts, sizeof(MappingFilter));
1166
+ REF(MFilt(new_ts)->mapper);
1167
+ return new_ts;
1168
+ }
1169
+
1170
+ static Token *mf_next(TokenStream *ts)
1171
+ {
1172
+ char buf[MAX_WORD_SIZE];
1173
+ MultiMapper *mapper = MFilt(ts)->mapper;
1174
+ TokenFilter *tf = TkFilt(ts);
1175
+ Token *tk = tf->sub_ts->next(tf->sub_ts);
1176
+ if (tk != NULL) {
1177
+ tk->len = mulmap_map_len(mapper, buf, tk->text, MAX_WORD_SIZE);
1178
+ memcpy(tk->text, buf, tk->len + 1);
1179
+ }
1180
+ return tk;
1181
+ }
1182
+
1183
+ static TokenStream *mf_reset(TokenStream *ts, char *text)
1184
+ {
1185
+ MultiMapper *mm = MFilt(ts)->mapper;
1186
+ if (mm->d_size == 0) {
1187
+ mulmap_compile(MFilt(ts)->mapper);
1188
+ }
1189
+ filter_reset(ts, text);
1190
+ return ts;
1191
+ }
1192
+
1193
+ TokenStream *mapping_filter_new(TokenStream *sub_ts)
1194
+ {
1195
+ TokenStream *ts = tf_new(MappingFilter, sub_ts);
1196
+ MFilt(ts)->mapper = mulmap_new();
1197
+ ts->next = &mf_next;
1198
+ ts->destroy_i = &mf_destroy_i;
1199
+ ts->clone_i = &mf_clone_i;
1200
+ ts->reset = &mf_reset;
1201
+ return ts;
1202
+ }
1203
+
1204
+ TokenStream *mapping_filter_add(TokenStream *ts, const char *pattern,
1205
+ const char *replacement)
1206
+ {
1207
+ mulmap_add_mapping(MFilt(ts)->mapper, pattern, replacement);
1208
+ return ts;
1209
+ }
1210
+
1160
1211
  /****************************************************************************
1161
1212
  * HyphenFilter
1162
1213
  ****************************************************************************/
data/ext/analysis.h CHANGED
@@ -3,6 +3,7 @@
3
3
 
4
4
  #include "global.h"
5
5
  #include "hash.h"
6
+ #include "multimapper.h"
6
7
  #include <wchar.h>
7
8
 
8
9
  /****************************************************************************
@@ -89,6 +90,12 @@ typedef struct StopFilter
89
90
  HashTable *words;
90
91
  } StopFilter;
91
92
 
93
+ typedef struct MappingFilter
94
+ {
95
+ TokenFilter super;
96
+ MultiMapper *mapper;
97
+ } MappingFilter;
98
+
92
99
  typedef struct HyphenFilter
93
100
  {
94
101
  TokenFilter super;
@@ -150,6 +157,10 @@ extern TokenStream *stop_filter_new(TokenStream *ts);
150
157
  extern TokenStream *stem_filter_new(TokenStream *ts, const char *algorithm,
151
158
  const char *charenc);
152
159
 
160
+ extern TokenStream *mapping_filter_new(TokenStream *ts);
161
+ extern TokenStream *mapping_filter_add(TokenStream *ts, const char *pattern,
162
+ const char *replacement);
163
+
153
164
  /****************************************************************************
154
165
  *
155
166
  * Analyzer
data/ext/bitvector.c CHANGED
@@ -360,32 +360,53 @@ unsigned long bv_hash(BitVector *bv)
360
360
  return hash;
361
361
  }
362
362
 
363
+ static __inline void bv_recapa(BitVector *bv, int new_capa)
364
+ {
365
+ if (bv->capa < new_capa) {
366
+ REALLOC_N(bv->bits, f_u32, new_capa);
367
+ memset(bv->bits + bv->capa, (bv->extends_as_ones ? 0xFF : 0),
368
+ sizeof(f_u32) * (new_capa - bv->capa));
369
+ bv->capa = new_capa;
370
+ }
371
+ }
372
+
363
373
  static BitVector *bv_and_i(BitVector *bv, BitVector *bv1, BitVector *bv2)
364
374
  {
365
375
  int i;
366
- int min_size = min2(bv1->size, bv2->size);
367
- int word_size = (min_size >> 5) + 1;
376
+ int size;
377
+ int word_size;
368
378
  int capa = 4;
369
- while (capa < word_size) {
370
- capa <<= 1;
371
- }
372
- REALLOC_N(bv->bits, f_u32, capa);
373
- bv->capa = capa;
374
- bv->size = min_size;
375
379
 
376
380
  if (bv1->extends_as_ones && bv2->extends_as_ones) {
381
+ size = max2(bv1->size, bv2->size);
377
382
  bv->extends_as_ones = true;
378
383
  }
384
+ else if (bv1->extends_as_ones || bv2->extends_as_ones) {
385
+ size = max2(bv1->size, bv2->size);
386
+ bv->extends_as_ones = false;
387
+ }
379
388
  else {
389
+ size = min2(bv1->size, bv2->size);
380
390
  bv->extends_as_ones = false;
381
391
  }
382
392
 
393
+ word_size = (size >> 5) + 1;
394
+ while (capa < word_size) {
395
+ capa <<= 1;
396
+ }
397
+ bv_recapa(bv1, capa);
398
+ bv_recapa(bv2, capa);
399
+ REALLOC_N(bv->bits, f_u32, capa);
400
+ bv->capa = capa;
401
+ bv->size = size;
402
+
383
403
  memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
384
404
  sizeof(f_u32) * (capa - word_size));
385
405
 
386
406
  for (i = 0; i < word_size; i++) {
387
407
  bv->bits[i] = bv1->bits[i] & bv2->bits[i];
388
408
  }
409
+
389
410
  bv_recount(bv);
390
411
  return bv;
391
412
  }
@@ -400,16 +421,6 @@ BitVector *bv_and_x(BitVector *bv1, BitVector *bv2)
400
421
  return bv_and_i(bv1, bv1, bv2);
401
422
  }
402
423
 
403
- static __inline void bv_recapa(BitVector *bv, int new_capa)
404
- {
405
- if (bv->capa < new_capa) {
406
- REALLOC_N(bv->bits, f_u32, new_capa);
407
- memset(bv->bits + bv->capa, (bv->extends_as_ones ? 0xFF : 0),
408
- sizeof(f_u32) * (new_capa - bv->capa));
409
- bv->capa = new_capa;
410
- }
411
- }
412
-
413
424
  static BitVector *bv_or_i(BitVector *bv, BitVector *bv1, BitVector *bv2)
414
425
  {
415
426
  int i;
File without changes
data/ext/except.h CHANGED
@@ -63,7 +63,7 @@
63
63
  #define FRT_EXCEPT_H
64
64
 
65
65
  #include <setjmp.h>
66
- #include "defines.h"
66
+ #include "config.h"
67
67
 
68
68
  #define BODY 0
69
69
  #define FINALLY 1
data/ext/extconf.rb CHANGED
@@ -5,6 +5,7 @@ if (/mswin/ =~ RUBY_PLATFORM) and ENV['make'].nil?
5
5
  create_makefile("ferret_ext")
6
6
  else
7
7
  require 'mkmf'
8
- $CFLAGS += " -fno-common"
8
+ #$CFLAGS += " -fno-common"
9
+ $CFLAGS += " -fno-common -D_FILE_OFFSET_BITS=64"
9
10
  create_makefile("ferret_ext")
10
11
  end
data/ext/fs_store.c CHANGED
@@ -223,7 +223,8 @@ static off_t fs_length(Store *store, char *filename)
223
223
  static void fso_flush_i(OutStream *os, uchar *src, int len)
224
224
  {
225
225
  if (len != write(os->file.fd, src, len)) {
226
- RAISE(IO_ERROR, "flushing src of length %d", len);
226
+ RAISE(IO_ERROR, "flushing src of length %d, <%s>", len,
227
+ strerror(errno));
227
228
  }
228
229
  }
229
230
 
@@ -268,7 +269,7 @@ static OutStream *fs_new_output(Store *store, const char *filename)
268
269
  static void fsi_read_i(InStream *is, uchar *path, int len)
269
270
  {
270
271
  int fd = is->file.fd;
271
- int pos = is_pos(is);
272
+ off_t pos = is_pos(is);
272
273
  if (pos != lseek(fd, 0, SEEK_CUR)) {
273
274
  lseek(fd, pos, SEEK_SET);
274
275
  }
@@ -409,6 +410,7 @@ static HashTable stores = {
409
410
  /* fill */ 0,
410
411
  /* used */ 0,
411
412
  /* mask */ HASH_MINSIZE - 1,
413
+ /* ref_cnt */ 1,
412
414
  /* table */ stores.smalltable,
413
415
  /* smalltable */ {{0, NULL, NULL}},
414
416
  /* lookup */ (h_lookup_ft)&h_lookup_str,
data/ext/global.h CHANGED
@@ -1,7 +1,7 @@
1
1
  #ifndef FRT_GLOBAL_H
2
2
  #define FRT_GLOBAL_H
3
3
 
4
- #include "defines.h"
4
+ #include "config.h"
5
5
  #include "except.h"
6
6
  #include "lang.h"
7
7
  #include <stdlib.h>
data/ext/hash.c CHANGED
@@ -238,6 +238,7 @@ HashTable *h_new_str(free_ft free_key, free_ft free_value)
238
238
 
239
239
  ht->free_key_i = free_key != NULL ? free_key : &dummy_free;
240
240
  ht->free_value_i = free_value != NULL ? free_value : &dummy_free;
241
+ ht->ref_cnt = 1;
241
242
  return ht;
242
243
  }
243
244
 
@@ -285,23 +286,25 @@ void h_clear(HashTable *ht)
285
286
 
286
287
  void h_destroy(HashTable *ht)
287
288
  {
288
- h_clear(ht);
289
+ if (--(ht->ref_cnt) <= 0) {
290
+ h_clear(ht);
289
291
 
290
- /* if a new table was created, be sure to free it */
291
- if (ht->table != ht->smalltable) {
292
- free(ht->table);
293
- }
292
+ /* if a new table was created, be sure to free it */
293
+ if (ht->table != ht->smalltable) {
294
+ free(ht->table);
295
+ }
294
296
 
295
297
  #ifdef DEBUG
296
- free(ht);
297
- #else
298
- if (num_free_hts < MAX_FREE_HASH_TABLES) {
299
- free_hts[num_free_hts++] = ht;
300
- }
301
- else {
302
298
  free(ht);
303
- }
299
+ #else
300
+ if (num_free_hts < MAX_FREE_HASH_TABLES) {
301
+ free_hts[num_free_hts++] = ht;
302
+ }
303
+ else {
304
+ free(ht);
305
+ }
304
306
  #endif
307
+ }
305
308
  }
306
309
 
307
310
  void *h_get(HashTable *ht, const void *key)
data/ext/hash.h CHANGED
@@ -46,6 +46,7 @@ typedef struct HashTable
46
46
  int fill; /* num Active + num Dummy */
47
47
  int size; /* num Active ie, num keys set */
48
48
  int mask; /* capacity_of_table - 1 */
49
+ int ref_cnt;
49
50
 
50
51
  /* table points to smalltable initially. If the table grows beyond 2/3 of
51
52
  * HASH_MINSIZE it will point to newly malloced memory as it grows. */
data/ext/helper.c CHANGED
@@ -14,13 +14,13 @@ f_i32 float2int(float f)
14
14
  {
15
15
  union { f_i32 i; float f; } tmp;
16
16
  tmp.f = f;
17
- return POSH_LittleU32(tmp.i);
17
+ return tmp.i;
18
18
  }
19
19
 
20
20
  float int2float(f_i32 i32)
21
21
  {
22
22
  union { f_i32 i; float f; } tmp;
23
- tmp.i = POSH_LittleU32(i32);
23
+ tmp.i = i32;
24
24
  return tmp.f;
25
25
  }
26
26
 
data/ext/helper.h CHANGED
@@ -1,7 +1,7 @@
1
1
  #ifndef FRT_HELPER_H
2
2
  #define FRT_HELPER_H
3
3
 
4
- #include "defines.h"
4
+ #include "config.h"
5
5
 
6
6
  extern __inline int hlp_string_diff(register const char *const s1,
7
7
  register const char *const s2);
data/ext/index.c CHANGED
@@ -5350,7 +5350,8 @@ void iw_close(IndexWriter *iw)
5350
5350
  free(iw);
5351
5351
  }
5352
5352
 
5353
- IndexWriter *iw_open(Store *store, Analyzer *analyzer, const Config *config)
5353
+ IndexWriter *iw_open(Store *store, volatile Analyzer *analyzer,
5354
+ const Config *config)
5354
5355
  {
5355
5356
  IndexWriter *iw = ALLOC_AND_ZERO(IndexWriter);
5356
5357
  mutex_init(&iw->mutex, NULL);
@@ -5381,7 +5382,8 @@ IndexWriter *iw_open(Store *store, Analyzer *analyzer, const Config *config)
5381
5382
  XENDTRY
5382
5383
 
5383
5384
  iw->similarity = sim_create_default();
5384
- iw->analyzer = analyzer ? analyzer : mb_standard_analyzer_new(true);
5385
+ iw->analyzer = analyzer ? (Analyzer *)analyzer
5386
+ : mb_standard_analyzer_new(true);
5385
5387
 
5386
5388
  REF(store);
5387
5389
  return iw;
data/ext/index.h CHANGED
@@ -7,7 +7,7 @@
7
7
  #include "hash.h"
8
8
  #include "hashset.h"
9
9
  #include "store.h"
10
- #include "mem_pool.h"
10
+ #include "mempool.h"
11
11
  #include "similarity.h"
12
12
  #include "bitvector.h"
13
13
  #include "priorityqueue.h"
@@ -874,7 +874,7 @@ struct IndexWriter
874
874
  };
875
875
 
876
876
  extern void index_create(Store *store, FieldInfos *fis);
877
- extern IndexWriter *iw_open(Store *store, Analyzer *analyzer,
877
+ extern IndexWriter *iw_open(Store *store, volatile Analyzer *analyzer,
878
878
  const Config *config);
879
879
  extern void iw_delete_term(IndexWriter *iw, const char *field,
880
880
  const char *term);
@@ -1,5 +1,5 @@
1
1
  #include "global.h"
2
- #include "mem_pool.h"
2
+ #include "mempool.h"
3
3
  #include <string.h>
4
4
 
5
5
  MemoryPool *mp_new_capa(int chuck_size, int init_buf_capa)
File without changes
data/ext/multimapper.c ADDED
@@ -0,0 +1,310 @@
1
+ #include "multimapper.h"
2
+ #include "array.h"
3
+ #include "bitvector.h"
4
+ #include <string.h>
5
+
6
+ #define St(state) ((State *)(state))
7
+ #define UCtoI(val) ((int)(unsigned char)(val))
8
+
9
+ static void state_destroy(State *state)
10
+ {
11
+ state->destroy_i(state);
12
+ }
13
+
14
+ typedef struct LetterState
15
+ {
16
+ State super;
17
+ int c;
18
+ int val;
19
+ char *mapping;
20
+ } LetterState;
21
+ #define LSt(state) ((LetterState *)(state))
22
+
23
+
24
+ static int lstate_next(LetterState *self, int c, int *states)
25
+ {
26
+ if (c == self->c) {
27
+ states[0] = self->val;
28
+ return 1;
29
+ }
30
+ else {
31
+ return 0;
32
+ }
33
+ }
34
+
35
+ static int lstate_is_match(LetterState *self, char **mapping)
36
+ {
37
+ if (self->val < 0) {
38
+ *mapping = self->mapping;
39
+ return self->val;
40
+ }
41
+ else {
42
+ return 0;
43
+ }
44
+ }
45
+
46
+ static LetterState *lstate_new(int c, int val)
47
+ {
48
+ LetterState *self = ALLOC(LetterState);
49
+ self->c = c;
50
+ self->val = val;
51
+ self->mapping = NULL;
52
+ St(self)->next = (int (*)(State *, int, int *))&lstate_next;
53
+ St(self)->destroy_i = (void (*)(State *))&free;
54
+ St(self)->is_match = (int (*)(State *, char **))&lstate_is_match;
55
+ return self;
56
+ }
57
+
58
+ typedef struct NonDeterministicState
59
+ {
60
+ State super;
61
+ int *states[256];
62
+ int size[256];
63
+ int capa[256];
64
+ } NonDeterministicState;
65
+
66
+ static int ndstate_next(NonDeterministicState *self, int c, int *states)
67
+ {
68
+ int size = self->size[c];
69
+ memcpy(states, self->states[c], size * sizeof(int));
70
+ return size;
71
+ }
72
+
73
+ static void ndstate_add(NonDeterministicState *self, int c, int state)
74
+ {
75
+ if (self->capa[c] <= self->size[c]) {
76
+ if (self->capa[c] == 0) {
77
+ self->capa[c] = 4;
78
+ }
79
+ else {
80
+ self->capa[c] <<= 1;
81
+ }
82
+ REALLOC_N(self->states[c], int, self->capa[c]);
83
+ }
84
+ self->states[c][self->size[c]++] = state;
85
+ }
86
+
87
+ static void ndstate_destroy_i(NonDeterministicState *self)
88
+ {
89
+ int i;
90
+ for (i = 0; i < 256; i++) {
91
+ free(self->states[i]);
92
+ }
93
+ free(self);
94
+ }
95
+
96
+ static int ndstate_is_match(State *self, char **mapping)
97
+ {
98
+ (void)self; (void)mapping;
99
+ return 0;
100
+ }
101
+
102
+ static NonDeterministicState *ndstate_new()
103
+ {
104
+ NonDeterministicState *self = ALLOC_AND_ZERO(NonDeterministicState);
105
+ St(self)->next = (int (*)(State *, int, int *))&ndstate_next;
106
+ St(self)->destroy_i = (void (*)(State *))&ndstate_destroy_i;
107
+ St(self)->is_match = &ndstate_is_match;
108
+ return self;
109
+ }
110
+
111
+ MultiMapper *mulmap_new()
112
+ {
113
+ MultiMapper *self = ALLOC_AND_ZERO(MultiMapper);
114
+ self->capa = 128;
115
+ self->mappings = ALLOC_N(Mapping *, 128);
116
+ self->d_capa = 128;
117
+ self->dstates = ALLOC_N(DeterministicState *, 128);
118
+ self->dstates_map = NULL;
119
+ self->nstates = NULL;
120
+ self->ref_cnt = 1;
121
+ return self;
122
+ }
123
+
124
+ static __inline void mulmap_free_dstates(MultiMapper *self)
125
+ {
126
+ if (self->d_size > 0) {
127
+ int i;
128
+ for (i = self->d_size - 1; i >= 0; i--) {
129
+ free(self->dstates[i]);
130
+ }
131
+ self->d_size = 0;
132
+ }
133
+ }
134
+
135
+ void mulmap_add_mapping(MultiMapper *self, const char *pattern, const char *rep)
136
+ {
137
+ if (pattern == NULL || pattern[0] == '\0') {
138
+ RAISE(ARG_ERROR, "Tried to add empty pattern to multi_mapper");
139
+ }
140
+ else {
141
+ Mapping *mapping = ALLOC(Mapping);
142
+ if (self->size >= self->capa) {
143
+ self->capa <<= 1;
144
+ REALLOC_N(self->mappings, Mapping *, self->capa);
145
+ }
146
+ mapping->pattern = estrdup(pattern);
147
+ mapping->replacement = estrdup(rep);
148
+ self->mappings[self->size++] = mapping;
149
+ mulmap_free_dstates(self);
150
+ }
151
+ }
152
+
153
+
154
+ static __inline void mulmap_bv_set_states(BitVector *bv, int *states, int cnt)
155
+ {
156
+ int i;
157
+ for (i = cnt - 1; i >= 0; i--) {
158
+ bv_set(bv, states[i]);
159
+ }
160
+ }
161
+
162
+ static DeterministicState *mulmap_process_state(MultiMapper *self, BitVector *bv)
163
+ {
164
+ DeterministicState *current_state = h_get(self->dstates_map, bv);
165
+ if (current_state == NULL) {
166
+ int bit, i;
167
+ int match_len = 0, max_match_len = 0;
168
+ State *start = self->nstates[0];
169
+ DeterministicState *start_ds;
170
+ current_state = ALLOC_AND_ZERO(DeterministicState);
171
+ h_set(self->dstates_map, bv, current_state);
172
+ if (self->d_size >= self->d_capa) {
173
+ self->d_capa <<= 1;
174
+ REALLOC_N(self->dstates, DeterministicState *, self->d_capa);
175
+ }
176
+ self->dstates[self->d_size++] = current_state;
177
+ start_ds = self->dstates[0];
178
+ for (i = 0; i <= 256; i++) {
179
+ current_state->next[i] = start_ds;
180
+ }
181
+ while ((bit = bv_scan_next(bv)) >= 0) {
182
+ char *mapping;
183
+ State *st = self->nstates[bit];
184
+ if ((match_len = -st->is_match(st, &mapping)) > max_match_len) {
185
+ current_state->longest_match = max_match_len = match_len;
186
+ current_state->mapping = mapping;
187
+ current_state->mapping_len = strlen(mapping);
188
+ }
189
+ }
190
+ for (i = self->a_size - 1; i >= 0; i--) {
191
+ unsigned char c = self->alphabet[i];
192
+ BitVector *nxt_bv = bv_new_capa(self->nsize);
193
+ mulmap_bv_set_states(nxt_bv, self->next_states,
194
+ start->next(start, (int)c, self->next_states));
195
+ bv_scan_reset(bv);
196
+ while ((bit = bv_scan_next(bv)) >= 0) {
197
+ State *state = self->nstates[bit];
198
+ mulmap_bv_set_states(nxt_bv, self->next_states,
199
+ state->next(state, (int)c, self->next_states));
200
+ }
201
+ current_state->next[(int)c] = mulmap_process_state(self, nxt_bv);
202
+ }
203
+ }
204
+ else {
205
+ bv_destroy(bv);
206
+ }
207
+ return current_state;
208
+ }
209
+
210
+ void mulmap_compile(MultiMapper *self)
211
+ {
212
+ NonDeterministicState *start = ndstate_new();
213
+ int i, j;
214
+ int size = 1;
215
+ int capa = 128;
216
+ LetterState *ls;
217
+ State **nstates = ALLOC_N(State *, capa);
218
+ Mapping **mappings = self->mappings;
219
+ unsigned char alphabet[256];
220
+ nstates[0] = (State *)start;
221
+ memset(alphabet, 0, 256);
222
+
223
+ for (i = self->size - 1; i >= 0; i--) {
224
+ const char *pattern = mappings[i]->pattern;
225
+ const int plen = (int)strlen(pattern);
226
+ ndstate_add(start, UCtoI(pattern[0]), size);
227
+ if (size + plen + 1 >= capa) {
228
+ capa <<= 2;
229
+ REALLOC_N(nstates, State *, capa);
230
+ }
231
+ for (j = 0; j < plen; j++) {
232
+ alphabet[UCtoI(pattern[j])] = 1;
233
+ size += 1;
234
+ nstates[size-1] = (State *)lstate_new(UCtoI(pattern[j+1]), size);
235
+ }
236
+ ls = LSt(nstates[size-1]);
237
+ ls->mapping = mappings[i]->replacement;
238
+ ls->val = -plen;
239
+ ls->c = -1;
240
+ }
241
+ for (i = j = 0; i < 256; i++) {
242
+ if (alphabet[i]) self->alphabet[j++] = i;
243
+ }
244
+ self->a_size = j;
245
+ mulmap_free_dstates(self);
246
+ self->nstates = nstates;
247
+ self->nsize = size;
248
+ self->next_states = ALLOC_N(int, size);
249
+ self->dstates_map = h_new((hash_ft)&bv_hash, (eq_ft)&bv_eq,
250
+ (free_ft)&bv_destroy, (free_ft)NULL);
251
+ mulmap_process_state(self, bv_new_capa(0));
252
+ h_destroy(self->dstates_map);
253
+ for (i = size - 1; i >= 0; i--) {
254
+ state_destroy(nstates[i]);
255
+ }
256
+ free(self->next_states);
257
+ free(nstates);
258
+ }
259
+
260
+ int mulmap_map_len(MultiMapper *self, char *to, char *from, int capa)
261
+ {
262
+ DeterministicState *start = self->dstates[0];
263
+ DeterministicState *state = start;
264
+ char *s = from, *d = to, *end = to + capa - 1;
265
+ if (self->d_size == 0) {
266
+ RAISE(STATE_ERROR, "You forgot to compile your MultiMapper");
267
+ }
268
+ while (*s && d < end) {
269
+ state = state->next[UCtoI(*s)];
270
+ if (state->mapping) {
271
+ int len = state->mapping_len;
272
+ d -= (state->longest_match - 1);
273
+ if ((d + len) > end) {
274
+ len = end - d;
275
+ }
276
+ memcpy(d, state->mapping, len);
277
+ d += len;
278
+ state = start;
279
+ }
280
+ else {
281
+ *(d++) = *s;
282
+ }
283
+ s++;
284
+ }
285
+ *d = '\0';
286
+ return d - to;
287
+ }
288
+
289
+ char *mulmap_map(MultiMapper *self, char *to, char *from, int capa)
290
+ {
291
+ mulmap_map_len(self, to, from, capa);
292
+ return to;
293
+ }
294
+
295
+ void mulmap_destroy(MultiMapper *self)
296
+ {
297
+ if (--(self->ref_cnt) <= 0) {
298
+ int i;
299
+ mulmap_free_dstates(self);
300
+ for (i = self->size - 1; i >= 0; i--) {
301
+ Mapping *mapping = self->mappings[i];
302
+ free(mapping->pattern);
303
+ free(mapping->replacement);
304
+ free(mapping);
305
+ }
306
+ free(self->mappings);
307
+ free(self->dstates);
308
+ free(self);
309
+ }
310
+ }