ferret 0.10.1 → 0.10.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/Rakefile +7 -1
  2. data/ext/analysis.c +21 -13
  3. data/ext/array.c +1 -1
  4. data/ext/bitvector.c +2 -2
  5. data/ext/defines.h +0 -6
  6. data/ext/except.c +6 -6
  7. data/ext/except.h +12 -8
  8. data/ext/extconf.rb +1 -0
  9. data/ext/ferret.c +4 -0
  10. data/ext/ferret.h +1 -0
  11. data/ext/fs_store.c +18 -4
  12. data/ext/global.c +18 -16
  13. data/ext/global.h +7 -2
  14. data/ext/hash.c +1 -1
  15. data/ext/helper.c +1 -1
  16. data/ext/helper.h +1 -1
  17. data/ext/inc/lang.h +7 -1
  18. data/ext/ind.c +4 -4
  19. data/ext/ind.h +3 -3
  20. data/ext/index.c +33 -26
  21. data/ext/index.h +1 -1
  22. data/ext/lang.h +7 -1
  23. data/ext/mem_pool.c +1 -1
  24. data/ext/mem_pool.h +1 -1
  25. data/ext/q_fuzzy.c +2 -2
  26. data/ext/q_match_all.c +2 -2
  27. data/ext/q_multi_term.c +1 -1
  28. data/ext/q_parser.c +60 -52
  29. data/ext/r_analysis.c +6 -4
  30. data/ext/r_index.c +57 -4
  31. data/ext/r_search.c +1 -1
  32. data/ext/r_utils.c +1 -1
  33. data/ext/ram_store.c +1 -1
  34. data/ext/search.c +4 -4
  35. data/ext/sort.c +3 -3
  36. data/ext/store.c +9 -9
  37. data/ext/store.h +4 -4
  38. data/ext/tags +7841 -0
  39. data/ext/term_vectors.c +3 -3
  40. data/lib/ferret/index.rb +69 -7
  41. data/test/test_helper.rb +3 -2
  42. data/test/unit/analysis/tc_token_stream.rb +1 -0
  43. data/test/unit/index/tc_index.rb +157 -2
  44. data/test/unit/index/tc_index_reader.rb +108 -5
  45. data/test/unit/query_parser/tc_query_parser.rb +2 -1
  46. data/test/unit/search/tc_index_searcher.rb +1 -1
  47. data/test/unit/search/tc_multi_searcher.rb +2 -1
  48. data/test/unit/search/tc_spans.rb +1 -1
  49. data/test/unit/store/tc_fs_store.rb +6 -3
  50. data/test/unit/ts_analysis.rb +1 -1
  51. data/test/unit/ts_utils.rb +1 -1
  52. data/test/unit/utils/tc_number_tools.rb +1 -1
  53. metadata +138 -137
data/Rakefile CHANGED
@@ -34,6 +34,7 @@ EXT_SRC = FileList["../c/src/*.[c]", "../c/include/*.h",
34
34
  "../c/lib/libstemmer_c/runtime/*.[ch]",
35
35
  "../c/lib/libstemmer_c/libstemmer/*.[ch]",
36
36
  "../c/lib/libstemmer_c/include/libstemmer.h"]
37
+ EXT_SRC.exclude('../**/ind.[ch]')
37
38
 
38
39
  EXT_SRC_DEST = EXT_SRC.map {|fn| File.join("ext", File.basename(fn))}
39
40
  SRC = (FileList["ext/*.[ch]"] + EXT_SRC_DEST).uniq
@@ -94,7 +95,12 @@ end
94
95
  EXT_SRC.each do |fn|
95
96
  dest_fn = File.join("ext", File.basename(fn))
96
97
  file dest_fn => fn do |t|
97
- ln_s File.join("..", fn), dest_fn
98
+ begin
99
+ ln_s File.join("..", fn), dest_fn
100
+ rescue Exception => e
101
+ cp File.expand_path(fn), dest_fn
102
+ end
103
+
98
104
  if fn =~ /stemmer/
99
105
  # flatten the directory structure for lib_stemmer
100
106
  open(dest_fn) do |in_f|
data/ext/analysis.c CHANGED
@@ -12,7 +12,7 @@
12
12
  *
13
13
  ****************************************************************************/
14
14
 
15
- inline Token *tk_set(Token *tk,
15
+ __inline Token *tk_set(Token *tk,
16
16
  char *text, int tlen, int start, int end, int pos_inc)
17
17
  {
18
18
  if (tlen >= MAX_WORD_SIZE) {
@@ -27,20 +27,20 @@ inline Token *tk_set(Token *tk,
27
27
  return tk;
28
28
  }
29
29
 
30
- inline Token *tk_set_ts(Token *tk,
30
+ __inline Token *tk_set_ts(Token *tk,
31
31
  char *start, char *end, char *text, int pos_inc)
32
32
  {
33
33
  return tk_set(tk, start, (int)(end - start),
34
34
  (int)(start - text), (int)(end - text), pos_inc);
35
35
  }
36
36
 
37
- inline Token *tk_set_no_len(Token *tk,
37
+ __inline Token *tk_set_no_len(Token *tk,
38
38
  char *text, int start, int end, int pos_inc)
39
39
  {
40
40
  return tk_set(tk, text, (int)strlen(text), start, end, pos_inc);
41
41
  }
42
42
 
43
- inline Token *w_tk_set(Token *tk, wchar_t *text, int start, int end,
43
+ __inline Token *w_tk_set(Token *tk, wchar_t *text, int start, int end,
44
44
  int pos_inc)
45
45
  {
46
46
  int len = wcstombs(tk->text, text, MAX_WORD_SIZE - 1);
@@ -151,7 +151,7 @@ static TokenStream *cts_new()
151
151
 
152
152
  #define MBTS(token_stream) ((MultiByteTokenStream *)(token_stream))
153
153
 
154
- inline int mb_next_char(wchar_t *wchr, const char *s, mbstate_t *state)
154
+ __inline int mb_next_char(wchar_t *wchr, const char *s, mbstate_t *state)
155
155
  {
156
156
  int num_bytes;
157
157
  if ((num_bytes = (int)mbrtowc(wchr, s, MB_CUR_MAX, state)) < 0) {
@@ -161,7 +161,7 @@ inline int mb_next_char(wchar_t *wchr, const char *s, mbstate_t *state)
161
161
  ZEROSET(state, mbstate_t);
162
162
  num_bytes = (int)mbrtowc(wchr, t, MB_CUR_MAX, state);
163
163
  } while ((num_bytes < 0) && (*wchr != 0) && (*t != 0));
164
- num_bytes += t - s;
164
+ num_bytes = t - s;
165
165
  }
166
166
  return num_bytes;
167
167
  }
@@ -774,6 +774,7 @@ static bool std_advance_to_start(TokenStream *ts)
774
774
  {
775
775
  char *t = ts->t;
776
776
  while (*t != '\0' && !isalnum(*t)) {
777
+ if (isnumpunc(*t) && isdigit(t[1])) break;
777
778
  t++;
778
779
  }
779
780
 
@@ -790,7 +791,8 @@ static bool mb_std_advance_to_start(TokenStream *ts)
790
791
 
791
792
  i = mb_next_char(&wchr, ts->t, &state);
792
793
 
793
- while (wchr != 0 && !iswalnum(wchr)) {
794
+ while (wchr != 0 && !iswalpha(wchr) && !isdigit(*(ts->t))) {
795
+ if (isnumpunc(*ts->t) && isdigit(ts->t[1])) break;
794
796
  ts->t += i;
795
797
  i = mb_next_char(&wchr, ts->t, &state);
796
798
  }
@@ -817,8 +819,9 @@ static Token *std_next(TokenStream *ts)
817
819
  }
818
820
 
819
821
  start = t = ts->t;
820
- if (isdigit(*t)) {
821
- t += std_get_number(t);
822
+ if ((isdigit(*t) || isnumpunc(*t))
823
+ && ((len = std_get_number(t)) > 0)) {
824
+ t += len;
822
825
  ts->t = t;
823
826
  tk_set_ts(&(CTS(ts)->token), start, t, ts->text, 1);
824
827
  }
@@ -850,8 +853,9 @@ static Token *std_next(TokenStream *ts)
850
853
  return tk_set_ts(&(CTS(ts)->token), start, t, ts->text, 1);
851
854
  }
852
855
 
853
- if (isdigit(*t) || isnumpunc(*t)) { /* possibly a number */
854
- num_end = start + std_get_number(start);
856
+ if ((isdigit(*t) || isnumpunc(*t)) /* possibly a number */
857
+ && (len = std_get_number(t) > 0)) {
858
+ num_end = start + len;
855
859
  if (!std_tz->is_tok_char(num_end)) { /* won't find a longer token */
856
860
  ts->t = num_end;
857
861
  return tk_set_ts(&(CTS(ts)->token), start, num_end, ts->text, 1);
@@ -908,7 +912,7 @@ static Token *std_next(TokenStream *ts)
908
912
  t--; /* strip trailing punctuation */
909
913
  }
910
914
 
911
- if (t > num_end) {
915
+ if (num_end == NULL || t > num_end) {
912
916
  ts->t = t;
913
917
 
914
918
  if (is_acronym) { /* check it is one letter followed by one '.' */
@@ -1132,13 +1136,17 @@ Token *mb_lcf_next(TokenStream *ts)
1132
1136
  return tk;
1133
1137
  }
1134
1138
 
1135
- mbstowcs(wbuf, tk->text, MAX_WORD_SIZE);
1139
+ if (mbstowcs(wbuf, tk->text, MAX_WORD_SIZE) <= 0) return tk;
1136
1140
  wchr = wbuf;
1137
1141
  while (*wchr != 0) {
1138
1142
  *wchr = towlower(*wchr);
1139
1143
  wchr++;
1140
1144
  }
1141
1145
  tk->len = wcstombs(tk->text, wbuf, MAX_WORD_SIZE);
1146
+ if (tk->len <= 0) {
1147
+ strcpy(tk->text, "BAD_DATA");
1148
+ tk->len = 8;
1149
+ }
1142
1150
  tk->text[tk->len] = '\0';
1143
1151
  return tk;
1144
1152
  }
data/ext/array.c CHANGED
@@ -16,7 +16,7 @@ void **ary_new_i(int type_size, int init_capa)
16
16
  return (void **)&ary[3];
17
17
  }
18
18
 
19
- inline void ary_resize_i(void ***ary, int size)
19
+ __inline void ary_resize_i(void ***ary, int size)
20
20
  {
21
21
  size++;
22
22
  if (size >= ary_sz(*ary)) {
data/ext/bitvector.c CHANGED
@@ -190,7 +190,7 @@ const int NUM_TRAILING_ZEROS[] = {
190
190
  /*
191
191
  * This method is highly optimized, hence the loop unrolling
192
192
  */
193
- static inline int bv_get_1_offset(f_u32 word)
193
+ static __inline int bv_get_1_offset(f_u32 word)
194
194
  {
195
195
  if (word & 0xff) {
196
196
  return NUM_TRAILING_ZEROS[word & 0xff];
@@ -397,7 +397,7 @@ BitVector *bv_and_x(BitVector *bv1, BitVector *bv2)
397
397
  return bv_and_i(bv1, bv1, bv2);
398
398
  }
399
399
 
400
- static inline void bv_recapa(BitVector *bv, int new_capa)
400
+ static __inline void bv_recapa(BitVector *bv, int new_capa)
401
401
  {
402
402
  if (bv->capa < new_capa) {
403
403
  REALLOC_N(bv->bits, f_u32, new_capa);
data/ext/defines.h CHANGED
@@ -30,20 +30,14 @@ typedef posh_i64_t f_i64;
30
30
  #endif
31
31
 
32
32
  #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
33
- #define FRT_HAS_INLINE
34
33
  #define FRT_IS_C99
35
34
  #define FRT_HAS_ISO_VARARGS
36
35
  #define FRT_HAS_VARARGS
37
36
  #endif
38
37
 
39
38
  #if defined(__GNUC__) && !defined(__STRICT_ANSI__)
40
- #define FRT_HAS_INLINE
41
39
  #define FRT_HAS_GNUC_VARARGS
42
40
  #define FRT_HAS_VARARGS
43
41
  #endif
44
42
 
45
- #ifndef FRT_HAS_INLINE
46
- # define inline
47
- #endif
48
-
49
43
  #endif
data/ext/except.c CHANGED
@@ -26,8 +26,8 @@ const char *const FRT_ERROR_TYPES[] = {
26
26
 
27
27
  char *const UNSUPPORTED_ERROR_MSG = "Unsupported operation";
28
28
  char *const EOF_ERROR_MSG = "Read past end of file";
29
- char xmsg_buffer[2048];
30
- char xmsg_buffer_final[2048];
29
+ char xmsg_buffer[XMSG_BUFFER_SIZE];
30
+ char xmsg_buffer_final[XMSG_BUFFER_SIZE];
31
31
 
32
32
  static thread_key_t exception_stack_key;
33
33
  static thread_once_t exception_stack_key_once = THREAD_ONCE_INIT;
@@ -48,9 +48,9 @@ void xpush_context(xcontext_t *context)
48
48
  context->in_finally = false;
49
49
  }
50
50
 
51
- static inline void xraise_context(xcontext_t *context,
52
- volatile int excode,
53
- const char *const msg)
51
+ static __inline void xraise_context(xcontext_t *context,
52
+ volatile int excode,
53
+ const char *const msg)
54
54
  {
55
55
  context->msg = msg;
56
56
  context->excode = excode;
@@ -63,7 +63,7 @@ void RAISE(int excode, const char *fmt, ...)
63
63
  {
64
64
  va_list args;
65
65
  va_start(args, fmt);
66
- vsprintf(xmsg_buffer, fmt, args);
66
+ vsnprintf(xmsg_buffer, XMSG_BUFFER_SIZE, fmt, args);
67
67
  xraise(excode, xmsg_buffer);
68
68
  va_end(args);
69
69
  }
data/ext/except.h CHANGED
@@ -68,7 +68,7 @@
68
68
  #define BODY 0
69
69
  #define FINALLY 1
70
70
  #define EXCEPTION 2
71
- #define ERROR 2
71
+ #define FERRET_ERROR 2
72
72
  #define IO_ERROR 3
73
73
  #define ARG_ERROR 4
74
74
  #define EOF_ERROR 5
@@ -121,17 +121,21 @@ typedef struct xcontext_t
121
121
 
122
122
  #define HANDLED() xcontext.handled = 1; /* true */
123
123
 
124
+ #define XMSG_BUFFER_SIZE 2048
125
+
124
126
  #ifdef FRT_HAS_ISO_VARARGS
125
127
  # define RAISE(excode, ...) do {\
126
- sprintf(xmsg_buffer, __VA_ARGS__);\
127
- sprintf(xmsg_buffer_final, "Error occured in %s:%d - %s\n\t%s\n",\
128
+ snprintf(xmsg_buffer, XMSG_BUFFER_SIZE, __VA_ARGS__);\
129
+ snprintf(xmsg_buffer_final, XMSG_BUFFER_SIZE,\
130
+ "Error occured in %s:%d - %s\n\t%s\n",\
128
131
  __FILE__, __LINE__, __func__, xmsg_buffer);\
129
132
  xraise(excode, xmsg_buffer_final);\
130
133
  } while (0)
131
134
  #elif defined(FRT_HAS_GNUC_VARARGS)
132
135
  # define RAISE(excode, args...) do {\
133
- sprintf(xmsg_buffer, ##args);\
134
- sprintf(xmsg_buffer_final, "Error occured in %s:%d - %s\n\t%s\n",\
136
+ snprintf(xmsg_buffer, XMSG_BUFFER_SIZE, ##args);\
137
+ snprintf(xmsg_buffer_final, XMSG_BUFFER_SIZE,\
138
+ "Error occured in %s:%d - %s\n\t%s\n",\
135
139
  __FILE__, __LINE__, __func__, xmsg_buffer);\
136
140
  xraise(excode, xmsg_buffer_final);\
137
141
  } while (0)
@@ -139,14 +143,14 @@ typedef struct xcontext_t
139
143
  #else
140
144
  extern void RAISE(int excode, const char *fmt, ...);
141
145
  #endif
142
- #define RAISE_HELL() RAISE(ERROR, "Hell")
146
+ #define RAISE_HELL() RAISE(FERRET_ERROR, "Hell")
143
147
 
144
148
 
145
149
  extern void xraise(int excode, const char *const msg);
146
150
  extern void xpush_context(xcontext_t *context);
147
151
  extern void xpop_context();
148
152
 
149
- extern char xmsg_buffer[2048];
150
- extern char xmsg_buffer_final[2048];
153
+ extern char xmsg_buffer[XMSG_BUFFER_SIZE];
154
+ extern char xmsg_buffer_final[XMSG_BUFFER_SIZE];
151
155
 
152
156
  #endif
data/ext/extconf.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # extconf.rb for Ferret extensions
2
2
  if (/mswin/ =~ RUBY_PLATFORM) and ENV['make'].nil?
3
3
  require 'mkmf'
4
+ $LIBS += " msvcprt.lib"
4
5
  create_makefile("ferret_ext")
5
6
  else
6
7
  require 'mkmf'
data/ext/ferret.c CHANGED
@@ -15,6 +15,7 @@ ID id_less_than;
15
15
  ID id_lt;
16
16
  ID id_call;
17
17
  ID id_is_directory;
18
+ ID id_close;
18
19
  ID id_data;
19
20
 
20
21
  static ID id_mkdir_p;
@@ -182,6 +183,8 @@ VALUE frt_get_error(const char *err_type)
182
183
  #ifdef FRT_HAS_VARARGS
183
184
  void vfrt_rb_raise(const char *file, int line_num, const char *func,
184
185
  const char *err_type, const char *fmt, va_list args)
186
+ #else
187
+ void V_FRT_EXIT(const char *err_type, const char *fmt, va_list args)
185
188
  #endif
186
189
  {
187
190
  char buf[FRT_BUF_SIZ];
@@ -250,6 +253,7 @@ void Init_ferret_ext(void)
250
253
 
251
254
  id_mkdir_p = rb_intern("mkdir_p");
252
255
  id_is_directory = rb_intern("directory?");
256
+ id_close = rb_intern("close");
253
257
 
254
258
  id_data = rb_intern("@data");
255
259
 
data/ext/ferret.h CHANGED
@@ -12,6 +12,7 @@ extern ID id_less_than;
12
12
  extern ID id_lt;
13
13
  extern ID id_call;
14
14
  extern ID id_is_directory;
15
+ extern ID id_close;
15
16
  extern ID id_data;
16
17
 
17
18
  /* Symbols */
data/ext/fs_store.c CHANGED
@@ -1,4 +1,5 @@
1
1
  #include "store.h"
2
+ #include <time.h>
2
3
  #include <sys/types.h>
3
4
  #include <fcntl.h>
4
5
  #include <sys/stat.h>
@@ -41,7 +42,7 @@ extern int file_is_lock(char *filename);
41
42
  */
42
43
  static char *join_path(char *buf, const char *base, const char *filename)
43
44
  {
44
- sprintf(buf, "%s"DIR_SEPARATOR"%s", base, filename);
45
+ snprintf(buf, MAX_FILE_PATH, "%s"DIR_SEPARATOR"%s", base, filename);
45
46
  return buf;
46
47
  }
47
48
 
@@ -330,6 +331,10 @@ static InStream *fs_open_input(Store *store, const char *filename)
330
331
 
331
332
  #define LOCK_OBTAIN_TIMEOUT 5
332
333
 
334
+ #ifdef RUBY_BINDINGS
335
+ struct timeval rb_time_interval _((VALUE));
336
+ #endif
337
+
333
338
  static int fs_lock_obtain(Lock *lock)
334
339
  {
335
340
  int f;
@@ -337,9 +342,18 @@ static int fs_lock_obtain(Lock *lock)
337
342
  while (((f =
338
343
  open(lock->name, O_CREAT | O_EXCL | O_RDWR,
339
344
  S_IRUSR | S_IWUSR)) < 0) && (trys > 0)) {
345
+
346
+ #ifdef RUBY_BINDINGS
347
+ rb_thread_wait_for(rb_time_interval(rb_float_new(0.01)));
348
+ #endif
340
349
  trys--;
341
- /* FIXME:: implement sleep properly to sleep in milliseconds
342
- * sleep(1); */
350
+ /* sleep for 10 milliseconds
351
+ clock_t start = clock();
352
+ trys--;
353
+
354
+ while (((double)(clock() - start) / CLOCKS_PER_SEC) < 0.01) {
355
+ }
356
+ */
343
357
  }
344
358
  if (f >= 0) {
345
359
  close(f);
@@ -375,7 +389,7 @@ static Lock *fs_open_lock(Store *store, char *lockname)
375
389
  Lock *lock = ALLOC(Lock);
376
390
  char lname[100];
377
391
  char path[MAX_FILE_PATH];
378
- sprintf(lname, "%s%s.lck", LOCK_PREFIX, lockname);
392
+ snprintf(lname, 100, "%s%s.lck", LOCK_PREFIX, lockname);
379
393
  lock->name = estrdup(join_path(path, store->dir.path, lname));
380
394
  lock->store = store;
381
395
  lock->obtain = &fs_lock_obtain;
data/ext/global.c CHANGED
@@ -11,22 +11,22 @@ const char *EMPTY_STRING = "";
11
11
 
12
12
  bool x_do_logging = false;
13
13
 
14
- inline int min3(int a, int b, int c)
14
+ __inline int min3(int a, int b, int c)
15
15
  {
16
16
  return MIN3(a, b, c);
17
17
  }
18
18
 
19
- inline int min2(int a, int b)
19
+ __inline int min2(int a, int b)
20
20
  {
21
21
  return MIN(a, b);
22
22
  }
23
23
 
24
- inline int max3(int a, int b, int c)
24
+ __inline int max3(int a, int b, int c)
25
25
  {
26
26
  return MAX3(a, b, c);
27
27
  }
28
28
 
29
- inline int max2(int a, int b)
29
+ __inline int max2(int a, int b)
30
30
  {
31
31
  return MAX(a, b);
32
32
  }
@@ -56,13 +56,14 @@ int icmp_risky(const void *p1, const void *p2)
56
56
  }
57
57
 
58
58
 
59
+ #ifndef RUBY_BINDINGS
59
60
  /* frt_exit: print error message and exit */
60
- #ifdef FRT_HAS_VARARGS
61
+ # ifdef FRT_HAS_VARARGS
61
62
  void vfrt_exit(const char *file, int line_num, const char *func,
62
63
  const char *err_type, const char *fmt, va_list args)
63
- #else
64
+ # else
64
65
  void V_FRT_EXIT(const char *err_type, const char *fmt, va_list args)
65
- #endif
66
+ # endif
66
67
  {
67
68
  fflush(stdout);
68
69
  fprintf(stderr, "\n");
@@ -70,12 +71,12 @@ void V_FRT_EXIT(const char *err_type, const char *fmt, va_list args)
70
71
  fprintf(stderr, "%s: ", progname());
71
72
  }
72
73
 
73
- #ifdef FRT_HAS_VARARGS
74
+ # ifdef FRT_HAS_VARARGS
74
75
  fprintf(stderr, "%s occured at <%s>:%d in %s\n",
75
76
  err_type, file, line_num, func);
76
- #else
77
+ # else
77
78
  fprintf(stderr, "%s occured:\n", err_type);
78
- #endif
79
+ # endif
79
80
  vfprintf(stderr, fmt, args);
80
81
 
81
82
  if (fmt[0] != '\0' && fmt[strlen(fmt) - 1] == ':') {
@@ -87,22 +88,23 @@ void V_FRT_EXIT(const char *err_type, const char *fmt, va_list args)
87
88
  }
88
89
 
89
90
 
90
- #ifdef FRT_HAS_VARARGS
91
+ # ifdef FRT_HAS_VARARGS
91
92
  void frt_exit(const char *file, int line_num, const char *func,
92
93
  const char *err_type, const char *fmt, ...)
93
- #else
94
+ # else
94
95
  void FRT_EXIT(const char *err_type, const char *fmt, ...)
95
- #endif
96
+ # endif
96
97
  {
97
98
  va_list args;
98
99
  va_start(args, fmt);
99
- #ifdef FRT_HAS_VARARGS
100
+ # ifdef FRT_HAS_VARARGS
100
101
  vfrt_exit(file, line_num, func, err_type, fmt, args);
101
- #else
102
+ # else
102
103
  V_FRT_EXIT(err_type, fmt, args);
103
- #endif
104
+ # endif
104
105
  va_end(args);
105
106
  }
107
+ #endif
106
108
 
107
109
 
108
110
  /* weprintf: print error message and don't exit */