ferret 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/README CHANGED
@@ -12,17 +12,18 @@ search for things in them later.
12
12
 
13
13
  == Installation
14
14
 
15
- If you have gems installed you can simple do;
15
+ If you have gems installed you can simply do;
16
16
 
17
17
  gem install ferret
18
18
 
19
- Otherwise, de-compress the archive and enter its top directory.
19
+ Otherwise, you will need Rake installed. De-compress the archive and enter its top directory.
20
20
 
21
- tar zxpvf ferret-0.1.tar.gz
22
- cd ferret-0.1
21
+ tar zxpvf ferret-<version>.tar.gz
22
+ cd ferret-<version>
23
23
 
24
- Run the setup config;
24
+ Run the following;
25
25
 
26
+ $ rake ext
26
27
  $ ruby setup.rb config
27
28
  $ ruby setup.rb setup
28
29
  # ruby setup.rb install
data/Rakefile CHANGED
@@ -33,16 +33,25 @@ $VERBOSE = nil
33
33
 
34
34
  EXT = "ferret_ext.so"
35
35
  EXT_SRC = FileList["src/**/*.[ch]"]
36
+ if (/mswin/ =~ RUBY_PLATFORM)
37
+ EXT_SRC.delete('src/io/nix_io.c')
38
+ end
36
39
 
37
40
  EXT_SRC_DEST = EXT_SRC.map {|fn| File.join("ext", File.basename(fn))}
38
41
  SRC = (FileList["ext/*.[ch]"] + EXT_SRC_DEST).uniq
39
42
 
40
- CLEAN.include(FileList['**/*.o', 'InstalledFiles', '.config'])
43
+ CLEAN.include(FileList['**/*.o', '**/*.obj', 'InstalledFiles', '.config'])
41
44
  CLOBBER.include(FileList['**/*.so'], 'ext/Makefile', EXT_SRC_DEST)
45
+ POLISH = Rake::FileList.new.include(FileList['**/*.so'], 'ext/Makefile')
46
+
47
+ desc "Clean specifically for the release."
48
+ task :polish => [:clean] do
49
+ POLISH.each { |fn| rm_r fn rescue nil }
50
+ end
42
51
 
43
- task :default => :all_tests
52
+ task :default => :test_all
44
53
  desc "Run all tests"
45
- task :all_tests => [ :test_runits, :test_cunits, :test_functional ]
54
+ task :test_all => [ :test_runits, :test_cunits, :test_functional ]
46
55
 
47
56
  desc "Generate API documentation, and show coding stats"
48
57
  task :doc => [ :stats, :appdoc ]
@@ -121,7 +130,13 @@ task :ext => ["ext/#{EXT}"] + SRC
121
130
  file "ext/#{EXT}" => ["ext/Makefile"] do
122
131
  cp "ext/inc/lang.h", "ext/lang.h"
123
132
  cp "ext/inc/except.h", "ext/except.h"
124
- sh "cd ext; make"
133
+ cd "ext"
134
+ if (/mswin/ =~ RUBY_PLATFORM) and ENV['make'].nil?
135
+ sh "nmake"
136
+ else
137
+ sh "make"
138
+ end
139
+ cd ".."
125
140
  end
126
141
 
127
142
  file "ext/lang.h" => ["ext/inc/lang.h"] do
@@ -132,7 +147,9 @@ file "ext/except.h" => ["ext/inc/except.h"] do
132
147
  end
133
148
 
134
149
  file "ext/Makefile" => SRC do
135
- sh "cd ext; ruby extconf.rb"
150
+ cd "ext"
151
+ `ruby extconf.rb`
152
+ cd ".."
136
153
  end
137
154
 
138
155
  # Make Parsers ---------------------------------------------------------------
@@ -158,6 +175,9 @@ PKG_FILES = FileList[
158
175
  'Rakefile'
159
176
  ]
160
177
  PKG_FILES.exclude('**/*.o')
178
+ PKG_FILES.include('ext/termdocs.c')
179
+ PKG_FILES.exclude('**/Makefile')
180
+ PKG_FILES.exclude('ext/ferret_ext.so')
161
181
 
162
182
 
163
183
  if ! defined?(Gem)
@@ -233,12 +253,13 @@ end
233
253
  # Creating a release
234
254
 
235
255
  desc "Make a new release"
236
- task :prerelease => [:all_tests, :clobber]
237
- task :repackage => EXT_SRC_DEST
238
- task :package => EXT_SRC_DEST
239
- task :tag => [:prerelease]
240
- task :update_version => [:prerelease]
241
- task :release do #=> [:tag, :update_version, :package] do
256
+ task :release => [
257
+ :prerelease,
258
+ :polish,
259
+ :test_all,
260
+ :update_version,
261
+ :package,
262
+ :tag] do
242
263
  announce
243
264
  announce "**************************************************************"
244
265
  announce "* Release #{PKG_VERSION} Complete."
@@ -288,6 +309,7 @@ def reversion(fn)
288
309
  end
289
310
  end
290
311
  end
312
+ mv fn + ".new", fn
291
313
  end
292
314
 
293
315
  task :update_version => [:prerelease] do
@@ -300,9 +322,8 @@ task :update_version => [:prerelease] do
300
322
  if ENV['RELTEST']
301
323
  announce "Release Task Testing, skipping commiting of new version"
302
324
  else
303
- mv "lib/rferret.rb.new", "lib/rferret.rb"
325
+ sh %{svn ci -m "Updated to version #{PKG_VERSION}" lib/rferret.rb}
304
326
  end
305
- sh %{svn ci -m "Updated to version #{PKG_VERSION}" lib/rferret.rb}
306
327
  end
307
328
  end
308
329
 
data/TODO CHANGED
@@ -5,6 +5,7 @@ Send suggestions for this list to mailto:dbalmain@gmail.com
5
5
  === To Do
6
6
 
7
7
  * Make a dll for people on Windows
8
+ * fix rb_obj_as_string to StringValue()
8
9
  * pure ruby ConstantScoreQuery
9
10
 
10
11
  === Done
data/TUTORIAL CHANGED
@@ -22,7 +22,7 @@ search for later. If you'd like to use a different analyzer you can specify it
22
22
  here, eg;
23
23
 
24
24
  index = Index::Index.new(:path => '/path/to/index',
25
- :analyzer => WhiteSpaceAnalyzer.new)
25
+ :analyzer => Analysis::WhiteSpaceAnalyzer.new)
26
26
 
27
27
  For more options when creating an Index refer to Ferret::Index::Index.
28
28
 
data/ext/analysis.c CHANGED
@@ -1,10 +1,11 @@
1
- #include <analysis.h>
1
+ #include "analysis.h"
2
+ #include "hash.h"
3
+ #include "libstemmer.h"
2
4
  #include <string.h>
3
5
  #include <ctype.h>
4
6
  #include <wctype.h>
5
7
  #include <wchar.h>
6
- #include "hash.h"
7
- #include "libstemmer.h"
8
+
8
9
 
9
10
  /****************************************************************************
10
11
  *
@@ -22,9 +23,16 @@ void tk_destroy(void *p)
22
23
  free(p);
23
24
  }
24
25
 
25
- inline Token *tk_set(Token *tk, char *text, int tlen, int start, int end, int pos_inc)
26
+ inline Token *tk_set(Token *tk,
27
+ char *text,
28
+ int tlen,
29
+ int start,
30
+ int end,
31
+ int pos_inc)
26
32
  {
27
- if (tlen >= MAX_WORD_SIZE) tlen = MAX_WORD_SIZE - 1;
33
+ if (tlen >= MAX_WORD_SIZE) {
34
+ tlen = MAX_WORD_SIZE - 1;
35
+ }
28
36
  memcpy(tk->text, text, sizeof(char) * tlen);
29
37
  tk->text[tlen] = '\0';
30
38
  tk->start = start;
@@ -33,14 +41,23 @@ inline Token *tk_set(Token *tk, char *text, int tlen, int start, int end, int po
33
41
  return tk;
34
42
  }
35
43
 
36
- inline Token *tk_set_ts(Token *tk, char *start, char *end, char *text, int pos_inc)
44
+ inline Token *tk_set_ts(Token *tk,
45
+ char *start,
46
+ char *end,
47
+ char *text,
48
+ int pos_inc)
37
49
  {
38
- return tk_set(tk, start, end - start, start - text, end - text, pos_inc);
50
+ return tk_set(tk, start, (int)(end - start),
51
+ (int)(start - text), (int)(end - text), pos_inc);
39
52
  }
40
53
 
41
- inline Token *tk_set_no_len(Token *tk, char *text, int start, int end, int pos_inc)
54
+ inline Token *tk_set_no_len(Token *tk,
55
+ char *text,
56
+ int start,
57
+ int end,
58
+ int pos_inc)
42
59
  {
43
- return tk_set(tk, text, strlen(text), start, end, pos_inc);
60
+ return tk_set(tk, text, (int)strlen(text), start, end, pos_inc);
44
61
  }
45
62
 
46
63
  int tk_eq(Token *tk1, Token *tk2)
@@ -75,9 +92,14 @@ int tk_cmp(Token *tk1, Token *tk2)
75
92
  *
76
93
  ****************************************************************************/
77
94
 
78
- void ts_standard_destroy(void *p)
95
+ void ts_deref(void *p)
79
96
  {
80
97
  TokenStream *ts = (TokenStream *)p;
98
+ if (--ts->ref_cnt <= 0) ts->destroy(ts);
99
+ }
100
+
101
+ void ts_standard_destroy(TokenStream *ts)
102
+ {
81
103
  tk_destroy(ts->token);
82
104
  free(ts);
83
105
  }
@@ -89,13 +111,11 @@ void ts_reset(TokenStream *ts, char *text)
89
111
 
90
112
  TokenStream *ts_create()
91
113
  {
92
- TokenStream *ts = ALLOC(TokenStream);
93
- ts->text = NULL;
114
+ TokenStream *ts = ALLOC_AND_ZERO_N(TokenStream, 1);
94
115
  ts->token = tk_create();
95
116
  ts->destroy = &ts_standard_destroy;
96
117
  ts->reset = &ts_reset;
97
- ts->sub_ts = NULL;
98
- ts->clone_i = NULL;
118
+ ts->ref_cnt = 1;
99
119
  return ts;
100
120
  }
101
121
 
@@ -109,6 +129,7 @@ TokenStream *ts_clone(TokenStream *orig_ts)
109
129
  }
110
130
  if (orig_ts->sub_ts) ts->sub_ts = ts_clone(orig_ts->sub_ts);
111
131
  if (orig_ts->clone_i) orig_ts->clone_i(orig_ts, ts);
132
+ ts->ref_cnt = 1;
112
133
  return ts;
113
134
  }
114
135
 
@@ -116,7 +137,7 @@ TokenStream *ts_clone(TokenStream *orig_ts)
116
137
  static char * const ENC_ERR_MSG = "Error decoding input string. "
117
138
  "Check that you have the locale set correctly";
118
139
  #define MB_NEXT_CHAR \
119
- if ((i = mbrtowc(&wchr, t, MB_CUR_MAX, (mbstate_t *)ts->data)) < 0)\
140
+ if ((i = (int)mbrtowc(&wchr, t, MB_CUR_MAX, (mbstate_t *)ts->data)) < 0)\
120
141
  RAISE(IO_ERROR, ENC_ERR_MSG)
121
142
 
122
143
  inline Token *w_tk_set(Token *tk, wchar_t *text, int start, int end, int pos_inc)
@@ -128,9 +149,8 @@ inline Token *w_tk_set(Token *tk, wchar_t *text, int start, int end, int pos_inc
128
149
  return tk;
129
150
  }
130
151
 
131
- void mb_ts_standard_destroy(void *p)
152
+ void mb_ts_standard_destroy(TokenStream *ts)
132
153
  {
133
- TokenStream *ts = (TokenStream *)p;
134
154
  tk_destroy(ts->token);
135
155
  free(ts->data);
136
156
  free(ts);
@@ -150,14 +170,13 @@ void mb_ts_clone_i(TokenStream *orig_ts, TokenStream *new_ts)
150
170
 
151
171
  TokenStream *mb_ts_create()
152
172
  {
153
- TokenStream *ts = ALLOC(TokenStream);
173
+ TokenStream *ts = ALLOC_AND_ZERO_N(TokenStream, 1);
154
174
  ts->data = ALLOC(mbstate_t);
155
- ts->text = NULL;
156
175
  ts->token = tk_create();
157
176
  ts->destroy = &mb_ts_standard_destroy;
158
177
  ts->reset = &mb_ts_reset;
159
178
  ts->clone_i = &mb_ts_clone_i;
160
- ts->sub_ts = NULL;
179
+ ts->ref_cnt = 1;
161
180
  return ts;
162
181
  }
163
182
 
@@ -167,11 +186,16 @@ TokenStream *mb_ts_create()
167
186
  *
168
187
  ****************************************************************************/
169
188
 
170
- void a_standard_destroy(void *p)
189
+ void a_deref(void *p)
171
190
  {
172
191
  Analyzer *a = (Analyzer *)p;
173
- ts_destroy(a->current_ts);
174
- free(p);
192
+ if (--a->ref_cnt <= 0) a->destroy(a);
193
+ }
194
+
195
+ void a_standard_destroy(Analyzer *a)
196
+ {
197
+ if (a->current_ts) ts_deref(a->current_ts);
198
+ free(a);
175
199
  }
176
200
 
177
201
  TokenStream *a_standard_get_ts(Analyzer *a, char *field, char *text)
@@ -180,7 +204,8 @@ TokenStream *a_standard_get_ts(Analyzer *a, char *field, char *text)
180
204
  return a->current_ts;
181
205
  }
182
206
 
183
- Analyzer *analyzer_create(void *data, TokenStream *ts, void (*destroy)(void *),
207
+ Analyzer *analyzer_create(void *data, TokenStream *ts,
208
+ void (*destroy)(Analyzer *a),
184
209
  TokenStream *(*get_ts)(Analyzer *a, char *field, char *text))
185
210
  {
186
211
  Analyzer *a = ALLOC(Analyzer);
@@ -188,6 +213,7 @@ Analyzer *analyzer_create(void *data, TokenStream *ts, void (*destroy)(void *),
188
213
  a->current_ts = ts;
189
214
  a->destroy = (destroy ? destroy : &a_standard_destroy);
190
215
  a->get_ts = (get_ts ? get_ts : &a_standard_get_ts);
216
+ a->ref_cnt = 1;
191
217
  return a;
192
218
  }
193
219
 
@@ -284,7 +310,7 @@ Token *mb_wst_next_lc(TokenStream *ts)
284
310
  MB_NEXT_CHAR;
285
311
  }
286
312
  *w = 0;
287
- w_tk_set(ts->token, wbuf, start - ts->text, t - ts->text, 1);
313
+ w_tk_set(ts->token, wbuf, (int)(start - ts->text), (int)(t - ts->text), 1);
288
314
  ts->t = t;
289
315
  return ts->token;
290
316
  }
@@ -409,7 +435,7 @@ Token *mb_lt_next_lc(TokenStream *ts)
409
435
  MB_NEXT_CHAR;
410
436
  }
411
437
  *w = 0;
412
- w_tk_set(ts->token, wbuf, start - ts->text, t - ts->text, 1);
438
+ w_tk_set(ts->token, wbuf, (int)(start - ts->text), (int)(t - ts->text), 1);
413
439
  ts->t = t;
414
440
  return ts->token;
415
441
  }
@@ -472,7 +498,7 @@ int mb_std_get_alpha(TokenStream *ts, char *token)
472
498
  if ((i = mbtowc(&w, t, MB_CUR_MAX)) < 0) RAISE(IO_ERROR, ENC_ERR_MSG);
473
499
  }
474
500
 
475
- i = t - ts->t;
501
+ i = (int)(t - ts->t);
476
502
  if (i > MAX_WORD_SIZE) i = MAX_WORD_SIZE - 1;
477
503
  memcpy(token, ts->t, i);
478
504
  return i;
@@ -500,7 +526,7 @@ int mb_std_get_alnum(char *text, char *token, TokenStream *ts)
500
526
  if ((i = mbtowc(&w, t, MB_CUR_MAX)) < 0) RAISE(IO_ERROR, ENC_ERR_MSG);
501
527
  }
502
528
 
503
- i = t - ts->t;
529
+ i = (int)(t - ts->t);
504
530
  if (i > MAX_WORD_SIZE) i = MAX_WORD_SIZE - 1;
505
531
  memcpy(token, ts->t, i);
506
532
  return i;
@@ -599,7 +625,7 @@ int std_get_apostrophe(char *input)
599
625
  while (isalpha(*t) || *t == '\'')
600
626
  t++;
601
627
 
602
- return t - input;
628
+ return (int)(t - input);
603
629
  }
604
630
 
605
631
  int mb_std_get_apostrophe(char *input)
@@ -613,7 +639,7 @@ int mb_std_get_apostrophe(char *input)
613
639
  t += i;
614
640
  if ((i = mbtowc(&w, t, MB_CUR_MAX)) < 0) RAISE(IO_ERROR, ENC_ERR_MSG);
615
641
  }
616
- return t - input;
642
+ return (int)(t - input);
617
643
  }
618
644
 
619
645
  int std_get_url(char *input, char *token, int i)
@@ -654,7 +680,7 @@ int mb_std_get_company_name(char *input, TokenStream *ts)
654
680
  MB_NEXT_CHAR;
655
681
  }
656
682
 
657
- return t - input;
683
+ return (int)(t - input);
658
684
  }
659
685
 
660
686
  bool std_advance_to_start(TokenStream *ts)
@@ -723,7 +749,7 @@ Token *std_next(TokenStream *ts)
723
749
  if (*t == '\'') { // apostrophe case.
724
750
  t += std_tz->get_apostrophe(t);
725
751
  ts->t = t;
726
- len = t - start;
752
+ len = (int)(t - start);
727
753
  // strip possesive
728
754
  if ((t[-1] == 's' || t[-1] == 'S') && t[-2] == '\'') t -= 2;
729
755
 
@@ -760,13 +786,14 @@ Token *std_next(TokenStream *ts)
760
786
  memcmp(token, "file", 4) == 0)) {
761
787
  len = std_get_url(t, token, 0); // dispose of first part of the URL
762
788
  } else { //still treat as url but keep the first part
763
- token_i = t - start;
789
+ token_i = (int)(t - start);
764
790
  memcpy(token, start, token_i * sizeof(char));
765
791
  len = token_i + std_get_url(t, token, token_i); // keep start
766
792
  }
767
793
  ts->t = t + len;
768
794
  token[len] = 0;
769
- tk_set(ts->token, token, len, start - ts->text, ts->t - ts->text, 1);
795
+ tk_set(ts->token, token, len, (int)(start - ts->text),
796
+ (int)(ts->t - ts->text), 1);
770
797
  return ts->token;
771
798
  }
772
799
 
@@ -806,7 +833,8 @@ Token *std_next(TokenStream *ts)
806
833
  token_i++;
807
834
  }
808
835
  }
809
- tk_set(ts->token, token, token_i, start - ts->text, t - ts->text, 1);
836
+ tk_set(ts->token, token, token_i, (int)(start - ts->text),
837
+ (int)(t - ts->text), 1);
810
838
  } else { // just return the url as is
811
839
  tk_set_ts(ts->token, start, t, ts->text, 1);
812
840
  }
@@ -819,9 +847,8 @@ Token *std_next(TokenStream *ts)
819
847
  return ts->token;
820
848
  }
821
849
 
822
- void std_ts_destroy(void *p)
850
+ void std_ts_destroy(TokenStream *ts)
823
851
  {
824
- TokenStream *ts = (TokenStream *)p;
825
852
  free(ts->data);
826
853
  ts_standard_destroy(ts);
827
854
  }
@@ -871,19 +898,18 @@ void filter_reset(TokenStream *ts, char *text)
871
898
  ts->sub_ts->reset(ts->sub_ts, text);
872
899
  }
873
900
 
874
- void filter_destroy(void *p)
901
+ void filter_destroy(TokenStream *tf)
875
902
  {
876
- TokenStream *tf = (TokenStream *)p;
877
- if (tf->destroy_sub) tf->sub_ts->destroy(tf->sub_ts);
903
+ ts_deref(tf->sub_ts);
878
904
  if (tf->token != NULL) tk_destroy(tf->token);
879
905
  free(tf);
880
906
  }
881
907
 
882
- void sf_destroy(void *p)
908
+ void sf_destroy(TokenStream *tf)
883
909
  {
884
- HshTable *words = (HshTable *)((TokenStream *)p)->data;
910
+ HshTable *words = (HshTable *)tf->data;
885
911
  h_destroy(words);
886
- filter_destroy(p);
912
+ filter_destroy(tf);
887
913
  }
888
914
 
889
915
  void sf_clone_i_i(void *key, void *value, void *arg)
@@ -917,10 +943,10 @@ TokenStream *stop_filter_create_with_words_len(TokenStream *ts,
917
943
  {
918
944
  int i;
919
945
  char *w;
946
+ HshTable *wordtable = h_new_str(&free, (free_ft)NULL);
920
947
  TokenStream *tf = ALLOC(TokenStream);
921
948
  tf->sub_ts = ts;
922
- tf->destroy_sub = true;
923
- HshTable *wordtable = h_new_str(&free, NULL);
949
+
924
950
  for (i = 0; i < len; i++) {
925
951
  w = estrdup(words[i]);
926
952
  h_set(wordtable, w, w);
@@ -931,16 +957,16 @@ TokenStream *stop_filter_create_with_words_len(TokenStream *ts,
931
957
  tf->reset = &filter_reset;
932
958
  tf->destroy = &sf_destroy;
933
959
  tf->clone_i = &sf_clone_i;
960
+ tf->ref_cnt = 1;
934
961
  return tf;
935
962
  }
936
963
 
937
964
  TokenStream *stop_filter_create_with_words(TokenStream *ts, const char **words)
938
965
  {
939
966
  char *w;
967
+ HshTable *wordtable = h_new_str(&free, (free_ft)NULL);
940
968
  TokenStream *tf = ALLOC(TokenStream);
941
969
  tf->sub_ts = ts;
942
- tf->destroy_sub = true;
943
- HshTable *wordtable = h_new_str(&free, NULL);
944
970
  while (*words) {
945
971
  w = estrdup(*words);
946
972
  h_set(wordtable, w, w);
@@ -952,6 +978,7 @@ TokenStream *stop_filter_create_with_words(TokenStream *ts, const char **words)
952
978
  tf->reset = &filter_reset;
953
979
  tf->destroy = &sf_destroy;
954
980
  tf->clone_i = &sf_clone_i;
981
+ tf->ref_cnt = 1;
955
982
  return tf;
956
983
  }
957
984
 
@@ -968,7 +995,7 @@ Token *mb_lcf_next(TokenStream *ts)
968
995
  Token *tk = ts->sub_ts->next(ts->sub_ts);
969
996
  if (tk == NULL) return tk;
970
997
 
971
- i = mbstowcs(wbuf, tk->text, MAX_WORD_SIZE);
998
+ i = (int)mbstowcs(wbuf, tk->text, MAX_WORD_SIZE);
972
999
  w = wbuf;
973
1000
  while (*w != 0) {
974
1001
  *w = towlower(*w);
@@ -986,8 +1013,8 @@ TokenStream *mb_lowercase_filter_create(TokenStream *ts)
986
1013
  tf->reset = &filter_reset;
987
1014
  tf->destroy = &filter_destroy;
988
1015
  tf->sub_ts = ts;
989
- tf->destroy_sub = true;
990
1016
  tf->clone_i = NULL;
1017
+ tf->ref_cnt = 1;
991
1018
  return tf;
992
1019
  }
993
1020
 
@@ -1011,8 +1038,8 @@ TokenStream *lowercase_filter_create(TokenStream *ts)
1011
1038
  tf->reset = &filter_reset;
1012
1039
  tf->destroy = &filter_destroy;
1013
1040
  tf->sub_ts = ts;
1014
- tf->destroy_sub = true;
1015
1041
  tf->clone_i = NULL;
1042
+ tf->ref_cnt = 1;
1016
1043
  return tf;
1017
1044
  }
1018
1045
 
@@ -1022,15 +1049,14 @@ typedef struct StemFilter {
1022
1049
  char *charenc;
1023
1050
  } StemFilter;
1024
1051
 
1025
- void stemf_destroy(void *p)
1052
+ void stemf_destroy(TokenStream *tf)
1026
1053
  {
1027
- TokenStream *ts = (TokenStream *)p;
1028
- StemFilter *stemf = (StemFilter *)ts->data;
1054
+ StemFilter *stemf = (StemFilter *)tf->data;
1029
1055
  sb_stemmer_delete(stemf->stemmer);
1030
1056
  free(stemf->algorithm);
1031
1057
  free(stemf->charenc);
1032
1058
  free(stemf);
1033
- filter_destroy(ts);
1059
+ filter_destroy(tf);
1034
1060
  }
1035
1061
 
1036
1062
  Token *stemf_next(TokenStream *ts)
@@ -1040,7 +1066,7 @@ Token *stemf_next(TokenStream *ts)
1040
1066
  struct sb_stemmer *stemmer = ((StemFilter *)ts->data)->stemmer;
1041
1067
  Token *tk = ts->sub_ts->next(ts->sub_ts);
1042
1068
  if (tk == NULL) return tk;
1043
- stemmed = sb_stemmer_stem(stemmer, (sb_symbol *)tk->text, strlen(tk->text));
1069
+ stemmed = sb_stemmer_stem(stemmer, (sb_symbol *)tk->text, (int)strlen(tk->text));
1044
1070
  len = sb_stemmer_length(stemmer);
1045
1071
  if (len >= MAX_WORD_SIZE) len = MAX_WORD_SIZE - 1;
1046
1072
  memcpy(tk->text, stemmed, len);
@@ -1074,7 +1100,7 @@ TokenStream *stem_filter_create(TokenStream *ts, const char * algorithm,
1074
1100
  tf->destroy = &stemf_destroy;
1075
1101
  tf->clone_i = &stemf_clone_i;
1076
1102
  tf->sub_ts = ts;
1077
- tf->destroy_sub = true;
1103
+ tf->ref_cnt = 1;
1078
1104
  return tf;
1079
1105
  }
1080
1106
 
@@ -1148,19 +1174,12 @@ Analyzer *mb_standard_analyzer_create(bool lowercase)
1148
1174
  *
1149
1175
  ****************************************************************************/
1150
1176
 
1151
- typedef struct PerFieldAnalyzer {
1152
- HshTable *dict;
1153
- Analyzer *def;
1154
- bool destroy_subs : 1;
1155
- } PerFieldAnalyzer;
1156
-
1157
- void pfa_destroy(void *p)
1177
+ void pfa_destroy(Analyzer *self)
1158
1178
  {
1159
- Analyzer *self = (Analyzer *)p;
1160
1179
  PerFieldAnalyzer *pfa = (PerFieldAnalyzer *)self->data;
1161
1180
  h_destroy(pfa->dict);
1162
1181
 
1163
- if (pfa->destroy_subs) a_destroy(pfa->def);
1182
+ a_deref(pfa->def);
1164
1183
  free(pfa);
1165
1184
  free(self);
1166
1185
  }
@@ -1176,7 +1195,7 @@ TokenStream *pfa_get_ts(Analyzer *self, char *field, char *text)
1176
1195
  void pfa_sub_a_destroy(void *p)
1177
1196
  {
1178
1197
  Analyzer *a = (Analyzer *)p;
1179
- a->destroy(a);
1198
+ a_deref(a);
1180
1199
  }
1181
1200
 
1182
1201
  void pfa_add_field(Analyzer *self, char *field, Analyzer *analyzer)
@@ -1185,13 +1204,11 @@ void pfa_add_field(Analyzer *self, char *field, Analyzer *analyzer)
1185
1204
  h_set(pfa->dict, estrdup(field), analyzer);
1186
1205
  }
1187
1206
 
1188
- Analyzer *per_field_analyzer_create(Analyzer *def, bool destroy_subs)
1207
+ Analyzer *per_field_analyzer_create(Analyzer *def)
1189
1208
  {
1190
1209
  PerFieldAnalyzer *pfa = ALLOC(PerFieldAnalyzer);
1191
1210
  pfa->def = def;
1192
- pfa->destroy_subs = destroy_subs;
1193
- pfa->dict = destroy_subs ? h_new_str(&free, &pfa_sub_a_destroy)
1194
- : h_new_str(&free, NULL);
1211
+ pfa->dict = h_new_str(&free, &pfa_sub_a_destroy);
1195
1212
  return analyzer_create(pfa, NULL, &pfa_destroy, &pfa_get_ts);
1196
1213
  }
1197
1214