ferret 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/README CHANGED
@@ -12,17 +12,18 @@ search for things in them later.
12
12
 
13
13
  == Installation
14
14
 
15
- If you have gems installed you can simple do;
15
+ If you have gems installed you can simply do;
16
16
 
17
17
  gem install ferret
18
18
 
19
- Otherwise, de-compress the archive and enter its top directory.
19
+ Otherwise, you will need Rake installed. De-compress the archive and enter its top directory.
20
20
 
21
- tar zxpvf ferret-0.1.tar.gz
22
- cd ferret-0.1
21
+ tar zxpvf ferret-<version>.tar.gz
22
+ cd ferret-<version>
23
23
 
24
- Run the setup config;
24
+ Run the following;
25
25
 
26
+ $ rake ext
26
27
  $ ruby setup.rb config
27
28
  $ ruby setup.rb setup
28
29
  # ruby setup.rb install
data/Rakefile CHANGED
@@ -33,16 +33,25 @@ $VERBOSE = nil
33
33
 
34
34
  EXT = "ferret_ext.so"
35
35
  EXT_SRC = FileList["src/**/*.[ch]"]
36
+ if (/mswin/ =~ RUBY_PLATFORM)
37
+ EXT_SRC.delete('src/io/nix_io.c')
38
+ end
36
39
 
37
40
  EXT_SRC_DEST = EXT_SRC.map {|fn| File.join("ext", File.basename(fn))}
38
41
  SRC = (FileList["ext/*.[ch]"] + EXT_SRC_DEST).uniq
39
42
 
40
- CLEAN.include(FileList['**/*.o', 'InstalledFiles', '.config'])
43
+ CLEAN.include(FileList['**/*.o', '**/*.obj', 'InstalledFiles', '.config'])
41
44
  CLOBBER.include(FileList['**/*.so'], 'ext/Makefile', EXT_SRC_DEST)
45
+ POLISH = Rake::FileList.new.include(FileList['**/*.so'], 'ext/Makefile')
46
+
47
+ desc "Clean specifically for the release."
48
+ task :polish => [:clean] do
49
+ POLISH.each { |fn| rm_r fn rescue nil }
50
+ end
42
51
 
43
- task :default => :all_tests
52
+ task :default => :test_all
44
53
  desc "Run all tests"
45
- task :all_tests => [ :test_runits, :test_cunits, :test_functional ]
54
+ task :test_all => [ :test_runits, :test_cunits, :test_functional ]
46
55
 
47
56
  desc "Generate API documentation, and show coding stats"
48
57
  task :doc => [ :stats, :appdoc ]
@@ -121,7 +130,13 @@ task :ext => ["ext/#{EXT}"] + SRC
121
130
  file "ext/#{EXT}" => ["ext/Makefile"] do
122
131
  cp "ext/inc/lang.h", "ext/lang.h"
123
132
  cp "ext/inc/except.h", "ext/except.h"
124
- sh "cd ext; make"
133
+ cd "ext"
134
+ if (/mswin/ =~ RUBY_PLATFORM) and ENV['make'].nil?
135
+ sh "nmake"
136
+ else
137
+ sh "make"
138
+ end
139
+ cd ".."
125
140
  end
126
141
 
127
142
  file "ext/lang.h" => ["ext/inc/lang.h"] do
@@ -132,7 +147,9 @@ file "ext/except.h" => ["ext/inc/except.h"] do
132
147
  end
133
148
 
134
149
  file "ext/Makefile" => SRC do
135
- sh "cd ext; ruby extconf.rb"
150
+ cd "ext"
151
+ `ruby extconf.rb`
152
+ cd ".."
136
153
  end
137
154
 
138
155
  # Make Parsers ---------------------------------------------------------------
@@ -158,6 +175,9 @@ PKG_FILES = FileList[
158
175
  'Rakefile'
159
176
  ]
160
177
  PKG_FILES.exclude('**/*.o')
178
+ PKG_FILES.include('ext/termdocs.c')
179
+ PKG_FILES.exclude('**/Makefile')
180
+ PKG_FILES.exclude('ext/ferret_ext.so')
161
181
 
162
182
 
163
183
  if ! defined?(Gem)
@@ -233,12 +253,13 @@ end
233
253
  # Creating a release
234
254
 
235
255
  desc "Make a new release"
236
- task :prerelease => [:all_tests, :clobber]
237
- task :repackage => EXT_SRC_DEST
238
- task :package => EXT_SRC_DEST
239
- task :tag => [:prerelease]
240
- task :update_version => [:prerelease]
241
- task :release do #=> [:tag, :update_version, :package] do
256
+ task :release => [
257
+ :prerelease,
258
+ :polish,
259
+ :test_all,
260
+ :update_version,
261
+ :package,
262
+ :tag] do
242
263
  announce
243
264
  announce "**************************************************************"
244
265
  announce "* Release #{PKG_VERSION} Complete."
@@ -288,6 +309,7 @@ def reversion(fn)
288
309
  end
289
310
  end
290
311
  end
312
+ mv fn + ".new", fn
291
313
  end
292
314
 
293
315
  task :update_version => [:prerelease] do
@@ -300,9 +322,8 @@ task :update_version => [:prerelease] do
300
322
  if ENV['RELTEST']
301
323
  announce "Release Task Testing, skipping commiting of new version"
302
324
  else
303
- mv "lib/rferret.rb.new", "lib/rferret.rb"
325
+ sh %{svn ci -m "Updated to version #{PKG_VERSION}" lib/rferret.rb}
304
326
  end
305
- sh %{svn ci -m "Updated to version #{PKG_VERSION}" lib/rferret.rb}
306
327
  end
307
328
  end
308
329
 
data/TODO CHANGED
@@ -5,6 +5,7 @@ Send suggestions for this list to mailto:dbalmain@gmail.com
5
5
  === To Do
6
6
 
7
7
  * Make a dll for people on Windows
8
+ * fix rb_obj_as_string to StringValue()
8
9
  * pure ruby ConstantScoreQuery
9
10
 
10
11
  === Done
data/TUTORIAL CHANGED
@@ -22,7 +22,7 @@ search for later. If you'd like to use a different analyzer you can specify it
22
22
  here, eg;
23
23
 
24
24
  index = Index::Index.new(:path => '/path/to/index',
25
- :analyzer => WhiteSpaceAnalyzer.new)
25
+ :analyzer => Analysis::WhiteSpaceAnalyzer.new)
26
26
 
27
27
  For more options when creating an Index refer to Ferret::Index::Index.
28
28
 
data/ext/analysis.c CHANGED
@@ -1,10 +1,11 @@
1
- #include <analysis.h>
1
+ #include "analysis.h"
2
+ #include "hash.h"
3
+ #include "libstemmer.h"
2
4
  #include <string.h>
3
5
  #include <ctype.h>
4
6
  #include <wctype.h>
5
7
  #include <wchar.h>
6
- #include "hash.h"
7
- #include "libstemmer.h"
8
+
8
9
 
9
10
  /****************************************************************************
10
11
  *
@@ -22,9 +23,16 @@ void tk_destroy(void *p)
22
23
  free(p);
23
24
  }
24
25
 
25
- inline Token *tk_set(Token *tk, char *text, int tlen, int start, int end, int pos_inc)
26
+ inline Token *tk_set(Token *tk,
27
+ char *text,
28
+ int tlen,
29
+ int start,
30
+ int end,
31
+ int pos_inc)
26
32
  {
27
- if (tlen >= MAX_WORD_SIZE) tlen = MAX_WORD_SIZE - 1;
33
+ if (tlen >= MAX_WORD_SIZE) {
34
+ tlen = MAX_WORD_SIZE - 1;
35
+ }
28
36
  memcpy(tk->text, text, sizeof(char) * tlen);
29
37
  tk->text[tlen] = '\0';
30
38
  tk->start = start;
@@ -33,14 +41,23 @@ inline Token *tk_set(Token *tk, char *text, int tlen, int start, int end, int po
33
41
  return tk;
34
42
  }
35
43
 
36
- inline Token *tk_set_ts(Token *tk, char *start, char *end, char *text, int pos_inc)
44
+ inline Token *tk_set_ts(Token *tk,
45
+ char *start,
46
+ char *end,
47
+ char *text,
48
+ int pos_inc)
37
49
  {
38
- return tk_set(tk, start, end - start, start - text, end - text, pos_inc);
50
+ return tk_set(tk, start, (int)(end - start),
51
+ (int)(start - text), (int)(end - text), pos_inc);
39
52
  }
40
53
 
41
- inline Token *tk_set_no_len(Token *tk, char *text, int start, int end, int pos_inc)
54
+ inline Token *tk_set_no_len(Token *tk,
55
+ char *text,
56
+ int start,
57
+ int end,
58
+ int pos_inc)
42
59
  {
43
- return tk_set(tk, text, strlen(text), start, end, pos_inc);
60
+ return tk_set(tk, text, (int)strlen(text), start, end, pos_inc);
44
61
  }
45
62
 
46
63
  int tk_eq(Token *tk1, Token *tk2)
@@ -75,9 +92,14 @@ int tk_cmp(Token *tk1, Token *tk2)
75
92
  *
76
93
  ****************************************************************************/
77
94
 
78
- void ts_standard_destroy(void *p)
95
+ void ts_deref(void *p)
79
96
  {
80
97
  TokenStream *ts = (TokenStream *)p;
98
+ if (--ts->ref_cnt <= 0) ts->destroy(ts);
99
+ }
100
+
101
+ void ts_standard_destroy(TokenStream *ts)
102
+ {
81
103
  tk_destroy(ts->token);
82
104
  free(ts);
83
105
  }
@@ -89,13 +111,11 @@ void ts_reset(TokenStream *ts, char *text)
89
111
 
90
112
  TokenStream *ts_create()
91
113
  {
92
- TokenStream *ts = ALLOC(TokenStream);
93
- ts->text = NULL;
114
+ TokenStream *ts = ALLOC_AND_ZERO_N(TokenStream, 1);
94
115
  ts->token = tk_create();
95
116
  ts->destroy = &ts_standard_destroy;
96
117
  ts->reset = &ts_reset;
97
- ts->sub_ts = NULL;
98
- ts->clone_i = NULL;
118
+ ts->ref_cnt = 1;
99
119
  return ts;
100
120
  }
101
121
 
@@ -109,6 +129,7 @@ TokenStream *ts_clone(TokenStream *orig_ts)
109
129
  }
110
130
  if (orig_ts->sub_ts) ts->sub_ts = ts_clone(orig_ts->sub_ts);
111
131
  if (orig_ts->clone_i) orig_ts->clone_i(orig_ts, ts);
132
+ ts->ref_cnt = 1;
112
133
  return ts;
113
134
  }
114
135
 
@@ -116,7 +137,7 @@ TokenStream *ts_clone(TokenStream *orig_ts)
116
137
  static char * const ENC_ERR_MSG = "Error decoding input string. "
117
138
  "Check that you have the locale set correctly";
118
139
  #define MB_NEXT_CHAR \
119
- if ((i = mbrtowc(&wchr, t, MB_CUR_MAX, (mbstate_t *)ts->data)) < 0)\
140
+ if ((i = (int)mbrtowc(&wchr, t, MB_CUR_MAX, (mbstate_t *)ts->data)) < 0)\
120
141
  RAISE(IO_ERROR, ENC_ERR_MSG)
121
142
 
122
143
  inline Token *w_tk_set(Token *tk, wchar_t *text, int start, int end, int pos_inc)
@@ -128,9 +149,8 @@ inline Token *w_tk_set(Token *tk, wchar_t *text, int start, int end, int pos_inc
128
149
  return tk;
129
150
  }
130
151
 
131
- void mb_ts_standard_destroy(void *p)
152
+ void mb_ts_standard_destroy(TokenStream *ts)
132
153
  {
133
- TokenStream *ts = (TokenStream *)p;
134
154
  tk_destroy(ts->token);
135
155
  free(ts->data);
136
156
  free(ts);
@@ -150,14 +170,13 @@ void mb_ts_clone_i(TokenStream *orig_ts, TokenStream *new_ts)
150
170
 
151
171
  TokenStream *mb_ts_create()
152
172
  {
153
- TokenStream *ts = ALLOC(TokenStream);
173
+ TokenStream *ts = ALLOC_AND_ZERO_N(TokenStream, 1);
154
174
  ts->data = ALLOC(mbstate_t);
155
- ts->text = NULL;
156
175
  ts->token = tk_create();
157
176
  ts->destroy = &mb_ts_standard_destroy;
158
177
  ts->reset = &mb_ts_reset;
159
178
  ts->clone_i = &mb_ts_clone_i;
160
- ts->sub_ts = NULL;
179
+ ts->ref_cnt = 1;
161
180
  return ts;
162
181
  }
163
182
 
@@ -167,11 +186,16 @@ TokenStream *mb_ts_create()
167
186
  *
168
187
  ****************************************************************************/
169
188
 
170
- void a_standard_destroy(void *p)
189
+ void a_deref(void *p)
171
190
  {
172
191
  Analyzer *a = (Analyzer *)p;
173
- ts_destroy(a->current_ts);
174
- free(p);
192
+ if (--a->ref_cnt <= 0) a->destroy(a);
193
+ }
194
+
195
+ void a_standard_destroy(Analyzer *a)
196
+ {
197
+ if (a->current_ts) ts_deref(a->current_ts);
198
+ free(a);
175
199
  }
176
200
 
177
201
  TokenStream *a_standard_get_ts(Analyzer *a, char *field, char *text)
@@ -180,7 +204,8 @@ TokenStream *a_standard_get_ts(Analyzer *a, char *field, char *text)
180
204
  return a->current_ts;
181
205
  }
182
206
 
183
- Analyzer *analyzer_create(void *data, TokenStream *ts, void (*destroy)(void *),
207
+ Analyzer *analyzer_create(void *data, TokenStream *ts,
208
+ void (*destroy)(Analyzer *a),
184
209
  TokenStream *(*get_ts)(Analyzer *a, char *field, char *text))
185
210
  {
186
211
  Analyzer *a = ALLOC(Analyzer);
@@ -188,6 +213,7 @@ Analyzer *analyzer_create(void *data, TokenStream *ts, void (*destroy)(void *),
188
213
  a->current_ts = ts;
189
214
  a->destroy = (destroy ? destroy : &a_standard_destroy);
190
215
  a->get_ts = (get_ts ? get_ts : &a_standard_get_ts);
216
+ a->ref_cnt = 1;
191
217
  return a;
192
218
  }
193
219
 
@@ -284,7 +310,7 @@ Token *mb_wst_next_lc(TokenStream *ts)
284
310
  MB_NEXT_CHAR;
285
311
  }
286
312
  *w = 0;
287
- w_tk_set(ts->token, wbuf, start - ts->text, t - ts->text, 1);
313
+ w_tk_set(ts->token, wbuf, (int)(start - ts->text), (int)(t - ts->text), 1);
288
314
  ts->t = t;
289
315
  return ts->token;
290
316
  }
@@ -409,7 +435,7 @@ Token *mb_lt_next_lc(TokenStream *ts)
409
435
  MB_NEXT_CHAR;
410
436
  }
411
437
  *w = 0;
412
- w_tk_set(ts->token, wbuf, start - ts->text, t - ts->text, 1);
438
+ w_tk_set(ts->token, wbuf, (int)(start - ts->text), (int)(t - ts->text), 1);
413
439
  ts->t = t;
414
440
  return ts->token;
415
441
  }
@@ -472,7 +498,7 @@ int mb_std_get_alpha(TokenStream *ts, char *token)
472
498
  if ((i = mbtowc(&w, t, MB_CUR_MAX)) < 0) RAISE(IO_ERROR, ENC_ERR_MSG);
473
499
  }
474
500
 
475
- i = t - ts->t;
501
+ i = (int)(t - ts->t);
476
502
  if (i > MAX_WORD_SIZE) i = MAX_WORD_SIZE - 1;
477
503
  memcpy(token, ts->t, i);
478
504
  return i;
@@ -500,7 +526,7 @@ int mb_std_get_alnum(char *text, char *token, TokenStream *ts)
500
526
  if ((i = mbtowc(&w, t, MB_CUR_MAX)) < 0) RAISE(IO_ERROR, ENC_ERR_MSG);
501
527
  }
502
528
 
503
- i = t - ts->t;
529
+ i = (int)(t - ts->t);
504
530
  if (i > MAX_WORD_SIZE) i = MAX_WORD_SIZE - 1;
505
531
  memcpy(token, ts->t, i);
506
532
  return i;
@@ -599,7 +625,7 @@ int std_get_apostrophe(char *input)
599
625
  while (isalpha(*t) || *t == '\'')
600
626
  t++;
601
627
 
602
- return t - input;
628
+ return (int)(t - input);
603
629
  }
604
630
 
605
631
  int mb_std_get_apostrophe(char *input)
@@ -613,7 +639,7 @@ int mb_std_get_apostrophe(char *input)
613
639
  t += i;
614
640
  if ((i = mbtowc(&w, t, MB_CUR_MAX)) < 0) RAISE(IO_ERROR, ENC_ERR_MSG);
615
641
  }
616
- return t - input;
642
+ return (int)(t - input);
617
643
  }
618
644
 
619
645
  int std_get_url(char *input, char *token, int i)
@@ -654,7 +680,7 @@ int mb_std_get_company_name(char *input, TokenStream *ts)
654
680
  MB_NEXT_CHAR;
655
681
  }
656
682
 
657
- return t - input;
683
+ return (int)(t - input);
658
684
  }
659
685
 
660
686
  bool std_advance_to_start(TokenStream *ts)
@@ -723,7 +749,7 @@ Token *std_next(TokenStream *ts)
723
749
  if (*t == '\'') { // apostrophe case.
724
750
  t += std_tz->get_apostrophe(t);
725
751
  ts->t = t;
726
- len = t - start;
752
+ len = (int)(t - start);
727
753
  // strip possesive
728
754
  if ((t[-1] == 's' || t[-1] == 'S') && t[-2] == '\'') t -= 2;
729
755
 
@@ -760,13 +786,14 @@ Token *std_next(TokenStream *ts)
760
786
  memcmp(token, "file", 4) == 0)) {
761
787
  len = std_get_url(t, token, 0); // dispose of first part of the URL
762
788
  } else { //still treat as url but keep the first part
763
- token_i = t - start;
789
+ token_i = (int)(t - start);
764
790
  memcpy(token, start, token_i * sizeof(char));
765
791
  len = token_i + std_get_url(t, token, token_i); // keep start
766
792
  }
767
793
  ts->t = t + len;
768
794
  token[len] = 0;
769
- tk_set(ts->token, token, len, start - ts->text, ts->t - ts->text, 1);
795
+ tk_set(ts->token, token, len, (int)(start - ts->text),
796
+ (int)(ts->t - ts->text), 1);
770
797
  return ts->token;
771
798
  }
772
799
 
@@ -806,7 +833,8 @@ Token *std_next(TokenStream *ts)
806
833
  token_i++;
807
834
  }
808
835
  }
809
- tk_set(ts->token, token, token_i, start - ts->text, t - ts->text, 1);
836
+ tk_set(ts->token, token, token_i, (int)(start - ts->text),
837
+ (int)(t - ts->text), 1);
810
838
  } else { // just return the url as is
811
839
  tk_set_ts(ts->token, start, t, ts->text, 1);
812
840
  }
@@ -819,9 +847,8 @@ Token *std_next(TokenStream *ts)
819
847
  return ts->token;
820
848
  }
821
849
 
822
- void std_ts_destroy(void *p)
850
+ void std_ts_destroy(TokenStream *ts)
823
851
  {
824
- TokenStream *ts = (TokenStream *)p;
825
852
  free(ts->data);
826
853
  ts_standard_destroy(ts);
827
854
  }
@@ -871,19 +898,18 @@ void filter_reset(TokenStream *ts, char *text)
871
898
  ts->sub_ts->reset(ts->sub_ts, text);
872
899
  }
873
900
 
874
- void filter_destroy(void *p)
901
+ void filter_destroy(TokenStream *tf)
875
902
  {
876
- TokenStream *tf = (TokenStream *)p;
877
- if (tf->destroy_sub) tf->sub_ts->destroy(tf->sub_ts);
903
+ ts_deref(tf->sub_ts);
878
904
  if (tf->token != NULL) tk_destroy(tf->token);
879
905
  free(tf);
880
906
  }
881
907
 
882
- void sf_destroy(void *p)
908
+ void sf_destroy(TokenStream *tf)
883
909
  {
884
- HshTable *words = (HshTable *)((TokenStream *)p)->data;
910
+ HshTable *words = (HshTable *)tf->data;
885
911
  h_destroy(words);
886
- filter_destroy(p);
912
+ filter_destroy(tf);
887
913
  }
888
914
 
889
915
  void sf_clone_i_i(void *key, void *value, void *arg)
@@ -917,10 +943,10 @@ TokenStream *stop_filter_create_with_words_len(TokenStream *ts,
917
943
  {
918
944
  int i;
919
945
  char *w;
946
+ HshTable *wordtable = h_new_str(&free, (free_ft)NULL);
920
947
  TokenStream *tf = ALLOC(TokenStream);
921
948
  tf->sub_ts = ts;
922
- tf->destroy_sub = true;
923
- HshTable *wordtable = h_new_str(&free, NULL);
949
+
924
950
  for (i = 0; i < len; i++) {
925
951
  w = estrdup(words[i]);
926
952
  h_set(wordtable, w, w);
@@ -931,16 +957,16 @@ TokenStream *stop_filter_create_with_words_len(TokenStream *ts,
931
957
  tf->reset = &filter_reset;
932
958
  tf->destroy = &sf_destroy;
933
959
  tf->clone_i = &sf_clone_i;
960
+ tf->ref_cnt = 1;
934
961
  return tf;
935
962
  }
936
963
 
937
964
  TokenStream *stop_filter_create_with_words(TokenStream *ts, const char **words)
938
965
  {
939
966
  char *w;
967
+ HshTable *wordtable = h_new_str(&free, (free_ft)NULL);
940
968
  TokenStream *tf = ALLOC(TokenStream);
941
969
  tf->sub_ts = ts;
942
- tf->destroy_sub = true;
943
- HshTable *wordtable = h_new_str(&free, NULL);
944
970
  while (*words) {
945
971
  w = estrdup(*words);
946
972
  h_set(wordtable, w, w);
@@ -952,6 +978,7 @@ TokenStream *stop_filter_create_with_words(TokenStream *ts, const char **words)
952
978
  tf->reset = &filter_reset;
953
979
  tf->destroy = &sf_destroy;
954
980
  tf->clone_i = &sf_clone_i;
981
+ tf->ref_cnt = 1;
955
982
  return tf;
956
983
  }
957
984
 
@@ -968,7 +995,7 @@ Token *mb_lcf_next(TokenStream *ts)
968
995
  Token *tk = ts->sub_ts->next(ts->sub_ts);
969
996
  if (tk == NULL) return tk;
970
997
 
971
- i = mbstowcs(wbuf, tk->text, MAX_WORD_SIZE);
998
+ i = (int)mbstowcs(wbuf, tk->text, MAX_WORD_SIZE);
972
999
  w = wbuf;
973
1000
  while (*w != 0) {
974
1001
  *w = towlower(*w);
@@ -986,8 +1013,8 @@ TokenStream *mb_lowercase_filter_create(TokenStream *ts)
986
1013
  tf->reset = &filter_reset;
987
1014
  tf->destroy = &filter_destroy;
988
1015
  tf->sub_ts = ts;
989
- tf->destroy_sub = true;
990
1016
  tf->clone_i = NULL;
1017
+ tf->ref_cnt = 1;
991
1018
  return tf;
992
1019
  }
993
1020
 
@@ -1011,8 +1038,8 @@ TokenStream *lowercase_filter_create(TokenStream *ts)
1011
1038
  tf->reset = &filter_reset;
1012
1039
  tf->destroy = &filter_destroy;
1013
1040
  tf->sub_ts = ts;
1014
- tf->destroy_sub = true;
1015
1041
  tf->clone_i = NULL;
1042
+ tf->ref_cnt = 1;
1016
1043
  return tf;
1017
1044
  }
1018
1045
 
@@ -1022,15 +1049,14 @@ typedef struct StemFilter {
1022
1049
  char *charenc;
1023
1050
  } StemFilter;
1024
1051
 
1025
- void stemf_destroy(void *p)
1052
+ void stemf_destroy(TokenStream *tf)
1026
1053
  {
1027
- TokenStream *ts = (TokenStream *)p;
1028
- StemFilter *stemf = (StemFilter *)ts->data;
1054
+ StemFilter *stemf = (StemFilter *)tf->data;
1029
1055
  sb_stemmer_delete(stemf->stemmer);
1030
1056
  free(stemf->algorithm);
1031
1057
  free(stemf->charenc);
1032
1058
  free(stemf);
1033
- filter_destroy(ts);
1059
+ filter_destroy(tf);
1034
1060
  }
1035
1061
 
1036
1062
  Token *stemf_next(TokenStream *ts)
@@ -1040,7 +1066,7 @@ Token *stemf_next(TokenStream *ts)
1040
1066
  struct sb_stemmer *stemmer = ((StemFilter *)ts->data)->stemmer;
1041
1067
  Token *tk = ts->sub_ts->next(ts->sub_ts);
1042
1068
  if (tk == NULL) return tk;
1043
- stemmed = sb_stemmer_stem(stemmer, (sb_symbol *)tk->text, strlen(tk->text));
1069
+ stemmed = sb_stemmer_stem(stemmer, (sb_symbol *)tk->text, (int)strlen(tk->text));
1044
1070
  len = sb_stemmer_length(stemmer);
1045
1071
  if (len >= MAX_WORD_SIZE) len = MAX_WORD_SIZE - 1;
1046
1072
  memcpy(tk->text, stemmed, len);
@@ -1074,7 +1100,7 @@ TokenStream *stem_filter_create(TokenStream *ts, const char * algorithm,
1074
1100
  tf->destroy = &stemf_destroy;
1075
1101
  tf->clone_i = &stemf_clone_i;
1076
1102
  tf->sub_ts = ts;
1077
- tf->destroy_sub = true;
1103
+ tf->ref_cnt = 1;
1078
1104
  return tf;
1079
1105
  }
1080
1106
 
@@ -1148,19 +1174,12 @@ Analyzer *mb_standard_analyzer_create(bool lowercase)
1148
1174
  *
1149
1175
  ****************************************************************************/
1150
1176
 
1151
- typedef struct PerFieldAnalyzer {
1152
- HshTable *dict;
1153
- Analyzer *def;
1154
- bool destroy_subs : 1;
1155
- } PerFieldAnalyzer;
1156
-
1157
- void pfa_destroy(void *p)
1177
+ void pfa_destroy(Analyzer *self)
1158
1178
  {
1159
- Analyzer *self = (Analyzer *)p;
1160
1179
  PerFieldAnalyzer *pfa = (PerFieldAnalyzer *)self->data;
1161
1180
  h_destroy(pfa->dict);
1162
1181
 
1163
- if (pfa->destroy_subs) a_destroy(pfa->def);
1182
+ a_deref(pfa->def);
1164
1183
  free(pfa);
1165
1184
  free(self);
1166
1185
  }
@@ -1176,7 +1195,7 @@ TokenStream *pfa_get_ts(Analyzer *self, char *field, char *text)
1176
1195
  void pfa_sub_a_destroy(void *p)
1177
1196
  {
1178
1197
  Analyzer *a = (Analyzer *)p;
1179
- a->destroy(a);
1198
+ a_deref(a);
1180
1199
  }
1181
1200
 
1182
1201
  void pfa_add_field(Analyzer *self, char *field, Analyzer *analyzer)
@@ -1185,13 +1204,11 @@ void pfa_add_field(Analyzer *self, char *field, Analyzer *analyzer)
1185
1204
  h_set(pfa->dict, estrdup(field), analyzer);
1186
1205
  }
1187
1206
 
1188
- Analyzer *per_field_analyzer_create(Analyzer *def, bool destroy_subs)
1207
+ Analyzer *per_field_analyzer_create(Analyzer *def)
1189
1208
  {
1190
1209
  PerFieldAnalyzer *pfa = ALLOC(PerFieldAnalyzer);
1191
1210
  pfa->def = def;
1192
- pfa->destroy_subs = destroy_subs;
1193
- pfa->dict = destroy_subs ? h_new_str(&free, &pfa_sub_a_destroy)
1194
- : h_new_str(&free, NULL);
1211
+ pfa->dict = h_new_str(&free, &pfa_sub_a_destroy);
1195
1212
  return analyzer_create(pfa, NULL, &pfa_destroy, &pfa_get_ts);
1196
1213
  }
1197
1214