ferret 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +6 -5
 - data/Rakefile +34 -13
 - data/TODO +1 -0
 - data/TUTORIAL +1 -1
 - data/ext/analysis.c +87 -70
 - data/ext/analysis.h +18 -6
 - data/ext/array.c +1 -2
 - data/ext/array.h +1 -1
 - data/ext/bitvector.c +10 -6
 - data/ext/bitvector.h +2 -2
 - data/ext/compound_io.c +30 -27
 - data/ext/document.c +15 -15
 - data/ext/document.h +5 -5
 - data/ext/except.c +2 -0
 - data/ext/except.h +25 -23
 - data/ext/extconf.rb +1 -0
 - data/ext/ferret.c +10 -8
 - data/ext/ferret.h +9 -8
 - data/ext/field.c +29 -25
 - data/ext/filter.c +52 -14
 - data/ext/frtio.h +13 -0
 - data/ext/fs_store.c +115 -170
 - data/ext/global.c +9 -8
 - data/ext/global.h +17 -13
 - data/ext/hash.c +13 -19
 - data/ext/hash.h +11 -11
 - data/ext/hashset.c +5 -7
 - data/ext/hashset.h +9 -8
 - data/ext/helper.c +1 -1
 - data/ext/helper.h +2 -1
 - data/ext/inc/except.h +25 -23
 - data/ext/inc/lang.h +11 -1
 - data/ext/ind.c +33 -21
 - data/ext/index.h +44 -39
 - data/ext/index_io.c +61 -57
 - data/ext/index_rw.c +418 -361
 - data/ext/lang.c +10 -0
 - data/ext/lang.h +11 -1
 - data/ext/nix_io.c +135 -0
 - data/ext/priorityqueue.c +16 -16
 - data/ext/priorityqueue.h +9 -6
 - data/ext/q_boolean.c +128 -76
 - data/ext/q_const_score.c +20 -20
 - data/ext/q_filtered_query.c +20 -20
 - data/ext/q_fuzzy.c +37 -23
 - data/ext/q_match_all.c +15 -19
 - data/ext/q_multi_phrase.c +87 -46
 - data/ext/q_parser.c +247 -119
 - data/ext/q_phrase.c +86 -52
 - data/ext/q_prefix.c +25 -14
 - data/ext/q_range.c +59 -14
 - data/ext/q_span.c +263 -172
 - data/ext/q_term.c +62 -51
 - data/ext/q_wildcard.c +24 -13
 - data/ext/r_analysis.c +328 -80
 - data/ext/r_doc.c +11 -6
 - data/ext/r_index_io.c +40 -32
 - data/ext/r_qparser.c +15 -14
 - data/ext/r_search.c +270 -152
 - data/ext/r_store.c +32 -17
 - data/ext/ram_store.c +38 -22
 - data/ext/search.c +617 -87
 - data/ext/search.h +227 -163
 - data/ext/similarity.c +54 -45
 - data/ext/similarity.h +3 -3
 - data/ext/sort.c +132 -53
 - data/ext/store.c +21 -2
 - data/ext/store.h +14 -14
 - data/ext/tags +4322 -232
 - data/ext/term.c +140 -109
 - data/ext/termdocs.c +74 -60
 - data/ext/vector.c +181 -152
 - data/ext/w32_io.c +150 -0
 - data/lib/ferret.rb +1 -1
 - data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
 - data/lib/ferret/document/field.rb +1 -1
 - data/lib/ferret/index/field_infos.rb +1 -1
 - data/lib/ferret/index/term.rb +1 -1
 - data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
 - data/lib/ferret/search.rb +1 -0
 - data/lib/ferret/search/boolean_query.rb +0 -4
 - data/lib/ferret/search/index_searcher.rb +21 -8
 - data/lib/ferret/search/multi_phrase_query.rb +7 -0
 - data/lib/ferret/search/multi_searcher.rb +261 -0
 - data/lib/ferret/search/phrase_query.rb +1 -1
 - data/lib/ferret/search/query.rb +34 -5
 - data/lib/ferret/search/sort.rb +7 -3
 - data/lib/ferret/search/sort_field.rb +8 -4
 - data/lib/ferret/store/fs_store.rb +13 -6
 - data/lib/ferret/store/index_io.rb +0 -14
 - data/lib/ferret/store/ram_store.rb +3 -2
 - data/lib/rferret.rb +1 -1
 - data/test/unit/analysis/ctc_analyzer.rb +131 -0
 - data/test/unit/analysis/ctc_tokenstream.rb +98 -9
 - data/test/unit/index/tc_index.rb +40 -1
 - data/test/unit/index/tc_term.rb +7 -0
 - data/test/unit/index/th_doc.rb +8 -0
 - data/test/unit/query_parser/tc_query_parser.rb +6 -4
 - data/test/unit/search/rtc_sort_field.rb +6 -6
 - data/test/unit/search/tc_index_searcher.rb +8 -0
 - data/test/unit/search/tc_multi_searcher.rb +275 -0
 - data/test/unit/search/tc_multi_searcher2.rb +126 -0
 - data/test/unit/search/tc_search_and_sort.rb +66 -0
 - metadata +31 -26
 - data/test/unit/query_parser/rtc_query_parser.rb +0 -138
 
    
        data/README
    CHANGED
    
    | 
         @@ -12,17 +12,18 @@ search for things in them later. 
     | 
|
| 
       12 
12 
     | 
    
         | 
| 
       13 
13 
     | 
    
         
             
            == Installation
         
     | 
| 
       14 
14 
     | 
    
         | 
| 
       15 
     | 
    
         
            -
            If you have gems installed you can  
     | 
| 
      
 15 
     | 
    
         
            +
            If you have gems installed you can simply do;
         
     | 
| 
       16 
16 
     | 
    
         | 
| 
       17 
17 
     | 
    
         
             
              gem install ferret
         
     | 
| 
       18 
18 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
            Otherwise,  
     | 
| 
      
 19 
     | 
    
         
            +
            Otherwise, you will need Rake installed. De-compress the archive and enter its top directory.
         
     | 
| 
       20 
20 
     | 
    
         | 
| 
       21 
     | 
    
         
            -
              tar zxpvf ferret 
     | 
| 
       22 
     | 
    
         
            -
              cd ferret 
     | 
| 
      
 21 
     | 
    
         
            +
              tar zxpvf ferret-<version>.tar.gz
         
     | 
| 
      
 22 
     | 
    
         
            +
              cd ferret-<version>
         
     | 
| 
       23 
23 
     | 
    
         | 
| 
       24 
     | 
    
         
            -
            Run the  
     | 
| 
      
 24 
     | 
    
         
            +
            Run the following;
         
     | 
| 
       25 
25 
     | 
    
         | 
| 
      
 26 
     | 
    
         
            +
              $ rake ext
         
     | 
| 
       26 
27 
     | 
    
         
             
              $ ruby setup.rb config
         
     | 
| 
       27 
28 
     | 
    
         
             
              $ ruby setup.rb setup
         
     | 
| 
       28 
29 
     | 
    
         
             
              # ruby setup.rb install
         
     | 
    
        data/Rakefile
    CHANGED
    
    | 
         @@ -33,16 +33,25 @@ $VERBOSE = nil 
     | 
|
| 
       33 
33 
     | 
    
         | 
| 
       34 
34 
     | 
    
         
             
            EXT = "ferret_ext.so"
         
     | 
| 
       35 
35 
     | 
    
         
             
            EXT_SRC = FileList["src/**/*.[ch]"]
         
     | 
| 
      
 36 
     | 
    
         
            +
            if (/mswin/ =~ RUBY_PLATFORM)
         
     | 
| 
      
 37 
     | 
    
         
            +
              EXT_SRC.delete('src/io/nix_io.c')
         
     | 
| 
      
 38 
     | 
    
         
            +
            end
         
     | 
| 
       36 
39 
     | 
    
         | 
| 
       37 
40 
     | 
    
         
             
            EXT_SRC_DEST = EXT_SRC.map {|fn| File.join("ext", File.basename(fn))}
         
     | 
| 
       38 
41 
     | 
    
         
             
            SRC = (FileList["ext/*.[ch]"] + EXT_SRC_DEST).uniq
         
     | 
| 
       39 
42 
     | 
    
         | 
| 
       40 
     | 
    
         
            -
            CLEAN.include(FileList['**/*.o', 'InstalledFiles', '.config'])
         
     | 
| 
      
 43 
     | 
    
         
            +
            CLEAN.include(FileList['**/*.o', '**/*.obj', 'InstalledFiles', '.config'])
         
     | 
| 
       41 
44 
     | 
    
         
             
            CLOBBER.include(FileList['**/*.so'], 'ext/Makefile', EXT_SRC_DEST)
         
     | 
| 
      
 45 
     | 
    
         
            +
            POLISH = Rake::FileList.new.include(FileList['**/*.so'], 'ext/Makefile')
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            desc "Clean specifically for the release."
         
     | 
| 
      
 48 
     | 
    
         
            +
            task :polish => [:clean] do
         
     | 
| 
      
 49 
     | 
    
         
            +
              POLISH.each { |fn| rm_r fn rescue nil }
         
     | 
| 
      
 50 
     | 
    
         
            +
            end
         
     | 
| 
       42 
51 
     | 
    
         | 
| 
       43 
     | 
    
         
            -
            task :default => : 
     | 
| 
      
 52 
     | 
    
         
            +
            task :default => :test_all
         
     | 
| 
       44 
53 
     | 
    
         
             
            desc "Run all tests"
         
     | 
| 
       45 
     | 
    
         
            -
            task : 
     | 
| 
      
 54 
     | 
    
         
            +
            task :test_all => [ :test_runits, :test_cunits, :test_functional ]
         
     | 
| 
       46 
55 
     | 
    
         | 
| 
       47 
56 
     | 
    
         
             
            desc "Generate API documentation, and show coding stats"
         
     | 
| 
       48 
57 
     | 
    
         
             
            task :doc => [ :stats, :appdoc ]
         
     | 
| 
         @@ -121,7 +130,13 @@ task :ext => ["ext/#{EXT}"] + SRC 
     | 
|
| 
       121 
130 
     | 
    
         
             
            file "ext/#{EXT}" => ["ext/Makefile"] do
         
     | 
| 
       122 
131 
     | 
    
         
             
              cp "ext/inc/lang.h", "ext/lang.h"
         
     | 
| 
       123 
132 
     | 
    
         
             
              cp "ext/inc/except.h", "ext/except.h"
         
     | 
| 
       124 
     | 
    
         
            -
               
     | 
| 
      
 133 
     | 
    
         
            +
              cd "ext"
         
     | 
| 
      
 134 
     | 
    
         
            +
              if (/mswin/ =~ RUBY_PLATFORM) and ENV['make'].nil?
         
     | 
| 
      
 135 
     | 
    
         
            +
                sh "nmake"
         
     | 
| 
      
 136 
     | 
    
         
            +
              else
         
     | 
| 
      
 137 
     | 
    
         
            +
                sh "make"
         
     | 
| 
      
 138 
     | 
    
         
            +
              end
         
     | 
| 
      
 139 
     | 
    
         
            +
              cd ".."
         
     | 
| 
       125 
140 
     | 
    
         
             
            end
         
     | 
| 
       126 
141 
     | 
    
         | 
| 
       127 
142 
     | 
    
         
             
            file "ext/lang.h" => ["ext/inc/lang.h"] do
         
     | 
| 
         @@ -132,7 +147,9 @@ file "ext/except.h" => ["ext/inc/except.h"] do 
     | 
|
| 
       132 
147 
     | 
    
         
             
            end
         
     | 
| 
       133 
148 
     | 
    
         | 
| 
       134 
149 
     | 
    
         
             
            file "ext/Makefile" => SRC do
         
     | 
| 
       135 
     | 
    
         
            -
               
     | 
| 
      
 150 
     | 
    
         
            +
              cd "ext"
         
     | 
| 
      
 151 
     | 
    
         
            +
              `ruby extconf.rb`
         
     | 
| 
      
 152 
     | 
    
         
            +
              cd ".."
         
     | 
| 
       136 
153 
     | 
    
         
             
            end
         
     | 
| 
       137 
154 
     | 
    
         | 
| 
       138 
155 
     | 
    
         
             
            # Make Parsers ---------------------------------------------------------------
         
     | 
| 
         @@ -158,6 +175,9 @@ PKG_FILES = FileList[ 
     | 
|
| 
       158 
175 
     | 
    
         
             
              'Rakefile'
         
     | 
| 
       159 
176 
     | 
    
         
             
            ]
         
     | 
| 
       160 
177 
     | 
    
         
             
            PKG_FILES.exclude('**/*.o')
         
     | 
| 
      
 178 
     | 
    
         
            +
            PKG_FILES.include('ext/termdocs.c')
         
     | 
| 
      
 179 
     | 
    
         
            +
            PKG_FILES.exclude('**/Makefile')
         
     | 
| 
      
 180 
     | 
    
         
            +
            PKG_FILES.exclude('ext/ferret_ext.so')
         
     | 
| 
       161 
181 
     | 
    
         | 
| 
       162 
182 
     | 
    
         | 
| 
       163 
183 
     | 
    
         
             
            if ! defined?(Gem)
         
     | 
| 
         @@ -233,12 +253,13 @@ end 
     | 
|
| 
       233 
253 
     | 
    
         
             
            # Creating a release
         
     | 
| 
       234 
254 
     | 
    
         | 
| 
       235 
255 
     | 
    
         
             
            desc "Make a new release"
         
     | 
| 
       236 
     | 
    
         
            -
            task : 
     | 
| 
       237 
     | 
    
         
            -
             
     | 
| 
       238 
     | 
    
         
            -
             
     | 
| 
       239 
     | 
    
         
            -
             
     | 
| 
       240 
     | 
    
         
            -
             
     | 
| 
       241 
     | 
    
         
            -
             
     | 
| 
      
 256 
     | 
    
         
            +
            task :release => [
         
     | 
| 
      
 257 
     | 
    
         
            +
              :prerelease,
         
     | 
| 
      
 258 
     | 
    
         
            +
              :polish,
         
     | 
| 
      
 259 
     | 
    
         
            +
              :test_all,
         
     | 
| 
      
 260 
     | 
    
         
            +
              :update_version,
         
     | 
| 
      
 261 
     | 
    
         
            +
              :package,
         
     | 
| 
      
 262 
     | 
    
         
            +
              :tag] do
         
     | 
| 
       242 
263 
     | 
    
         
             
              announce 
         
     | 
| 
       243 
264 
     | 
    
         
             
              announce "**************************************************************"
         
     | 
| 
       244 
265 
     | 
    
         
             
              announce "* Release #{PKG_VERSION} Complete."
         
     | 
| 
         @@ -288,6 +309,7 @@ def reversion(fn) 
     | 
|
| 
       288 
309 
     | 
    
         
             
                  end
         
     | 
| 
       289 
310 
     | 
    
         
             
                end
         
     | 
| 
       290 
311 
     | 
    
         
             
              end
         
     | 
| 
      
 312 
     | 
    
         
            +
              mv fn + ".new", fn
         
     | 
| 
       291 
313 
     | 
    
         
             
            end
         
     | 
| 
       292 
314 
     | 
    
         | 
| 
       293 
315 
     | 
    
         
             
            task :update_version => [:prerelease] do
         
     | 
| 
         @@ -300,9 +322,8 @@ task :update_version => [:prerelease] do 
     | 
|
| 
       300 
322 
     | 
    
         
             
                if ENV['RELTEST']
         
     | 
| 
       301 
323 
     | 
    
         
             
                  announce "Release Task Testing, skipping commiting of new version"
         
     | 
| 
       302 
324 
     | 
    
         
             
                else
         
     | 
| 
       303 
     | 
    
         
            -
                   
     | 
| 
      
 325 
     | 
    
         
            +
                  sh %{svn ci -m "Updated to version #{PKG_VERSION}" lib/rferret.rb}
         
     | 
| 
       304 
326 
     | 
    
         
             
                end
         
     | 
| 
       305 
     | 
    
         
            -
                sh %{svn ci -m "Updated to version #{PKG_VERSION}" lib/rferret.rb}
         
     | 
| 
       306 
327 
     | 
    
         
             
              end
         
     | 
| 
       307 
328 
     | 
    
         
             
            end
         
     | 
| 
       308 
329 
     | 
    
         | 
    
        data/TODO
    CHANGED
    
    
    
        data/TUTORIAL
    CHANGED
    
    | 
         @@ -22,7 +22,7 @@ search for later. If you'd like to use a different analyzer you can specify it 
     | 
|
| 
       22 
22 
     | 
    
         
             
            here, eg;
         
     | 
| 
       23 
23 
     | 
    
         | 
| 
       24 
24 
     | 
    
         
             
              index = Index::Index.new(:path => '/path/to/index',
         
     | 
| 
       25 
     | 
    
         
            -
                                       :analyzer => WhiteSpaceAnalyzer.new)
         
     | 
| 
      
 25 
     | 
    
         
            +
                                       :analyzer => Analysis::WhiteSpaceAnalyzer.new)
         
     | 
| 
       26 
26 
     | 
    
         | 
| 
       27 
27 
     | 
    
         
             
            For more options when creating an Index refer to Ferret::Index::Index.
         
     | 
| 
       28 
28 
     | 
    
         | 
    
        data/ext/analysis.c
    CHANGED
    
    | 
         @@ -1,10 +1,11 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            #include  
     | 
| 
      
 1 
     | 
    
         
            +
            #include "analysis.h"
         
     | 
| 
      
 2 
     | 
    
         
            +
            #include "hash.h"
         
     | 
| 
      
 3 
     | 
    
         
            +
            #include "libstemmer.h"
         
     | 
| 
       2 
4 
     | 
    
         
             
            #include <string.h>
         
     | 
| 
       3 
5 
     | 
    
         
             
            #include <ctype.h>
         
     | 
| 
       4 
6 
     | 
    
         
             
            #include <wctype.h>
         
     | 
| 
       5 
7 
     | 
    
         
             
            #include <wchar.h>
         
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
            #include "libstemmer.h"
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
       8 
9 
     | 
    
         | 
| 
       9 
10 
     | 
    
         
             
            /****************************************************************************
         
     | 
| 
       10 
11 
     | 
    
         
             
             *
         
     | 
| 
         @@ -22,9 +23,16 @@ void tk_destroy(void *p) 
     | 
|
| 
       22 
23 
     | 
    
         
             
              free(p);
         
     | 
| 
       23 
24 
     | 
    
         
             
            }
         
     | 
| 
       24 
25 
     | 
    
         | 
| 
       25 
     | 
    
         
            -
            inline Token *tk_set(Token *tk,  
     | 
| 
      
 26 
     | 
    
         
            +
            inline Token *tk_set(Token *tk, 
         
     | 
| 
      
 27 
     | 
    
         
            +
            					 char *text, 
         
     | 
| 
      
 28 
     | 
    
         
            +
            					 int tlen, 
         
     | 
| 
      
 29 
     | 
    
         
            +
            					 int start, 
         
     | 
| 
      
 30 
     | 
    
         
            +
            					 int end, 
         
     | 
| 
      
 31 
     | 
    
         
            +
            					 int pos_inc)
         
     | 
| 
       26 
32 
     | 
    
         
             
            {
         
     | 
| 
       27 
     | 
    
         
            -
              if (tlen >= MAX_WORD_SIZE)  
     | 
| 
      
 33 
     | 
    
         
            +
              if (tlen >= MAX_WORD_SIZE) {
         
     | 
| 
      
 34 
     | 
    
         
            +
                tlen = MAX_WORD_SIZE - 1;
         
     | 
| 
      
 35 
     | 
    
         
            +
              }
         
     | 
| 
       28 
36 
     | 
    
         
             
              memcpy(tk->text, text, sizeof(char) * tlen);
         
     | 
| 
       29 
37 
     | 
    
         
             
              tk->text[tlen] = '\0';
         
     | 
| 
       30 
38 
     | 
    
         
             
              tk->start = start;
         
     | 
| 
         @@ -33,14 +41,23 @@ inline Token *tk_set(Token *tk, char *text, int tlen, int start, int end, int po 
     | 
|
| 
       33 
41 
     | 
    
         
             
              return tk;
         
     | 
| 
       34 
42 
     | 
    
         
             
            }
         
     | 
| 
       35 
43 
     | 
    
         | 
| 
       36 
     | 
    
         
            -
            inline Token *tk_set_ts(Token *tk,  
     | 
| 
      
 44 
     | 
    
         
            +
            inline Token *tk_set_ts(Token *tk, 
         
     | 
| 
      
 45 
     | 
    
         
            +
            						char *start, 
         
     | 
| 
      
 46 
     | 
    
         
            +
            						char *end, 
         
     | 
| 
      
 47 
     | 
    
         
            +
            						char *text, 
         
     | 
| 
      
 48 
     | 
    
         
            +
            						int pos_inc)
         
     | 
| 
       37 
49 
     | 
    
         
             
            {
         
     | 
| 
       38 
     | 
    
         
            -
              return tk_set(tk, start, end - start,  
     | 
| 
      
 50 
     | 
    
         
            +
              return tk_set(tk, start, (int)(end - start), 
         
     | 
| 
      
 51 
     | 
    
         
            +
            	  (int)(start - text), (int)(end - text), pos_inc);
         
     | 
| 
       39 
52 
     | 
    
         
             
            }
         
     | 
| 
       40 
53 
     | 
    
         | 
| 
       41 
     | 
    
         
            -
            inline Token *tk_set_no_len(Token *tk,  
     | 
| 
      
 54 
     | 
    
         
            +
            inline Token *tk_set_no_len(Token *tk, 
         
     | 
| 
      
 55 
     | 
    
         
            +
            							char *text, 
         
     | 
| 
      
 56 
     | 
    
         
            +
            							int start, 
         
     | 
| 
      
 57 
     | 
    
         
            +
            							int end, 
         
     | 
| 
      
 58 
     | 
    
         
            +
            							int pos_inc)
         
     | 
| 
       42 
59 
     | 
    
         
             
            {
         
     | 
| 
       43 
     | 
    
         
            -
              return tk_set(tk, text, strlen(text), start, end, pos_inc);
         
     | 
| 
      
 60 
     | 
    
         
            +
              return tk_set(tk, text, (int)strlen(text), start, end, pos_inc);
         
     | 
| 
       44 
61 
     | 
    
         
             
            }
         
     | 
| 
       45 
62 
     | 
    
         | 
| 
       46 
63 
     | 
    
         
             
            int tk_eq(Token *tk1, Token *tk2)
         
     | 
| 
         @@ -75,9 +92,14 @@ int tk_cmp(Token *tk1, Token *tk2) 
     | 
|
| 
       75 
92 
     | 
    
         
             
             *
         
     | 
| 
       76 
93 
     | 
    
         
             
             ****************************************************************************/
         
     | 
| 
       77 
94 
     | 
    
         | 
| 
       78 
     | 
    
         
            -
            void  
     | 
| 
      
 95 
     | 
    
         
            +
            void ts_deref(void *p)
         
     | 
| 
       79 
96 
     | 
    
         
             
            {
         
     | 
| 
       80 
97 
     | 
    
         
             
              TokenStream *ts = (TokenStream *)p;
         
     | 
| 
      
 98 
     | 
    
         
            +
              if (--ts->ref_cnt <= 0) ts->destroy(ts);
         
     | 
| 
      
 99 
     | 
    
         
            +
            }
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
      
 101 
     | 
    
         
            +
            void ts_standard_destroy(TokenStream *ts)
         
     | 
| 
      
 102 
     | 
    
         
            +
            {
         
     | 
| 
       81 
103 
     | 
    
         
             
              tk_destroy(ts->token);
         
     | 
| 
       82 
104 
     | 
    
         
             
              free(ts);
         
     | 
| 
       83 
105 
     | 
    
         
             
            }
         
     | 
| 
         @@ -89,13 +111,11 @@ void ts_reset(TokenStream *ts, char *text) 
     | 
|
| 
       89 
111 
     | 
    
         | 
| 
       90 
112 
     | 
    
         
             
            TokenStream *ts_create()
         
     | 
| 
       91 
113 
     | 
    
         
             
            {
         
     | 
| 
       92 
     | 
    
         
            -
              TokenStream *ts =  
     | 
| 
       93 
     | 
    
         
            -
              ts->text = NULL;
         
     | 
| 
      
 114 
     | 
    
         
            +
              TokenStream *ts = ALLOC_AND_ZERO_N(TokenStream, 1);
         
     | 
| 
       94 
115 
     | 
    
         
             
              ts->token = tk_create();
         
     | 
| 
       95 
116 
     | 
    
         
             
              ts->destroy = &ts_standard_destroy;
         
     | 
| 
       96 
117 
     | 
    
         
             
              ts->reset = &ts_reset;
         
     | 
| 
       97 
     | 
    
         
            -
              ts-> 
     | 
| 
       98 
     | 
    
         
            -
              ts->clone_i = NULL;
         
     | 
| 
      
 118 
     | 
    
         
            +
              ts->ref_cnt = 1;
         
     | 
| 
       99 
119 
     | 
    
         
             
              return ts;
         
     | 
| 
       100 
120 
     | 
    
         
             
            }
         
     | 
| 
       101 
121 
     | 
    
         | 
| 
         @@ -109,6 +129,7 @@ TokenStream *ts_clone(TokenStream *orig_ts) 
     | 
|
| 
       109 
129 
     | 
    
         
             
              }
         
     | 
| 
       110 
130 
     | 
    
         
             
              if (orig_ts->sub_ts) ts->sub_ts = ts_clone(orig_ts->sub_ts);
         
     | 
| 
       111 
131 
     | 
    
         
             
              if (orig_ts->clone_i) orig_ts->clone_i(orig_ts, ts);
         
     | 
| 
      
 132 
     | 
    
         
            +
              ts->ref_cnt = 1;
         
     | 
| 
       112 
133 
     | 
    
         
             
              return ts;
         
     | 
| 
       113 
134 
     | 
    
         
             
            }
         
     | 
| 
       114 
135 
     | 
    
         | 
| 
         @@ -116,7 +137,7 @@ TokenStream *ts_clone(TokenStream *orig_ts) 
     | 
|
| 
       116 
137 
     | 
    
         
             
            static char * const ENC_ERR_MSG = "Error decoding input string. "
         
     | 
| 
       117 
138 
     | 
    
         
             
                                      "Check that you have the locale set correctly";
         
     | 
| 
       118 
139 
     | 
    
         
             
            #define MB_NEXT_CHAR \
         
     | 
| 
       119 
     | 
    
         
            -
              if ((i = mbrtowc(&wchr, t, MB_CUR_MAX, (mbstate_t *)ts->data)) < 0)\
         
     | 
| 
      
 140 
     | 
    
         
            +
              if ((i = (int)mbrtowc(&wchr, t, MB_CUR_MAX, (mbstate_t *)ts->data)) < 0)\
         
     | 
| 
       120 
141 
     | 
    
         
             
                RAISE(IO_ERROR, ENC_ERR_MSG)
         
     | 
| 
       121 
142 
     | 
    
         | 
| 
       122 
143 
     | 
    
         
             
            inline Token *w_tk_set(Token *tk, wchar_t *text, int start, int end, int pos_inc)
         
     | 
| 
         @@ -128,9 +149,8 @@ inline Token *w_tk_set(Token *tk, wchar_t *text, int start, int end, int pos_inc 
     | 
|
| 
       128 
149 
     | 
    
         
             
              return tk;
         
     | 
| 
       129 
150 
     | 
    
         
             
            }
         
     | 
| 
       130 
151 
     | 
    
         | 
| 
       131 
     | 
    
         
            -
            void mb_ts_standard_destroy( 
     | 
| 
      
 152 
     | 
    
         
            +
            void mb_ts_standard_destroy(TokenStream *ts)
         
     | 
| 
       132 
153 
     | 
    
         
             
            {
         
     | 
| 
       133 
     | 
    
         
            -
              TokenStream *ts = (TokenStream *)p;
         
     | 
| 
       134 
154 
     | 
    
         
             
              tk_destroy(ts->token);
         
     | 
| 
       135 
155 
     | 
    
         
             
              free(ts->data);
         
     | 
| 
       136 
156 
     | 
    
         
             
              free(ts);
         
     | 
| 
         @@ -150,14 +170,13 @@ void mb_ts_clone_i(TokenStream *orig_ts, TokenStream *new_ts) 
     | 
|
| 
       150 
170 
     | 
    
         | 
| 
       151 
171 
     | 
    
         
             
            TokenStream *mb_ts_create()
         
     | 
| 
       152 
172 
     | 
    
         
             
            {
         
     | 
| 
       153 
     | 
    
         
            -
              TokenStream *ts =  
     | 
| 
      
 173 
     | 
    
         
            +
              TokenStream *ts = ALLOC_AND_ZERO_N(TokenStream, 1);
         
     | 
| 
       154 
174 
     | 
    
         
             
              ts->data = ALLOC(mbstate_t);
         
     | 
| 
       155 
     | 
    
         
            -
              ts->text = NULL;
         
     | 
| 
       156 
175 
     | 
    
         
             
              ts->token = tk_create();
         
     | 
| 
       157 
176 
     | 
    
         
             
              ts->destroy = &mb_ts_standard_destroy;
         
     | 
| 
       158 
177 
     | 
    
         
             
              ts->reset = &mb_ts_reset;
         
     | 
| 
       159 
178 
     | 
    
         
             
              ts->clone_i = &mb_ts_clone_i;
         
     | 
| 
       160 
     | 
    
         
            -
              ts-> 
     | 
| 
      
 179 
     | 
    
         
            +
              ts->ref_cnt = 1;
         
     | 
| 
       161 
180 
     | 
    
         
             
              return ts;
         
     | 
| 
       162 
181 
     | 
    
         
             
            }
         
     | 
| 
       163 
182 
     | 
    
         | 
| 
         @@ -167,11 +186,16 @@ TokenStream *mb_ts_create() 
     | 
|
| 
       167 
186 
     | 
    
         
             
             *
         
     | 
| 
       168 
187 
     | 
    
         
             
             ****************************************************************************/
         
     | 
| 
       169 
188 
     | 
    
         | 
| 
       170 
     | 
    
         
            -
            void  
     | 
| 
      
 189 
     | 
    
         
            +
            void a_deref(void *p)
         
     | 
| 
       171 
190 
     | 
    
         
             
            {
         
     | 
| 
       172 
191 
     | 
    
         
             
              Analyzer *a = (Analyzer *)p;
         
     | 
| 
       173 
     | 
    
         
            -
               
     | 
| 
       174 
     | 
    
         
            -
             
     | 
| 
      
 192 
     | 
    
         
            +
              if (--a->ref_cnt <= 0) a->destroy(a);
         
     | 
| 
      
 193 
     | 
    
         
            +
            }
         
     | 
| 
      
 194 
     | 
    
         
            +
             
     | 
| 
      
 195 
     | 
    
         
            +
            void a_standard_destroy(Analyzer *a)
         
     | 
| 
      
 196 
     | 
    
         
            +
            {
         
     | 
| 
      
 197 
     | 
    
         
            +
              if (a->current_ts) ts_deref(a->current_ts);
         
     | 
| 
      
 198 
     | 
    
         
            +
              free(a);
         
     | 
| 
       175 
199 
     | 
    
         
             
            }
         
     | 
| 
       176 
200 
     | 
    
         | 
| 
       177 
201 
     | 
    
         
             
            TokenStream *a_standard_get_ts(Analyzer *a, char *field, char *text)
         
     | 
| 
         @@ -180,7 +204,8 @@ TokenStream *a_standard_get_ts(Analyzer *a, char *field, char *text) 
     | 
|
| 
       180 
204 
     | 
    
         
             
              return a->current_ts; 
         
     | 
| 
       181 
205 
     | 
    
         
             
            }
         
     | 
| 
       182 
206 
     | 
    
         | 
| 
       183 
     | 
    
         
            -
            Analyzer *analyzer_create(void *data, TokenStream *ts, 
     | 
| 
      
 207 
     | 
    
         
            +
            Analyzer *analyzer_create(void *data, TokenStream *ts,
         
     | 
| 
      
 208 
     | 
    
         
            +
                void (*destroy)(Analyzer *a),
         
     | 
| 
       184 
209 
     | 
    
         
             
                TokenStream *(*get_ts)(Analyzer *a, char *field, char *text))
         
     | 
| 
       185 
210 
     | 
    
         
             
            {
         
     | 
| 
       186 
211 
     | 
    
         
             
              Analyzer *a = ALLOC(Analyzer);
         
     | 
| 
         @@ -188,6 +213,7 @@ Analyzer *analyzer_create(void *data, TokenStream *ts, void (*destroy)(void *), 
     | 
|
| 
       188 
213 
     | 
    
         
             
              a->current_ts = ts;
         
     | 
| 
       189 
214 
     | 
    
         
             
              a->destroy = (destroy ? destroy : &a_standard_destroy);
         
     | 
| 
       190 
215 
     | 
    
         
             
              a->get_ts = (get_ts ? get_ts : &a_standard_get_ts);
         
     | 
| 
      
 216 
     | 
    
         
            +
              a->ref_cnt = 1;
         
     | 
| 
       191 
217 
     | 
    
         
             
              return a;
         
     | 
| 
       192 
218 
     | 
    
         
             
            }
         
     | 
| 
       193 
219 
     | 
    
         | 
| 
         @@ -284,7 +310,7 @@ Token *mb_wst_next_lc(TokenStream *ts) 
     | 
|
| 
       284 
310 
     | 
    
         
             
                MB_NEXT_CHAR;
         
     | 
| 
       285 
311 
     | 
    
         
             
              }
         
     | 
| 
       286 
312 
     | 
    
         
             
              *w = 0;
         
     | 
| 
       287 
     | 
    
         
            -
              w_tk_set(ts->token, wbuf, start - ts->text, t - ts->text, 1);
         
     | 
| 
      
 313 
     | 
    
         
            +
              w_tk_set(ts->token, wbuf, (int)(start - ts->text), (int)(t - ts->text), 1);
         
     | 
| 
       288 
314 
     | 
    
         
             
              ts->t = t;
         
     | 
| 
       289 
315 
     | 
    
         
             
              return ts->token;
         
     | 
| 
       290 
316 
     | 
    
         
             
            }
         
     | 
| 
         @@ -409,7 +435,7 @@ Token *mb_lt_next_lc(TokenStream *ts) 
     | 
|
| 
       409 
435 
     | 
    
         
             
                MB_NEXT_CHAR;
         
     | 
| 
       410 
436 
     | 
    
         
             
              }
         
     | 
| 
       411 
437 
     | 
    
         
             
              *w = 0;
         
     | 
| 
       412 
     | 
    
         
            -
              w_tk_set(ts->token, wbuf, start - ts->text, t - ts->text, 1);
         
     | 
| 
      
 438 
     | 
    
         
            +
              w_tk_set(ts->token, wbuf, (int)(start - ts->text), (int)(t - ts->text), 1);
         
     | 
| 
       413 
439 
     | 
    
         
             
              ts->t = t;
         
     | 
| 
       414 
440 
     | 
    
         
             
              return ts->token;
         
     | 
| 
       415 
441 
     | 
    
         
             
            }
         
     | 
| 
         @@ -472,7 +498,7 @@ int mb_std_get_alpha(TokenStream *ts, char *token) 
     | 
|
| 
       472 
498 
     | 
    
         
             
                if ((i = mbtowc(&w, t, MB_CUR_MAX)) < 0) RAISE(IO_ERROR, ENC_ERR_MSG);
         
     | 
| 
       473 
499 
     | 
    
         
             
              }
         
     | 
| 
       474 
500 
     | 
    
         | 
| 
       475 
     | 
    
         
            -
              i = t - ts->t;
         
     | 
| 
      
 501 
     | 
    
         
            +
              i = (int)(t - ts->t);
         
     | 
| 
       476 
502 
     | 
    
         
             
              if (i > MAX_WORD_SIZE) i = MAX_WORD_SIZE - 1;
         
     | 
| 
       477 
503 
     | 
    
         
             
              memcpy(token, ts->t, i);
         
     | 
| 
       478 
504 
     | 
    
         
             
              return i;
         
     | 
| 
         @@ -500,7 +526,7 @@ int mb_std_get_alnum(char *text, char *token, TokenStream *ts) 
     | 
|
| 
       500 
526 
     | 
    
         
             
                if ((i = mbtowc(&w, t, MB_CUR_MAX)) < 0) RAISE(IO_ERROR, ENC_ERR_MSG);
         
     | 
| 
       501 
527 
     | 
    
         
             
              }
         
     | 
| 
       502 
528 
     | 
    
         | 
| 
       503 
     | 
    
         
            -
              i = t - ts->t;
         
     | 
| 
      
 529 
     | 
    
         
            +
              i = (int)(t - ts->t);
         
     | 
| 
       504 
530 
     | 
    
         
             
              if (i > MAX_WORD_SIZE) i = MAX_WORD_SIZE - 1;
         
     | 
| 
       505 
531 
     | 
    
         
             
              memcpy(token, ts->t, i);
         
     | 
| 
       506 
532 
     | 
    
         
             
              return i;
         
     | 
| 
         @@ -599,7 +625,7 @@ int std_get_apostrophe(char *input) 
     | 
|
| 
       599 
625 
     | 
    
         
             
              while (isalpha(*t) || *t == '\'')
         
     | 
| 
       600 
626 
     | 
    
         
             
                t++;
         
     | 
| 
       601 
627 
     | 
    
         | 
| 
       602 
     | 
    
         
            -
              return t - input;
         
     | 
| 
      
 628 
     | 
    
         
            +
              return (int)(t - input);
         
     | 
| 
       603 
629 
     | 
    
         
             
            }
         
     | 
| 
       604 
630 
     | 
    
         | 
| 
       605 
631 
     | 
    
         
             
            int mb_std_get_apostrophe(char *input)
         
     | 
| 
         @@ -613,7 +639,7 @@ int mb_std_get_apostrophe(char *input) 
     | 
|
| 
       613 
639 
     | 
    
         
             
                t += i;
         
     | 
| 
       614 
640 
     | 
    
         
             
                if ((i = mbtowc(&w, t, MB_CUR_MAX)) < 0) RAISE(IO_ERROR, ENC_ERR_MSG);
         
     | 
| 
       615 
641 
     | 
    
         
             
              }
         
     | 
| 
       616 
     | 
    
         
            -
              return t - input;
         
     | 
| 
      
 642 
     | 
    
         
            +
              return (int)(t - input);
         
     | 
| 
       617 
643 
     | 
    
         
             
            }
         
     | 
| 
       618 
644 
     | 
    
         | 
| 
       619 
645 
     | 
    
         
             
            int std_get_url(char *input, char *token, int i)
         
     | 
| 
         @@ -654,7 +680,7 @@ int mb_std_get_company_name(char *input, TokenStream *ts) 
     | 
|
| 
       654 
680 
     | 
    
         
             
                MB_NEXT_CHAR;
         
     | 
| 
       655 
681 
     | 
    
         
             
              }
         
     | 
| 
       656 
682 
     | 
    
         | 
| 
       657 
     | 
    
         
            -
              return t - input;
         
     | 
| 
      
 683 
     | 
    
         
            +
              return (int)(t - input);
         
     | 
| 
       658 
684 
     | 
    
         
             
            }
         
     | 
| 
       659 
685 
     | 
    
         | 
| 
       660 
686 
     | 
    
         
             
            bool std_advance_to_start(TokenStream *ts)
         
     | 
| 
         @@ -723,7 +749,7 @@ Token *std_next(TokenStream *ts) 
     | 
|
| 
       723 
749 
     | 
    
         
             
                if (*t == '\'') { // apostrophe case. 
         
     | 
| 
       724 
750 
     | 
    
         
             
                  t += std_tz->get_apostrophe(t);
         
     | 
| 
       725 
751 
     | 
    
         
             
                  ts->t = t;
         
     | 
| 
       726 
     | 
    
         
            -
                  len = t - start;
         
     | 
| 
      
 752 
     | 
    
         
            +
                  len = (int)(t - start);
         
     | 
| 
       727 
753 
     | 
    
         
             
                  // strip possesive
         
     | 
| 
       728 
754 
     | 
    
         
             
                  if ((t[-1] == 's' || t[-1] == 'S') && t[-2] == '\'') t -= 2;
         
     | 
| 
       729 
755 
     | 
    
         | 
| 
         @@ -760,13 +786,14 @@ Token *std_next(TokenStream *ts) 
     | 
|
| 
       760 
786 
     | 
    
         
             
                       memcmp(token, "file", 4) == 0)) {
         
     | 
| 
       761 
787 
     | 
    
         
             
                    len = std_get_url(t, token, 0); // dispose of first part of the URL
         
     | 
| 
       762 
788 
     | 
    
         
             
                  } else { //still treat as url but keep the first part
         
     | 
| 
       763 
     | 
    
         
            -
                    token_i = t - start;
         
     | 
| 
      
 789 
     | 
    
         
            +
                    token_i = (int)(t - start);
         
     | 
| 
       764 
790 
     | 
    
         
             
                    memcpy(token, start, token_i * sizeof(char));
         
     | 
| 
       765 
791 
     | 
    
         
             
                    len = token_i + std_get_url(t, token, token_i); // keep start
         
     | 
| 
       766 
792 
     | 
    
         
             
                  }
         
     | 
| 
       767 
793 
     | 
    
         
             
                  ts->t = t + len;
         
     | 
| 
       768 
794 
     | 
    
         
             
                  token[len] = 0;
         
     | 
| 
       769 
     | 
    
         
            -
                  tk_set(ts->token, token, len, start - ts->text,  
     | 
| 
      
 795 
     | 
    
         
            +
                  tk_set(ts->token, token, len, (int)(start - ts->text), 
         
     | 
| 
      
 796 
     | 
    
         
            +
            		  (int)(ts->t - ts->text), 1);
         
     | 
| 
       770 
797 
     | 
    
         
             
                  return ts->token;
         
     | 
| 
       771 
798 
     | 
    
         
             
                }
         
     | 
| 
       772 
799 
     | 
    
         | 
| 
         @@ -806,7 +833,8 @@ Token *std_next(TokenStream *ts) 
     | 
|
| 
       806 
833 
     | 
    
         
             
                        token_i++;
         
     | 
| 
       807 
834 
     | 
    
         
             
                      }
         
     | 
| 
       808 
835 
     | 
    
         
             
                    }
         
     | 
| 
       809 
     | 
    
         
            -
                    tk_set(ts->token, token, token_i, start - ts->text,  
     | 
| 
      
 836 
     | 
    
         
            +
                    tk_set(ts->token, token, token_i, (int)(start - ts->text), 
         
     | 
| 
      
 837 
     | 
    
         
            +
            			(int)(t - ts->text), 1);
         
     | 
| 
       810 
838 
     | 
    
         
             
                  } else { // just return the url as is
         
     | 
| 
       811 
839 
     | 
    
         
             
                    tk_set_ts(ts->token, start, t, ts->text, 1);
         
     | 
| 
       812 
840 
     | 
    
         
             
                  }
         
     | 
| 
         @@ -819,9 +847,8 @@ Token *std_next(TokenStream *ts) 
     | 
|
| 
       819 
847 
     | 
    
         
             
              return ts->token;
         
     | 
| 
       820 
848 
     | 
    
         
             
            }
         
     | 
| 
       821 
849 
     | 
    
         | 
| 
       822 
     | 
    
         
            -
            void std_ts_destroy( 
     | 
| 
      
 850 
     | 
    
         
            +
            void std_ts_destroy(TokenStream *ts)
         
     | 
| 
       823 
851 
     | 
    
         
             
            {
         
     | 
| 
       824 
     | 
    
         
            -
              TokenStream *ts = (TokenStream *)p;
         
     | 
| 
       825 
852 
     | 
    
         
             
              free(ts->data);
         
     | 
| 
       826 
853 
     | 
    
         
             
              ts_standard_destroy(ts);
         
     | 
| 
       827 
854 
     | 
    
         
             
            }
         
     | 
| 
         @@ -871,19 +898,18 @@ void filter_reset(TokenStream *ts, char *text) 
     | 
|
| 
       871 
898 
     | 
    
         
             
              ts->sub_ts->reset(ts->sub_ts, text);
         
     | 
| 
       872 
899 
     | 
    
         
             
            }
         
     | 
| 
       873 
900 
     | 
    
         | 
| 
       874 
     | 
    
         
            -
            void filter_destroy( 
     | 
| 
      
 901 
     | 
    
         
            +
            void filter_destroy(TokenStream *tf)
         
     | 
| 
       875 
902 
     | 
    
         
             
            {
         
     | 
| 
       876 
     | 
    
         
            -
               
     | 
| 
       877 
     | 
    
         
            -
              if (tf->destroy_sub) tf->sub_ts->destroy(tf->sub_ts);
         
     | 
| 
      
 903 
     | 
    
         
            +
              ts_deref(tf->sub_ts);
         
     | 
| 
       878 
904 
     | 
    
         
             
              if (tf->token != NULL) tk_destroy(tf->token);
         
     | 
| 
       879 
905 
     | 
    
         
             
              free(tf);
         
     | 
| 
       880 
906 
     | 
    
         
             
            }
         
     | 
| 
       881 
907 
     | 
    
         | 
| 
       882 
     | 
    
         
            -
            void sf_destroy( 
     | 
| 
      
 908 
     | 
    
         
            +
            void sf_destroy(TokenStream *tf)
         
     | 
| 
       883 
909 
     | 
    
         
             
            {
         
     | 
| 
       884 
     | 
    
         
            -
              HshTable *words = (HshTable *) 
     | 
| 
      
 910 
     | 
    
         
            +
              HshTable *words = (HshTable *)tf->data;
         
     | 
| 
       885 
911 
     | 
    
         
             
              h_destroy(words);
         
     | 
| 
       886 
     | 
    
         
            -
              filter_destroy( 
     | 
| 
      
 912 
     | 
    
         
            +
              filter_destroy(tf);
         
     | 
| 
       887 
913 
     | 
    
         
             
            }
         
     | 
| 
       888 
914 
     | 
    
         | 
| 
       889 
915 
     | 
    
         
             
            void sf_clone_i_i(void *key, void *value, void *arg)
         
     | 
| 
         @@ -917,10 +943,10 @@ TokenStream *stop_filter_create_with_words_len(TokenStream *ts, 
     | 
|
| 
       917 
943 
     | 
    
         
             
            {
         
     | 
| 
       918 
944 
     | 
    
         
             
              int i;
         
     | 
| 
       919 
945 
     | 
    
         
             
              char *w;
         
     | 
| 
      
 946 
     | 
    
         
            +
              HshTable *wordtable = h_new_str(&free, (free_ft)NULL);
         
     | 
| 
       920 
947 
     | 
    
         
             
              TokenStream *tf = ALLOC(TokenStream);
         
     | 
| 
       921 
948 
     | 
    
         
             
              tf->sub_ts = ts;
         
     | 
| 
       922 
     | 
    
         
            -
             
     | 
| 
       923 
     | 
    
         
            -
              HshTable *wordtable = h_new_str(&free, NULL);
         
     | 
| 
      
 949 
     | 
    
         
            +
             
     | 
| 
       924 
950 
     | 
    
         
             
              for (i = 0; i < len; i++) {
         
     | 
| 
       925 
951 
     | 
    
         
             
                w = estrdup(words[i]);
         
     | 
| 
       926 
952 
     | 
    
         
             
                h_set(wordtable, w, w);
         
     | 
| 
         @@ -931,16 +957,16 @@ TokenStream *stop_filter_create_with_words_len(TokenStream *ts, 
     | 
|
| 
       931 
957 
     | 
    
         
             
              tf->reset = &filter_reset;
         
     | 
| 
       932 
958 
     | 
    
         
             
              tf->destroy = &sf_destroy;
         
     | 
| 
       933 
959 
     | 
    
         
             
              tf->clone_i = &sf_clone_i;
         
     | 
| 
      
 960 
     | 
    
         
            +
              tf->ref_cnt = 1;
         
     | 
| 
       934 
961 
     | 
    
         
             
              return tf;
         
     | 
| 
       935 
962 
     | 
    
         
             
            }
         
     | 
| 
       936 
963 
     | 
    
         | 
| 
       937 
964 
     | 
    
         
             
            TokenStream *stop_filter_create_with_words(TokenStream *ts, const char **words)
         
     | 
| 
       938 
965 
     | 
    
         
             
            {
         
     | 
| 
       939 
966 
     | 
    
         
             
              char *w;
         
     | 
| 
      
 967 
     | 
    
         
            +
              HshTable *wordtable = h_new_str(&free, (free_ft)NULL);
         
     | 
| 
       940 
968 
     | 
    
         
             
              TokenStream *tf = ALLOC(TokenStream);
         
     | 
| 
       941 
969 
     | 
    
         
             
              tf->sub_ts = ts;
         
     | 
| 
       942 
     | 
    
         
            -
              tf->destroy_sub = true;
         
     | 
| 
       943 
     | 
    
         
            -
              HshTable *wordtable = h_new_str(&free, NULL);
         
     | 
| 
       944 
970 
     | 
    
         
             
              while (*words) {
         
     | 
| 
       945 
971 
     | 
    
         
             
                w = estrdup(*words);
         
     | 
| 
       946 
972 
     | 
    
         
             
                h_set(wordtable, w, w);
         
     | 
| 
         @@ -952,6 +978,7 @@ TokenStream *stop_filter_create_with_words(TokenStream *ts, const char **words) 
     | 
|
| 
       952 
978 
     | 
    
         
             
              tf->reset = &filter_reset;
         
     | 
| 
       953 
979 
     | 
    
         
             
              tf->destroy = &sf_destroy;
         
     | 
| 
       954 
980 
     | 
    
         
             
              tf->clone_i = &sf_clone_i;
         
     | 
| 
      
 981 
     | 
    
         
            +
              tf->ref_cnt = 1;
         
     | 
| 
       955 
982 
     | 
    
         
             
              return tf;
         
     | 
| 
       956 
983 
     | 
    
         
             
            }
         
     | 
| 
       957 
984 
     | 
    
         | 
| 
         @@ -968,7 +995,7 @@ Token *mb_lcf_next(TokenStream *ts) 
     | 
|
| 
       968 
995 
     | 
    
         
             
              Token *tk = ts->sub_ts->next(ts->sub_ts);
         
     | 
| 
       969 
996 
     | 
    
         
             
              if (tk == NULL) return tk;
         
     | 
| 
       970 
997 
     | 
    
         | 
| 
       971 
     | 
    
         
            -
              i = mbstowcs(wbuf, tk->text, MAX_WORD_SIZE);
         
     | 
| 
      
 998 
     | 
    
         
            +
              i = (int)mbstowcs(wbuf, tk->text, MAX_WORD_SIZE);
         
     | 
| 
       972 
999 
     | 
    
         
             
              w = wbuf;
         
     | 
| 
       973 
1000 
     | 
    
         
             
              while (*w != 0) {
         
     | 
| 
       974 
1001 
     | 
    
         
             
                *w = towlower(*w);
         
     | 
| 
         @@ -986,8 +1013,8 @@ TokenStream *mb_lowercase_filter_create(TokenStream *ts) 
     | 
|
| 
       986 
1013 
     | 
    
         
             
              tf->reset = &filter_reset;
         
     | 
| 
       987 
1014 
     | 
    
         
             
              tf->destroy = &filter_destroy;
         
     | 
| 
       988 
1015 
     | 
    
         
             
              tf->sub_ts = ts;
         
     | 
| 
       989 
     | 
    
         
            -
              tf->destroy_sub = true;
         
     | 
| 
       990 
1016 
     | 
    
         
             
              tf->clone_i = NULL;
         
     | 
| 
      
 1017 
     | 
    
         
            +
              tf->ref_cnt = 1;
         
     | 
| 
       991 
1018 
     | 
    
         
             
              return tf;
         
     | 
| 
       992 
1019 
     | 
    
         
             
            }
         
     | 
| 
       993 
1020 
     | 
    
         | 
| 
         @@ -1011,8 +1038,8 @@ TokenStream *lowercase_filter_create(TokenStream *ts) 
     | 
|
| 
       1011 
1038 
     | 
    
         
             
              tf->reset = &filter_reset;
         
     | 
| 
       1012 
1039 
     | 
    
         
             
              tf->destroy = &filter_destroy;
         
     | 
| 
       1013 
1040 
     | 
    
         
             
              tf->sub_ts = ts;
         
     | 
| 
       1014 
     | 
    
         
            -
              tf->destroy_sub = true;
         
     | 
| 
       1015 
1041 
     | 
    
         
             
              tf->clone_i = NULL;
         
     | 
| 
      
 1042 
     | 
    
         
            +
              tf->ref_cnt = 1;
         
     | 
| 
       1016 
1043 
     | 
    
         
             
              return tf;
         
     | 
| 
       1017 
1044 
     | 
    
         
             
            }
         
     | 
| 
       1018 
1045 
     | 
    
         | 
| 
         @@ -1022,15 +1049,14 @@ typedef struct StemFilter { 
     | 
|
| 
       1022 
1049 
     | 
    
         
             
              char *charenc;
         
     | 
| 
       1023 
1050 
     | 
    
         
             
            } StemFilter;
         
     | 
| 
       1024 
1051 
     | 
    
         | 
| 
       1025 
     | 
    
         
            -
            void stemf_destroy( 
     | 
| 
      
 1052 
     | 
    
         
            +
            void stemf_destroy(TokenStream *tf)
         
     | 
| 
       1026 
1053 
     | 
    
         
             
            {
         
     | 
| 
       1027 
     | 
    
         
            -
               
     | 
| 
       1028 
     | 
    
         
            -
              StemFilter *stemf = (StemFilter *)ts->data;
         
     | 
| 
      
 1054 
     | 
    
         
            +
              StemFilter *stemf = (StemFilter *)tf->data;
         
     | 
| 
       1029 
1055 
     | 
    
         
             
              sb_stemmer_delete(stemf->stemmer);
         
     | 
| 
       1030 
1056 
     | 
    
         
             
              free(stemf->algorithm);
         
     | 
| 
       1031 
1057 
     | 
    
         
             
              free(stemf->charenc);
         
     | 
| 
       1032 
1058 
     | 
    
         
             
              free(stemf);
         
     | 
| 
       1033 
     | 
    
         
            -
              filter_destroy( 
     | 
| 
      
 1059 
     | 
    
         
            +
              filter_destroy(tf);
         
     | 
| 
       1034 
1060 
     | 
    
         
             
            }
         
     | 
| 
       1035 
1061 
     | 
    
         | 
| 
       1036 
1062 
     | 
    
         
             
            Token *stemf_next(TokenStream *ts)
         
     | 
| 
         @@ -1040,7 +1066,7 @@ Token *stemf_next(TokenStream *ts) 
     | 
|
| 
       1040 
1066 
     | 
    
         
             
              struct sb_stemmer *stemmer = ((StemFilter *)ts->data)->stemmer;
         
     | 
| 
       1041 
1067 
     | 
    
         
             
              Token *tk = ts->sub_ts->next(ts->sub_ts);
         
     | 
| 
       1042 
1068 
     | 
    
         
             
              if (tk == NULL) return tk;
         
     | 
| 
       1043 
     | 
    
         
            -
              stemmed = sb_stemmer_stem(stemmer, (sb_symbol *)tk->text, strlen(tk->text));
         
     | 
| 
      
 1069 
     | 
    
         
            +
              stemmed = sb_stemmer_stem(stemmer, (sb_symbol *)tk->text, (int)strlen(tk->text));
         
     | 
| 
       1044 
1070 
     | 
    
         
             
              len = sb_stemmer_length(stemmer);
         
     | 
| 
       1045 
1071 
     | 
    
         
             
              if (len >= MAX_WORD_SIZE) len = MAX_WORD_SIZE - 1;
         
     | 
| 
       1046 
1072 
     | 
    
         
             
              memcpy(tk->text, stemmed, len);
         
     | 
| 
         @@ -1074,7 +1100,7 @@ TokenStream *stem_filter_create(TokenStream *ts, const char * algorithm, 
     | 
|
| 
       1074 
1100 
     | 
    
         
             
              tf->destroy = &stemf_destroy;
         
     | 
| 
       1075 
1101 
     | 
    
         
             
              tf->clone_i = &stemf_clone_i;
         
     | 
| 
       1076 
1102 
     | 
    
         
             
              tf->sub_ts = ts;
         
     | 
| 
       1077 
     | 
    
         
            -
              tf-> 
     | 
| 
      
 1103 
     | 
    
         
            +
              tf->ref_cnt = 1;
         
     | 
| 
       1078 
1104 
     | 
    
         
             
              return tf;
         
     | 
| 
       1079 
1105 
     | 
    
         
             
            }
         
     | 
| 
       1080 
1106 
     | 
    
         | 
| 
         @@ -1148,19 +1174,12 @@ Analyzer *mb_standard_analyzer_create(bool lowercase) 
     | 
|
| 
       1148 
1174 
     | 
    
         
             
             *
         
     | 
| 
       1149 
1175 
     | 
    
         
             
             ****************************************************************************/
         
     | 
| 
       1150 
1176 
     | 
    
         | 
| 
       1151 
     | 
    
         
            -
             
     | 
| 
       1152 
     | 
    
         
            -
              HshTable *dict;
         
     | 
| 
       1153 
     | 
    
         
            -
              Analyzer *def;
         
     | 
| 
       1154 
     | 
    
         
            -
              bool destroy_subs : 1;
         
     | 
| 
       1155 
     | 
    
         
            -
            } PerFieldAnalyzer;
         
     | 
| 
       1156 
     | 
    
         
            -
             
     | 
| 
       1157 
     | 
    
         
            -
            void pfa_destroy(void *p)
         
     | 
| 
      
 1177 
     | 
    
         
            +
            void pfa_destroy(Analyzer *self)
         
     | 
| 
       1158 
1178 
     | 
    
         
             
            {
         
     | 
| 
       1159 
     | 
    
         
            -
              Analyzer *self = (Analyzer *)p;
         
     | 
| 
       1160 
1179 
     | 
    
         
             
              PerFieldAnalyzer *pfa = (PerFieldAnalyzer *)self->data;
         
     | 
| 
       1161 
1180 
     | 
    
         
             
              h_destroy(pfa->dict);
         
     | 
| 
       1162 
1181 
     | 
    
         | 
| 
       1163 
     | 
    
         
            -
               
     | 
| 
      
 1182 
     | 
    
         
            +
              a_deref(pfa->def);
         
     | 
| 
       1164 
1183 
     | 
    
         
             
              free(pfa);
         
     | 
| 
       1165 
1184 
     | 
    
         
             
              free(self);
         
     | 
| 
       1166 
1185 
     | 
    
         
             
            }
         
     | 
| 
         @@ -1176,7 +1195,7 @@ TokenStream *pfa_get_ts(Analyzer *self, char *field, char *text) 
     | 
|
| 
       1176 
1195 
     | 
    
         
             
            void pfa_sub_a_destroy(void *p)
         
     | 
| 
       1177 
1196 
     | 
    
         
             
            {
         
     | 
| 
       1178 
1197 
     | 
    
         
             
              Analyzer *a = (Analyzer *)p;
         
     | 
| 
       1179 
     | 
    
         
            -
               
     | 
| 
      
 1198 
     | 
    
         
            +
              a_deref(a);
         
     | 
| 
       1180 
1199 
     | 
    
         
             
            }
         
     | 
| 
       1181 
1200 
     | 
    
         | 
| 
       1182 
1201 
     | 
    
         
             
            void pfa_add_field(Analyzer *self, char *field, Analyzer *analyzer)
         
     | 
| 
         @@ -1185,13 +1204,11 @@ void pfa_add_field(Analyzer *self, char *field, Analyzer *analyzer) 
     | 
|
| 
       1185 
1204 
     | 
    
         
             
              h_set(pfa->dict, estrdup(field), analyzer);
         
     | 
| 
       1186 
1205 
     | 
    
         
             
            }
         
     | 
| 
       1187 
1206 
     | 
    
         | 
| 
       1188 
     | 
    
         
            -
            Analyzer *per_field_analyzer_create(Analyzer *def 
     | 
| 
      
 1207 
     | 
    
         
            +
            Analyzer *per_field_analyzer_create(Analyzer *def)
         
     | 
| 
       1189 
1208 
     | 
    
         
             
            {
         
     | 
| 
       1190 
1209 
     | 
    
         
             
              PerFieldAnalyzer *pfa = ALLOC(PerFieldAnalyzer);
         
     | 
| 
       1191 
1210 
     | 
    
         
             
              pfa->def = def;
         
     | 
| 
       1192 
     | 
    
         
            -
              pfa-> 
     | 
| 
       1193 
     | 
    
         
            -
              pfa->dict = destroy_subs ? h_new_str(&free, &pfa_sub_a_destroy)
         
     | 
| 
       1194 
     | 
    
         
            -
                                       : h_new_str(&free, NULL);
         
     | 
| 
      
 1211 
     | 
    
         
            +
              pfa->dict = h_new_str(&free, &pfa_sub_a_destroy);
         
     | 
| 
       1195 
1212 
     | 
    
         
             
              return analyzer_create(pfa, NULL, &pfa_destroy, &pfa_get_ts);
         
     | 
| 
       1196 
1213 
     | 
    
         
             
            }
         
     | 
| 
       1197 
1214 
     | 
    
         |