ferret 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TUTORIAL +5 -4
- data/ext/Makefile +140 -0
- data/ext/ferret_ext.so +0 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/document/document.rb +6 -3
- data/lib/ferret/index/index.rb +102 -4
- data/lib/ferret/index/index_reader.rb +6 -0
- data/test/unit/index/tc_index.rb +102 -0
- data/test/unit/index/tc_index_reader.rb +28 -1
- metadata +4 -2
    
        data/TUTORIAL
    CHANGED
    
    | @@ -81,16 +81,17 @@ phrase "quick brown fox" in the content field. We'd write; | |
| 81 81 | 
             
              end
         | 
| 82 82 |  | 
| 83 83 | 
             
            But "fast" has a pretty similar meaning to "quick" and we don't mind if the
         | 
| 84 | 
            -
            fox is a little red.  | 
| 84 | 
            +
            fox is a little red. Also, the phrase could be in the title so we'll search
         | 
| 85 | 
            +
            there as well. So we could expand our search like this;
         | 
| 85 86 |  | 
| 86 | 
            -
              index.search_each('content:"quick|fast brown|red fox"') do |doc, score|
         | 
| 87 | 
            +
              index.search_each('title|content:"quick|fast brown|red fox"') do |doc, score|
         | 
| 87 88 | 
             
                puts "Document #{doc} found with a score of #{score}"
         | 
| 88 89 | 
             
              end
         | 
| 89 90 |  | 
| 90 91 | 
             
            What if we want to find all documents entered on or after 5th of September,
         | 
| 91 | 
            -
            2005 with the words "ruby" or "rails" in  | 
| 92 | 
            +
            2005 with the words "ruby" or "rails" in any field. We could type something like;
         | 
| 92 93 |  | 
| 93 | 
            -
              index.search_each('date:( >= 20050905)  | 
| 94 | 
            +
              index.search_each('date:( >= 20050905) *:(ruby OR rails)') do |doc, score|
         | 
| 94 95 | 
             
                puts "Document #{doc} found with a score of #{score}"
         | 
| 95 96 | 
             
              end
         | 
| 96 97 |  | 
    
        data/ext/Makefile
    ADDED
    
    | @@ -0,0 +1,140 @@ | |
| 1 | 
            +
             | 
| 2 | 
            +
            SHELL = /bin/sh
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            #### Start of system configuration section. ####
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            srcdir = .
         | 
| 7 | 
            +
            topdir = /usr/lib/ruby/1.8/i486-linux
         | 
| 8 | 
            +
            hdrdir = $(topdir)
         | 
| 9 | 
            +
            VPATH = $(srcdir):$(topdir):$(hdrdir)
         | 
| 10 | 
            +
            prefix = $(DESTDIR)/usr
         | 
| 11 | 
            +
            exec_prefix = $(prefix)
         | 
| 12 | 
            +
            sitedir = $(DESTDIR)/usr/local/lib/site_ruby
         | 
| 13 | 
            +
            rubylibdir = $(libdir)/ruby/$(ruby_version)
         | 
| 14 | 
            +
            archdir = $(rubylibdir)/$(arch)
         | 
| 15 | 
            +
            sbindir = $(exec_prefix)/sbin
         | 
| 16 | 
            +
            datadir = $(prefix)/share
         | 
| 17 | 
            +
            includedir = $(prefix)/include
         | 
| 18 | 
            +
            infodir = $(prefix)/info
         | 
| 19 | 
            +
            sysconfdir = $(DESTDIR)/etc
         | 
| 20 | 
            +
            mandir = $(datadir)/man
         | 
| 21 | 
            +
            libdir = $(exec_prefix)/lib
         | 
| 22 | 
            +
            sharedstatedir = $(prefix)/com
         | 
| 23 | 
            +
            oldincludedir = $(DESTDIR)/usr/include
         | 
| 24 | 
            +
            sitearchdir = $(sitelibdir)/$(sitearch)
         | 
| 25 | 
            +
            bindir = $(exec_prefix)/bin
         | 
| 26 | 
            +
            localstatedir = $(DESTDIR)/var
         | 
| 27 | 
            +
            sitelibdir = $(sitedir)/$(ruby_version)
         | 
| 28 | 
            +
            libexecdir = $(exec_prefix)/libexec
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            CC = gcc
         | 
| 31 | 
            +
            LIBRUBY = $(LIBRUBY_SO)
         | 
| 32 | 
            +
            LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
         | 
| 33 | 
            +
            LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
         | 
| 34 | 
            +
            LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            CFLAGS   =  -fPIC -Wall -g -O2  -fPIC 
         | 
| 37 | 
            +
            CPPFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir)  
         | 
| 38 | 
            +
            CXXFLAGS = $(CFLAGS) 
         | 
| 39 | 
            +
            DLDFLAGS =   
         | 
| 40 | 
            +
            LDSHARED = $(CC) -shared
         | 
| 41 | 
            +
            AR = ar
         | 
| 42 | 
            +
            EXEEXT = 
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            RUBY_INSTALL_NAME = ruby1.8
         | 
| 45 | 
            +
            RUBY_SO_NAME = ruby1.8
         | 
| 46 | 
            +
            arch = i486-linux
         | 
| 47 | 
            +
            sitearch = i486-linux
         | 
| 48 | 
            +
            ruby_version = 1.8
         | 
| 49 | 
            +
            ruby = /usr/bin/ruby1.8
         | 
| 50 | 
            +
            RUBY = $(ruby)
         | 
| 51 | 
            +
            RM = rm -f
         | 
| 52 | 
            +
            MAKEDIRS = mkdir -p
         | 
| 53 | 
            +
            INSTALL = /usr/bin/install -c
         | 
| 54 | 
            +
            INSTALL_PROG = $(INSTALL) -m 0755
         | 
| 55 | 
            +
            INSTALL_DATA = $(INSTALL) -m 644
         | 
| 56 | 
            +
            COPY = cp
         | 
| 57 | 
            +
             | 
| 58 | 
            +
            #### End of system configuration section. ####
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            preload = 
         | 
| 61 | 
            +
             | 
| 62 | 
            +
            libpath = $(libdir)
         | 
| 63 | 
            +
            LIBPATH =  -L"$(libdir)"
         | 
| 64 | 
            +
            DEFFILE = 
         | 
| 65 | 
            +
             | 
| 66 | 
            +
            CLEANFILES = 
         | 
| 67 | 
            +
            DISTCLEANFILES = 
         | 
| 68 | 
            +
             | 
| 69 | 
            +
            extout = 
         | 
| 70 | 
            +
            extout_prefix = 
         | 
| 71 | 
            +
            target_prefix = 
         | 
| 72 | 
            +
            LOCAL_LIBS = 
         | 
| 73 | 
            +
            LIBS = $(LIBRUBYARG_SHARED)  -lpthread -ldl -lcrypt -lm   -lc
         | 
| 74 | 
            +
            SRCS = index_io.c term_buffer.c ram_directory.c priority_queue.c string_helper.c segment_merge_queue.c ferret.c term.c util.c
         | 
| 75 | 
            +
            OBJS = index_io.o term_buffer.o ram_directory.o priority_queue.o string_helper.o segment_merge_queue.o ferret.o term.o util.o
         | 
| 76 | 
            +
            TARGET = ferret_ext
         | 
| 77 | 
            +
            DLLIB = $(TARGET).so
         | 
| 78 | 
            +
            STATIC_LIB = 
         | 
| 79 | 
            +
             | 
| 80 | 
            +
            RUBYCOMMONDIR = $(sitedir)$(target_prefix)
         | 
| 81 | 
            +
            RUBYLIBDIR    = $(sitelibdir)$(target_prefix)
         | 
| 82 | 
            +
            RUBYARCHDIR   = $(sitearchdir)$(target_prefix)
         | 
| 83 | 
            +
             | 
| 84 | 
            +
            TARGET_SO     = $(DLLIB)
         | 
| 85 | 
            +
            CLEANLIBS     = $(TARGET).so $(TARGET).il? $(TARGET).tds $(TARGET).map
         | 
| 86 | 
            +
            CLEANOBJS     = *.o *.a *.s[ol] *.pdb *.exp *.bak
         | 
| 87 | 
            +
             | 
| 88 | 
            +
            all:		$(DLLIB)
         | 
| 89 | 
            +
            static:		$(STATIC_LIB)
         | 
| 90 | 
            +
             | 
| 91 | 
            +
            clean:
         | 
| 92 | 
            +
            		@-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
         | 
| 93 | 
            +
             | 
| 94 | 
            +
            distclean:	clean
         | 
| 95 | 
            +
            		@-$(RM) Makefile extconf.h conftest.* mkmf.log
         | 
| 96 | 
            +
            		@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
         | 
| 97 | 
            +
             | 
| 98 | 
            +
            realclean:	distclean
         | 
| 99 | 
            +
            install: install-so install-rb
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            install-so: $(RUBYARCHDIR)
         | 
| 102 | 
            +
            install-so: $(RUBYARCHDIR)/$(DLLIB)
         | 
| 103 | 
            +
            $(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
         | 
| 104 | 
            +
            	$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
         | 
| 105 | 
            +
            install-rb: pre-install-rb install-rb-default
         | 
| 106 | 
            +
            install-rb-default: pre-install-rb-default
         | 
| 107 | 
            +
            pre-install-rb pre-install-rb-default: $(RUBYLIBDIR)
         | 
| 108 | 
            +
            $(RUBYARCHDIR):
         | 
| 109 | 
            +
            	$(MAKEDIRS) $@
         | 
| 110 | 
            +
            $(RUBYLIBDIR):
         | 
| 111 | 
            +
            	$(MAKEDIRS) $@
         | 
| 112 | 
            +
             | 
| 113 | 
            +
            site-install: site-install-so site-install-rb
         | 
| 114 | 
            +
            site-install-so: install-so
         | 
| 115 | 
            +
            site-install-rb: install-rb
         | 
| 116 | 
            +
             | 
| 117 | 
            +
            .SUFFIXES: .c .m .cc .cxx .cpp .C .o
         | 
| 118 | 
            +
             | 
| 119 | 
            +
            .cc.o:
         | 
| 120 | 
            +
            	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $<
         | 
| 121 | 
            +
             | 
| 122 | 
            +
            .cxx.o:
         | 
| 123 | 
            +
            	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $<
         | 
| 124 | 
            +
             | 
| 125 | 
            +
            .cpp.o:
         | 
| 126 | 
            +
            	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $<
         | 
| 127 | 
            +
             | 
| 128 | 
            +
            .C.o:
         | 
| 129 | 
            +
            	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $<
         | 
| 130 | 
            +
             | 
| 131 | 
            +
            .c.o:
         | 
| 132 | 
            +
            	$(CC) $(CFLAGS) $(CPPFLAGS) -c $<
         | 
| 133 | 
            +
             | 
| 134 | 
            +
            $(DLLIB): $(OBJS)
         | 
| 135 | 
            +
            	@-$(RM) $@
         | 
| 136 | 
            +
            	$(LDSHARED) $(DLDFLAGS) $(LIBPATH) -o $@ $(OBJS) $(LOCAL_LIBS) $(LIBS)
         | 
| 137 | 
            +
             | 
| 138 | 
            +
             | 
| 139 | 
            +
             | 
| 140 | 
            +
            $(OBJS): ruby.h defines.h
         | 
    
        data/ext/ferret_ext.so
    ADDED
    
    | Binary file | 
    
        data/lib/ferret.rb
    CHANGED
    
    
| @@ -123,11 +123,14 @@ module Ferret::Document | |
| 123 123 |  | 
| 124 124 | 
             
                # Sets the data in field +field+ to +text+. If there is more than one
         | 
| 125 125 | 
             
                # field of that name then it will set the data in the first field of that
         | 
| 126 | 
            -
                # name.
         | 
| 126 | 
            +
                # name. If there is no field of that name, then a new one will be created
         | 
| 127 127 | 
             
                def []=(field_name, data)
         | 
| 128 128 | 
             
                  field = field(field_name.to_s)
         | 
| 129 | 
            -
                   | 
| 130 | 
            -
             | 
| 129 | 
            +
                  if field
         | 
| 130 | 
            +
                    field.data = data
         | 
| 131 | 
            +
                  else
         | 
| 132 | 
            +
                    add_field(Field.new(field_name.to_s, data))
         | 
| 133 | 
            +
                  end
         | 
| 131 134 | 
             
                end
         | 
| 132 135 |  | 
| 133 136 | 
             
                # Returns an array of binaries of the field specified as the method
         | 
    
        data/lib/ferret/index/index.rb
    CHANGED
    
    | @@ -82,7 +82,11 @@ module Ferret::Index | |
| 82 82 | 
             
                #   
         | 
| 83 83 | 
             
                def initialize(options = {})
         | 
| 84 84 | 
             
                  super()
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                  options[:default_search_field] &&= options[:default_search_field].to_s
         | 
| 87 | 
            +
                  options[:default_field] &&= options[:default_field].to_s
         | 
| 85 88 | 
             
                  options[:create_if_missing] = true if options[:create_if_missing].nil? 
         | 
| 89 | 
            +
             | 
| 86 90 | 
             
                  if options[:path]
         | 
| 87 91 | 
             
                    @dir = FSDirectory.new(options[:path], options[:create])
         | 
| 88 92 | 
             
                    options[:close_dir] = true
         | 
| @@ -291,8 +295,25 @@ module Ferret::Index | |
| 291 295 | 
             
                      return @reader.delete_docs_with_term(t)
         | 
| 292 296 | 
             
                    elsif id.is_a?(Term)
         | 
| 293 297 | 
             
                      return @reader.delete_docs_with_term(id)
         | 
| 294 | 
            -
                     | 
| 298 | 
            +
                    elsif id.is_a?(Integer)
         | 
| 295 299 | 
             
                      return @reader.delete(id)
         | 
| 300 | 
            +
                    else
         | 
| 301 | 
            +
                      raise ArgumentError, "Cannot delete for id of type #{id.class}"
         | 
| 302 | 
            +
                    end
         | 
| 303 | 
            +
                  end
         | 
| 304 | 
            +
                end
         | 
| 305 | 
            +
             | 
| 306 | 
            +
                # Delete all documents returned by the query.
         | 
| 307 | 
            +
                # 
         | 
| 308 | 
            +
                # query:: The query to find documents you wish to delete. Can either be a
         | 
| 309 | 
            +
                #         string (in which case it is parsed by the standard query parser)
         | 
| 310 | 
            +
                #         or an actual query object.
         | 
| 311 | 
            +
                def query_delete(query)
         | 
| 312 | 
            +
                  @dir.synchronize do
         | 
| 313 | 
            +
                    ensure_searcher_open()
         | 
| 314 | 
            +
                    query = process_query(query)
         | 
| 315 | 
            +
                    @searcher.search_each(query) do |doc, score|
         | 
| 316 | 
            +
                      @reader.delete(doc)
         | 
| 296 317 | 
             
                    end
         | 
| 297 318 | 
             
                  end
         | 
| 298 319 | 
             
                end
         | 
| @@ -305,6 +326,72 @@ module Ferret::Index | |
| 305 326 | 
             
                  end
         | 
| 306 327 | 
             
                end
         | 
| 307 328 |  | 
| 329 | 
            +
                # Update the document referenced by the document number +id+ if +id+ is an
         | 
| 330 | 
            +
                # integer or all of the documents which have the term +id+ if +id+ is a
         | 
| 331 | 
            +
                # term..
         | 
| 332 | 
            +
                #
         | 
| 333 | 
            +
                # id::      The number of the document to update. Can also be a string
         | 
| 334 | 
            +
                #           representing the value in the +id+ field or a term to match.
         | 
| 335 | 
            +
                # new_val:: The values we are updating. This can be a string in which case
         | 
| 336 | 
            +
                #           the default field is updated, or it can be a hash, in which
         | 
| 337 | 
            +
                #           case, all fields in the hash are updated. You can also pass a
         | 
| 338 | 
            +
                #           full Document object but you must pass the doc_num as the id.
         | 
| 339 | 
            +
                def update(id, new_val)
         | 
| 340 | 
            +
                  @dir.synchronize do
         | 
| 341 | 
            +
                    if id.is_a?(String)
         | 
| 342 | 
            +
                      query_update("id:#{id}", new_val)
         | 
| 343 | 
            +
                    elsif id.is_a?(Term)
         | 
| 344 | 
            +
                      query_update(TermQuery.new(id), new_val)
         | 
| 345 | 
            +
                    elsif id.is_a?(Integer)
         | 
| 346 | 
            +
                      ensure_reader_open()
         | 
| 347 | 
            +
                      document = doc(id)
         | 
| 348 | 
            +
                      if new_val.is_a?(Hash)
         | 
| 349 | 
            +
                        new_val.each_pair {|name, content| document[name] = content.to_s}
         | 
| 350 | 
            +
                      elsif new_val.is_a?(Document)
         | 
| 351 | 
            +
                        document = new_val
         | 
| 352 | 
            +
                      else
         | 
| 353 | 
            +
                        document[@options[:default_field]] = new_val.to_s
         | 
| 354 | 
            +
                      end          
         | 
| 355 | 
            +
                      @reader.delete(id)
         | 
| 356 | 
            +
                      ensure_writer_open()
         | 
| 357 | 
            +
                      @writer.add_document(document)
         | 
| 358 | 
            +
                    else
         | 
| 359 | 
            +
                      raise ArgumentError, "Cannot update for id of type #{id.class}"
         | 
| 360 | 
            +
                    end
         | 
| 361 | 
            +
                  end
         | 
| 362 | 
            +
                end
         | 
| 363 | 
            +
             | 
| 364 | 
            +
                # Update all the documents returned by the query.
         | 
| 365 | 
            +
                #
         | 
| 366 | 
            +
                # query::   The query to find documents you wish to update. Can either be
         | 
| 367 | 
            +
                #           a string (in which case it is parsed by the standard query
         | 
| 368 | 
            +
                #           parser) or an actual query object.
         | 
| 369 | 
            +
                # new_val:: The values we are updating. This can be a string in which case
         | 
| 370 | 
            +
                #           the default field is updated, or it can be a hash, in which
         | 
| 371 | 
            +
                #           case, all fields in the hash are updated. If you want to pass
         | 
| 372 | 
            +
                #           a full document see #update.
         | 
| 373 | 
            +
                def query_update(query, new_val)
         | 
| 374 | 
            +
                  @dir.synchronize do
         | 
| 375 | 
            +
                    ensure_searcher_open()
         | 
| 376 | 
            +
                    docs_to_add = []
         | 
| 377 | 
            +
                    query = process_query(query)
         | 
| 378 | 
            +
                    @searcher.search_each(query) do |id, score|
         | 
| 379 | 
            +
                      document = doc(id)
         | 
| 380 | 
            +
                      if new_val.is_a?(Hash)
         | 
| 381 | 
            +
                        new_val.each_pair {|name, content| document[name] = content.to_s}
         | 
| 382 | 
            +
                      else
         | 
| 383 | 
            +
                        document[@options[:default_field]] = new_val.to_s
         | 
| 384 | 
            +
                      end
         | 
| 385 | 
            +
                      docs_to_add << document
         | 
| 386 | 
            +
                      @reader.delete(id)
         | 
| 387 | 
            +
                    end
         | 
| 388 | 
            +
                    ensure_writer_open()
         | 
| 389 | 
            +
                    docs_to_add.each do |document|
         | 
| 390 | 
            +
                      @writer.add_document(document)
         | 
| 391 | 
            +
                    end
         | 
| 392 | 
            +
                  end
         | 
| 393 | 
            +
                end
         | 
| 394 | 
            +
             | 
| 308 395 | 
             
                # Returns true if any documents have been deleted since the index was last
         | 
| 309 396 | 
             
                # flushed.
         | 
| 310 397 | 
             
                def has_deletions?()
         | 
| @@ -432,7 +519,13 @@ module Ferret::Index | |
| 432 519 |  | 
| 433 520 | 
             
                  def ensure_reader_open()
         | 
| 434 521 | 
             
                    raise "tried to use a closed index" if not @open
         | 
| 435 | 
            -
                     | 
| 522 | 
            +
                    if @reader
         | 
| 523 | 
            +
                      if not @reader.latest?
         | 
| 524 | 
            +
                        @reader = IndexReader.open(@dir, false)
         | 
| 525 | 
            +
                      end
         | 
| 526 | 
            +
                      return
         | 
| 527 | 
            +
                    end
         | 
| 528 | 
            +
             | 
| 436 529 | 
             
                    if @writer
         | 
| 437 530 | 
             
                      @writer.close
         | 
| 438 531 | 
             
                      @writer = nil
         | 
| @@ -450,6 +543,12 @@ module Ferret::Index | |
| 450 543 | 
             
                private
         | 
| 451 544 | 
             
                  def do_search(query, options)
         | 
| 452 545 | 
             
                    ensure_searcher_open()
         | 
| 546 | 
            +
                    query = process_query(query)
         | 
| 547 | 
            +
             | 
| 548 | 
            +
                    return @searcher.search(query, options)
         | 
| 549 | 
            +
                  end
         | 
| 550 | 
            +
             | 
| 551 | 
            +
                  def process_query(query)
         | 
| 453 552 | 
             
                    if query.is_a?(String)
         | 
| 454 553 | 
             
                      if @qp.nil?
         | 
| 455 554 | 
             
                        @qp = Ferret::QueryParser.new(@default_search_field, @options)
         | 
| @@ -458,8 +557,7 @@ module Ferret::Index | |
| 458 557 | 
             
                      @qp.fields = @reader.get_field_names.to_a
         | 
| 459 558 | 
             
                      query = @qp.parse(query)
         | 
| 460 559 | 
             
                    end
         | 
| 461 | 
            -
             | 
| 462 | 
            -
                    return @searcher.search(query, options)
         | 
| 560 | 
            +
                    return query
         | 
| 463 561 | 
             
                  end
         | 
| 464 562 | 
             
              end
         | 
| 465 563 | 
             
            end
         | 
| @@ -343,6 +343,12 @@ module Ferret::Index | |
| 343 343 | 
             
                    end
         | 
| 344 344 | 
             
                  end
         | 
| 345 345 | 
             
                end
         | 
| 346 | 
            +
             | 
| 347 | 
            +
                # Returns true if the reader is reading from the latest version of the
         | 
| 348 | 
            +
                # index.
         | 
| 349 | 
            +
                def latest?()
         | 
| 350 | 
            +
                  SegmentInfos.read_current_version(@directory) == @segment_infos.version()
         | 
| 351 | 
            +
                end
         | 
| 346 352 |  | 
| 347 353 | 
             
                # Deletes the document numbered +doc_num+.  Once a document is deleted it
         | 
| 348 354 | 
             
                # will not appear in TermDocEnum or TermPostitions enumerations.  Attempts to
         | 
    
        data/test/unit/index/tc_index.rb
    CHANGED
    
    | @@ -5,6 +5,7 @@ class IndexTest < Test::Unit::TestCase | |
| 5 5 | 
             
              include Ferret::Index
         | 
| 6 6 | 
             
              include Ferret::Analysis
         | 
| 7 7 | 
             
              include Ferret::Store
         | 
| 8 | 
            +
              include Ferret::Document
         | 
| 8 9 |  | 
| 9 10 | 
             
              def setup()
         | 
| 10 11 | 
             
                @qp = Ferret::QueryParser.new()
         | 
| @@ -289,4 +290,105 @@ class IndexTest < Test::Unit::TestCase | |
| 289 290 | 
             
                assert_equal("romeo", index[3]["f"])
         | 
| 290 291 | 
             
                index.close
         | 
| 291 292 | 
             
              end
         | 
| 293 | 
            +
             | 
| 294 | 
            +
              def test_auto_update_when_externally_modified()
         | 
| 295 | 
            +
                fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
         | 
| 296 | 
            +
                index = Index.new(:path => fs_path, :default_field => "f", :create => true)
         | 
| 297 | 
            +
                index << "document 1"
         | 
| 298 | 
            +
                assert_equal(1, index.size)
         | 
| 299 | 
            +
             | 
| 300 | 
            +
                index2 = Index.new(:path => fs_path, :default_field => "f")
         | 
| 301 | 
            +
                assert_equal(1, index2.size)
         | 
| 302 | 
            +
                index2 << "document 2"
         | 
| 303 | 
            +
                assert_equal(2, index2.size)
         | 
| 304 | 
            +
                assert_equal(2, index.size)
         | 
| 305 | 
            +
             | 
| 306 | 
            +
                iw = IndexWriter.new(fs_path, :analyzer => WhiteSpaceAnalyzer.new())
         | 
| 307 | 
            +
                doc = Document.new
         | 
| 308 | 
            +
                doc << Field.new("f", "content3", Field::Store::YES, Field::Index::TOKENIZED)
         | 
| 309 | 
            +
                iw << doc
         | 
| 310 | 
            +
                iw.close()
         | 
| 311 | 
            +
                assert_equal(3, index.size)
         | 
| 312 | 
            +
                assert_equal("content3", index[2]["f"])
         | 
| 313 | 
            +
              end
         | 
| 314 | 
            +
             | 
| 315 | 
            +
              def test_delete
         | 
| 316 | 
            +
                data = [
         | 
| 317 | 
            +
                  {:id => 0, :cat => "/cat1/subcat1"},
         | 
| 318 | 
            +
                  {:id => 1, :cat => "/cat1/subcat2"},
         | 
| 319 | 
            +
                  {:id => 2, :cat => "/cat1/subcat2"},
         | 
| 320 | 
            +
                  {:id => 3, :cat => "/cat1/subcat3"},
         | 
| 321 | 
            +
                  {:id => 4, :cat => "/cat1/subcat4"},
         | 
| 322 | 
            +
                  {:id => 5, :cat => "/cat2/subcat1"},
         | 
| 323 | 
            +
                  {:id => 6, :cat => "/cat2/subcat2"},
         | 
| 324 | 
            +
                  {:id => 7, :cat => "/cat2/subcat3"},
         | 
| 325 | 
            +
                  {:id => 8, :cat => "/cat2/subcat4"},
         | 
| 326 | 
            +
                  {:id => 9, :cat => "/cat2/subcat5"},
         | 
| 327 | 
            +
                ]
         | 
| 328 | 
            +
                index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
         | 
| 329 | 
            +
                data.each {|doc| index << doc }
         | 
| 330 | 
            +
                assert_equal(10, index.size)
         | 
| 331 | 
            +
                assert_equal(1, index.search("id:9").size)
         | 
| 332 | 
            +
                index.delete(9)
         | 
| 333 | 
            +
                assert_equal(9, index.size)
         | 
| 334 | 
            +
                assert_equal(0, index.search("id:9").size)
         | 
| 335 | 
            +
                assert_equal(1, index.search("id:8").size)
         | 
| 336 | 
            +
                index.delete("8")
         | 
| 337 | 
            +
                assert_equal(8, index.size)
         | 
| 338 | 
            +
                assert_equal(0, index.search("id:8").size)
         | 
| 339 | 
            +
                assert_equal(5, index.search("cat:/cat1*").size)
         | 
| 340 | 
            +
                index.query_delete("cat:/cat1*")
         | 
| 341 | 
            +
                assert_equal(3, index.size)
         | 
| 342 | 
            +
                assert_equal(0, index.search("cat:/cat1*").size)
         | 
| 343 | 
            +
              end
         | 
| 344 | 
            +
             | 
| 345 | 
            +
              def test_update
         | 
| 346 | 
            +
                data = [
         | 
| 347 | 
            +
                  {:id => 0, :cat => "/cat1/subcat1", :content => "content0"},
         | 
| 348 | 
            +
                  {:id => 1, :cat => "/cat1/subcat2", :content => "content1"},
         | 
| 349 | 
            +
                  {:id => 2, :cat => "/cat1/subcat2", :content => "content2"},
         | 
| 350 | 
            +
                  {:id => 3, :cat => "/cat1/subcat3", :content => "content3"},
         | 
| 351 | 
            +
                  {:id => 4, :cat => "/cat1/subcat4", :content => "content4"},
         | 
| 352 | 
            +
                  {:id => 5, :cat => "/cat2/subcat1", :content => "content5"},
         | 
| 353 | 
            +
                  {:id => 6, :cat => "/cat2/subcat2", :content => "content6"},
         | 
| 354 | 
            +
                  {:id => 7, :cat => "/cat2/subcat3", :content => "content7"},
         | 
| 355 | 
            +
                  {:id => 8, :cat => "/cat2/subcat4", :content => "content8"},
         | 
| 356 | 
            +
                  {:id => 9, :cat => "/cat2/subcat5", :content => "content9"},
         | 
| 357 | 
            +
                ]
         | 
| 358 | 
            +
                index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
         | 
| 359 | 
            +
                                  :default_field => :content)
         | 
| 360 | 
            +
                data.each { |doc| index << doc }
         | 
| 361 | 
            +
                assert_equal(10, index.size)
         | 
| 362 | 
            +
                assert_equal("content5", index["5"][:content])
         | 
| 363 | 
            +
                index.update(5, "content five")
         | 
| 364 | 
            +
                assert_equal("content five", index["5"][:content])
         | 
| 365 | 
            +
                assert_equal(nil, index["5"][:extra_content])
         | 
| 366 | 
            +
                index.update("5", {:cat => "/cat1/subcat6",
         | 
| 367 | 
            +
                                   :content => "high five",
         | 
| 368 | 
            +
                                   :extra_content => "hello"})
         | 
| 369 | 
            +
                assert_equal("hello", index["5"][:extra_content])
         | 
| 370 | 
            +
                assert_equal("high five", index["5"][:content])
         | 
| 371 | 
            +
                assert_equal("/cat1/subcat6", index["5"][:cat])
         | 
| 372 | 
            +
                assert_equal("content9", index["9"][:content])
         | 
| 373 | 
            +
                index.update(Term.new("content", "content9"), {:content => "content nine"})
         | 
| 374 | 
            +
                assert_equal("content nine", index["9"][:content])
         | 
| 375 | 
            +
                assert_equal("content0", index["0"][:content])
         | 
| 376 | 
            +
                assert_equal(nil, index["0"][:extra_content])
         | 
| 377 | 
            +
                document = index[0]
         | 
| 378 | 
            +
                document[:content] = "content zero"
         | 
| 379 | 
            +
                document[:extra_content] = "extra content"
         | 
| 380 | 
            +
                index.update(0, document)
         | 
| 381 | 
            +
                assert_equal("content zero", index["0"][:content])
         | 
| 382 | 
            +
                assert_equal("extra content", index["0"][:extra_content])
         | 
| 383 | 
            +
                assert_equal(nil, index["1"][:tag])
         | 
| 384 | 
            +
                assert_equal(nil, index["2"][:tag])
         | 
| 385 | 
            +
                assert_equal(nil, index["3"][:tag])
         | 
| 386 | 
            +
                assert_equal(nil, index["4"][:tag])
         | 
| 387 | 
            +
                index.query_update("id:<5 AND cat:>=/cat1/subcat2", {:tag => "cool"})
         | 
| 388 | 
            +
                assert_equal("cool", index["1"][:tag])
         | 
| 389 | 
            +
                assert_equal("cool", index["2"][:tag])
         | 
| 390 | 
            +
                assert_equal("cool", index["3"][:tag])
         | 
| 391 | 
            +
                assert_equal("cool", index["4"][:tag])
         | 
| 392 | 
            +
                assert_equal(4, index.search("tag:cool").size)
         | 
| 393 | 
            +
              end
         | 
| 292 394 | 
             
            end
         | 
| @@ -417,7 +417,6 @@ module IndexReaderCommon | |
| 417 417 | 
             
                ir3.close()
         | 
| 418 418 | 
             
              end
         | 
| 419 419 |  | 
| 420 | 
            -
             | 
| 421 420 | 
             
            end
         | 
| 422 421 |  | 
| 423 422 | 
             
            class SegmentReaderTest < Test::Unit::TestCase
         | 
| @@ -618,5 +617,33 @@ class IndexReaderTest < Test::Unit::TestCase | |
| 618 617 | 
             
                ir.close()
         | 
| 619 618 | 
             
                fs_dir.close()
         | 
| 620 619 | 
             
              end
         | 
| 620 | 
            +
             | 
| 621 | 
            +
              def test_latest()
         | 
| 622 | 
            +
                dpath = File.join(File.dirname(__FILE__),
         | 
| 623 | 
            +
                                   '../../temp/fsdir')
         | 
| 624 | 
            +
                fs_dir = Ferret::Store::FSDirectory.new(dpath, true)
         | 
| 625 | 
            +
             | 
| 626 | 
            +
                iw = IndexWriter.new(fs_dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
         | 
| 627 | 
            +
                doc = Document.new
         | 
| 628 | 
            +
                doc << Field.new("field", "content", Field::Store::YES, Field::Index::TOKENIZED)
         | 
| 629 | 
            +
                iw << doc
         | 
| 630 | 
            +
                iw.close()
         | 
| 631 | 
            +
             | 
| 632 | 
            +
                ir = IndexReader.open(fs_dir, false)
         | 
| 633 | 
            +
                assert(ir.latest?)
         | 
| 634 | 
            +
             | 
| 635 | 
            +
                iw = IndexWriter.new(fs_dir, :analyzer => WhiteSpaceAnalyzer.new())
         | 
| 636 | 
            +
                doc = Document.new
         | 
| 637 | 
            +
                doc << Field.new("field", "content2", Field::Store::YES, Field::Index::TOKENIZED)
         | 
| 638 | 
            +
                iw << doc
         | 
| 639 | 
            +
                iw.close()
         | 
| 640 | 
            +
             | 
| 641 | 
            +
                assert(!ir.latest?)
         | 
| 642 | 
            +
             | 
| 643 | 
            +
                ir.close()
         | 
| 644 | 
            +
                ir = IndexReader.open(fs_dir, false)
         | 
| 645 | 
            +
                assert(ir.latest?)
         | 
| 646 | 
            +
                ir.close()
         | 
| 647 | 
            +
              end
         | 
| 621 648 | 
             
            end
         | 
| 622 649 |  | 
    
        metadata
    CHANGED
    
    | @@ -3,8 +3,8 @@ rubygems_version: 0.8.11 | |
| 3 3 | 
             
            specification_version: 1
         | 
| 4 4 | 
             
            name: ferret
         | 
| 5 5 | 
             
            version: !ruby/object:Gem::Version 
         | 
| 6 | 
            -
              version: 0. | 
| 7 | 
            -
            date: 2005-11- | 
| 6 | 
            +
              version: 0.2.0
         | 
| 7 | 
            +
            date: 2005-11-12 00:00:00 +09:00
         | 
| 8 8 | 
             
            summary: Ruby indexing library.
         | 
| 9 9 | 
             
            require_paths: 
         | 
| 10 10 | 
             
              - lib
         | 
| @@ -35,6 +35,7 @@ files: | |
| 35 35 | 
             
              - MIT-LICENSE
         | 
| 36 36 | 
             
              - TODO
         | 
| 37 37 | 
             
              - TUTORIAL
         | 
| 38 | 
            +
              - ext/Makefile
         | 
| 38 39 | 
             
              - ext/index_io.c
         | 
| 39 40 | 
             
              - ext/term_buffer.c
         | 
| 40 41 | 
             
              - ext/ram_directory.c
         | 
| @@ -47,6 +48,7 @@ files: | |
| 47 48 | 
             
              - ext/ferret.h
         | 
| 48 49 | 
             
              - ext/util.c
         | 
| 49 50 | 
             
              - ext/tags
         | 
| 51 | 
            +
              - ext/ferret_ext.so
         | 
| 50 52 | 
             
              - ext/dummy.exe
         | 
| 51 53 | 
             
              - lib/ferret.rb
         | 
| 52 54 | 
             
              - lib/ferret/analysis.rb
         |