RubyGems - picolena - Versions diffs - 0.1.7 → 0.1.8 - Mend

picolena 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

data/History.txt CHANGED Viewed

@@ -1,3 +1,13 @@
+== 0.1.8  2008-05-08
+* 2 minor enhancements:
+  * New IndexerLogger with basic statistics
+  * More specs & documentation.
+* 2 bug fixes:
+  * Binary documents without extension are not considered supported anymore
+  * Ensure that index is locked system-wide by using lock file.
 == 0.1.7  2008-04-30
 * 5 minor enhancements:

data/Manifest.txt CHANGED Viewed

@@ -50,6 +50,7 @@ lib/picolena/templates/lang/ui/en.yml
 lib/picolena/templates/lang/ui/es.yml
 lib/picolena/templates/lang/ui/fr.yml
 lib/picolena/templates/lib/core_exts.rb
+lib/picolena/templates/lib/indexer_logger.rb
 lib/picolena/templates/lib/plain_text_extractor_DSL.rb
 lib/picolena/templates/lib/plain_text_extractors/adobe.pdf.rb
 lib/picolena/templates/lib/plain_text_extractors/html.rb

data/lib/picolena/templates/app/controllers/documents_controller.rb CHANGED Viewed

@@ -22,9 +22,9 @@ class DocumentsController < ApplicationController
   def show
     start=Time.now
       @query=[params[:id],params.delete(:format)].compact.join('.')
-      @sort=params[:sort]
+      @sort_by=params[:sort_by]
       page=params[:page]||1
-      finder=Finder.new(@query,@sort,page)
+      finder=Finder.new(@query,@sort_by,page)
       finder.execute!
       pager=::Paginator.new(finder.total_hits, Picolena::ResultsPerPage) do
         finder.matching_documents
@@ -64,7 +64,7 @@ class DocumentsController < ApplicationController
   def ensure_index_is_created
     Indexer.ensure_index_existence
-    while Indexer.do_not_disturb_while_indexing do
+    while Indexer.locked? do
       sleep 1
     end
   end

data/lib/picolena/templates/app/helpers/documents_helper.rb CHANGED Viewed

@@ -6,10 +6,10 @@ module DocumentsHelper
   # Very basic pagination.
   # Provides liks to Next, Prev and FirstPage when needed.
-  def should_paginate(page,query, sort)
-    [(link_to("&larr;&larr;", :action => :show, :id => query, :sort=>sort) if page.number>2),
-     (link_to("&larr;", :action => :show, :id => query, :page => page.prev.number, :sort=>sort) if page.prev?),
-     (link_to("&rarr;", :action => :show, :id => query, :page => page.next.number, :sort=>sort) if page.next?)].compact.join(" | ")
+  def should_paginate(page,query, sort_by)
+    [(link_to("&larr;&larr;", :action => :show, :id => query, :sort_by=>sort_by) if page.number>2),
+     (link_to("&larr;", :action => :show, :id => query, :page => page.prev.number, :sort_by=>sort_by) if page.prev?),
+     (link_to("&rarr;", :action => :show, :id => query, :page => page.next.number, :sort_by=>sort_by) if page.next?)].compact.join(" | ")
   end
   # Returns a localized sentence like "Results 1-10 of 12 for Zimbabwe (0.472s)" or
@@ -81,7 +81,7 @@ module DocumentsHelper
   end
   def sort_by_date_or_relevance(query)
-    [link_to_unless_current('By date', document_path(query, :sort=>'by_date')),
+    [link_to_unless_current('By date', document_path(query, :sort_by=>'date')),
      link_to_unless_current('By relevance', document_path(query))].join("&nbsp;")
   end
 end

data/lib/picolena/templates/app/models/document.rb CHANGED Viewed

@@ -11,10 +11,18 @@ class Document
   end
   #Delegating properties to File::method_name(complete_path)
-  [:dirname, :basename, :extname, :ext_as_sym, :file?, :size, :ext_as_sym].each{|method_name|
+  [:dirname, :basename, :extname, :ext_as_sym, :file?, :plain_text?, :size, :ext_as_sym].each{|method_name|
     define_method(method_name){File.send(method_name,complete_path)}
   }
   alias_method :filename, :basename
+  alias_method :to_s, :complete_path
+  def inspect
+    [self,("(#{pretty_score})" if @score),("(language:#{language})" if language)].compact.join(" ")
+  end
   # Returns filename without extension
   #   "buildings.odt" => "buildings"
@@ -50,7 +58,7 @@ class Document
   #  Document.new("presentation.pdf").supported? => true
   #  Document.new("presentation.some_weird_extension").supported? => false
   def supported?
-    PlainTextExtractor.supported_extensions.include?(self.ext_as_sym)
+    PlainTextExtractor.supported_extensions.include?(self.ext_as_sym) unless ext_as_sym==:no_extension and !plain_text?
   end
   # Retrieves content as it is *now*.
@@ -91,6 +99,10 @@ class Document
     from_index[:language]
   end
+  def pretty_score
+    "%3.1f%" % (@score*100)
+  end
   # Fields that are shared between every document.
   def self.default_fields_for(complete_path)
     {
@@ -103,6 +115,7 @@ class Document
     }
   end
   private
   # FIXME: Is there a way to easily retrieve doc_id for a given document?
@@ -138,4 +151,4 @@ class Document
   def validate_in_indexed_directory
     raise ArgumentError, "required document is not in indexed directory" unless in_indexed_directory?
   end
-end
+end

data/lib/picolena/templates/app/models/finder.rb CHANGED Viewed

@@ -2,23 +2,24 @@ class Finder
   attr_reader :query
   def index
-    @@index ||= Indexer.index
+    @@index ||= Indexer.index
   end
-  def initialize(raw_query,by_date=false, page=1,results_per_page=Picolena::ResultsPerPage)
+  def initialize(raw_query,sort_by='relevance', page=1,results_per_page=Picolena::ResultsPerPage)
     @query = Query.extract_from(raw_query)
     @raw_query= raw_query
     Indexer.ensure_index_existence
+    reload_index! if should_be_reloaded?
     @per_page=results_per_page
     @offset=(page.to_i-1)*results_per_page
-    @by_date=by_date
+    @sort_by=sort_by
     index_should_have_documents
   end
   def execute!
     @matching_documents=[]
     start=Time.now
-      @total_hits = index.search_each(query, :limit => @per_page, :offset=>@offset, :sort => (sort_by_date if @by_date)){|index_id, score|
+      @total_hits = index.search_each(query, :limit => @per_page, :offset=>@offset, :sort => (sort_by_date if @sort_by=='date')){|index_id, score|
         begin
           found_doc=Document.new(index[index_id][:complete_path])
           found_doc.matching_content=index.highlight(query, index_id,
@@ -52,11 +53,21 @@ class Finder
     }
   }
-  def self.reload!
+  private
+  def reload_index!
+    Indexer.close
     @@index = nil
+    @@last_reload = Time.now
   end
-  private
+  def should_be_reloaded?
+    Indexer.reload_file_mtime > last_reload
+  end
+  def last_reload
+    @@last_reload ||= Time.at(0)
+  end
   def sort_by_date
     Ferret::Search::SortField.new(:modified, :type => :byte, :reverse => true)
@@ -65,4 +76,4 @@ class Finder
   def index_should_have_documents
     raise IndexError, "no document found" unless index.size > 0
   end
-end
+end

data/lib/picolena/templates/app/models/indexer.rb CHANGED Viewed

@@ -1,63 +1,74 @@
+# Indexer is used to index (duh!) documents contained in IndexedDirectories
+# It can create, update, delete and prune the index, and take care that only
+# one IndexWriter exists at any given time, even when used in a multi-threaded
+# way.
+require 'indexer_logger'
 class Indexer
   # This regexp defines which files should *not* be indexed.
   @@exclude          = /(Thumbs\.db)/
   # Number of threads that will be used during indexing process
   @@threads_number = 8
-  cattr_reader :do_not_disturb_while_indexing
   class << self
+    # Finds every document included in IndexedDirectories, parses them with
+    # PlainTextExtractor and adds them to the index.
+    #
+    # Updates the index unless remove_first parameter is set to true, in which
+    # case it removes the index first before re-creating it.
     def index_every_directory(remove_first=false)
-      @@do_not_disturb_while_indexing=true
       clear! if remove_first
+      lock!
       @from_scratch = remove_first
-      # Forces Finder.searcher and Finder.index to be reloaded, by removing them from the cache.
-      Finder.reload!
-      log :debug => "Indexing every directory"
-      start=Time.now
+      logger.start_indexing
       Picolena::IndexedDirectories.each{|dir, alias_dir|
         index_directory_with_multithreads(dir)
       }
-      log :debug => "Now optimizing index"
+      logger.debug "Now optimizing index"
       index.optimize
-      @@do_not_disturb_while_indexing=false
-      log :debug => "Indexing done in #{Time.now-start} s."
+      index_time_dbm_file['last']=Time.now._dump
+      unlock!
+      logger.show_report
     end
+    # Indexes a given directory, using @@threads_number threads.
+    # To do so, it retrieves a list of every included document, cuts it in
+    # @@threads_number chunks, and create a new indexing thread for every chunk.
     def index_directory_with_multithreads(dir)
-      log :debug => "Indexing #{dir}, #{@@threads_number} threads"
+      logger.debug "Indexing #{dir}, #{@@threads_number} threads"
       indexing_list=Dir[File.join(dir,"**/*")].select{|filename|
         File.file?(filename) && filename !~ @@exclude
       }
       indexing_list_chunks=indexing_list.in_transposed_slices(@@threads_number)
       prepare_multi_threads_environment
       indexing_list_chunks.each_with_thread{|chunk|
         chunk.each{|complete_path|
-          last_itime=index_time_dbm_file[complete_path]
-          if @from_scratch || !last_itime || File.mtime(complete_path)> Time._load(last_itime) then
+          if should_index_this_document?(complete_path) then
             add_or_update_file(complete_path)
           else
-            log :debug => "Identical : #{complete_path}"
+            logger.debug "Identical : #{complete_path}"
           end
           index_time_dbm_file[complete_path] = Time.now._dump
         }
       }
     end
+    # Retrieves content and language from a given document, and adds it to the index.
+    # Since Document#probably_unique_id is used as index :key, no document will be added
+    # twice to the index, and the old document will just get updated.
+    #
+    # If for some reason (no content found or no defined PlainTextExtractor), content cannot
+    # be found, some basic information about the document (mtime, filename, complete_path)
+    # gets indexed anyway.
     def add_or_update_file(complete_path)
-      default_fields = Document.default_fields_for(complete_path)
+      document = Document.default_fields_for(complete_path)
       begin
-        document = PlainTextExtractor.extract_content_and_language_from(complete_path)
+        document.merge! PlainTextExtractor.extract_content_and_language_from(complete_path)
         raise "empty document #{complete_path}" if document[:content].strip.empty?
-        document.merge! default_fields
-        log :debug => ["Added : #{complete_path}",document[:language] ? " (#{document[:language]})" : ""].join
+        logger.add_document document
       rescue => e
-        log :debug => "\tindexing without content: #{e.message}"
-        document = default_fields
+        logger.reject_document document, e
       end
       index << document
     end
@@ -73,11 +84,9 @@ class Indexer
     # ensures that a new Index is instantiated next time index is called.
     def close
       @@index.close rescue nil
-      # Ferret will SEGFAULT otherwise.
       @@index = nil
     end
     # Checks for indexed files that are missing from filesytem
     # and removes them from index & dbm file.
     def prune_index
@@ -85,7 +94,7 @@ class Indexer
       missing_files.each{|filename, itime|
         index.writer.delete(:complete_path, filename)
         index_time_dbm_file.delete(filename)
-        log :debug => "Removed : #{filename}"
+        logger.debug "Removed : #{filename}"
       }
       index.optimize
     end
@@ -97,6 +106,7 @@ class Indexer
       @@index ||= Ferret::Index::Index.new(default_index_params)
     end
+    # Creates the index unless it already exists.
     def ensure_index_existence
       index_every_directory(:remove_first) unless index_exists? or RAILS_ENV=="production"
     end
@@ -106,11 +116,66 @@ class Indexer
       index.size
     end
+    # Returns the time at which the index was last created/updated.
+    # Returns "none" if it doesn't exist.
+    def last_update
+      Time._load(index_time_dbm_file['last']) rescue "none"
+    end
+    # Returns the time at which the reload file was last touched.
+    # Useful to know if other processes have modified the shared index,
+    # and if the Indexer should be reloaded.
+    def reload_file_mtime
+      touch_reload_file! unless File.exists?(reload_file)
+      File.mtime(reload_file)
+    end
+    # For a given document, it retrieves the time it was last indexed, compare it to
+    # its modification time and returns false unless the file has been
+    # modified after the last indexing process.
+    def should_index_this_document?(complete_path)
+      last_itime=index_time_dbm_file[complete_path]
+      @from_scratch || !last_itime || File.mtime(complete_path)> Time._load(last_itime)
+    end
+    def locked?
+      File.exists?(lock_file)
+    end
     private
+    def touch_reload_file!
+      FileUtils.touch(reload_file)
+      # To ensure that every process can touch reload_file, even if Picolena
+      # is launched as a special user.
+      FileUtils.chmod(0666, reload_file)
+    end
+    def reload_file
+      File.join(Picolena::MetaIndexPath,'reload')
+    end
+    def lock!
+      FileUtils.touch(lock_file)
+    end
+    def unlock!
+      FileUtils.rm(lock_file)
+      # Forces Finder.index to be reloaded.
+      touch_reload_file!
+    end
+    def lock_file
+      File.join(Picolena::MetaIndexPath,'lock')
+    end
+    def logger
+      @@logger ||= IndexerLogger.new
+    end
     # Copied from Ferret book, By David Balmain
     def index_time_dbm_file
-      @@dbm_file ||= DBM.open(File.join(Picolena::IndexSavePath, 'added_at'))
+      @@dbm_file ||= DBM.open(File.join(Picolena::MetaIndexPath, 'added_at'))
     end
     def index_exists?
@@ -121,12 +186,6 @@ class Indexer
       Dir.glob(File.join(Picolena::IndexSavePath,'*.cfs')).first
     end
-    def log(hash)
-      hash.each{|level,message|
-        IndexerLogger.send(level,message)
-      }
-    end
     def default_index_params
       {
         :path        => Picolena::IndexSavePath,

data/lib/picolena/templates/app/models/query.rb CHANGED Viewed

@@ -4,6 +4,11 @@ class Query
     def extract_from(raw_query)
       parser.parse(convert_to_english(raw_query))
     end
+    # Returns terms related to content. Useful for cache highlighting
+    def content_terms(raw_query)
+      Query.extract_from(raw_query).terms(Indexer.index.searcher).select{|term| term.field==:content}.collect{|term| term.text}.uniq
+    end
     private

data/lib/picolena/templates/app/views/documents/_document.html.haml CHANGED Viewed

@@ -3,14 +3,15 @@
   =language_icon_for(document)
   %small=number_to_percentage(document.score*100, :precision=>1)
 =highlight_matching_content(document)
-%p=link_to_containing_directory(document)
--if document.supported?
-  %p
+%p
+  =link_to_containing_directory(document)
+  %br/
+  -if document.supported?
     =link_to_plain_text_content(document)
     &#45;
-    =number_to_human_size(document.size)
-    &#45;
-    =document.pretty_date
-    &#45;
     =link_to_cached_content(document,query)
-%hr/
+    &#45;
+  =number_to_human_size(document.size)
+  &#45;
+  =document.pretty_date
+%hr/

data/lib/picolena/templates/config/environment.rb CHANGED Viewed

@@ -7,8 +7,6 @@
 # Specifies gem version of Rails to use when vendor/rails is not present
 RAILS_GEM_VERSION = '2.0.2' unless defined? RAILS_GEM_VERSION
-IndexerLogger=Logger.new($stdout)
 # Bootstrap the Rails environment, frameworks, and default configuration
 require File.join(File.dirname(__FILE__), 'boot')

data/lib/picolena/templates/config/environments/development.rb CHANGED Viewed

@@ -16,6 +16,3 @@ config.action_view.cache_template_extensions         = false
 # Don't care if the mailer can't send
 config.action_mailer.raise_delivery_errors = false
-IndexerLogger.level = Logger::DEBUG

data/lib/picolena/templates/config/environments/production.rb CHANGED Viewed

@@ -17,5 +17,3 @@ config.action_view.cache_template_loading            = true
 # Disable delivery errors, bad email addresses will be ignored
 # config.action_mailer.raise_delivery_errors = false
-IndexerLogger.level = Logger::INFO

data/lib/picolena/templates/config/environments/test.rb CHANGED Viewed

@@ -20,6 +20,3 @@ config.action_controller.allow_forgery_protection    = false
 # The :test delivery method accumulates sent emails in the
 # ActionMailer::Base.deliveries array.
 config.action_mailer.delivery_method = :test
-IndexerLogger.level = Logger::WARN

data/lib/picolena/templates/config/initializers/002_load_indexed_dirs.rb CHANGED Viewed

@@ -7,4 +7,7 @@ module Picolena
   }
   IndexSavePath=File.join(IndexesSavePath,ENV["RAILS_ENV"] || "development")
+  FileUtils.mkpath IndexSavePath
+  MetaIndexPath= File.join(IndexSavePath,'meta')
+  FileUtils.mkpath MetaIndexPath
 end

data/lib/picolena/templates/config/initializers/007_load_performance_tweaks.rb CHANGED Viewed

@@ -3,4 +3,4 @@ module Picolena
   YAML.load_file('config/custom/indexing_performance.yml').each_pair{|param, value|
     IndexingConfiguration[param.to_sym]= value=~/^[\d_]+$/ ? value.to_i : value
   }
-end
+end

data/lib/picolena/templates/lang/ui/de.yml CHANGED Viewed

@@ -22,5 +22,5 @@ LIKE: WIE
 filename: filename|file|datei
 filetype: erweiterung|ext
 content: inhalt
-modified: jahr|zeit|geändert
-language: lang|sprache
+modified: jahr|zeit|geändert|geaendert|geandert
+language: lang|sprache

data/lib/picolena/templates/lang/ui/en.yml CHANGED Viewed

@@ -20,7 +20,7 @@ LIKE: LIKE
 ## Fields
 filename: filename|file
-filetype: filetype|ext
+filetype: filetype|ext|extension
 content: content
 modified: year|date|modified
 language: lang|language

data/lib/picolena/templates/lang/ui/es.yml CHANGED Viewed

@@ -20,7 +20,7 @@ LIKE: COMO
 ## Fields
 filename: filename|file|archivo
-filetype: extensión|ext
+filetype: extensión|ext|extension
 content: contenido
 modified: fecha|año|anho|modificado
 language: lang|idioma

data/lib/picolena/templates/lang/ui/fr.yml CHANGED Viewed

@@ -22,5 +22,5 @@ LIKE: COMME
 filename: filename|file|fichier
 filetype: extension|ext
 content: contenu
-modified: année|date|annee|modifie
-language: lang|langue
+modified: année|date|annee|modifie|modifié
+language: lang|langue

data/lib/picolena/templates/lib/core_exts.rb CHANGED Viewed

@@ -1,20 +1,3 @@
-class MimeType
-  @@all=[]
-  def self.all
-    @@all
-  end
-  def self.add(exts,mime_name)
-    all<<new(exts,mime_name)
-  end
-  attr_reader :exts, :name
-  def initialize(exts,mime_name)
-    @exts,@name=exts,mime_name
-  end
-end
 class String
   # Creates a "probably unique" id with the desired length, composed only of lowercase letters.
   def base26_hash(length=Picolena::HashLength)
@@ -23,6 +6,9 @@ class String
 end
 module Enumerable
+  # Similar to Enumerable#each, but creates a new thread for each element.
+  # Used for the indexer to make it multi-threaded.
+  # It ensures that threads are joined together before returning.
   def each_with_thread(&block)
     tds=self.collect{|elem|
       Thread.new(elem) {|elem|
@@ -57,17 +43,31 @@ class Array
   end
 end
+class Hash
+  def add(category)
+    self[category]||={:size=>0}
+    self[category][:size]+=1
+  end
+end
 class File
+  # Returns the filetype of filename as a symbol.
+  # Returns :no_extension unless an extension is found
+  #  >> File.ext_as_sym("test.pdf")
+  #  => :pdf
+  #  >> File.ext_as_sym("test.tar.gz")
+  #  => :gz
+  #  >> File.ext_as_sym("test")
+  #  => :no_extension
   def self.ext_as_sym(filename)
     File.extname(filename).sub(/^\./,'').downcase.to_sym rescue :no_extension
   end
-  def self.mime(filename)
-    ext=ext_as_sym(filename)
-    m=MimeType.all.find{|m| m.exts.include?(ext)}
-    m ? m.name : 'application/octet-stream'
-  end
+  # Returns a probable encoding for a given plain text file
+  # If source is a html file, it parses for metadata to retrieve encoding,
+  # and uses file -i otherwise.
+  # Returns iso-8859-15 instead of iso-8859-1, to be sure € char can be
+  # encoded
   def self.encoding(source)
     parse_for_charset="grep -io charset=[a-z0-9\\-]* | sed 's/charset=//i'"
     if File.extname(source)[0,4]==".htm" then
@@ -86,9 +86,18 @@ class File
      end
   end
+  # Returns the content of a file and removes it after.
+  # Could be used to read temporary output file written by a PlainTextExtractor.
   def self.read_and_remove(filename)
     content=read(filename)
     FileUtils.rm filename, :force=>true
     content
   end
+  # Returns nil unless filename is a plain text file.
+  # It requires file command.
+  # NOTE: What to use for Win32?
+  def self.plain_text?(filename)
+    %x{file -i "#{filename}"} =~ /: text\//
+  end
 end

data/lib/picolena/templates/lib/indexer_logger.rb ADDED Viewed

@@ -0,0 +1,45 @@
+class IndexerLogger<Logger
+  def initialize
+    super($stdout)
+    #FIXME: Should be defined in config/environments/*.rb
+    levels={
+      "development"=>Logger::DEBUG,
+      "production" =>Logger::INFO,
+      "test"       =>Logger::WARN
+    }
+    @level=levels[RAILS_ENV]
+    @found_languages={}
+    @supported_filetypes={}
+    @unsupported_filetypes={}
+  end
+  def start_indexing
+    @start_time=Time.now
+    debug "Indexing every directory"
+  end
+  def add_document(document)
+    debug ["Added : #{document[:complete_path]}",document[:language] && " ("<<document[:language]<<")"].join
+    @found_languages.add(document[:language]) if document[:language]
+    @supported_filetypes.add(document[:filetype])
+  end
+  def reject_document(document, error)
+    @unsupported_filetypes.add(document[:filetype])
+    debug "Added without content (#{error.message}) : #{document[:complete_path]}"
+  end
+  def show_report
+    describe :found_languages, :supported_filetypes, :unsupported_filetypes
+    info "Time needed              : #{Time.now-@start_time} s."
+  end
+  private
+  def describe(*instance_variable_names)
+    instance_variable_names.each{|var_name|
+      hash=instance_variable_get("@#{var_name}")
+      info var_name.to_s.humanize.ljust(25)<<": "<<hash.reject{|k,v| k.blank?}.sort_by{|k,v| v[:size]}.reverse.collect{|k,v| "#{k.downcase} (#{v[:size]})"}.join(", ") unless hash.empty?
+    }
+  end
+end

data/lib/picolena/templates/lib/plain_text_extractor_DSL.rb CHANGED Viewed

@@ -16,7 +16,6 @@ module PlainTextExtractorDSL
     @content_and_file_examples=[]
     self.instance_eval(&block)
     PlainTextExtractor.add(self)
-    MimeType.add(self.exts,self.mime_name)
   end
   def every(*exts)

data/lib/picolena/templates/lib/plain_text_extractors/plain_text.rb CHANGED Viewed

@@ -3,12 +3,12 @@ PlainTextExtractor.new {
   as "application/plain"
   aka "plain text file"
   with {|source|
+    raise "binary file" unless File.plain_text?(source)
     encoding=File.encoding(source)
-    #TODO: Return "binary file" if binary
     if encoding.empty? then
-       File.read(source)
+      File.read(source)
     else
-       %x{iconv -f #{encoding} -t utf8  "#{source}" 2>/dev/null}
+      %x{iconv -f #{encoding} -t utf8  "#{source}" 2>/dev/null}
     end
   }
   # for dependencies spec

data/lib/picolena/templates/lib/tasks/index.rake CHANGED Viewed

@@ -25,10 +25,15 @@ namespace :index do
     puts "#{Indexer.size} documents are currently indexed in #{Picolena::IndexSavePath}"
   end
+  desc 'Returns the last time the index was created/update'
+  task :last_update => :environment do
+    puts Indexer.last_update
+  end
   # Search index with query "some query" :
   # rake index:search query="some query"
   desc 'Search index'
   task :search => :environment do
-    Finder.new(ENV["query"]).matching_documents.entries.each{|doc| puts doc.to_s}
+    puts Finder.new(ENV["query"]).matching_documents.entries.collect{|doc| doc.inspect}.join("\n"<<"#"*80<<"\n")
   end
 end

data/lib/picolena/templates/spec/helpers/documents_helper_spec.rb CHANGED Viewed

@@ -1,8 +1,6 @@
 require File.dirname(__FILE__) + '/../spec_helper'
 describe DocumentsHelper do
-  it "shouldn't raise if matching not in content field"
   PlainTextExtractor.supported_extensions.each{|ext|
     it "should have an icon for .#{ext} filetype" do
       icon_for(ext).should_not be_nil

data/lib/picolena/templates/spec/models/basic_finder_spec.rb CHANGED Viewed

@@ -7,10 +7,13 @@ describe "Finder without index on disk" do
     @original_indexed_dirs=Picolena::IndexedDirectories.dup
     @new_index_path=File.join(Dir::tmpdir,'ferret_tst')
     Picolena::IndexSavePath.replace(@new_index_path)
+    Picolena::MetaIndexPath.replace(File.join(@new_index_path,'meta'))
+    FileUtils.mkpath Picolena::MetaIndexPath
   end
   before(:each) do
     Indexer.clear!
+    Finder.send(:class_variable_set,'@@last_reload',nil)
   end
   it "should create index" do
@@ -29,6 +32,7 @@ describe "Finder without index on disk" do
   after(:all) do
     Picolena::IndexedDirectories.replace(@original_indexed_dirs)
     Picolena::IndexSavePath.replace(@original_index_path)
+    Picolena::MetaIndexPath.replace(File.join(@original_index_path,'meta'))
   end
 end

data/lib/picolena/templates/spec/models/document_spec.rb CHANGED Viewed

@@ -78,6 +78,12 @@ describe Document do
     @valid_document.should be_supported
     Document.new("spec/test_dirs/indexed/others/ghjopdfg.xyz").should_not be_supported
   end
+  it "should not be considered supported if binary" do
+    Document.new("spec/test_dirs/indexed/others/BIN_FILE_WITHOUT_EXTENSION").should_not be_supported
+  end
   it "should know its language when enough content is available" do
     Document.new("spec/test_dirs/indexed/lang/goethe").language.should == "de"

data/lib/picolena/templates/spec/models/finder_spec.rb CHANGED Viewed

@@ -123,7 +123,6 @@ describe Finder do
     end
   end
-  it "should not index content of binary files"
   # Ferret sometimes SEGFAULT crashed with '*.pdf' queries
   it "should not crash while looking for *.pdf" do

data/lib/picolena/templates/spec/models/indexer_spec.rb CHANGED Viewed

@@ -4,4 +4,13 @@ describe Indexer do
   it "should have at least 32MB memory allocated" do
     Indexer.index.writer.max_buffer_memory.should > 2**25-1
   end
+  it "should know the time it was updated" do
+    Indexer.should respond_to(:last_update)
+    begin
+      Indexer.last_update.should be_kind_of(Time)
+    rescue
+      Indexer.last_update.should == "none"
+    end
+  end
 end

data/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb CHANGED Viewed

@@ -27,4 +27,9 @@ describe "PlainTextExtractors" do
       end
     }
   }
+  it "should not extract content of binary files" do
+    bin_file="spec/test_dirs/indexed/others/BIN_FILE_WITHOUT_EXTENSION"
+    lambda{PlainTextExtractor.extract_content_from(bin_file)}.should raise_error(RuntimeError, "binary file")
+  end
 end

data/lib/picolena/templates/spec/models/query_spec.rb CHANGED Viewed

@@ -37,7 +37,21 @@ describe Query do
     }
   end
-  it "should accept field terms in different languages"
+  it "should accept field terms in different languages" do
+    Globalite.language = :en
+      english_query_with_french_words = Query.extract_from("absorption language:fr extension:pdf")
+      english_query_with_german_words = Query.extract_from("Unabhängigkeit modified:>2005 filename:job.txt")
+    Globalite.language = :de
+      Query.extract_from("absorption sprache:fr erweiterung:pdf").should == english_query_with_french_words
+      Query.extract_from("Unabhängigkeit geändert:>2005 datei:job.txt").should == english_query_with_german_words
+    Globalite.language = :fr
+      Query.extract_from("absorption langue:fr extension:pdf").should == english_query_with_french_words
+      Query.extract_from("Unabhängigkeit modifié:>2005 fichier:job.txt").should == english_query_with_german_words
+    Globalite.language = :es
+      Query.extract_from("absorption idioma:fr extensión:pdf").should == english_query_with_french_words
+      Query.extract_from("Unabhängigkeit modificado:>2005 archivo:job.txt").should == english_query_with_german_words
+  end
   it "should use AND as default boolean ops" do
     query_without_and = Query.extract_from("one AND two")
@@ -62,4 +76,14 @@ describe Query do
     Query.extract_from("test").should == Query.extract_from("tesT")
     Query.extract_from("test").should_not == Query.extract_from("tesTe")
   end
-end
+  it "should be able to extract search terms related to :content" do
+    Query.content_terms("plain text").should == %w(plain text)
+    Query.content_terms("plain text extension:pdf").should == %w(plain text)
+    Query.content_terms("plain AND text").should == %w(plain text)
+    Query.content_terms("absorption OR adsorption").should ==%w(absorption adsorption)
+    Query.content_terms("filename:plain_text").should be_empty
+    Globalite.language = :en
+    Query.content_terms("LIKE absorption").include?("adsorption").should be_true
+  end
+end

data/lib/picolena/version.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Picolena #:nodoc:
   module VERSION #:nodoc:
     MAJOR = 0
     MINOR = 1
-    TINY  = 7
+    TINY  = 8
     STRING = [MAJOR, MINOR, TINY].join('.')
   end

data/website/index.html CHANGED Viewed

@@ -33,7 +33,7 @@
     <h1>Picolena</h1>
     <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/picolena"; return false'>
       <p>Get Version</p>
-      <a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.7</a>
+      <a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.8</a>
     </div>
     <h1>&#x2192; &#8216;picolena&#8217;</h1>

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: picolena
 version: !ruby/object:Gem::Version
-  version: 0.1.7
+  version: 0.1.8
 platform: ruby
 authors:
 - Eric Duminil
@@ -30,7 +30,7 @@ cert_chain:
   qvI9FgPZ1QTG5uZAlBbk6d6JU2XfpA==
   -----END CERTIFICATE-----
-date: 2008-04-30 00:00:00 +02:00
+date: 2008-05-08 00:00:00 +02:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -185,6 +185,7 @@ files:
 - lib/picolena/templates/lang/ui/es.yml
 - lib/picolena/templates/lang/ui/fr.yml
 - lib/picolena/templates/lib/core_exts.rb
+- lib/picolena/templates/lib/indexer_logger.rb
 - lib/picolena/templates/lib/plain_text_extractor_DSL.rb
 - lib/picolena/templates/lib/plain_text_extractors/adobe.pdf.rb
 - lib/picolena/templates/lib/plain_text_extractors/html.rb

metadata.gz.sig CHANGED Viewed

Binary file