RubyGems - picolena - Versions diffs - 0.1.4 → 0.1.5 - Mend

picolena 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

data/History.txt CHANGED Viewed

@@ -1,3 +1,14 @@
+== 0.1.5  2008-04-
+* 1 major enhancement:
+  * yet another Indexer & Index rewrite
+* 1 minor enhancement:
+  * flags to indicate found language
+* bug fixes:
+  * No more (or just less?) index lock errors
 == 0.1.4  2008-04-23
 * 1 minor enhancement:
   * minimal MacOS support

data/Manifest.txt CHANGED Viewed

@@ -22,8 +22,6 @@ lib/picolena/templates/app/helpers/application_helper.rb
 lib/picolena/templates/app/helpers/documents_helper.rb
 lib/picolena/templates/app/models/document.rb
 lib/picolena/templates/app/models/finder.rb
-lib/picolena/templates/app/models/index_reader.rb
-lib/picolena/templates/app/models/index_writer.rb
 lib/picolena/templates/app/models/indexer.rb
 lib/picolena/templates/app/models/plain_text_extractor.rb
 lib/picolena/templates/app/models/query.rb
@@ -75,6 +73,22 @@ lib/picolena/templates/public/favicon.ico
 lib/picolena/templates/public/help/PicolenaHowTo-de.pdf
 lib/picolena/templates/public/help/PicolenaHowTo-de.tex
 lib/picolena/templates/public/images/bg.gif
+lib/picolena/templates/public/images/flags/ar.png
+lib/picolena/templates/public/images/flags/be.png
+lib/picolena/templates/public/images/flags/ca.png
+lib/picolena/templates/public/images/flags/de.png
+lib/picolena/templates/public/images/flags/el.png
+lib/picolena/templates/public/images/flags/en.png
+lib/picolena/templates/public/images/flags/es.png
+lib/picolena/templates/public/images/flags/fr.png
+lib/picolena/templates/public/images/flags/ga.png
+lib/picolena/templates/public/images/flags/hr.png
+lib/picolena/templates/public/images/flags/it.png
+lib/picolena/templates/public/images/flags/nl.png
+lib/picolena/templates/public/images/flags/pl.png
+lib/picolena/templates/public/images/flags/pt-br.png
+lib/picolena/templates/public/images/flags/pt-pt.png
+lib/picolena/templates/public/images/flags/readme.txt
 lib/picolena/templates/public/images/icons/cad.png
 lib/picolena/templates/public/images/icons/code.png
 lib/picolena/templates/public/images/icons/doc.png
@@ -120,8 +134,6 @@ lib/picolena/templates/spec/models/document_spec.rb
 lib/picolena/templates/spec/models/finder_spec.rb
 lib/picolena/templates/spec/models/host_indexing_system_spec.rb
 lib/picolena/templates/spec/models/index_directories_spec.rb
-lib/picolena/templates/spec/models/index_reader_spec.rb
-lib/picolena/templates/spec/models/index_writer_spec.rb
 lib/picolena/templates/spec/models/indexer_spec.rb
 lib/picolena/templates/spec/models/plain_text_extractor_spec.rb
 lib/picolena/templates/spec/models/query_spec.rb

data/lib/picolena/picolena_generator.rb CHANGED Viewed

@@ -163,7 +163,6 @@ EOS
     spec/test_dirs/indexed/yet_another_dir
     spec/test_dirs/not_indexed
     spec/views
-    spec/views/application
     tmp/cache
     tmp/ferret_indexes
     tmp/pids

data/lib/picolena/templates/app/helpers/documents_helper.rb CHANGED Viewed

@@ -37,6 +37,10 @@ module DocumentsHelper
     }) if document.matching_content
   end
+  def language_icon_for(document)
+    (lang=document.language) && image_tag("flags/#{lang}.png")
+  end
   # Returns icon and filename for any given document.
   def icon_and_filename_for(document)
     [icon_for(document.extname),document.filename].join("&nbsp;")

data/lib/picolena/templates/app/models/document.rb CHANGED Viewed

@@ -71,21 +71,33 @@ class Document
   # Returns the last modification date before the document got indexed.
   # Useful to know how old a document is, and to which version the cache corresponds.
   def date
-    from_index[:date].sub(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})/,'\1-\2-\3 \4:\5:\6')
+    from_index[:modified].sub(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})/,'\1-\2-\3 \4:\5:\6')
   end
   def mtime
-    from_index[:date].to_i
+    from_index[:modified].to_i
   end
   # Returns language.
-  def lang
-    from_index[:lang]
+  def language
+    from_index[:language]
   end
   # Returns the id with which the document is indexed.
   def index_id
-    @index_id ||= Document.find_by_complete_path(complete_path).index_id
+    @index_id ||= Finder.term_search(:complete_path, complete_path).doc
+  end
+  # Fields that are shared between every document.
+  def self.default_fields_for(complete_path)
+    {
+      :complete_path      => complete_path,
+      :probably_unique_id => complete_path.base26_hash,
+      :filename           => File.basename(complete_path),
+      :basename           => File.basename(complete_path, File.extname(complete_path)).gsub(/_/,' '),
+      :filetype           => File.extname(complete_path),
+      :modified           => File.mtime(complete_path).strftime("%Y%m%d%H%M%S")
+    }
   end
   private
@@ -93,17 +105,14 @@ class Document
   # Retrieves the document from the index.
   # Useful to get meta-info about it.
   def from_index
-    IndexReader.new[index_id]
+    Indexer.index[index_id]
   end
   def self.find_by_unique_id(some_id)
-    Finder.new("probably_unique_id:"<<some_id).matching_document
+    doc_id=Finder.term_search(:probably_unique_id, some_id).doc
+    new(Indexer.index[doc_id][:complete_path])
   end
-  def self.find_by_complete_path(complete_path)
-    Finder.new('complete_path:"'<<complete_path<<'"').matching_document
-  end
   def in_indexed_directory?
     !indexed_directory.nil?
   end

data/lib/picolena/templates/app/models/finder.rb CHANGED Viewed

@@ -2,18 +2,16 @@ class Finder
   attr_reader :query
   def index
-    # caching index @@index ||=
-    # causes ferret-0.11.6/lib/ferret/index.rb:768: [BUG] Segmentation fault
-    IndexReader.new
+    @@index ||= Indexer.index
   end
   def initialize(raw_query,page=1,results_per_page=Picolena::ResultsPerPage)
     @query = Query.extract_from(raw_query)
     @raw_query= raw_query
-    IndexReader.ensure_existence
+    Indexer.ensure_index_existence
     @per_page=results_per_page
     @offset=(page.to_i-1)*results_per_page
-    index.should_have_documents
+    index_should_have_documents
   end
   def execute!
@@ -31,9 +29,9 @@ class Finder
         found_doc.score=score
         found_doc.index_id=index_id
         @matching_documents<<found_doc
-        rescue Errno::ENOENT
-          #"File has been moved/deleted!"
-        end
+      rescue Errno::ENOENT
+        #"File has been moved/deleted!"
+      end
       }
       @executed=true
       @time_needed=Time.now-start
@@ -60,14 +58,36 @@ class Finder
    # Returns matching document for any given query only if
    # exactly one document is found.
    # Raises otherwise.
-   def matching_document
-     case matching_documents.size
-     when 0
-       raise IndexError, "No document found"
-     when 1
-       matching_documents.first
-     else
-       raise IndexError, "More than one document found"
-     end
-   end
+  def matching_document
+    case matching_documents.size
+    when 0
+      raise IndexError, "No document found"
+    when 1
+      matching_documents.first
+    else
+      raise IndexError, "More than one document found"
+    end
+  end
+  class<<self
+    def searcher
+      @@searcher ||= Ferret::Search::Searcher.new(Picolena::IndexSavePath)
+    end
+    def term_search(field,term)
+      query = Ferret::Search::TermQuery.new(field,term)
+      searcher.search(query).hits.first
+    end
+    def reload!
+      @@searcher = nil
+      @@index    = nil
+    end
+  end
+  private
+  def index_should_have_documents
+    raise IndexError, "no document found" unless index.size > 0
+  end
 end

data/lib/picolena/templates/app/models/indexer.rb CHANGED Viewed

@@ -5,54 +5,22 @@ class Indexer
   @@max_threads_number = 8
   class << self
-    def fields_for(complete_path)
-      {
-        :complete_path      => complete_path,
-        :probably_unique_id => complete_path.base26_hash,
-        :file               => File.basename(complete_path),
-        :basename           => File.basename(complete_path, File.extname(complete_path)).gsub(/_/,' '),
-        :filetype           => File.extname(complete_path),
-        :date               => File.mtime(complete_path).strftime("%Y%m%d%H%M%S")
-      }
-    end
-    def index_every_directory(update=true)
+    def index_every_directory(remove_first=false)
+      clear! if remove_first
+      # Forces Finder.searcher and Finder.index to be reloaded, by removing them from the cache.
+      Finder.reload!
       log :debug => "Indexing every directory"
       start=Time.now
-      @update = update
-      reset! unless update
       Picolena::IndexedDirectories.each{|dir, alias_dir|
         index_directory_with_multithreads(dir)
       }
-      # FIXME: with those 2 lines,
+      log :debug => "Now optimizing index"
       writer.optimize
-      writer.close
-      # launching Indexer.index_every_directory twice in a row
-      # would raise a SEGFAULT:
-      # picolena/lib/picolena/templates/app/models/indexer.rb:27: [BUG] Segmentation fault
-      # ruby 1.8.6 (2007-06-07) [i486-linux]
-      #
-      # Aborted (core dumped)
-      #
-      # But without those 2 lines, specs don't pass anymore.
-      #
       log :debug => "Indexing done in #{Time.now-start} s."
     end
     def index_directory_with_multithreads(dir)
-      # FIXME: Don't know why, but if more than one thread is created while update the index,
-      # indexer raises:
-      #
-      # current thread not owner
-      # /usr/lib/ruby/1.8/monitor.rb:278:in `mon_check_owner'
-      # /home/www/picolena/lib/picolena/templates/lib/core_exts.rb:32:in `join'
-      # ...
-      #
-      # So Index creation is multithreaded, Index update is monothreaded.
-      threads_number = @update ? 1 : @@max_threads_number
+      threads_number = @@max_threads_number
       log :debug => "Indexing #{dir}, #{threads_number} thread(s)"
       indexing_list=Dir[File.join(dir,"**/*")].select{|filename|
@@ -61,82 +29,94 @@ class Indexer
       indexing_list_chunks=indexing_list.in_transposed_slices(threads_number)
+      # It initializes an IndexWriter before launching multithreaded
+      # indexing. Otherwise, two threads could try to instantiate
+      # an IndexWriter at the same time, and get a
+      #  Ferret::Store::Lock::LockError
+      writer
       indexing_list_chunks.each_with_thread{|chunk|
         chunk.each{|filename|
-          add_or_update_file(filename)
+          add_file(filename)
         }
       }
     end
-    def add_or_update_file(complete_path)
-      should_be_added = true
-      if @update then
-        log :debug =>  "What to do with #{complete_path} ?"
-        occurences = reader.occurences_number(complete_path)
-        log :debug =>  "\tappears #{occurences} times in the index"
-        case occurences
-          when 0
-          #Nothing to do here, the file will be added.
-          when 1
-          d=Document.find_by_complete_path(complete_path)
-          if File.mtime(complete_path).strftime("%Y%m%d%H%M%S").to_i > d.mtime then
-            log :debug => "\thas been modified"
-            delete_file(complete_path)
-          else
-            should_be_added = false
-            log :debug => "\thas not been modified. leaving it"
-          end
-        else
-          delete_file(complete_path)
-        end
-      end
-      add_file(complete_path) if should_be_added
-    end
     def add_file(complete_path)
-      log :debug => "Adding #{complete_path}"
-      mime_type=File.mime(complete_path)
-      fields = fields_for(complete_path)
-      begin
-        text, lang = PlainTextExtractor.extract_content_and_language_from(complete_path)
-        raise "\tempty document #{complete_path}" if text.strip.empty?
-        fields[:content] = text
-        log :debug => "language found: #{lang}" if lang
-        fields[:lang] = lang
+      default_fields = Document.default_fields_for(complete_path)
+      begin
+        document = PlainTextExtractor.extract_content_and_language_from(complete_path)
+        raise "empty document #{complete_path}" if document[:content].strip.empty?
+        document.merge! default_fields
+        log :debug => ["Added : #{complete_path}",document[:language] ? " (#{document[:language]})" : ""].join
       rescue => e
         log :debug => "\tindexing without content: #{e.message}"
+        document = default_fields
       end
-      writer << fields
+      writer << document
     end
-    def writer
-      @@writer ||= IndexWriter.new
+    # Ensures writer is closed, and removes every index file for RAILS_ENV.
+    def clear!(all=false)
+      close
+      to_remove=all ? Picolena::IndexesSavePath : Picolena::IndexSavePath
+      Dir.glob(File.join(to_remove,'**/*')).each{|f| FileUtils.rm(f) if File.file?(f)}
     end
-    def reader
-      @@reader ||= IndexReader.new
+    # Closes the writer and
+    # ensures that a new IndexWriter is instantiated next time writer is called.
+    def close
+      @@writer.close rescue nil
+      # Ferret will SEGFAULT otherwise.
+      @@writer = nil
     end
-    def reset!
-      log :debug => "Resetting Index"
-      @@writer=nil
-      @@reader=nil
-      IndexWriter.remove
+    # Only one IndexWriter should be instantiated.
+    # If one already exists, returns it.
+    # Creates it otherwise.
+    def writer
+      @@writer ||= Ferret::Index::IndexWriter.new(default_index_params)
     end
-    def delete_file(complete_path)
-      log :debug => "\tRemoving from index"
-      reader.delete_by_complete_path(complete_path)
+    def index
+      Ferret::Index::Index.new(default_index_params)
+    end
+    def ensure_index_existence
+      index_every_directory(:remove_first) unless index_exists? or RAILS_ENV=="production"
     end
     private
+    def index_exists?
+      index_filename and File.exists?(index_filename)
+    end
+    def index_filename
+      Dir.glob(File.join(Picolena::IndexSavePath,'*.cfs')).first
+    end
     def log(hash)
       hash.each{|level,message|
         IndexerLogger.send(level,message)
       }
-    end
+    end
+    def default_index_params
+      {:path => Picolena::IndexSavePath, :analyzer => Picolena::Analyzer, :field_infos => default_field_infos}
+    end
+    def default_field_infos
+      returning Ferret::Index::FieldInfos.new do |field_infos|
+        field_infos.add_field(:complete_path,      :store => :yes, :index => :untokenized)
+        field_infos.add_field(:content,            :store => :yes, :index => :yes)
+        field_infos.add_field(:basename,           :store => :no,  :index => :yes, :boost => 1.5)
+        field_infos.add_field(:filename,           :store => :no,  :index => :yes, :boost => 1.5)
+        field_infos.add_field(:filetype,           :store => :no,  :index => :yes, :boost => 1.5)
+        field_infos.add_field(:modified,           :store => :yes, :index => :untokenized)
+        field_infos.add_field(:probably_unique_id, :store => :no,  :index => :yes)
+        field_infos.add_field(:language,           :store => :yes, :index => :yes)
+      end
+    end
   end
 end

data/lib/picolena/templates/app/models/plain_text_extractor.rb CHANGED Viewed

@@ -109,12 +109,12 @@ class PlainTextExtractor
   # and if probability score is higher than 90%.
   def extract_content_and_language
     content=extract_content
-    return [content, nil] unless [# Is LanguageRecognition turned on? (cf config/custom/picolena.rb)
-                                  Picolena::UseLanguageRecognition,
-                                  # Is a language guesser already installed?
-                                  PlainTextExtractor.language_guesser,
-                                  # Language recognition is too unreliable for small files.
-                                  content.size > 500].all?
+    return {:content => content} unless [# Is LanguageRecognition turned on? (cf config/custom/picolena.rb)
+                                         Picolena::UseLanguageRecognition,
+                                         # Is a language guesser already installed?
+                                         PlainTextExtractor.language_guesser,
+                                         # Language recognition is too unreliable for small files.
+                                         content.size > 500].all?
     language=IO.popen(PlainTextExtractor.language_guesser,'w+'){|lang_guesser|
       lang_guesser.write content
       lang_guesser.close_write
@@ -125,6 +125,6 @@ class PlainTextExtractor
         lang unless score<0.9
       end
     }
-    [content,language]
+    {:content => content, :language => language}
   end
 end

data/lib/picolena/templates/app/models/query.rb CHANGED Viewed

@@ -13,9 +13,11 @@ class Query
        /\b#{:AND.l}\b/=>'AND',
        /\b#{:OR.l}\b/=>'OR',
        /\b#{:NOT.l}\b/=>'NOT',
+       /(#{:filename.l}):/=>'filename:',
        /(#{:filetype.l}):/=>'filetype:',
        /#{:content.l}:/ => 'content:',
-       /#{:date.l}:/ => 'date:',
+       /(#{:modified.l}):/ => 'modified:',
+       /(#{:language.l}):/ => 'language:',
        /\b#{:LIKE.l}\s+(\S+)/=>'\1~'
       }
       to_en.inject(raw_query){|mem,non_english_to_english_keyword|
@@ -25,7 +27,7 @@ class Query
     # Instantiates a QueryParser once, and keeps it in cache.
     def parser
-      @@parser ||= Ferret::QueryParser.new(:fields => [:content, :file, :basename, :filetype, :date], :or_default => false, :analyzer=>Picolena::Analyzer)
+      @@parser ||= Ferret::QueryParser.new(:fields => [:content, :filename, :basename, :filetype, :modified], :or_default => false, :analyzer=>Picolena::Analyzer)
     end
   end
 end

data/lib/picolena/templates/app/views/documents/_document.html.haml CHANGED Viewed

@@ -1,5 +1,6 @@
 %h2
   =link_to icon_and_filename_for(document), download_document_path(document.probably_unique_id)
+  =language_icon_for(document)
   %small=number_to_percentage(document.score*100, :precision=>1)
 =highlight_matching_content(document)
 %p=link_to_containing_directory(document)

data/lib/picolena/templates/lang/ui/de.yml CHANGED Viewed

@@ -19,6 +19,8 @@ NOT: NICHT
 LIKE: WIE
 ## Fields
+filename: filename|file|datei
 filetype: erweiterung|ext
 content: inhalt
-date: jahr|zeit
+modified: jahr|zeit|geändert
+language: lang|sprache

data/lib/picolena/templates/lang/ui/en.yml CHANGED Viewed

@@ -19,6 +19,8 @@ NOT: NOT
 LIKE: LIKE
 ## Fields
+filename: filename|file
 filetype: filetype|ext
 content: content
-date: year|date
+modified: year|date|modified
+language: lang|language

data/lib/picolena/templates/lang/ui/es.yml CHANGED Viewed

@@ -19,6 +19,8 @@ NOT: "NO"
 LIKE: COMO
 ## Fields
+filename: filename|file|archivo
 filetype: extensión|ext
 content: contenido
-date: fecha|año|anho
+modified: fecha|año|anho|modificado
+language: lang|idioma

data/lib/picolena/templates/lang/ui/fr.yml CHANGED Viewed

@@ -19,6 +19,8 @@ NOT: NON
 LIKE: COMME
 ## Fields
+filename: filename|file|fichier
 filetype: extension|ext
 content: contenu
-date: année|date|annee
+modified: année|date|annee|modifie
+language: lang|langue

data/lib/picolena/templates/lib/tasks/index.rake CHANGED Viewed

@@ -2,17 +2,17 @@ desc 'Ferret index maintenance tasks'
 namespace :index do
   desc 'Clear indexes'
   task :clear => :environment do
-    IndexWriter.remove
+    Indexer.clear! :all
   end
   desc 'Create index'
   task :create => :environment do
-    Indexer.index_every_directory(update=false)
+    Indexer.index_every_directory(remove_first=true)
   end
   desc 'Update index'
   task :update => :environment do
-    Indexer.index_every_directory(update=true)
+    Indexer.index_every_directory
   end
   # Search index with query "some query" :

data/lib/picolena/templates/public/images/flags/ar.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/be.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/ca.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/de.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/el.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/en.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/es.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/fr.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/ga.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/hr.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/it.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/nl.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/pl.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/pt-br.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/pt-pt.png ADDED Viewed

Binary file

data/lib/picolena/templates/public/images/flags/readme.txt ADDED Viewed

@@ -0,0 +1,9 @@
+Flag icons - http://www.famfamfam.com
+These icons are public domain, and as such are free for any use (attribution appreciated but not required).
+Note that these flags are named using the ISO3166-1 alpha-2 country codes where appropriate. A list of codes can be found at http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
+If you find these icons useful, please donate via paypal to mjames@gmail.com (or click the donate button available at http://www.famfamfam.com/lab/icons/silk)
+Contact: mjames@gmail.com

data/lib/picolena/templates/spec/models/basic_finder_spec.rb CHANGED Viewed

@@ -10,14 +10,14 @@ describe "Finder without index on disk" do
   end
   before(:each) do
-    IndexWriter.remove
+    Indexer.clear!
   end
   it "should create index" do
     Picolena::IndexedDirectories.replace({'spec/test_dirs/indexed/just_one_doc'=>'//justonedoc/'})
-    lambda {@finder_with_new_index=Finder.new("test moi")}.should change(IndexReader, :exists?).from(false).to(true)
+    lambda {@finder_with_new_index=Finder.new("test moi")}.should change(Indexer, :index_exists?).from(false).to(true)
     File.exists?(File.join(@new_index_path,'_0.cfs')).should be_true
-    IndexReader.new.size.should >0
+    Indexer.index.size.should >0
   end
   it "should raise if index is still empty after trying to create it" do
@@ -35,16 +35,19 @@ end
 fields={
   # description => key
-  :content=>:content,
-  :basename=>:basename,
-  :filename=>:file,
-  :extension => :filetype,
-  :modification_time=>:date
+  :content            => :content,
+  :complete_path      => :complete_path,
+  :basename           => :basename,
+  :filename           => :filename,
+  :extension          => :filetype,
+  :modification_time  => :modified,
+  :probably_unique_id => :probably_unique_id,
+  :language           => :language
 }
 describe "Basic Finder" do
   before(:all) do
-    Indexer.index_every_directory(update=false)
+    Indexer.index_every_directory(remove_first=true)
   end
   it "should accept one parameter as query, and 2 optionals for paginating" do
@@ -82,7 +85,7 @@ describe "Basic Finder" do
   fields.each_pair do |description,field_name|
     it "should index #{description} as :#{field_name}" do
-      IndexReader.new.field_infos[field_name].should be_an_instance_of(Ferret::Index::FieldInfo)
+      Indexer.index.field_infos[field_name].should be_an_instance_of(Ferret::Index::FieldInfo)
     end
   end

data/lib/picolena/templates/spec/models/finder_spec.rb CHANGED Viewed

@@ -21,7 +21,7 @@ describe Finder do
     File.utime(0, once_upon_a_time, 'spec/test_dirs/indexed/basic/basic.pdf')
     File.utime(0, a_bit_later, 'spec/test_dirs/indexed/yet_another_dir/office2003-word-template.dot')
     File.utime(0, nineties, 'spec/test_dirs/indexed/others/placeholder.txt')
-    Indexer.index_every_directory(update=false)
+    Indexer.index_every_directory(remove_first=true)
   end
   it "should find documents according to their basename when specified with basename:query" do
@@ -30,8 +30,8 @@ describe Finder do
     matching_documents_filename.should include("crossed.text")
   end
-  it "should find documents according to their filename when specified with file:query" do
-    Finder.new("file:crossed.text").matching_documents.collect{|d| d.content}.should include("txt inside!")
+  it "should find documents according to their filename when specified with file:query or filename:query" do
+    Finder.new("filename:crossed.text").matching_documents.collect{|d| d.content}.should include("txt inside!")
     Finder.new("file:crossed.txt").matching_documents.collect{|d| d.content}.should include("text inside!")
   end
@@ -47,9 +47,9 @@ describe Finder do
   end
   it "should give a boost to basename, filename and filetype in index" do
-    index=IndexReader.new
+    index=Indexer.index
     index.field_infos[:basename].boost.should > 1.0
-    index.field_infos[:file].boost.should > 1.0
+    index.field_infos[:filename].boost.should > 1.0
     index.field_infos[:filetype].boost.should > 1.0
   end

data/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb CHANGED Viewed

@@ -2,7 +2,7 @@ require File.dirname(__FILE__) + '/../spec_helper'
 describe "PlainTextExtractors" do
   before(:all) do
-    IndexReader.ensure_existence
+    Indexer.ensure_index_existence
   end
   PlainTextExtractor.all.each{|extractor|
@@ -29,14 +29,14 @@ describe "PlainTextExtractors" do
   }
   it "should guess language when enough content is available" do
-    Document.new("spec/test_dirs/indexed/lang/goethe").lang.should == "de"
-    Document.new("spec/test_dirs/indexed/lang/shakespeare").lang.should == "en"
-    Document.new("spec/test_dirs/indexed/lang/lorca").lang.should == "es"
-    Document.new("spec/test_dirs/indexed/lang/hugo").lang.should == "fr"
+    Document.new("spec/test_dirs/indexed/lang/goethe").language.should == "de"
+    Document.new("spec/test_dirs/indexed/lang/shakespeare").language.should == "en"
+    Document.new("spec/test_dirs/indexed/lang/lorca").language.should == "es"
+    Document.new("spec/test_dirs/indexed/lang/hugo").language.should == "fr"
   end
   it "should not try to guess language when file is too small" do
-    Document.new("spec/test_dirs/indexed/basic/hello.rb").lang.should be_empty
-    Document.new("spec/test_dirs/indexed/README").lang.should be_empty
+    Document.new("spec/test_dirs/indexed/basic/hello.rb").language.should be_nil
+    Document.new("spec/test_dirs/indexed/README").language.should be_nil
   end
 end

data/lib/picolena/version.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Picolena #:nodoc:
   module VERSION #:nodoc:
     MAJOR = 0
     MINOR = 1
-    TINY  = 4
+    TINY  = 5
     STRING = [MAJOR, MINOR, TINY].join('.')
   end

data/tasks/hack.rake CHANGED Viewed

@@ -1,9 +1,9 @@
 desc 'Create development picolena structure inside lib/picolena/templates'
-task :lets_hack do
+task :lets_hack => :clean do
   picolena_root=File.join(File.dirname(__FILE__),'..')
   Dir.chdir(picolena_root){
     # Doesn't overwrite any file, Doesn't create any index, Doesn't launch any spec.
-    system("ruby bin/picolena lib/picolena/templates/spec/test_dirs/indexed --skip --no-index --no-spec --destination=lib/picolena/templates")
+    system("ruby bin/picolena lib/picolena/templates/spec/test_dirs/indexed --no-index --no-spec --destination=lib/picolena/templates")
   }
   puts <<-EXPLAIN

data/website/index.html CHANGED Viewed

@@ -33,7 +33,7 @@
     <h1>Picolena</h1>
     <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/picolena"; return false'>
       <p>Get Version</p>
-      <a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.4</a>
+      <a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.5</a>
     </div>
     <h1>&#x2192; &#8216;picolena&#8217;</h1>

data.tar.gz.sig CHANGED Viewed

@@ -1,2 +1 @@
-.��@�:���N6JQp�9V�"QT�k�7~4*�D��w��u���%v��[��r�Y���hB�t:Cv=�,8ڽ��c���;I��V[$y�ǌ�ϓέN�3��x+��yC�Q^ہ�C(�L)�O7�-��2ZV�L�]���i~��JK"8F�|��:�eT��Vp��ߋU��]��
-��V���[;#̧KM���$�;=X�~�>����	wYI7��3ksv��A߶�	��0�GZTi7$�����>@
+B�8Ǣ�����ԝ�ŗFA�sέ�%l�ѵ�Aw�k>�6�w���|ĝW^9>]���k��i����I٤�e�Z7٭Px���UK��+r�>P��al�<�T+eL@�HD�!�@��X�nV鐎wa<��b臋�g����,q���m�{i��2����#�m�=�܈ϲH'W�má؝=cm��ݔ�^��㩫҃L=�ˁ�"�r�L�{7�{�R'4�������k����hkx�����=��6�j

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: picolena
 version: !ruby/object:Gem::Version
-  version: 0.1.4
+  version: 0.1.5
 platform: ruby
 authors:
 - Eric Duminil
@@ -30,7 +30,7 @@ cert_chain:
   qvI9FgPZ1QTG5uZAlBbk6d6JU2XfpA==
   -----END CERTIFICATE-----
-date: 2008-04-23 00:00:00 +02:00
+date: 2008-04-25 00:00:00 +02:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -107,6 +107,7 @@ extra_rdoc_files:
 - History.txt
 - Manifest.txt
 - README.txt
+- lib/picolena/templates/public/images/flags/readme.txt
 - lib/picolena/templates/public/robots.txt
 - lib/picolena/templates/spec/test_dirs/indexed/basic/crossed.txt
 - lib/picolena/templates/spec/test_dirs/indexed/basic/plain.txt
@@ -156,8 +157,6 @@ files:
 - lib/picolena/templates/app/helpers/documents_helper.rb
 - lib/picolena/templates/app/models/document.rb
 - lib/picolena/templates/app/models/finder.rb
-- lib/picolena/templates/app/models/index_reader.rb
-- lib/picolena/templates/app/models/index_writer.rb
 - lib/picolena/templates/app/models/indexer.rb
 - lib/picolena/templates/app/models/plain_text_extractor.rb
 - lib/picolena/templates/app/models/query.rb
@@ -209,6 +208,22 @@ files:
 - lib/picolena/templates/public/help/PicolenaHowTo-de.pdf
 - lib/picolena/templates/public/help/PicolenaHowTo-de.tex
 - lib/picolena/templates/public/images/bg.gif
+- lib/picolena/templates/public/images/flags/ar.png
+- lib/picolena/templates/public/images/flags/be.png
+- lib/picolena/templates/public/images/flags/ca.png
+- lib/picolena/templates/public/images/flags/de.png
+- lib/picolena/templates/public/images/flags/el.png
+- lib/picolena/templates/public/images/flags/en.png
+- lib/picolena/templates/public/images/flags/es.png
+- lib/picolena/templates/public/images/flags/fr.png
+- lib/picolena/templates/public/images/flags/ga.png
+- lib/picolena/templates/public/images/flags/hr.png
+- lib/picolena/templates/public/images/flags/it.png
+- lib/picolena/templates/public/images/flags/nl.png
+- lib/picolena/templates/public/images/flags/pl.png
+- lib/picolena/templates/public/images/flags/pt-br.png
+- lib/picolena/templates/public/images/flags/pt-pt.png
+- lib/picolena/templates/public/images/flags/readme.txt
 - lib/picolena/templates/public/images/icons/cad.png
 - lib/picolena/templates/public/images/icons/code.png
 - lib/picolena/templates/public/images/icons/doc.png
@@ -254,8 +269,6 @@ files:
 - lib/picolena/templates/spec/models/finder_spec.rb
 - lib/picolena/templates/spec/models/host_indexing_system_spec.rb
 - lib/picolena/templates/spec/models/index_directories_spec.rb
-- lib/picolena/templates/spec/models/index_reader_spec.rb
-- lib/picolena/templates/spec/models/index_writer_spec.rb
 - lib/picolena/templates/spec/models/indexer_spec.rb
 - lib/picolena/templates/spec/models/plain_text_extractor_spec.rb
 - lib/picolena/templates/spec/models/query_spec.rb

metadata.gz.sig CHANGED Viewed

Binary file

data/lib/picolena/templates/app/models/index_reader.rb DELETED Viewed

@@ -1,54 +0,0 @@
-class IndexReader < Ferret::Index::Index
-  def initialize(params={})
-    # Add needed parameters
-    params.merge!(:path => Picolena::IndexSavePath, :analyzer => Picolena::Analyzer)
-    # Creates the IndexReader
-    super(params)
-  end
-  # Returns the number of times a file is present in the index.
-  # index_reader.doc_freq(field, term) → integer
-  # Return the number of documents in which the term term appears in the field field.
-  def occurences_number(complete_path)
-    # complete_path_query = Ferret::Search::TermQuery.new(:complete_path, complete_path)
-    search_by_complete_path(complete_path).total_hits
-  end
-  def search_by_complete_path(complete_path)
-    search('complete_path:"'<<complete_path<<'"')
-  end
-  def delete_by_complete_path(complete_path)
-    search_by_complete_path(complete_path).hits.each{|hit|
-      delete(hit.doc)
-    }
-    close
-  end
-  # Validation methods.
-  def should_have_documents
-     raise IndexError, "no document found" unless has_documents?
-  end
-  # Returns true if there's at least one document indexed.
-  def has_documents?
-   size>0
-  end
- class<<self
-   def ensure_existence
-     Indexer.index_every_directory(update=false) unless exists? or RAILS_ENV=="production"
-   end
-  def exists?
-     filename and File.exists?(filename)
-  end
-  def filename
-    Dir.glob(File.join(Picolena::IndexSavePath,'*.cfs')).first
-  end
-  end
-end

data/lib/picolena/templates/app/models/index_writer.rb DELETED Viewed

@@ -1,33 +0,0 @@
-class IndexWriter < Ferret::Index::IndexWriter
-  def initialize(params={})
-    # Add needed parameters
-    params.merge!(:create_if_missing => true,
-                  :path              => Picolena::IndexSavePath,
-                  :analyzer          => Picolena::Analyzer
-                  # huge performance impact?
-                  # :auto_flush        => true
-                  )
-    # Creates the IndexWriter
-    super(params)
-    # Add required fields (content, filetype, probably_unique_id, ...)
-    add_fields!
-  end
-  def self.remove
-    Dir.glob(File.join(Picolena::IndexSavePath,'*')).each{|f| FileUtils.rm(f) if File.file?(f)}
-  end
-  private
-  def add_fields!
-    # No need to re-create any field.
-    return unless field_infos.fields.empty?
-    field_infos.add_field(:complete_path,      :store => :yes, :index => :yes)
-    field_infos.add_field(:content,            :store => :yes, :index => :yes)
-    field_infos.add_field(:basename,           :store => :no,  :index => :yes, :boost => 1.5)
-    field_infos.add_field(:file,               :store => :no,  :index => :yes, :boost => 1.5)
-    field_infos.add_field(:filetype,           :store => :no,  :index => :yes, :boost => 1.5)
-    field_infos.add_field(:date,               :store => :yes, :index => :yes)
-    field_infos.add_field(:probably_unique_id, :store => :no,  :index => :yes)
-    field_infos.add_field(:lang,               :store => :yes,  :index => :yes)
-  end
-end

data/lib/picolena/templates/spec/models/index_reader_spec.rb DELETED Viewed

@@ -1,7 +0,0 @@
-require File.dirname(__FILE__) + '/../spec_helper'
-describe IndexReader do
-  before(:each) do
-    @index_reader = IndexReader.new
-  end
-end

data/lib/picolena/templates/spec/models/index_writer_spec.rb DELETED Viewed

@@ -1,7 +0,0 @@
-require File.dirname(__FILE__) + '/../spec_helper'
-describe IndexWriter do
-  before(:each) do
-    @index_writer = IndexWriter.new
-  end
-end