RubyGems - picolena - Versions diffs - 0.1.6 → 0.1.7 - Mend

picolena 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

data/History.txt +12 -3
data/Manifest.txt +2 -0
data/bin/picolena +1 -1
data/config/files_to_clean +1 -0
data/lib/picolena/config/basic.rb +6 -2
data/lib/picolena/config/indexing_performance.yml +30 -0
data/lib/picolena/picolena_generator.rb +9 -4
data/lib/picolena/templates/app/controllers/documents_controller.rb +3 -1
data/lib/picolena/templates/app/helpers/documents_helper.rb +18 -9
data/lib/picolena/templates/app/models/document.rb +20 -3
data/lib/picolena/templates/app/models/finder.rb +19 -19
data/lib/picolena/templates/app/models/indexer.rb +36 -9
data/lib/picolena/templates/app/views/documents/_document.html.haml +7 -2
data/lib/picolena/templates/app/views/documents/cached.html.haml +2 -2
data/lib/picolena/templates/app/views/documents/show.html.haml +5 -2
data/lib/picolena/templates/config/environment.rb +1 -1
data/lib/picolena/templates/config/initializers/007_load_performance_tweaks.rb +6 -0
data/lib/picolena/templates/lib/tasks/index.rake +6 -1
data/lib/picolena/templates/lib/tasks/install_dependencies.rake +1 -1
data/lib/picolena/templates/public/stylesheets/style.css +17 -1
data/lib/picolena/templates/spec/models/basic_finder_spec.rb +4 -4
data/lib/picolena/templates/spec/models/document_spec.rb +65 -16
data/lib/picolena/templates/spec/models/finder_spec.rb +4 -3
data/lib/picolena/templates/spec/models/host_indexing_system_spec.rb +8 -0
data/lib/picolena/templates/spec/models/indexer_spec.rb +2 -2
data/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb +0 -12
data/lib/picolena/templates/spec/models/query_spec.rb +10 -1
data/lib/picolena/version.rb +1 -1
data/website/index.html +1 -1
data/website/index.txt +0 -0
data/website/index_devjavu +0 -0
data/website/javascripts/rounded_corners_lite.inc.js +0 -0
data/website/stylesheets/screen.css +0 -0
data.tar.gz.sig +0 -0
metadata +4 -2
metadata.gz.sig +3 -1

data/History.txt CHANGED Viewed

@@ -1,10 +1,19 @@
+== 0.1.7  2008-04-30
+* 5 minor enhancements:
+  * added cache highlighting à la Google
+  * rake index:update implemented as described in Ferret book by David Balmain
+  * rake index:prune removes missing files from indexer
+  * possibility to sort results by relevance / by date
+  * one configuration file for performance tweaks
 == 0.1.6  2008-04-25
 * 1 minor enhancement:
   * replaced index key by Document#probably_unique_id
 * bug fixes:
-  * Added forgotten public/images/flags to generator file.
+  * Added forgotten public/images/flags to generator file
 == 0.1.5  2008-04-25
@@ -24,7 +33,7 @@
 == 0.1.3  2008-04-20
 * 1 bug fix:
-  * removed verbose debug info.
+  * removed verbose debug info
 == 0.1.2  2008-04-20
@@ -49,7 +58,7 @@
 * 3 minor enhancements:
   * can now be installed on win32 (doesn't pass every spec though)
   * moved rails_plugins away from lib/ so that they don't get parsed by rdoc/ri
-  * shorter and prettier base26_hash id for documents.
+  * shorter and prettier base26_hash id for documents
 == 0.0.99  2008-04-06

data/Manifest.txt CHANGED Viewed

@@ -11,6 +11,7 @@ lib/picolena/USAGE
 lib/picolena/config/basic.rb
 lib/picolena/config/icons_and_filetypes.yml
 lib/picolena/config/indexed_directories.yml
+lib/picolena/config/indexing_performance.yml
 lib/picolena/config/title_and_names_and_links.yml
 lib/picolena/config/white_list_ip.yml
 lib/picolena/picolena_generator.rb
@@ -42,6 +43,7 @@ lib/picolena/templates/config/initializers/003_load_white_list_IPs.rb
 lib/picolena/templates/config/initializers/004_load_plain_text_extractors.rb
 lib/picolena/templates/config/initializers/005_load_custom_title_and_names_and_links.rb
 lib/picolena/templates/config/initializers/006_load_icons.rb
+lib/picolena/templates/config/initializers/007_load_performance_tweaks.rb
 lib/picolena/templates/config/routes.rb
 lib/picolena/templates/lang/ui/de.yml
 lib/picolena/templates/lang/ui/en.yml

data/bin/picolena CHANGED Viewed

@@ -11,7 +11,7 @@ if %w(-v --version).include? ARGV.first
   exit(0)
 end
-action= ARGV.include?("--spec-only") ? "testing" : "installing"
+action= ARGV.any?{|opt| opt[0,6]=="--spec"} ? "testing" : "installing"
 require 'rubigen/scripts/generate'
 source = RubiGen::PathSource.new(:application,

data/config/files_to_clean CHANGED Viewed

@@ -5,6 +5,7 @@ lib/picolena/templates/config/custom/indexed_directories.yml
 lib/picolena/templates/config/custom/white_list_ip.yml
 lib/picolena/templates/config/custom/title_and_names_and_links.yml
 lib/picolena/templates/config/custom/icons_and_filetypes.yml
+lib/picolena/templates/config/custom/indexing_performance.yml
 lib/picolena/templates/log
 lib/picolena/templates/spec/test_dirs/indexed/others/bäñüßé.txt
 lib/picolena/templates/tmp

data/lib/picolena/config/basic.rb CHANGED Viewed

@@ -42,5 +42,9 @@ module Picolena
   # Specify the default Levenshtein distance when using FuzzyQuery
   # see http://ferret.davebalmain.com/api/classes/Ferret/QueryParser.html for more information.
   Ferret::Search::FuzzyQuery.default_min_similarity=0.6
-  Analyzer=Ferret::Analysis::StandardAnalyzer.new
-end
+  # PerFieldAnalyzer is used to prevent queries like "language:it" to be broken by StopFilter.
+  per_field_analyzer=Ferret::Analysis::PerFieldAnalyzer.new(Ferret::Analysis::StandardAnalyzer.new)
+  per_field_analyzer[:language]=Ferret::Analysis::WhiteSpaceAnalyzer.new
+  Analyzer=per_field_analyzer
+end

data/lib/picolena/config/indexing_performance.yml ADDED Viewed

@@ -0,0 +1,30 @@
+# You probably shouldn't change those parameters
+# if you don't know what they represent.
+# For more information, refer to:
+#  http://ferret.davebalmain.com/api/classes/Ferret/Index/IndexWriter.html
+## Main performance parameters
+# Allowed memory for indexing process.
+# 128MB by default, or 2^27
+max_buffer_memory: 134_217_728
+# High value => fast indexing, slow searching
+# Low  value => slow indexing, fast searching
+# 10 by default
+merge_factor: 10
+# Maximum number of extracted terms for any given document
+max_field_length: 10_000
+## Other parameters
+# 1MB by default, or 2**20
+chunk_size: 1_048_576
+max_buffered_docs: 10_000
+# NOTE: Be extra careful with this parameter, setting it to -1 (infinite)
+# multiplied indexing time by an order of magnitude.
+# max_merge_docs: -1
+use_compound_file: true
+index_skip_interval: 128
+doc_skip_interval: 16

data/lib/picolena/picolena_generator.rb CHANGED Viewed

@@ -16,10 +16,14 @@ class PicolenaGenerator < RubiGen::Base #:nodoc:
     usage if args.empty? and !options[:spec_only]
     @destination_root = options[:destination]
-    @directories_to_index=ARGV.collect{|relative_path|
-      abs_dir=Pathname.new(relative_path).realpath.to_s
-      "\"#{abs_dir}\" : \"#{abs_dir}\""
-    }.join("\n  ")
+    @directories_to_index=if options[:spec_only] then
+       "/whatever : /whatever"
+    else
+      ARGV.collect{|relative_path|
+        abs_dir=Pathname.new(relative_path).realpath.to_s
+        "\"#{abs_dir}\" : \"#{abs_dir}\""
+      }.join("\n  ")
+    end
     extract_options
   end
@@ -63,6 +67,7 @@ class PicolenaGenerator < RubiGen::Base #:nodoc:
       m.template '../config/indexed_directories.yml', 'config/custom/indexed_directories.yml', :assigns => {:directories_to_index => @directories_to_index}
       m.template '../config/title_and_names_and_links.yml', 'config/custom/title_and_names_and_links.yml', :assigns => {:version => Picolena::VERSION::STRING}
       m.file '../config/icons_and_filetypes.yml', 'config/custom/icons_and_filetypes.yml'
+      m.file '../config/indexing_performance.yml', 'config/custom/indexing_performance.yml'
       # README, License & Rakefile
       m.file 'MIT-LICENSE', 'LICENSE'

data/lib/picolena/templates/app/controllers/documents_controller.rb CHANGED Viewed

@@ -22,8 +22,9 @@ class DocumentsController < ApplicationController
   def show
     start=Time.now
       @query=[params[:id],params.delete(:format)].compact.join('.')
+      @sort=params[:sort]
       page=params[:page]||1
-      finder=Finder.new(@query,page)
+      finder=Finder.new(@query,@sort,page)
       finder.execute!
       pager=::Paginator.new(finder.total_hits, Picolena::ResultsPerPage) do
         finder.matching_documents
@@ -47,6 +48,7 @@ class DocumentsController < ApplicationController
   # Returns the content of the document identified by probably_unique_id, as it was at the time it was indexed.
   # similar to Google cache.
   def cached
+    @query=[params[:query],params.delete(:format)].compact.join('.')
   end
   private

data/lib/picolena/templates/app/helpers/documents_helper.rb CHANGED Viewed

@@ -3,15 +3,15 @@ module DocumentsHelper
   def nothing_found?
     @matching_documents.nil? or @matching_documents.entries.empty?
   end
   # Very basic pagination.
   # Provides liks to Next, Prev and FirstPage when needed.
-  def should_paginate(page,query)
-    [(link_to("&larr;&larr;", :action => :show, :id => query, :page => 1) if page.number>2),
-     (link_to("&larr;", :action => :show, :id => query, :page => page.prev.number) if page.prev?),
-     (link_to("&rarr;", :action => :show, :id => query, :page => page.next.number) if page.next?)].compact.join(" | ")
+  def should_paginate(page,query, sort)
+    [(link_to("&larr;&larr;", :action => :show, :id => query, :sort=>sort) if page.number>2),
+     (link_to("&larr;", :action => :show, :id => query, :page => page.prev.number, :sort=>sort) if page.prev?),
+     (link_to("&rarr;", :action => :show, :id => query, :page => page.next.number, :sort=>sort) if page.next?)].compact.join(" | ")
   end
   # Returns a localized sentence like "Results 1-10 of 12 for Zimbabwe (0.472s)" or
   # "Résultats 1-2 parmi 2 pour whatever (0.012s)"
   def describe_results(page, total_hits, dt, query)
@@ -24,7 +24,7 @@ module DocumentsHelper
     show_time_needed(dt)
     ].join(' ')
   end
   # Returns the time needed to treat the query and launch the search, with a ms precision : (0.472s)
   def show_time_needed(dt)
     content_tag(:small,'('<<number_with_precision(dt,3)<<'s)')
@@ -71,8 +71,17 @@ module DocumentsHelper
   end
   # For any indexed document, returns a link to show its cached content.
-  def link_to_cached_content(document)
+  def link_to_cached_content(document, query)
     link_name="("<<content_tag(:small,:cached.l)<<")"
-    link_to link_name, cached_document_path(document.probably_unique_id)
+    link_to link_name, cached_document_path(:id => document.probably_unique_id, :query => query)
+  end
+  def highlighted_cache(document, query)
+    h(document.highlighted_cache(query)).gsub(/\n/,'<br/>').gsub(/&lt;&lt;(.*?)&gt;&gt;/,content_tag(:span, '\1', :class=>"matching_content"))
+  end
+  def sort_by_date_or_relevance(query)
+    [link_to_unless_current('By date', document_path(query, :sort=>'by_date')),
+     link_to_unless_current('By relevance', document_path(query))].join("&nbsp;")
   end
 end

data/lib/picolena/templates/app/models/document.rb CHANGED Viewed

@@ -11,7 +11,7 @@ class Document
   end
   #Delegating properties to File::method_name(complete_path)
-  [:dirname, :basename, :extname, :ext_as_sym, :file?, :ext_as_sym].each{|method_name|
+  [:dirname, :basename, :extname, :ext_as_sym, :file?, :size, :ext_as_sym].each{|method_name|
     define_method(method_name){File.send(method_name,complete_path)}
   }
   alias_method :filename, :basename
@@ -63,11 +63,22 @@ class Document
   def cached
     from_index[:content]
   end
+  def highlighted_cache(raw_query)
+    #TODO: Report to Ferret. Highlight should accept :key and not only :doc_id.
+    Indexer.index.highlight(Query.extract_from(raw_query), doc_id,
+                            :field => :content, :excerpt_length => :all,
+                            :pre_tag => "<<", :post_tag => ">>"
+    ).first
+  end
-  # FIXME: Not just date anymore.
   # Returns the last modification date before the document got indexed.
   # Useful to know how old a document is, and to which version the cache corresponds.
-  def date
+  def pretty_date
+    from_index[:modified].sub(/(\d{4})(\d{2})(\d{2})\d{6}/,'\1-\2-\3')
+  end
+  def pretty_mtime
     from_index[:modified].sub(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})/,'\1-\2-\3 \4:\5:\6')
   end
@@ -93,6 +104,12 @@ class Document
   end
   private
+  # FIXME: Is there a way to easily retrieve doc_id for a given document?
+  # Better yet, fix Index#highlight to accept :probably_unique_id and stop using :doc_id.
+  def doc_id
+    Indexer.index.search(Ferret::Search::TermQuery.new(:probably_unique_id,probably_unique_id)).hits.first.doc
+  end
   # Retrieves the document from the index.
   # Useful to get meta-info about it.

data/lib/picolena/templates/app/models/finder.rb CHANGED Viewed

@@ -5,36 +5,34 @@ class Finder
     @@index ||= Indexer.index
   end
-  def initialize(raw_query,page=1,results_per_page=Picolena::ResultsPerPage)
+  def initialize(raw_query,by_date=false, page=1,results_per_page=Picolena::ResultsPerPage)
     @query = Query.extract_from(raw_query)
     @raw_query= raw_query
     Indexer.ensure_index_existence
     @per_page=results_per_page
     @offset=(page.to_i-1)*results_per_page
+    @by_date=by_date
     index_should_have_documents
   end
   def execute!
     @matching_documents=[]
     start=Time.now
-    top_docs=index.search(query, :limit => @per_page, :offset=>@offset)
-    top_docs.hits.each{|hit|
-      index_id,score=hit.doc,hit.score
-      begin
-        found_doc=Document.new(index[index_id][:complete_path])
-        found_doc.matching_content=index.highlight(query, index_id,
-                                                   :field => :content, :excerpt_length => 80,
-                                                   :pre_tag => "<<", :post_tag => ">>"
-        ) unless @raw_query=~/^\*+\.\w*$/
-        found_doc.score=score
-        @matching_documents<<found_doc
-      rescue Errno::ENOENT
-        #"File has been moved/deleted!"
-      end
+      @total_hits = index.search_each(query, :limit => @per_page, :offset=>@offset, :sort => (sort_by_date if @by_date)){|index_id, score|
+        begin
+          found_doc=Document.new(index[index_id][:complete_path])
+          found_doc.matching_content=index.highlight(query, index_id,
+                                                     :field => :content, :excerpt_length => 80,
+                                                     :pre_tag => "<<", :post_tag => ">>"
+          )
+          found_doc.score=score
+          @matching_documents<<found_doc
+        rescue Errno::ENOENT
+          #"File has been moved/deleted!"
+        end
       }
       @executed=true
-      @time_needed=Time.now-start
-      @total_hits=top_docs.total_hits
+    @time_needed=Time.now-start
   end
   # Returns true if it has been executed.
@@ -54,13 +52,15 @@ class Finder
     }
   }
   def self.reload!
     @@index = nil
   end
   private
+  def sort_by_date
+    Ferret::Search::SortField.new(:modified, :type => :byte, :reverse => true)
+  end
   def index_should_have_documents
     raise IndexError, "no document found" unless index.size > 0

data/lib/picolena/templates/app/models/indexer.rb CHANGED Viewed

@@ -10,6 +10,7 @@ class Indexer
     def index_every_directory(remove_first=false)
       @@do_not_disturb_while_indexing=true
       clear! if remove_first
+      @from_scratch = remove_first
       # Forces Finder.searcher and Finder.index to be reloaded, by removing them from the cache.
       Finder.reload!
       log :debug => "Indexing every directory"
@@ -35,13 +36,19 @@ class Indexer
       prepare_multi_threads_environment
       indexing_list_chunks.each_with_thread{|chunk|
-        chunk.each{|filename|
-          add_file(filename)
+        chunk.each{|complete_path|
+          last_itime=index_time_dbm_file[complete_path]
+          if @from_scratch || !last_itime || File.mtime(complete_path)> Time._load(last_itime) then
+            add_or_update_file(complete_path)
+          else
+            log :debug => "Identical : #{complete_path}"
+          end
+          index_time_dbm_file[complete_path] = Time.now._dump
         }
       }
     end
-    def add_file(complete_path)
+    def add_or_update_file(complete_path)
       default_fields = Document.default_fields_for(complete_path)
       begin
         document = PlainTextExtractor.extract_content_and_language_from(complete_path)
@@ -69,6 +76,19 @@ class Indexer
       # Ferret will SEGFAULT otherwise.
       @@index = nil
     end
+    # Checks for indexed files that are missing from filesytem
+    # and removes them from index & dbm file.
+    def prune_index
+      missing_files=index_time_dbm_file.reject{|filename,itime| File.exists?(filename) && Picolena::IndexedDirectories.any?{|dir,alias_path| filename.starts_with?(dir)}}
+      missing_files.each{|filename, itime|
+        index.writer.delete(:complete_path, filename)
+        index_time_dbm_file.delete(filename)
+        log :debug => "Removed : #{filename}"
+      }
+      index.optimize
+    end
     # Only one IndexWriter should be instantiated.
     # If one index already exists, returns it.
@@ -81,11 +101,17 @@ class Indexer
       index_every_directory(:remove_first) unless index_exists? or RAILS_ENV=="production"
     end
-    def doc_count
-      index.writer.doc_count
+    # Returns how many files are indexed.
+    def size
+      index.size
     end
     private
+    # Copied from Ferret book, By David Balmain
+    def index_time_dbm_file
+      @@dbm_file ||= DBM.open(File.join(Picolena::IndexSavePath, 'added_at'))
+    end
     def index_exists?
       index_filename and File.exists?(index_filename)
@@ -108,7 +134,7 @@ class Indexer
         :field_infos => default_field_infos,
         # Great way to ensure that no file is indexed twice!
         :key         => :probably_unique_id
-        }
+        }.merge Picolena::IndexingConfiguration
     end
     def default_field_infos
@@ -120,7 +146,7 @@ class Indexer
         field_infos.add_field(:filetype,           :store => :no,  :index => :yes, :boost => 1.5)
         field_infos.add_field(:modified,           :store => :yes, :index => :untokenized)
         field_infos.add_field(:probably_unique_id, :store => :no,  :index => :untokenized)
-        field_infos.add_field(:language,           :store => :yes, :index => :yes)
+        field_infos.add_field(:language,           :store => :yes, :index => :untokenized)
       end
     end
@@ -130,7 +156,8 @@ class Indexer
       # an IndexWriter at the same time, and get a
       #  Ferret::Store::Lock::LockError
       index
-      # NOTE: is it really necessary?
+      # Opens dbm file to dump indexing time.
+      index_time_dbm_file
       # ActiveSupport sometime raises
       #  Expected Object is NOT missing constant
       # without.
@@ -140,4 +167,4 @@ class Indexer
       PlainTextExtractor
     end
   end
-end
+end

data/lib/picolena/templates/app/views/documents/_document.html.haml CHANGED Viewed

@@ -7,5 +7,10 @@
 -if document.supported?
   %p
     =link_to_plain_text_content(document)
-    =link_to_cached_content(document)
-%hr/
+    &#45;
+    =number_to_human_size(document.size)
+    &#45;
+    =document.pretty_date
+    &#45;
+    =link_to_cached_content(document,query)
+%hr/

data/lib/picolena/templates/app/views/documents/cached.html.haml CHANGED Viewed

@@ -2,7 +2,7 @@
   =link_to icon_and_filename_for(@document), download_document_path(@probably_unique_id)
   (
   =:as_it_was_indexed_on.l
-  =@document.date
+  =@document.pretty_date
   )
 %p=link_to_containing_directory(@document)
-%blockquote=h(@document.cached).gsub(/\n/,'<br/>')
+%blockquote=highlighted_cache(@document, @query)

data/lib/picolena/templates/app/views/documents/show.html.haml CHANGED Viewed

@@ -7,6 +7,9 @@
         %strong=h(@query)
         =show_time_needed(@time_needed)
       -else
-        %span{:class=>'pagination'}=should_paginate(@matching_documents, @query)
+        %span{:class=>'pagination'}=should_paginate(@matching_documents, @query, @sort)
         =describe_results(@matching_documents, @total_hits, @time_needed, h(@query))
-= render :partial =>'document', :collection => @matching_documents
+-unless nothing_found?
+  %p
+    %span{:class=>'sort_by'}=sort_by_date_or_relevance(@query)
+= render :partial =>'document', :collection => @matching_documents, :locals => { :query => @query}

data/lib/picolena/templates/config/environment.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-%w(rubygems paginator fileutils pathname logger thread).each{|lib| require lib}
+%w(rubygems paginator fileutils pathname logger thread dbm).each{|lib| require lib}
 # Uncomment below to force Rails into production mode when
 # you don't control web/app server and can't set it the proper way

data/lib/picolena/templates/config/initializers/007_load_performance_tweaks.rb ADDED Viewed

@@ -0,0 +1,6 @@
+module Picolena
+  IndexingConfiguration={}
+  YAML.load_file('config/custom/indexing_performance.yml').each_pair{|param, value|
+    IndexingConfiguration[param.to_sym]= value=~/^[\d_]+$/ ? value.to_i : value
+  }
+end

data/lib/picolena/templates/lib/tasks/index.rake CHANGED Viewed

@@ -14,10 +14,15 @@ namespace :index do
   task :update => :environment do
     Indexer.index_every_directory
   end
+  desc 'Remove unneeded files from index'
+  task :prune => :environment do
+    Indexer.prune_index
+  end
   desc 'Returns the number of indexed documents'
   task :size => :environment do
-    puts "#{Indexer.doc_count} documents are currently indexed in #{Picolena::IndexSavePath}"
+    puts "#{Indexer.size} documents are currently indexed in #{Picolena::IndexSavePath}"
   end
   # Search index with query "some query" :

data/lib/picolena/templates/lib/tasks/install_dependencies.rake CHANGED Viewed

@@ -30,7 +30,7 @@ namespace :install_dependencies do
   task :deb_packages do
     root_privileges_required!
     #TODO: Should load this list from defined PlainTextExtractor's
-    packages=%w{antiword poppler-utils odt2txt html2text catdoc unrtf mguesser}.join(" ")
+    packages=%w{antiword poppler-utils odt2txt html2text catdoc unrtf mguesser libdbm-ruby1.8}.join(" ")
     puts "Installing "<<packages
     system("apt-get install "<<packages)
   end

data/lib/picolena/templates/public/stylesheets/style.css CHANGED Viewed

@@ -82,6 +82,17 @@ h1, h2, h3, h4, h5, h6, p, form {
 	text-decoration:none;
 }
+.sort_by {
+	float:right;
+	font-size: 13px;
+	color:#000;
+}
+.sort_by a{
+	color: #EE8907;
+	text-decoration:none;
+}
 #mainimg input.btn{
    margin-right: 10px;
    height: 20px;
@@ -116,7 +127,7 @@ width: 80%;
 #results {
 	width:778px;
-	padding-top: 25px;
+	padding-top: 15px;
 }
 #results h2 a{
@@ -137,6 +148,11 @@ width: 80%;
 	padding:0px 20px;
 }
+#results .matching_content{
+	background-color:#ffff66;
+}
 #results a, #results small{
 	font-family:"Trebuchet MS";
 	font-size:11px;

data/lib/picolena/templates/spec/models/basic_finder_spec.rb CHANGED Viewed

@@ -50,16 +50,16 @@ describe "Basic Finder" do
     Indexer.index_every_directory(remove_first=true)
   end
-  it "should accept one parameter as query, and 2 optionals for paginating" do
+  it "should accept one parameter as query, 1 optional for sorting results and 2 optionals for paginating" do
     lambda {Finder.new}.should raise_error(ArgumentError, "wrong number of arguments (0 for 1)")
     # show first page with 10 results per page
     lambda {Finder.new("a b")}.should_not raise_error
     # show second page
-    lambda {Finder.new("a", 2)}.should_not raise_error
+    lambda {Finder.new("a", "by_date")}.should_not raise_error
     # show first page with 15 results
-    lambda {Finder.new("a", 1, 15)}.should_not raise_error
+    lambda {Finder.new("a",  "by_date", 1, 15)}.should_not raise_error
     # Too many parameters
-    lambda {Finder.new("a", 10, 20, 30)}.should raise_error(ArgumentError, "wrong number of arguments (4 for 3)")
+    lambda {Finder.new("a",  "by_date", 10, 20, 30)}.should raise_error(ArgumentError, "wrong number of arguments (5 for 4)")
   end
   it "should return matching documents if executed successfully" do

data/lib/picolena/templates/spec/models/document_spec.rb CHANGED Viewed

@@ -5,28 +5,30 @@ basic_pdf_attribute={
   :basename=>'basic',
   :complete_path=>File.join(RAILS_ROOT, '/spec/test_dirs/indexed/basic/basic.pdf'),
   :extname=>'.pdf',
-  :filename=>'basic.pdf'
+  :ext_as_sym => :pdf,
+  :filename=>'basic.pdf',
+  :size => 9380
 }
 describe Document do
   before(:each) do
-    @valid_random_doc=Document.find(:random) rescue Document.new("spec/test_dirs/indexed/basic/basic.pdf")
+    @valid_document=Document.new("spec/test_dirs/indexed/basic/basic.pdf")
   end
   it "should be an existing file" do
     lambda {Document.new("/patapouf.txt")}.should raise_error(Errno::ENOENT)
-    lambda {@valid_random_doc}.should_not raise_error
+    lambda {@valid_document}.should_not raise_error
     lambda {Document.new("spec/test_dirs/not_indexed/Rakefile")}.should_not raise_error(Errno::ENOENT)
   end
   it "should belong to an indexed directory" do
-    lambda {@valid_random_doc}.should_not raise_error
+    lambda {@valid_document}.should_not raise_error
     lambda {Document.new("spec/test_dirs/not_indexed/Rakefile")}.should raise_error(ArgumentError, "required document is not in indexed directory")
   end
   basic_pdf_attribute.each{|attribute,expected_value|
     it "should know its #{attribute}" do
-      @valid_random_doc.should respond_to(attribute)
+      @valid_document.should respond_to(attribute)
       @basic_pdf=Document.new('spec/test_dirs/indexed/basic/basic.pdf')
       @basic_pdf.send(attribute).should == expected_value
     end
@@ -36,23 +38,70 @@ describe Document do
     another_doc=Document.new("spec/test_dirs/indexed/basic/plain.txt")
     another_doc.content.should == "just a content test\nin a txt file"
   end
+  it "should know its cached content" do
+    another_doc=Document.new("spec/test_dirs/indexed/basic/plain.txt")
+    another_doc.cached.should == "just a content test\nin a txt file"
+  end
+  it "should know its highlighted cached content for a given query" do
+    another_doc=Document.new("spec/test_dirs/indexed/basic/plain.txt")
+    another_doc.highlighted_cache('a content test').should == "just a <<content>> <<test>>\nin a txt file"
+  end
   it "should know its alias_path" do
-    @valid_random_doc.should respond_to(:alias_path)
-    @valid_random_doc.alias_path.starts_with?("http://picolena.devjavu.com/browser/trunk/lib/picolena/templates/spec/test_dirs/indexed").should be_true
+    @valid_document.should respond_to(:alias_path)
+    @valid_document.alias_path.starts_with?("http://picolena.devjavu.com/browser/trunk/lib/picolena/templates/spec/test_dirs/indexed").should be_true
+  end
+  it "should know its probably_unique_id" do
+    @valid_document.should respond_to(:probably_unique_id)
+    @valid_document.probably_unique_id.should =~/^[a-z]+$/
+    @valid_document.probably_unique_id.size.should == Picolena::HashLength
   end
+  it "should know its modification date" do
+    @valid_document.pretty_date.class.should == String
+    @valid_document.pretty_date.should =~/^\d{4}\-\d{2}\-\d{2}$/
+  end
+  it "should know its modification time and returns it in a pretty way" do
+    @valid_document.should respond_to(:mtime)
+    @valid_document.mtime.should be_kind_of(Integer)
+    @valid_document.should respond_to(:pretty_mtime)
+    @valid_document.pretty_mtime.class.should == String
+    @valid_document.pretty_mtime.should =~/^\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}$/
+  end
+  it "should know if its content can be extracted" do
+    @valid_document.should respond_to(:supported?)
+    @valid_document.should be_supported
+    Document.new("spec/test_dirs/indexed/others/ghjopdfg.xyz").should_not be_supported
+  end
+  it "should know its language when enough content is available" do
+    Document.new("spec/test_dirs/indexed/lang/goethe").language.should == "de"
+    Document.new("spec/test_dirs/indexed/lang/shakespeare").language.should == "en"
+    Document.new("spec/test_dirs/indexed/lang/lorca").language.should == "es"
+    Document.new("spec/test_dirs/indexed/lang/hugo").language.should == "fr"
+  end if Picolena::UseLanguageRecognition
+  it "should not try to guess language when file is too small" do
+    Document.new("spec/test_dirs/indexed/basic/hello.rb").language.should be_nil
+    Document.new("spec/test_dirs/indexed/README").language.should be_nil
+  end if Picolena::UseLanguageRecognition
   it "should let finder specify its score" do
-    @valid_random_doc.should respond_to(:score)
-    @valid_random_doc.score.should be_nil
-    @valid_random_doc.score=25
-    @valid_random_doc.score.should == 25
+    @valid_document.should respond_to(:score)
+    @valid_document.score.should be_nil
+    @valid_document.score=25
+    @valid_document.score.should == 25
   end
   it "should let finder specify its matching content" do
-    @valid_random_doc.should respond_to(:matching_content)
-    @valid_random_doc.matching_content.should be_nil
-    @valid_random_doc.matching_content=["thermal cooling", "heat driven cooling"]
-    @valid_random_doc.matching_content.should include("thermal cooling")
+    @valid_document.should respond_to(:matching_content)
+    @valid_document.matching_content.should be_nil
+    @valid_document.matching_content=["thermal cooling", "heat driven cooling"]
+    @valid_document.matching_content.should include("thermal cooling")
   end
-end
+end

data/lib/picolena/templates/spec/models/finder_spec.rb CHANGED Viewed

@@ -8,9 +8,9 @@ end
 def matching_document_for(query)
-   # Returns matching document for any given query only if
-   # exactly one document is found.
-   # Specs don't pass otherwise.
+  # Returns matching document for any given query only if
+  # exactly one document is found.
+  # Specs don't pass otherwise.
   matching_documents=Finder.new(query).matching_documents
   matching_documents.size.should == 1
   matching_documents.first
@@ -19,6 +19,7 @@ end
 describe Finder do
   before(:all) do
+    Globalite.language = :en
     # SVN doesn't like non-ascii filenames.
     revert_changes!('spec/test_dirs/indexed/others/bäñüßé.txt',"just to know if files are indexed with utf8 filenames")

data/lib/picolena/templates/spec/models/host_indexing_system_spec.rb CHANGED Viewed

@@ -13,10 +13,18 @@ describe "Host indexing system" do
   it "should know which IP addresses are allowed (config/custom/white_list_ip.yml)" do
     File.should be_readable('config/custom/white_list_ip.yml')
+    ip_conf=YAML.load_file('config/custom/white_list_ip.yml')
+    ip_conf.class.should == Hash
+    ip_conf['Allow'].should_not be_nil
   end
   it "should know which directories are to be indexed (config/custom/indexed_directories.yml)" do
     File.should be_readable('config/custom/indexed_directories.yml')
+    dirs_conf=YAML.load_file('config/custom/indexed_directories.yml')
+    dirs_conf.class.should == Hash
+    %w(development test production).all?{|env|
+      dirs_conf[env].should_not be_nil
+    }
   end
   it "should be able to calculate base26 hash from strings" do

data/lib/picolena/templates/spec/models/indexer_spec.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 require File.dirname(__FILE__) + '/../spec_helper'
 describe Indexer do
-  before(:each) do
-    @indexer = Indexer.new
+  it "should have at least 32MB memory allocated" do
+    Indexer.index.writer.max_buffer_memory.should > 2**25-1
   end
 end

data/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb CHANGED Viewed

@@ -27,16 +27,4 @@ describe "PlainTextExtractors" do
       end
     }
   }
-  it "should guess language when enough content is available" do
-    Document.new("spec/test_dirs/indexed/lang/goethe").language.should == "de"
-    Document.new("spec/test_dirs/indexed/lang/shakespeare").language.should == "en"
-    Document.new("spec/test_dirs/indexed/lang/lorca").language.should == "es"
-    Document.new("spec/test_dirs/indexed/lang/hugo").language.should == "fr"
-  end if Picolena::UseLanguageRecognition
-  it "should not try to guess language when file is too small" do
-    Document.new("spec/test_dirs/indexed/basic/hello.rb").language.should be_nil
-    Document.new("spec/test_dirs/indexed/README").language.should be_nil
-  end if Picolena::UseLanguageRecognition
 end

data/lib/picolena/templates/spec/models/query_spec.rb CHANGED Viewed

@@ -1,8 +1,16 @@
 require File.dirname(__FILE__) + '/../spec_helper'
 describe Query do
-  it "should return a BooleanQuery" do
+  it "should return a BooleanQuery, a TermQuery or a RangeQuery" do
     Query.extract_from("whatever").class.should == Ferret::Search::BooleanQuery
+    Query.extract_from("lang:de").class.should  == Ferret::Search::TermQuery
+    Query.extract_from("date:<1990").class.should  == Ferret::Search::RangeQuery
+  end
+  it "should not remove stop-words from TermQuery" do
+    # it means "Italian language", but also is a stop-word.
+    Query.extract_from("lang:it").class.should  == Ferret::Search::TermQuery
+    Query.extract_from("lang:it").to_s.should   == "language:it"
   end
   it "should translate LIKE, NOT, OR and AND boolean ops to English" do
@@ -12,6 +20,7 @@ describe Query do
       :fr=>["COMME","NON","OU","ET"]
     }
+    Globalite.language = :en
     english_query_with_like_and_not=Query.extract_from("LIKE something NOT something")
     english_query_with_or=Query.extract_from("test OR another")
     english_query_with_and=Query.extract_from("test AND another")

data/lib/picolena/version.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Picolena #:nodoc:
   module VERSION #:nodoc:
     MAJOR = 0
     MINOR = 1
-    TINY  = 6
+    TINY  = 7
     STRING = [MAJOR, MINOR, TINY].join('.')
   end

data/website/index.html CHANGED Viewed

@@ -33,7 +33,7 @@
     <h1>Picolena</h1>
     <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/picolena"; return false'>
       <p>Get Version</p>
-      <a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.6</a>
+      <a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.7</a>
     </div>
     <h1>&#x2192; &#8216;picolena&#8217;</h1>

data/website/index.txt CHANGED Viewed

File without changes

data/website/index_devjavu CHANGED Viewed

File without changes

data/website/javascripts/rounded_corners_lite.inc.js CHANGED Viewed

File without changes

data/website/stylesheets/screen.css CHANGED Viewed

File without changes

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: picolena
 version: !ruby/object:Gem::Version
-  version: 0.1.6
+  version: 0.1.7
 platform: ruby
 authors:
 - Eric Duminil
@@ -30,7 +30,7 @@ cert_chain:
   qvI9FgPZ1QTG5uZAlBbk6d6JU2XfpA==
   -----END CERTIFICATE-----
-date: 2008-04-25 00:00:00 +02:00
+date: 2008-04-30 00:00:00 +02:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -146,6 +146,7 @@ files:
 - lib/picolena/config/basic.rb
 - lib/picolena/config/icons_and_filetypes.yml
 - lib/picolena/config/indexed_directories.yml
+- lib/picolena/config/indexing_performance.yml
 - lib/picolena/config/title_and_names_and_links.yml
 - lib/picolena/config/white_list_ip.yml
 - lib/picolena/picolena_generator.rb
@@ -177,6 +178,7 @@ files:
 - lib/picolena/templates/config/initializers/004_load_plain_text_extractors.rb
 - lib/picolena/templates/config/initializers/005_load_custom_title_and_names_and_links.rb
 - lib/picolena/templates/config/initializers/006_load_icons.rb
+- lib/picolena/templates/config/initializers/007_load_performance_tweaks.rb
 - lib/picolena/templates/config/routes.rb
 - lib/picolena/templates/lang/ui/de.yml
 - lib/picolena/templates/lang/ui/en.yml

metadata.gz.sig CHANGED Viewed

@@ -1 +1,2 @@
-��"#m��EZY����v��>��lmLWA�ft�-�����<d�B��w]��T�7��ꞅ�tR-i��X�W��%2e�8f�]b�M�<IAF�ͯ7]krz�)w���
+�;����U�=nƷ�8߿X�`>����B����2Ħ@,u!��~�u�9>�Ӽq�J1� ֖i�T������-.q�^l*�`�>"��m�8��ɏP�cWk��y%����W�:r=&����CtaO;c
+.&��}�e)�g(O�)0ة)!����s�
+�"��Fm��>8���n���q�?I�P'����`|����`�\�>{\a4�Ӷ�JǮ}�&�?�d�UM{