RubyGems - picky - Versions diffs - 0.11.2 → 0.12.0 - Mend

picky 0.11.2 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

data/lib/picky/Index_api.rb +49 -0
data/lib/picky/alias_instances.rb +4 -1
data/lib/picky/application.rb +16 -15
data/lib/picky/cacher/partial/{subtoken.rb → substring.rb} +19 -18
data/lib/picky/{character_substitution/european.rb → character_substituters/west_european.rb} +2 -2
data/lib/picky/configuration/index.rb +67 -0
data/lib/picky/cores.rb +3 -0
data/lib/picky/index/bundle.rb +35 -51
data/lib/picky/index/file/basic.rb +39 -5
data/lib/picky/index/file/json.rb +10 -0
data/lib/picky/index/file/marshal.rb +10 -0
data/lib/picky/index/file/text.rb +22 -0
data/lib/picky/index/files.rb +11 -36
data/lib/picky/indexed/bundle.rb +61 -0
data/lib/picky/{index → indexed}/categories.rb +1 -1
data/lib/picky/{index → indexed}/category.rb +13 -16
data/lib/picky/{index/type.rb → indexed/index.rb} +6 -6
data/lib/picky/{index/types.rb → indexed/indexes.rb} +10 -10
data/lib/picky/{index → indexed}/wrappers/exact_first.rb +8 -8
data/lib/picky/indexers/no_source_specified_error.rb +1 -1
data/lib/picky/indexers/serial.rb +64 -0
data/lib/picky/indexers/solr.rb +1 -3
data/lib/picky/indexes_api.rb +41 -0
data/lib/picky/indexing/bundle.rb +43 -13
data/lib/picky/indexing/category.rb +17 -64
data/lib/picky/indexing/{type.rb → index.rb} +13 -3
data/lib/picky/indexing/{types.rb → indexes.rb} +22 -22
data/lib/picky/loader.rb +17 -22
data/lib/picky/query/base.rb +1 -1
data/lib/picky/rack/harakiri.rb +9 -2
data/lib/picky/signals.rb +1 -1
data/lib/picky/sources/base.rb +14 -14
data/lib/picky/sources/couch.rb +8 -7
data/lib/picky/sources/csv.rb +10 -10
data/lib/picky/sources/db.rb +8 -8
data/lib/picky/sources/delicious.rb +2 -2
data/lib/picky/sources/wrappers/location.rb +3 -3
data/lib/picky/tokenizers/base.rb +1 -11
data/lib/picky/tokenizers/index.rb +0 -1
data/lib/picky/tokenizers/query.rb +0 -1
data/lib/tasks/index.rake +4 -4
data/lib/tasks/shortcuts.rake +4 -4
data/lib/tasks/try.rake +8 -8
data/project_prototype/Gemfile +1 -1
data/project_prototype/app/application.rb +13 -12
data/spec/lib/application_spec.rb +10 -38
data/spec/lib/cacher/partial/{subtoken_spec.rb → substring_spec.rb} +0 -0
data/spec/lib/{character_substitution/european_spec.rb → character_substituters/west_european_spec.rb} +6 -2
data/spec/lib/configuration/index_spec.rb +80 -0
data/spec/lib/cores_spec.rb +1 -1
data/spec/lib/index/file/text_spec.rb +1 -1
data/spec/lib/index/files_spec.rb +12 -32
data/spec/lib/indexed/bundle_spec.rb +119 -0
data/spec/lib/{indexing → indexed}/categories_spec.rb +13 -14
data/spec/lib/{index → indexed}/category_spec.rb +6 -6
data/spec/lib/{index/type_spec.rb → indexed/index_spec.rb} +3 -3
data/spec/lib/{index → indexed}/wrappers/exact_first_spec.rb +5 -5
data/spec/lib/indexers/serial_spec.rb +62 -0
data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +7 -5
data/spec/lib/indexing/bundle_spec.rb +9 -14
data/spec/lib/indexing/category_spec.rb +9 -125
data/spec/lib/indexing/{type_spec.rb → index_spec.rb} +3 -3
data/spec/lib/query/base_spec.rb +1 -1
data/spec/lib/query/full_spec.rb +1 -1
data/spec/lib/query/live_spec.rb +2 -4
data/spec/lib/sources/couch_spec.rb +5 -5
data/spec/lib/sources/db_spec.rb +6 -7
data/spec/lib/tokenizers/base_spec.rb +1 -24
data/spec/lib/tokenizers/query_spec.rb +0 -1
metadata +38 -41
data/lib/picky/bundle.rb +0 -33
data/lib/picky/configuration/indexes.rb +0 -51
data/lib/picky/configuration/queries.rb +0 -15
data/lib/picky/indexers/base.rb +0 -85
data/lib/picky/indexers/default.rb +0 -3
data/lib/picky/type.rb +0 -46
data/lib/picky/types.rb +0 -41
data/lib/tasks/cache.rake +0 -46
data/spec/lib/configuration/indexes_spec.rb +0 -28
data/spec/lib/index/bundle_spec.rb +0 -151
data/spec/lib/indexers/base_spec.rb +0 -89

data/lib/picky/configuration/queries.rb DELETED Viewed

@@ -1,15 +0,0 @@
-module Configuration
-  #
-  #
-  class Queries
-    #
-    #
-    def default_tokenizer options = {}
-      Tokenizers::Query.default = Tokenizers::Query.new(options)
-    end
-  end
-end

data/lib/picky/indexers/base.rb DELETED Viewed

@@ -1,85 +0,0 @@
-# encoding: utf-8
-module Indexers
-  # Indexer.
-  #
-  # 1. Gets data from the original table and copies it into a "snapshot table".
-  # 3. Processes the data. I.e. takes the snapshot table data words and tokenizes etc. them. Writes the result into a txt file.
-  #
-  class Base
-    def initialize type, category
-      @type       = type
-      @category   = category
-    end
-    # Convenience method for getting the right Tokenizer.
-    #
-    def tokenizer
-      @category.tokenizer
-    end
-    # Convenience methods for user subclasses.
-    #
-    # TODO Duplicate code in Index::Files.
-    #
-    # TODO Rename to prepared_index_file_name.
-    #
-    def search_index_file_name
-      @category.search_index_file_name
-    end
-    # Executes the specific strategy.
-    #
-    def index
-      process
-    end
-    # Get the source where the data is taken from.
-    #
-    def source
-      @category.source || raise_no_source
-    end
-    def raise_no_source
-      raise NoSourceSpecifiedException.new("No source given for index:#{@type.name}, category:#{@category.name}.") # TODO field.identifier
-    end
-    # Selects the original id (indexed id) and a column to process. The column data is called "token".
-    #
-    # Note: Puts together the parts first in an array, then releasing the array from time to time by joining.
-    #
-    def process
-      comma   = ?,
-      newline = ?\n
-      indexing_message
-      # TODO Move open to Index::File.
-      #
-      # @category.prepared_index do |file|
-      #   source.harvest(@type, @category) do |indexed_id, text|
-      #     tokenizer.tokenize(text).each do |token_text|
-      #       next unless token_text
-      #       file.buffer indexed_id << comma << token_text << newline
-      #     end
-      #     file.write_maybe
-      #   end
-      # end
-      #
-      File.open(search_index_file_name, 'w:binary') do |file|
-        result = []
-        source.harvest(@type, @category) do |indexed_id, text|
-          tokenizer.tokenize(text).each do |token_text|
-            next unless token_text
-            result << indexed_id << comma << token_text << newline
-          end
-          file.write(result.join) && result.clear if result.size > 100_000
-        end
-        file.write result.join
-      end
-    end
-    def indexing_message
-      timed_exclaim "INDEX #{@type.name} #{@category.name}" #:#{@category.indexed_as}." # TODO field.identifier
-    end
-  end
-end

data/lib/picky/indexers/default.rb DELETED Viewed

@@ -1,3 +0,0 @@
-module Indexers
-  Default = Base
-end

data/lib/picky/type.rb DELETED Viewed

@@ -1,46 +0,0 @@
-# This class defines the indexing and index API.
-#
-# Note: A Type holds both an Index::Type and an Indexing::Type.
-#
-class Type
-  # TODO Delegation.
-  #
-  attr_reader :name, :indexing, :index
-  def initialize name, source, options = {}
-    @name     = name
-    @indexing = Indexing::Type.new name, source, options
-    @index    = Index::Type.new    name, options
-    # Centralized registry.
-    #
-    ::Indexes.register self
-  end
-  # API.
-  #
-  # TODO Spec! Doc!
-  #
-  def category name, options = {}
-    name = name.to_sym
-    indexing.add_category name, options
-    index.add_category    name, options
-    self
-  end
-  # def location name, options = {}
-  #   grid      = options.delete :grid
-  #   precision = options.delete :precision
-  #
-  #   options[:index_tokenizer] ||= Tokenizers::Index.new # TODO Or a specific location tokenizer.
-  #   options[:query_tokenizer] ||= Tokenizers::Query.new # TODO Or a specific location tokenizer.
-  #   options[:source_wrapper]  ||= Sources::Wrappers::Location.new(options)
-  #
-  #   new_category = category name, options
-  #   :source => Sources::Wrappers::Location.new(source, grid:2), :tokenizer => Tokenizers::Index.new
-  # end
-end

data/lib/picky/types.rb DELETED Viewed

@@ -1,41 +0,0 @@
-# Comfortable API convenience class, splits methods to indexes.
-#
-class Types
-  attr_reader :types, :type_mapping
-  delegate :reload,
-           :load_from_cache,
-           :to => :@indexes
-  delegate :check_caches,
-           :find,
-           :generate_cache_only,
-           :generate_index_only,
-           :index,
-           :index_for_tests,
-           :to => :@indexings
-  def initialize
-    @types = []
-    @type_mapping = {}
-    @indexes   = Index::Types.new
-    @indexings = Indexing::Types.new
-  end
-  def register type
-    self.types << type
-    self.type_mapping[type.name] = type
-    @indexings.register type.indexing
-    @indexes.register   type.index # TODO Even necessary?
-  end
-  def [] name
-    name = name.to_sym
-    self.type_mapping[name]
-  end
-end

data/lib/tasks/cache.rake DELETED Viewed

@@ -1,46 +0,0 @@
-namespace :cache do
-  # Move to index namespace.
-  #
-  # desc "Generates the index cache files."
-  # task :generate => :application do
-  #   Indexes.generate_caches
-  #   puts "Caches generated."
-  # end
-  # desc "Generates a specific index cache file like field=books:title. Note: Index tables need to be there. Will generate just the cache."
-  # task :only => :application do
-  #   type_and_field = ENV['FIELD'] || ENV['field']
-  #   type, field = type_and_field.split ':'
-  #   Indexes.generate_cache_only type.to_sym, field.to_sym
-  # end
-  # desc 'Checks the index cache files'
-  # task :check => :application do
-  #   Indexes.check_caches
-  #   puts "All caches look ok."
-  # end
-  # desc "Removes the index cache files."
-  # task :clear => :application do
-  #   Indexes.clear_caches
-  #   puts "All index cache files removed."
-  # end
-  # desc 'Backup the index cache files'
-  # task :backup => :application do
-  #   Indexes.backup_caches
-  #   puts "Index cache files moved to the backup directory"
-  # end
-  # desc 'Restore the index cache files'
-  # task :restore => :application do
-  #   Indexes.restore_caches
-  #   puts "Index cache files restored from the backup directory"
-  # end
-end

data/spec/lib/configuration/indexes_spec.rb DELETED Viewed

@@ -1,28 +0,0 @@
-# encoding: utf-8
-require 'spec_helper'
-describe Configuration::Indexes do
-  before(:each) do
-    @config = Configuration::Indexes.new
-  end
-  describe "types" do
-    it "exists" do
-      lambda { @config.types }.should_not raise_error
-    end
-    it "is initially empty" do
-      @config.types.should be_empty
-    end
-  end
-  describe "default_tokenizer" do
-    it "is a default tokenizer" do
-      @config.default_tokenizer.should be_kind_of(Tokenizers::Index)
-    end
-    it "does not cache" do
-      @config.default_tokenizer.should_not == @config.default_tokenizer
-    end
-  end
-end

data/spec/lib/index/bundle_spec.rb DELETED Viewed

@@ -1,151 +0,0 @@
-require 'spec_helper'
-describe Index::Bundle do
-  before(:each) do
-    @category    = stub :category, :name => :some_category
-    @type        = stub :type, :name => :some_type
-    @similarity  = stub :similarity
-    @index_class = Index::Bundle
-    @index       = @index_class.new :some_name, @category, @type, @similarity
-  end
-  describe 'identifier' do
-    it 'should return a specific identifier' do
-      @index.identifier.should == 'some_type: some_name some_category'
-    end
-  end
-  describe 'initialize_index_for' do
-    context 'token not yet assigned' do
-      before(:each) do
-        @index.stub! :index => {}
-      end
-      it 'should assign it an empty array' do
-        @index.initialize_index_for :some_token
-        @index.index[:some_token].should == []
-      end
-    end
-    context 'token already assigned' do
-      before(:each) do
-        @index.stub! :index => { :some_token => :already_assigned }
-      end
-      it 'should not assign it anymore' do
-        @index.initialize_index_for :some_token
-        @index.index[:some_token].should == :already_assigned
-      end
-    end
-  end
-  # TODO
-  #
-  # describe 'retrieve' do
-  #   it 'should call the other methods correctly' do
-  #     results = stub :results
-  #     @index.stub! :execute_query => results
-  #     @index.should_receive(:extract).once.with results
-  #
-  #     @index.retrieve
-  #   end
-  # end
-  describe 'load_from_index_file' do
-    it 'should call two methods in order' do
-      @index.should_receive(:load_from_index_generation_message).once.ordered
-      @index.should_receive(:clear).once.ordered
-      @index.should_receive(:retrieve).once.ordered
-      @index.load_from_index_file
-    end
-  end
-  describe 'ids' do
-    before(:each) do
-      @index.instance_variable_set :@index, { :existing => :some_ids }
-    end
-    it 'should return an empty array if not found' do
-      @index.ids(:non_existing).should == []
-    end
-    it 'should return the ids if found' do
-      @index.ids(:existing).should == :some_ids
-    end
-  end
-  describe 'weight' do
-    before(:each) do
-      @index.instance_variable_set :@weights, { :existing => :specific }
-    end
-    it 'should return nil' do
-      @index.weight(:non_existing).should == nil
-    end
-    it 'should return the weight for the text' do
-      @index.weight(:existing).should == :specific
-    end
-  end
-  describe 'load' do
-    it 'should trigger loads' do
-      @index.should_receive(:load_index).once.with
-      @index.should_receive(:load_similarity).once.with
-      @index.should_receive(:load_weights).once.with
-      @index.load
-    end
-  end
-  describe "loading indexes" do
-    before(:each) do
-      @index.stub! :timed_exclaim
-    end
-    describe "load_index" do
-      it "uses the right file" do
-        Yajl::Parser.stub! :parse
-        File.should_receive(:open).once.with 'some/search/root/index/test/some_type/some_name_some_category_index.json', 'r'
-        @index.load_index
-      end
-    end
-    describe "load_similarity" do
-      it "uses the right file" do
-        Marshal.stub! :load
-        File.should_receive(:open).once.with 'some/search/root/index/test/some_type/some_name_some_category_similarity.dump', 'r:binary'
-        @index.load_similarity
-      end
-    end
-    describe "load_weights" do
-      it "uses the right file" do
-        Yajl::Parser.stub! :parse
-        File.should_receive(:open).once.with 'some/search/root/index/test/some_type/some_name_some_category_weights.json', 'r'
-        @index.load_weights
-      end
-    end
-  end
-  describe 'initialization' do
-    before(:each) do
-      @category = stub :category, :name => :some_category
-      @type     = stub :type, :name => :some_type
-      @index = @index_class.new :some_name, @category, @type, :similarity
-    end
-    it 'should initialize the index correctly' do
-      @index.index.should == {}
-    end
-    it 'should initialize the weights index correctly' do
-      @index.weights.should == {}
-    end
-    it 'should initialize the similarity index correctly' do
-      @index.similarity.should == {}
-    end
-    it 'should initialize the similarity strategy correctly' do
-      @index.similarity_strategy.should == :similarity
-    end
-  end
-end

data/spec/lib/indexers/base_spec.rb DELETED Viewed

@@ -1,89 +0,0 @@
-require 'spec_helper'
-describe Indexers::Base do
-  before(:each) do
-    @type  = stub :type,
-                  :name => :some_type,
-                  :snapshot_table_name => :some_prepared_table_name
-    @field = stub :field,
-                  :name => :some_field_name,
-                  :search_index_file_name => :some_search_index_name,
-                  :indexed_name => :some_indexed_field_name
-    @indexer = Indexers::Base.new @type, @field
-    @indexer.stub! :timed_exclaim
-  end
-  describe "tokenizer" do
-    it "delegates to the field" do
-      @field.should_receive(:tokenizer).once.with
-      @indexer.tokenizer
-    end
-  end
-  describe "indexing_message" do
-    it "informs the user about what it is going to index" do
-      @indexer.should_receive(:timed_exclaim).once.with 'INDEX some_type some_field_name'
-      @indexer.indexing_message
-    end
-  end
-  describe "tokenizer" do
-    it "should delegate to field" do
-      @indexer.should_receive(:tokenizer).once.with
-      @indexer.tokenizer
-    end
-  end
-  describe 'search_index_file_name' do
-    it 'should return a specific name' do
-      @indexer.search_index_file_name.should == :some_search_index_name
-    end
-  end
-  describe "index" do
-    it "should execute! the indexer" do
-      @indexer.should_receive(:process).once.with
-      @indexer.index
-    end
-  end
-  describe "source" do
-    before(:each) do
-      @source = stub :source
-    end
-    context "field has one" do
-      before(:each) do
-        @field.stub! :source => @source
-      end
-      it "should return that one" do
-        @indexer.source.should == @source
-      end
-    end
-    context "field doesn't have one" do
-      before(:each) do
-        @field.stub! :source => nil
-      end
-      it "should call raise_no_source" do
-        @indexer.should_receive(:raise_no_source).once.with
-        @indexer.source
-      end
-    end
-  end
-  describe "raise_no_source" do
-    it "should raise" do
-      lambda { @indexer.raise_no_source }.should raise_error(Indexers::NoSourceSpecifiedException)
-    end
-  end
-  describe "chunked" do
-  end
-end