RubyGems - picky - Versions diffs - 2.4.0 → 2.4.1 - Mend

picky 2.4.0 → 2.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

data/lib/picky/application.rb +2 -2
data/lib/picky/index/base.rb +83 -7
data/lib/picky/index/memory.rb +2 -2
data/lib/picky/index/redis.rb +2 -2
data/lib/picky/internals/indexers/base.rb +1 -3
data/lib/picky/internals/indexers/parallel.rb +2 -2
data/lib/picky/internals/indexers/serial.rb +2 -2
data/lib/picky/internals/indexers/solr.rb +1 -1
data/lib/picky/internals/indexing/category.rb +2 -1
data/lib/picky/loader.rb +1 -0
data/lib/picky/sources/mongo.rb +75 -0
data/spec/lib/internals/index/files_spec.rb +1 -1
data/spec/lib/internals/index/redis_spec.rb +1 -1
data/spec/lib/internals/indexing/bundle/memory_partial_generation_speed_spec.rb +1 -1
data/spec/lib/internals/indexing/bundle/memory_spec.rb +1 -1
data/spec/lib/internals/indexing/bundle/redis_spec.rb +1 -1
data/spec/lib/internals/indexing/category_spec.rb +1 -1
data/spec/lib/sources/mongo_spec.rb +50 -0
metadata +7 -4

data/lib/picky/application.rb CHANGED Viewed

@@ -15,7 +15,7 @@
 #
 # == Index::Memory.new(name)
 #
-# Next, define where your data comes from. You use the <tt>Index::Memory.new</tt> method for that:
+# Next, define where your data comes from, creating an <tt>Index</tt>. You use the <tt>Index::Memory.new</tt> method for that:
 #   my_index = Index::Memory.new :some_index_name
 # You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
 #   class MyGreatSearch < Application
@@ -82,7 +82,7 @@
 #   $ rake index
 #   $ rake start
 # Run your first query:
-#   $ curl 'localhost:8080/books/full?query=hello server'
+#   $ curl 'localhost:8080/books?query=hello server'
 #
 # Nice, right? Your first query!
 #

data/lib/picky/index/base.rb CHANGED Viewed

@@ -1,3 +1,79 @@
+# = Picky Indexes
+#
+# A Picky Index defines
+# * where its data comes from (a data source).
+# * how this data it is indexed.
+# * a number of categories that may or may not map directly to data categories.
+#
+# == Howto
+#
+# This is a step-by-step description on how to create an index.
+#
+# Start by choosing an <tt>Index::Memory</tt> or an <tt>Index::Redis</tt>.
+# In the example, we will be using an in-memory index, <tt>Index::Memory</tt>.
+#
+#   books = Index::Memory.new(:books)
+#
+# That in itself won't do much good, that's why we add a data source:
+#
+#   books = Index::Memory.new(:books) do
+#     source Sources::CSV.new(:title, :author, file: 'data/books.csv')
+#   end
+#
+# In the example, we use an explicit <tt>Sources::CSV</tt> of Picky.
+# However, anything that responds to <tt>#each</tt>, and returns an object that
+# answers to <tt>#id</tt>, works.
+#
+# For example, a 3.0 ActiveRecord class:
+#
+#   books = Index::Memory.new(:books) do
+#     source Book.order('isbn ASC')
+#   end
+#
+# Now we know where the data comes from, but not, how to categorize it.
+#
+# Let's add a few categories:
+#
+#   books = Index::Memory.new(:books) do
+#     source   Book.order('isbn ASC')
+#     category :title
+#     category :author
+#     category :isbn
+#   end
+#
+# Categories offer quite a few options, see <tt>Index::Base#category</tt> for details.
+#
+# After adding more options, it might look like this:
+#
+#   books = Index::Memory.new(:books) do
+#     source   Book.order('isbn ASC')
+#     category :title,
+#              partial: Partial::Substring.new(from: 1),
+#              similarity: Similarity::DoubleMetaphone.new(3),
+#              qualifiers: [:t, :title, :titulo]
+#     category :author,
+#              similarity: Similarity::Metaphone.new(2)
+#     category :isbn,
+#              partial: Partial::None.new,
+#              from: :legacy_isbn_name
+#   end
+#
+# For this to work, a <tt>Book</tt> should support methods <tt>#title</tt>, <tt>#author</tt> and <tt>#legacy_isbn_name</tt>.
+#
+# If it uses <tt>String</tt> ids, use <tt>#key_format</tt> to define a formatting method:
+#
+#   books = Index::Memory.new(:books) do
+#     key_format :to_s
+#     source     Book.order('isbn ASC')
+#     category   :title
+#     category   :author
+#     category   :isbn
+#   end
+#
+# Finally, use the index for a <tt>Search</tt>:
+#
+#   route %r{^/media$} => Search.new(books, dvds, mp3s)
+#
 module Index
   # This class defines the indexing and index API that is exposed to the user
@@ -5,7 +81,7 @@ module Index
   #
   # It provides a single front for both indexing and index options. We suggest to always use the index API.
   #
-  # Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
+  # Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Index*.
   #
   class Base
@@ -52,16 +128,16 @@ module Index
       check_source internal_indexing.source
     end
-    def internal_indexing
+    def internal_indexing # :nodoc:
       @indexing
     end
-    def internal_indexed
+    def internal_indexed # :nodoc:
       @indexed
     end
     #
     # Since this is an API, we fail hard quickly.
     #
-    def check_name name
+    def check_name name # :nodoc:
       raise ArgumentError.new(<<-NAME
@@ -74,7 +150,7 @@ NAME
 ) unless name.respond_to?(:to_sym)
     end
-    def check_options options
+    def check_options options # :nodoc:
       raise ArgumentError.new(<<-OPTIONS
@@ -96,7 +172,7 @@ All the best
 OPTIONS
 ) unless options.respond_to?(:[])
     end
-    def check_source source
+    def check_source source # :nodoc:
       raise ArgumentError.new(<<-SOURCE
@@ -110,7 +186,7 @@ SOURCE
 ) unless source.respond_to?(:each) || source.respond_to?(:harvest)
     end
-    def to_stats
+    def to_stats # :nodoc:
       stats = <<-INDEX
 #{name} (#{self.class}):
   #{"source:            #{internal_indexing.source}".indented_to_s}

data/lib/picky/index/memory.rb CHANGED Viewed

@@ -17,10 +17,10 @@ module Index
     # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
     #
     def initialize name, options = {}
-      super name, options
       options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Memory
       options[:indexed_bundle_class]  ||= Internals::Indexed::Bundle::Memory
+      super name, options
     end
   end

data/lib/picky/index/redis.rb CHANGED Viewed

@@ -17,10 +17,10 @@ module Index
     # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
     #
     def initialize name, options = {}
-      super name, options
       options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Redis
       options[:indexed_bundle_class]  ||= Internals::Indexed::Bundle::Redis
+      super name, options
     end
   end

data/lib/picky/internals/indexers/base.rb CHANGED Viewed

@@ -6,9 +6,7 @@ module Indexers
   #
   class Base
-    # Selects the original id (indexed id) and a column to process. The column data is called "token".
-    #
-    # Note: Puts together the parts first in an array, then releasing the array from time to time by joining.
+    # Starts the indexing process.
     #
     def index
       indexing_message

data/lib/picky/internals/indexers/parallel.rb CHANGED Viewed

@@ -48,14 +48,14 @@ module Indexers
       flush combined
       combined.each { |_, _, file, _| file.close }
     end
-    def flush combined
+    def flush combined # :nodoc:
       combined.each do |_, cache, file, _|
         file.write(cache.join) && cache.clear
       end
     end
     #
     #
-    def indexing_message
+    def indexing_message # :nodoc:
       timed_exclaim %Q{"#{@index.name}": Starting parallel indexing.}
     end

data/lib/picky/internals/indexers/serial.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Indexers
   # Uses a category to index its data.
   #
-  # Note: It is called serial since it indexes each
+  # Note: It is called serial since it indexes each category separately.
   #
   class Serial < Base
@@ -44,7 +44,7 @@ module Indexers
     end
     #
     #
-    def indexing_message
+    def indexing_message # :nodoc:
       timed_exclaim %Q{"#{@category.identifier}": Starting serial indexing.}
     end

data/lib/picky/internals/indexers/solr.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Indexers
   # Deprecated. Only here as an example.
   #
-  class Solr
+  class Solr # :nodoc:
     attr_reader :type, :fields, :solr

data/lib/picky/internals/indexing/category.rb CHANGED Viewed

@@ -38,7 +38,8 @@ module Internals
         weights    = options[:weights]    || Generators::Weights::Default
         similarity = options[:similarity] || Generators::Similarity::Default
-        bundle_class = options[:indexing_bundle_class] || Bundle::Memory
+        bundle_class = index.bundle_class || Bundle::Memory
         @exact   = bundle_class.new(:exact,   self, similarity, Generators::Partial::None.new, weights)
         @partial = bundle_class.new(:partial, self, Generators::Similarity::None.new, partial, weights)
       end

data/lib/picky/loader.rb CHANGED Viewed

@@ -299,6 +299,7 @@ module Loader # :nodoc:all
     load_relative 'sources/csv'
     load_relative 'sources/delicious'
     load_relative 'sources/couch'
+    load_relative 'sources/mongo'
     load_relative 'sources/wrappers/base'
     load_relative 'sources/wrappers/location'

data/lib/picky/sources/mongo.rb ADDED Viewed

@@ -0,0 +1,75 @@
+module Sources
+  # Raised when a Mongo source is instantiated without a valid uri.
+	#
+	# Important!
+  # You have to start your mongodb with --rest in order to use
+  # the rest / http interface
+	#
+  class NoMongoDBGiven < StandardError; end
+  # Important note: We're not sure if this works already.
+  #
+  # A Mongo database source.
+  #
+  # Options:
+  # * url, db
+  # Example:
+  #   Sources::Mongo.new(:collection1, :collection2, :url => 'localhost:28017', :db => 'testdatabase')
+	# Be sure to escape the URL properly, e.g. # => %23 in the databasename if needed
+	#
+  # and all the options of a <tt>RestClient::Resource</tt>.
+  # See http://github.com/archiloque/rest-client.
+  #
+  class Mongo < Base
+		@@id_key = '_id'
+    #
+    #
+    def initialize *category_names, options
+      check_gem
+			unless options[:url] && options[:db]
+				raise_no_db_given(category_names)
+			end
+		  @db         = RestClient::Resource.new options.delete(:url), options
+			@database   = options.delete(:db)
+		  @key_format = options[:key_format] && options[:key_format].to_sym || :to_sym
+		end
+    # Tries to require the rest_client gem.
+    #
+    def check_gem # :nodoc:
+      require 'rest_client'
+    rescue LoadError
+      warn_gem_missing 'rest-client', 'the MongoDB source'
+      exit 1
+    end
+		# Fetches the data, @limit=0 will return all records
+		#
+		# Limit is set to 0 by default - all collection entries will be send
+		# If want to limit the results, set to to any other number, e.g. limit=15
+		# to return only 15 entries
+		#
+		def harvest category
+			collection = (category.from || category.index_name).to_s
+			resp = @db["/#{@database}/#{category.index_name}/?@limit=0"].get
+			JSON.parse(resp)['rows'].each do |row|
+			  text = row[collection].to_s
+			  next unless text
+				index_key = row.delete(@@id_key) # TODO Still works, I removed .values
+				yield index_key, text
+			end
+		end
+    def raise_no_db_given category_names # :nodoc:
+      raise NoMongoDBGiven.new(category_names.join(', '))
+    end
+    def to_s
+      self.class.name
+    end
+  end
+end

data/spec/lib/internals/index/files_spec.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require 'spec_helper'
 describe Internals::Index::Files do
   before(:each) do
-    index         = stub :index, :name => :some_index
+    index         = stub :index, :name => :some_index, :bundle_class => nil
     category      = Internals::Indexing::Category.new :some_category, index
     @files         = described_class.new :some_name, category

data/spec/lib/internals/index/redis_spec.rb CHANGED Viewed

@@ -4,7 +4,7 @@ describe Internals::Index::Redis do
   context 'indexing' do
     let(:category) do
-      index    = stub :index, :name => :some_index
+      index    = stub :index, :name => :some_index, :bundle_class => nil
       category = Internals::Indexing::Category.new :some_category, index
     end
     let(:redis) { described_class.new :some_name, category }

data/spec/lib/internals/indexing/bundle/memory_partial_generation_speed_spec.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require 'spec_helper'
 describe Internals::Indexing::Bundle::Memory do
   before(:each) do
-    @index            = stub :index, :name => :some_index
+    @index            = stub :index, :name => :some_index, :bundle_class => nil
     @category         = Internals::Indexing::Category.new :some_category, @index
     @partial_strategy = Internals::Generators::Partial::Substring.new :from => 1

data/spec/lib/internals/indexing/bundle/memory_spec.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require 'spec_helper'
 describe Internals::Indexing::Bundle::Memory do
   before(:each) do
-    @index = stub :index, :name => :some_index
+    @index = stub :index, :name => :some_index, :bundle_class => nil
     @category       = Internals::Indexing::Category.new :some_category, @index
     @partial     = stub :partial

data/spec/lib/internals/indexing/bundle/redis_spec.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require 'spec_helper'
 describe Internals::Indexing::Bundle::Redis do
   before(:each) do
-    @index       = stub :index, :name => :some_index
+    @index       = stub :index, :name => :some_index, :bundle_class => nil
     @category    = Internals::Indexing::Category.new :some_category, @index
     @partial     = stub :partial

data/spec/lib/internals/indexing/category_spec.rb CHANGED Viewed

@@ -3,7 +3,7 @@ require 'spec_helper'
 describe Internals::Indexing::Category do
   before(:each) do
-    @index  = stub :index, :name => :some_index
+    @index  = stub :index, :name => :some_index, :bundle_class => nil
     @source = stub :some_given_source, :key_format => nil
   end
   let(:category) { described_class.new(:some_category, @index, :source => @source).tap { |c| c.stub! :timed_exclaim } }

data/spec/lib/sources/mongo_spec.rb ADDED Viewed

@@ -0,0 +1,50 @@
+require 'spec_helper'
+describe Sources::Mongo do
+  describe 'key_format' do
+    context 'default' do
+      let(:source) { Sources::Mongo.new(:a, :b, :url => 'someurl', :db => 'somedb') }
+      it 'is correct' do
+        source.key_format.should == :to_sym
+      end
+    end
+    context 'non-default' do
+      let(:source) { Sources::Mongo.new(:a, :b, :url => 'bla', :db => 'somedb', :key_format => 'some_key_method') }
+      it 'is correct' do
+        source.key_format.should == :some_key_method
+      end
+    end
+  end
+  describe 'to_s' do
+     let(:source) { Sources::Mongo.new(:a, :b, :url => 'someurl', :db => 'somedb') }
+     it 'is correct' do
+       source.to_s.should == 'Sources::Mongo'
+     end
+   end
+   context "without database" do
+     it "should fail correctly" do
+       lambda { @source = Sources::Mongo.new(:a, :b, :url => 'someurl') }.should raise_error(Sources::NoMongoDBGiven)
+     end
+   end
+   context "with database" do
+     before(:each) do
+       @source = Sources::Mongo.new :a, :b, :url => 'someurl', :db => 'somedb'
+       RestClient::Request.should_receive(:execute).any_number_of_times.and_return %{{"rows":[{"_id":"7f","a":"a data","b":"b data","c":"c data"}]}}
+     end
+     describe "harvest" do
+       it "yields the right data" do
+         category = stub :b, :from => :b, :index_name => :some_index_name
+         @source.harvest category do |id, token|
+           id.should    eql('7f')
+           token.should eql('b data')
+         end.should have(1).item
+       end
+     end
+   end
+end

metadata CHANGED Viewed

@@ -2,7 +2,7 @@
 name: picky
 version: !ruby/object:Gem::Version
   prerelease:
-  version: 2.4.0
+  version: 2.4.1
 platform: ruby
 authors:
 - Florian Hanke
@@ -10,7 +10,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-04-20 00:00:00 +10:00
+date: 2011-05-25 00:00:00 +10:00
 default_executable: picky
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -32,7 +32,7 @@ dependencies:
     requirements:
     - - "="
       - !ruby/object:Gem::Version
-        version: 2.4.0
+        version: 2.4.1
   type: :development
   version_requirements: *id002
 description: Fast Ruby semantic text search engine with comfortable single field interface.
@@ -159,6 +159,7 @@ files:
 - lib/picky/sources/csv.rb
 - lib/picky/sources/db.rb
 - lib/picky/sources/delicious.rb
+- lib/picky/sources/mongo.rb
 - lib/picky/sources/wrappers/base.rb
 - lib/picky/sources/wrappers/location.rb
 - lib/picky/statistics.rb
@@ -269,6 +270,7 @@ files:
 - spec/lib/sources/csv_spec.rb
 - spec/lib/sources/db_spec.rb
 - spec/lib/sources/delicious_spec.rb
+- spec/lib/sources/mongo_spec.rb
 - spec/lib/sources/wrappers/base_spec.rb
 - spec/lib/sources/wrappers/location_spec.rb
 - spec/lib/statistics_spec.rb
@@ -298,7 +300,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubyforge_project: http://rubyforge.org/projects/picky
-rubygems_version: 1.5.0
+rubygems_version: 1.6.2
 signing_key:
 specification_version: 3
 summary: "Picky: Semantic Search Engine. Clever Interface. Good Tools."
@@ -394,6 +396,7 @@ test_files:
 - spec/lib/sources/csv_spec.rb
 - spec/lib/sources/db_spec.rb
 - spec/lib/sources/delicious_spec.rb
+- spec/lib/sources/mongo_spec.rb
 - spec/lib/sources/wrappers/base_spec.rb
 - spec/lib/sources/wrappers/location_spec.rb
 - spec/lib/statistics_spec.rb