RubyGems - picky - Versions diffs - 0.12.3 → 1.0.0 - Mend

picky 0.12.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

data/lib/deployment.rb +2 -2
data/lib/picky/application.rb +172 -12
data/lib/picky/cacher/generator.rb +1 -1
data/lib/picky/calculations/location.rb +9 -1
data/lib/picky/character_substituters/west_european.rb +1 -1
data/lib/picky/configuration/index.rb +1 -1
data/lib/picky/cores.rb +1 -1
data/lib/picky/extensions/array.rb +1 -1
data/lib/picky/extensions/hash.rb +1 -1
data/lib/picky/extensions/module.rb +1 -1
data/lib/picky/extensions/object.rb +1 -1
data/lib/picky/extensions/symbol.rb +1 -1
data/lib/picky/generator.rb +2 -2
data/lib/picky/helpers/cache.rb +7 -5
data/lib/picky/helpers/gc.rb +2 -0
data/lib/picky/helpers/measuring.rb +2 -0
data/lib/picky/index/bundle.rb +1 -1
data/lib/picky/index_api.rb +33 -15
data/lib/picky/indexed/bundle.rb +1 -1
data/lib/picky/indexed/index.rb +1 -1
data/lib/picky/indexed/wrappers/bundle/location.rb +1 -1
data/lib/picky/indexers/no_source_specified_error.rb +1 -1
data/lib/picky/indexes_api.rb +1 -1
data/lib/picky/indexing/bundle.rb +1 -1
data/lib/picky/indexing/index.rb +1 -1
data/lib/picky/loader.rb +1 -1
data/lib/picky/loggers/search.rb +1 -1
data/lib/picky/performant.rb +3 -0
data/lib/picky/query/allocation.rb +1 -1
data/lib/picky/query/allocations.rb +1 -1
data/lib/picky/query/base.rb +48 -16
data/lib/picky/query/combination.rb +1 -1
data/lib/picky/query/combinations.rb +1 -1
data/lib/picky/query/full.rb +7 -2
data/lib/picky/query/live.rb +9 -7
data/lib/picky/query/qualifiers.rb +6 -2
data/lib/picky/query/solr.rb +1 -1
data/lib/picky/query/token.rb +2 -1
data/lib/picky/query/tokens.rb +4 -1
data/lib/picky/query/weigher.rb +1 -1
data/lib/picky/query/weights.rb +1 -1
data/lib/picky/rack/harakiri.rb +14 -5
data/lib/picky/results/base.rb +1 -1
data/lib/picky/routing.rb +1 -1
data/lib/picky/solr/schema_generator.rb +2 -1
data/lib/picky/sources/base.rb +39 -25
data/lib/picky/sources/couch.rb +22 -8
data/lib/picky/sources/csv.rb +29 -6
data/lib/picky/sources/db.rb +46 -30
data/lib/picky/sources/delicious.rb +12 -2
data/lib/picky/sources/wrappers/base.rb +3 -1
data/lib/picky/tokenizers/base.rb +1 -1
data/project_prototype/Gemfile +1 -1
data/project_prototype/app/README +0 -1
data/spec/lib/calculations/location_spec.rb +28 -16
data/spec/lib/index_api_spec.rb +64 -0
data/spec/lib/indexed/index_spec.rb +2 -2
data/spec/lib/indexed/wrappers/exact_first_spec.rb +2 -2
data/spec/lib/indexing/index_spec.rb +2 -2
data/spec/lib/rack/harakiri_spec.rb +22 -10
metadata +7 -4

data/lib/picky/query/base.rb CHANGED Viewed

@@ -1,7 +1,23 @@
+# = Picky Queries
+#
+# A Picky Query is an object which:
+# * holds one or more indexes
+# * offers an interface to query these indexes.
+#
+# You connect URL paths to indexes via a Query.
+#
+# We recommend not to use this directly, but connect it to an URL and query through one of these
+# (Protip: Use "curl 'localhost:8080/query/path?query=exampletext')" in a Terminal.
+#
+# There are two flavors of queries:
+# * Query::Full (Full results with all infos)
+# * Query::Live (Same as the Full results without result ids. Useful for query result counters.)
+#
 module Query
-  # Base query class.
+  # The base query class.
   #
-  # Initialized with the index types it should search on.
+  # Not directly instantiated. However, its methods are used by its subclasses, Full and Live.
   #
   class Base
@@ -11,11 +27,11 @@ module Query
     attr_accessor :reduce_to_amount, :weights
     # Takes:
-    #  * A number of indexes
-    #  * Options hash (optional) with:
-    #    * weigher:   A weigher. Query::Weigher by default.
-    #    * tokenizer: Tokenizers::Query.default by default.
-    #    * weights:   A hash of weights, or a Query::Weights object.
+    # * A number of indexes
+    # * Options hash (optional) with:
+    #   * weigher:   A weigher. Query::Weigher by default.
+    #   * tokenizer: Tokenizers::Query.default by default.
+    #   * weights:   A hash of weights, or a Query::Weights object.
     #
     def initialize *index_type_definitions
       options      = Hash === index_type_definitions.last ? index_type_definitions.pop : {}
@@ -27,13 +43,21 @@ module Query
       @weights     = Hash === weights ? Weights.new(weights) : weights
     end
-    # Convenience method.
+    # Search through this method.
+    #
+    # Parameters:
+    # * text: The search text.
+    # * offset = 0: _optional_ The offset from which position to return the ids. Useful for pagination.
+    #
+    # Note: The Routing uses this method after unravelling the HTTP request.
     #
     def search_with_text text, offset = 0
       search tokenized(text), offset
     end
-    # This runs the actual search.
+    # Runs the actual search using Query::Tokens.
+    #
+    # Note: Internal method, use #search_with_text.
     #
     def search tokens, offset = 0
       results = nil
@@ -46,7 +70,9 @@ module Query
       results
     end
-    # Return nil if no results have been found.
+    # Execute a search using Query::Tokens.
+    #
+    # Note: Internal method, use #search_with_text.
     #
     def execute tokens, offset
       results_from offset, sorted_allocations(tokens)
@@ -54,12 +80,18 @@ module Query
     # Returns an empty result with default values.
     #
+    # Parameters:
+    # * offset = 0: _optional_ The offset to use for the empty results.
+    #
     def empty_results offset = 0
       result_type.new offset
     end
     # Delegates the tokenizing to the query tokenizer.
     #
+    # Parameters:
+    # * text: The text to tokenize.
+    #
     def tokenized text
       @tokenizer.tokenize text
     end
@@ -72,7 +104,7 @@ module Query
     #
     # TODO Rename: allocations
     #
-    def sorted_allocations tokens
+    def sorted_allocations tokens # :nodoc:
       # Get the allocations.
       #
       # TODO Pass in reduce_to_amount (aka max_allocations)
@@ -105,18 +137,18 @@ module Query
       #
       allocations
     end
-    def reduce allocations
+    def reduce allocations # :nodoc:
       allocations.reduce_to reduce_to_amount if reduce_to_amount
     end
-    def remove_identifiers?
+    def remove_identifiers? # :nodoc:
       identifiers_to_remove.present?
     end
-    def remove_from allocations
+    def remove_from allocations # :nodoc:
       allocations.remove(identifiers_to_remove) if remove_identifiers?
     end
     # Override. TODO No, redesign.
     #
-    def identifiers_to_remove
+    def identifiers_to_remove # :nodoc:
       @identifiers_to_remove ||= []
     end
@@ -126,7 +158,7 @@ module Query
     #
     # TODO Move to results. result_type.from allocations, offset
     #
-    def results_from offset = 0, allocations = nil
+    def results_from offset = 0, allocations = nil # :nodoc:
       results = result_type.new offset, allocations
       results.prepare!
       results

data/lib/picky/query/combination.rb CHANGED Viewed

@@ -7,7 +7,7 @@ module Query
   #
   # An allocation consists of a number of combinations.
   #
-  class Combination
+  class Combination # :nodoc:all
     attr_reader :token, :bundle, :category_name

data/lib/picky/query/combinations.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Query
   #
   # They are the core of an allocation.
   #
-  class Combinations
+  class Combinations # :nodoc:all
     attr_reader :combinations

data/lib/picky/query/full.rb CHANGED Viewed

@@ -1,10 +1,15 @@
 module Query
-  # This is the query class for performing full fledged queries.
+  # This Query class performs full queries.
+  #
+  # It includes in its results:
+  # * A count of results.
+  # * All possible combinations with its weights.
+  # * The top X result ids.
   #
   class Full < Base
-    # Generates full results.
+    # Returns Results::Full as its result type.
     #
     def result_type
       Results::Full

data/lib/picky/query/live.rb CHANGED Viewed

@@ -1,17 +1,19 @@
 module Query
-  # This is the query class for live queries.
+  # This Query class performs live queries.
   #
-  # It does:
-  #  * Return a count of results.
+  # It is useful for updating counters, or any job where you don't need the result ids.
   #
-  # It does NOT:
-  #  * Sort results geographically.
-  #  * Do any postprocessing.
+  # It includes in its results:
+  # * A count of results.
+  # * All possible combinations with its weights.
+  #
+  # But not:
+  # * The top X result ids.
   #
   class Live < Base
-    # Generates results from allocations.
+    # Returns Results::Live as its result type.
     #
     def result_type
       Results::Live

data/lib/picky/query/qualifiers.rb CHANGED Viewed

@@ -1,9 +1,13 @@
 # coding: utf-8
+#
+#
+#
 module Query
   # A single qualifier.
   #
-  class Qualifier
+  class Qualifier # :nodoc:all
     attr_reader :normalized_qualifier, :codes
@@ -28,7 +32,7 @@ module Query
   # Collection class for qualifiers.
   #
-  class Qualifiers
+  class Qualifiers # :nodoc:all
     include Singleton

data/lib/picky/query/solr.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Query
   #
   #
-  class Solr < Base
+  class Solr < Base # :nodoc:all
     attr_reader :server, :index_types

data/lib/picky/query/token.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 module Query
   # This is a query token. Together with other tokens it makes up a query.
   #
   # It remembers the original form, and and a normalized form.
@@ -7,7 +8,7 @@ module Query
   #
   # TODO Make partial / similarity char configurable.
   #
-  class Token
+  class Token # :nodoc:all
     attr_reader :text, :original
     attr_writer :similar

data/lib/picky/query/tokens.rb CHANGED Viewed

@@ -1,10 +1,13 @@
 # encoding: utf-8
 #
+#
+#
 module Query
   # This class primarily handles switching through similar token constellations.
   #
-  class Tokens
+  class Tokens # :nodoc:all
     # Basically delegates to its internal tokens array.
     #

data/lib/picky/query/weigher.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Query
   # Weighs the given tokens, generates Allocations -> Allocation -> Combinations.
   #
-  class Weigher
+  class Weigher # :nodoc:all
     attr_reader :indexes

data/lib/picky/query/weights.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Query
   # Calculates weights for certain combinations.
   #
-  class Weights
+  class Weights # :nodoc:all
     #
     #

data/lib/picky/rack/harakiri.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-module Rack
+module Rack # :nodoc:
   # Simple Rack Middleware to kill Unicorns after X requests.
   #
@@ -16,7 +16,6 @@ module Rack
     # Set the amount of requests before the Unicorn commits Harakiri.
     #
     cattr_accessor :after
-    attr_reader :quit_after_requests
     def initialize app
       @app = app
@@ -25,8 +24,10 @@ module Rack
       @quit_after_requests = self.class.after || 50
     end
-    # Harakiri is a middleware, so it passes the call on after checking if it
-    # is time to honorably retire.
+    # #call interface method.
+    #
+    # Harakiri is a middleware, so it delegates the the app or
+    # the next middleware after checking if it is time to honorably retire.
     #
     def call env
       harakiri
@@ -37,9 +38,17 @@ module Rack
     #
     # If yes, kills itself (Unicorn will answer the request, honorably).
     #
+    # Note: Sends its process a QUIT signal if it is time.
+    #
     def harakiri
       @requests = @requests + 1
-      Process.kill(:QUIT, Process.pid) if @requests >= @quit_after_requests
+      Process.kill(:QUIT, Process.pid) if harakiri?
+    end
+    # Is it time to honorably retire?
+    #
+    def harakiri?
+      @requests >= @quit_after_requests
     end
   end

data/lib/picky/results/base.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-module Results
+module Results # :nodoc:all
   # This is the internal results object. Usually, to_marshal, or to_json
   # is called on it to get a string for the answer.

data/lib/picky/routing.rb CHANGED Viewed

@@ -2,7 +2,7 @@ require 'rack/mount'
 #
 #
-class Routing
+class Routing # :nodoc:all
   @@defaults = {
     query_key:    'query'.freeze,

data/lib/picky/solr/schema_generator.rb CHANGED Viewed

@@ -1,4 +1,5 @@
-module Solr
+module Solr # :nodoc:all
   class SchemaGenerator
     attr_reader :types

data/lib/picky/sources/base.rb CHANGED Viewed

@@ -1,50 +1,64 @@
+# = Data Sources
+#
+# Currently, Picky offers the following Sources:
+# * CSV (comma – or other – separated file)
+# * Couch (CouchDB, key-value store)
+# * DB (Databases, foremost MySQL)
+# * Delicious (http://del.icio.us, online bookmarking service)
+# See also:
+# http://github.com/floere/picky/wiki/Sources-Configuration
+#
+# Don't worry if your source isn't here. Adding your own is easy:
+# http://github.com/floere/picky/wiki/Contributing-sources
+#
 module Sources
   # Sources are where your data comes from.
   #
-  # Basically, a source has 1-3 methods.
-  # * harvest: Used by the indexer to gather data.
-  #            Yields an indexed_id (string or integer) and a string value.
+  # A source has 1 mandatory and 2 optional methods:
+  # * connect_backend (_optional_): called once for each type/category pair.
+  # * harvest: Used by the indexer to gather data. Yields an indexed_id (string or integer) and a string value.
+  # * take_snapshot (_optional_): called once for each type.
+  #
+  # This base class "implements" all these methods, but they don't do anything.
+  # Subclass this class <tt>class MySource < Base</tt> and override the methods in your source to do something.
   #
-  # * connect_backend: Optional, called once for each type/category pair.
-  # * take_snapshot: Optional, called once for each type.
   class Base
-    # Note: Default methods do nothing.
-    #
     # Connect to the backend.
     #
-    # Note: Called once per index/category combination
-    #       before harvesting.
+    # Called once per index/category combination before harvesting.
     #
-    # For example, the db backend connects the db adapter.
+    # Examples:
+    # * The DB backend connects the DB adapter.
+    # * We open a connection to a key value store.
+    # * We open an file with data.
     #
     def connect_backend
     end
-    # Used to take a snapshot of your data if it is fast changing.
-    # e.g. in a database, a table based on the source's select
-    # statement is created.
+    # Called by the indexer when gathering data.
     #
-    # Note: Called before harvesting.
+    # Yields the data (id, text for id) for the given type and category.
     #
-    def take_snapshot type
+    # When implementing or overriding your own,
+    # be sure to <tt>yield(id, text_for_id)</tt> (or <tt>block.call(id, text_for_id)</tt>)
+    # for the given type symbol and category symbol.
+    #
+    def harvest index, category # :yields: id, text_for_id
+      # This concrete implementation yields "nothing", override in subclasses.
     end
-    # Called by the indexer when gathering data.
+    # Used to take a snapshot of your data if it is fast changing.
     #
-    # Yields the data (id, text for id) for the given type and category.
+    # Called once for each type before harvesting.
     #
-    # When implementing or overriding your own,
-    # be sure to <tt>yield</tt> (or <tt>block.call</tt>) an id (as string or integer)
-    # and a corresponding text for the given type symbol and
-    # category symbol.
+    # Example:
+    # * In a DB source, a table based on the source's select statement is created.
     #
-    def harvest type, category
-      # yields nothing
+    def take_snapshot index
     end
   end

data/lib/picky/sources/couch.rb CHANGED Viewed

@@ -1,11 +1,23 @@
 module Sources
-  # Describes a Couch database
-  # Give it a databse url and optionally username and password
+  # Raised when a Couch source is instantiated without a file.
+  #
+  # Example:
+  #   Sources::Couch.new(:column1, :column2) # without file option
   #
   class NoCouchDBGiven < StandardError; end
+  # A Couch database source.
+  #
+  # Options:
+  # * url
+  # and all the options of a <tt>RestClient::Resource</tt>.
+  # See http://github.com/archiloque/rest-client.
+  #
+  # Examples:
+  #  Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984')
+  #  Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984', user:'someuser', password:'somepassword')
+  #
   class Couch < Base
     def initialize *category_names, options
@@ -14,7 +26,9 @@ module Sources
       @db = RestClient::Resource.new options.delete(:url), options
     end
-    def check_gem
+    # Tries to require the rest_client gem.
+    #
+    def check_gem # :nodoc:
       require 'rest_client'
     rescue LoadError
       puts "Rest-client gem missing!\nTo use the CouchDB source, you need to:\n  1. Add the following line to Gemfile:\n     gem 'rest-client'\n  2. Then, run:\n     bundle update\n"
@@ -29,15 +43,15 @@ module Sources
         yield doc['_id'].to_i, doc[category_name] || next
       end
     end
-    def get_data &block
+    def get_data &block # :nodoc:
       resp = @db['_all_docs?include_docs=true'].get
       JSON.parse(resp)['rows'].
         map{|row| row['doc']}.
         each &block
     end
-    def raise_no_db_given category_names
+    def raise_no_db_given category_names # :nodoc:
       raise NoCouchDBGiven.new(category_names.join(', '))
     end
   end

data/lib/picky/sources/csv.rb CHANGED Viewed

@@ -1,13 +1,36 @@
 module Sources
-  # Describes a CSV source, a file with csv in it.
-  # Give it a sequence of category names and a file option with the filename.
+  # Raised when a CSV source is instantiated without a file.
+  #
+  # Example:
+  #   Sources::CSV.new(:column1, :column2) # without file option
   #
   class NoCSVFileGiven < StandardError; end
+  # Describes a CSV source, a file with comma separated values in it.
+  #
+  # The first column is implicitly assumed to be the id column.
+  #
+  # It takes the same options as the Ruby 1.9 CSV class.
+  #
+  # Examples:
+  #  Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv')
+  #  Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', col_sep:';')
+  #  Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', row_sep:"\n")
+  #
   class CSV < Base
-    attr_reader :file_name, :csv_options, :category_names
+    # The CSV file's path, relative to PICKY_ROOT.
+    #
+    attr_reader :file_name
+    # The options that were passed into #new.
+    #
+    attr_reader :csv_options
+    # The data category names.
+    #
+    attr_reader :category_names
     def initialize *category_names, options
       require 'csv'
@@ -17,9 +40,9 @@ module Sources
       @file_name      = @csv_options.delete(:file) || raise_no_file_given(category_names)
     end
+    # Raises a NoCSVFileGiven exception.
     #
-    #
-    def raise_no_file_given category_names
+    def raise_no_file_given category_names # :nodoc:
       raise NoCSVFileGiven.new(category_names.join(', '))
     end
@@ -38,7 +61,7 @@ module Sources
     #
     #
-    def get_data &block
+    def get_data &block # :nodoc:
       ::CSV.foreach file_name, csv_options, &block
     end