RubyGems - picky - Versions diffs - 0.0.9 → 0.1.0 - Mend

picky 0.0.9 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

data/lib/picky/application.rb +38 -37
data/lib/picky/cacher/partial/default.rb +1 -3
data/lib/picky/cacher/partial/subtoken.rb +44 -18
data/lib/picky/configuration/field.rb +6 -2
data/lib/picky/configuration/indexes.rb +16 -7
data/lib/picky/configuration/queries.rb +3 -13
data/lib/picky/extensions/symbol.rb +19 -4
data/lib/picky/generator.rb +9 -0
data/lib/picky/helpers/measuring.rb +3 -3
data/lib/picky/index/bundle.rb +5 -4
data/lib/picky/index/category.rb +14 -7
data/lib/picky/index/combined.rb +6 -1
data/lib/picky/indexers/no_source_specified_error.rb +2 -0
data/lib/picky/indexes.rb +3 -9
data/lib/picky/query/allocation.rb +1 -1
data/lib/picky/query/allocations.rb +2 -2
data/lib/picky/rack/harakiri.rb +10 -8
data/lib/picky/routing.rb +19 -21
data/lib/picky/solr/schema_generator.rb +4 -4
data/lib/picky/sources/base.rb +16 -4
data/lib/picky/sources/csv.rb +3 -0
data/lib/picky/sources/db.rb +30 -22
data/lib/picky/tokenizers/base.rb +7 -5
data/lib/picky/tokenizers/index.rb +5 -5
data/lib/picky/tokenizers/query.rb +9 -9
data/prototype_project/app/application.rb +36 -29
data/prototype_project/app/db.yml +1 -1
data/prototype_project/config.ru +3 -2
data/spec/ext/performant_spec.rb +2 -2
data/spec/lib/application_spec.rb +54 -8
data/spec/lib/cacher/partial/default_spec.rb +15 -0
data/spec/lib/cacher/partial/subtoken_spec.rb +54 -2
data/spec/lib/extensions/symbol_spec.rb +124 -30
data/spec/lib/index/bundle_partial_generation_speed_spec.rb +1 -1
data/spec/lib/query/allocations_spec.rb +5 -5
data/spec/lib/query/combinations_spec.rb +3 -3
data/spec/lib/rack/harakiri_spec.rb +29 -0
data/spec/lib/routing_spec.rb +22 -98
data/spec/lib/tokenizers/index_spec.rb +1 -1
data/spec/specific/speed_spec.rb +4 -5
metadata +7 -3

data/lib/picky/indexers/no_source_specified_error.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 module Indexers
+  # Raised if no source is available on a field.
+  #
   class NoSourceSpecifiedException < StandardError; end
 end

data/lib/picky/indexes.rb CHANGED Viewed

@@ -171,16 +171,10 @@ module Indexes
     end
   end
-  # Loads all index definitions.
-  #
-  def self.setup
-    self.types        ||= []
-    self.type_mapping ||= {}
-    configuration.types.each do |type|
-      add type.generate
-    end
-  end
   def self.add type
+    self.type_mapping ||= {}
+    self.types        ||= []
     self.type_mapping[type.name] = type
     self.types << type
   end

data/lib/picky/query/allocation.rb CHANGED Viewed

@@ -61,7 +61,7 @@ module Query
     # Transform the allocation into result form.
     #
     def to_result
-      [self.result_type, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
+      [self.result_type, self.score, count, @combinations.to_result, self.ids] if count = self.count > 0
     end
     # Json representation of this allocation.

data/lib/picky/query/allocations.rb CHANGED Viewed

@@ -66,8 +66,8 @@ module Query
       # TODO can there be no @allocations???
       return [] if @allocations.empty?
       ids = @allocations.first.ids
-      indexes = Array.new(ids.size) { |i| i }.sort_by { rand }
-      indexes.first(amount).map { |i| ids[i] }
+      indexes = Array.new(ids.size) { |id| id }.sort_by { rand }
+      indexes.first(amount).map { |id| ids[id] }
     end
     # This is the main method of this class that will replace ids and count.

data/lib/picky/rack/harakiri.rb CHANGED Viewed

@@ -1,22 +1,24 @@
-# Simple Rack Middleware to kill Unicorns after X requests.
-#
-# Use as follows in e.g. your rackup File:
-#
-# Rack::Harakiri.after = 50
-# use Rack::Harakiri
-#
 module Rack
+  # Simple Rack Middleware to kill Unicorns after X requests.
+  #
+  # Use as follows in e.g. your rackup File:
+  #
+  # Rack::Harakiri.after = 50
+  # use Rack::Harakiri
+  #
   class Harakiri
     # Set the amount of requests before the Unicorn commits Harakiri.
     #
     cattr_accessor :after
+    attr_reader :quit_after_requests
     def initialize app
       @app = app
       @requests            = 0
-      @quit_after_requests = @@after || 50
+      @quit_after_requests = self.class.after || 50
     end
     def call env

data/lib/picky/routing.rb CHANGED Viewed

@@ -14,14 +14,6 @@ class Routing
     @defaults = @@defaults.dup
   end
-  # #
-  # #
-  # def define_using &block
-  #   reset_routes
-  #   instance_eval &block
-  #   routes.freeze
-  # end
   #
   #
   def reset_routes
@@ -60,21 +52,27 @@ class Routing
   #
   #
-  def route url, query, route_options = {}
-    query.tokenizer = @defaults[:tokenizer] if @defaults[:tokenizer]
-    routes.add_route generate_app(query, route_options), default_options(url, route_options)
+  def route options = {}
+    mappings, route_options = split options
+    mappings.each do |url, query|
+      route_one url, query, route_options
+    end
   end
-  #
-  #
-  def live url, *indexes_and_options
-    route_options = Hash === indexes_and_options.last ? indexes_and_options.pop : {}
-    route url, Query::Live.new(*indexes_and_options), route_options
+  def split options
+    mappings      = {}
+    route_options = {}
+    options.each_pair do |key, value|
+      if Regexp === key or String === key
+        mappings[key] = value
+      else
+        route_options[key] = value
+      end
+    end
+    [mappings, route_options]
   end
-  #
-  #
-  def full url, *indexes_and_options
-    route_options = Hash === indexes_and_options.last ? indexes_and_options.pop : {}
-    route url, Query::Full.new(*indexes_and_options), route_options
+  def route_one url, query, route_options = {}
+    query.tokenizer = @defaults[:tokenizer] if @defaults[:tokenizer]
+    routes.add_route generate_app(query, route_options), default_options(url, route_options)
   end
   #
   #

data/lib/picky/solr/schema_generator.rb CHANGED Viewed

@@ -54,8 +54,8 @@ module Solr
     def read_template
       template_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml.erb'
       schema = ''
-      File.open(template_path, 'r') do |f|
-        schema = f.read
+      File.open(template_path, 'r') do |file|
+        schema = file.read
       end
       schema
     end
@@ -64,8 +64,8 @@ module Solr
     #
     def write result
       schema_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml'
-      File.open(schema_path, 'w') do |f|
-        f << result
+      File.open(schema_path, 'w') do |file|
+        file << result
       end
     end

data/lib/picky/sources/base.rb CHANGED Viewed

@@ -1,16 +1,28 @@
 module Sources
+  # Sources are where your data comes from.
+  # Harvest is the most important method as it is used always to get data.
+  #
   class Base
-    def connect_backend
+    # Note: Methods listed for illustrative purposes.
+    #
+    # Yield the data (id, text for id) for the given type and field.
+    #
+    def harvest type, field
+      # yields nothing
     end
-    def take_snapshot type
+    # Connects to the backend.
+    #
+    def connect_backend
     end
-    def harvest type, field
+    # Take a snapshot of your data, if it is fast changing.
+    #
+    def take_snapshot type
     end

data/lib/picky/sources/csv.rb CHANGED Viewed

@@ -2,6 +2,9 @@ require 'csv'
 module Sources
+  # Describes a CSV source, a file with csv in it.
+  # Give it a sequence of field names and a file option with the filename.
+  #
   class NoCSVFileGiven < StandardError; end
   class CSV < Base

data/lib/picky/sources/db.rb CHANGED Viewed

@@ -1,13 +1,16 @@
 module Sources
+  # Describes a database source. Just give it a select statement
+  # (with id in it), and a file option or the options from an AR config file.
+  #
   class DB < Base
     attr_reader :select_statement, :database, :connection_options
-    def initialize select_statement, with_options = { :file => 'app/db.yml' }
+    def initialize select_statement, options = { :file => 'app/db.yml' }
       @select_statement = select_statement
       @database         = create_database_adapter
-      configure with_options
+      @options          = options
     end
     # Get a configured Database backend.
@@ -34,7 +37,7 @@ module Sources
     #
     def configure options
       @connection_options = if filename = options[:file]
-        File.open(File.join(PICKY_ROOT, filename)) { |f| YAML::load(f) }
+        File.open(File.join(PICKY_ROOT, filename)) { |file| YAML::load(file) }
       else
         options
       end
@@ -44,6 +47,7 @@ module Sources
     # Connect the backend.
     #
     def connect_backend
+      configure @options
       return if PICKY_ENVIRONMENT.to_s == 'test' # TODO Unclean.
       raise "Database backend not configured" unless connection_options
       database.establish_connection connection_options
@@ -55,15 +59,17 @@ module Sources
       connect_backend
       origin = snapshot_table_name type
-      database.connection.execute "DROP TABLE IF EXISTS #{origin}"
-      database.connection.execute "CREATE TABLE #{origin} AS #{select_statement}"
-      database.connection.execute "ALTER TABLE #{origin} CHANGE COLUMN id indexed_id INTEGER"
-      database.connection.execute "ALTER TABLE #{origin} ADD COLUMN id INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT"
+      on_database = database.connection
+      on_database.execute "DROP TABLE IF EXISTS #{origin}"
+      on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
+      on_database.execute "ALTER TABLE #{origin} CHANGE COLUMN id indexed_id INTEGER"
+      on_database.execute "ALTER TABLE #{origin} ADD COLUMN id INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT"
       # Execute any special queries this type needs executed.
       #
-      database.connection.execute type.after_indexing if type.after_indexing
+      on_database.execute type.after_indexing if type.after_indexing
     end
     # Counts all the entries that are used for the index.
@@ -86,6 +92,8 @@ module Sources
     # Example:
     #   "SELECT indexed_id, value FROM bla_table st WHERE kind = 'bla'"
     #
+    # TODO Perhaps it should be just harvest field.
+    #
     def harvest type, field
       connect_backend
@@ -98,24 +106,12 @@ module Sources
       end
     end
-    # Override in subclasses.
-    #
-    def chunksize
-      25_000
-    end
     # Gets database from the backend.
     #
     def get_data type, field, offset
       database.connection.execute harvest_statement_with_offset(type, field, offset)
     end
-    # Base harvest statement for dbs.
-    #
-    def harvest_statement type, field
-      "SELECT indexed_id, #{field.name} FROM #{snapshot_table_name(type)} st"
-    end
     # Builds a harvest statement for getting data to index.
     #
     # TODO Use the adapter for this.
@@ -128,6 +124,18 @@ module Sources
       "#{statement} st.id > #{offset} LIMIT #{chunksize}"
     end
+    # Base harvest statement for dbs.
+    #
+    def harvest_statement type, field
+      "SELECT indexed_id, #{field.name} FROM #{snapshot_table_name(type)} st"
+    end
+    # Override in subclasses.
+    #
+    def chunksize
+      25_000
+    end
   end
 end

data/lib/picky/tokenizers/base.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 module Tokenizers
+  # Defines tokenizing processes used both in indexing and querying.
+  #
   class Base
     # Stopwords.
@@ -21,7 +23,7 @@ module Tokenizers
     # Contraction.
     #
-    def self.contract_expressions what, to_what
+    def self.contracts_expressions what, to_what
       define_method :contract do |text|
         text.gsub! what, to_what
       end
@@ -32,7 +34,7 @@ module Tokenizers
     #
     # TODO Should there be a legal?
     #
-    def self.illegal_characters regexp
+    def self.removes_characters regexp
       define_method :remove_illegals do |text|
         text.gsub! regexp, ''
       end
@@ -41,7 +43,7 @@ module Tokenizers
     # Splitting.
     #
-    def self.split_text_on regexp
+    def self.splits_text_on regexp
       define_method :split do |text|
         text.split regexp
       end
@@ -50,7 +52,7 @@ module Tokenizers
     # Normalizing.
     #
-    def self.normalize_words regexp_replaces
+    def self.normalizes_words regexp_replaces
       define_method :normalize_with_patterns do |text|
         regexp_replaces.each do |regex, replace|
           # This should be sufficient
@@ -65,7 +67,7 @@ module Tokenizers
     # Illegal after normalizing.
     #
-    def self.illegal_characters_after_splitting regexp
+    def self.removes_characters_after_splitting regexp
       define_method :remove_after_normalizing_illegals do |text|
         text.gsub! regexp, ''
       end

data/lib/picky/tokenizers/index.rb CHANGED Viewed

@@ -9,12 +9,12 @@ module Tokenizers
     # Default handling definitions. Override in config.
     #
-    illegal_characters(//)
+    removes_characters(//)
     stopwords(//)
-    contract_expressions(//, '')
-    split_text_on(/\s/)
-    normalize_words([])
-    illegal_characters_after_splitting(//)
+    contracts_expressions(//, '')
+    splits_text_on(/\s/)
+    normalizes_words([])
+    removes_characters_after_splitting(//)
     # Default indexing preprocessing hook.
     #

data/lib/picky/tokenizers/query.rb CHANGED Viewed

@@ -4,12 +4,12 @@ module Tokenizers
   # There are a few class methods that you can use to configure how a query works.
   #
-  # illegal_characters regexp
+  # removes_characters regexp
   # illegal_after_normalizing regexp
   # stopwords regexp
-  # contract_expressions regexp, to_string
-  # split_text_on regexp
-  # normalize_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
+  # contracts_expressions regexp, to_string
+  # splits_text_on regexp
+  # normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
   #
   class Query < Base
@@ -17,12 +17,12 @@ module Tokenizers
     # Default query tokenizer behaviour. Override in config.
     #
-    illegal_characters(//)
+    removes_characters(//)
     stopwords(//)
-    contract_expressions(//, '')
-    split_text_on(/\s/)
-    normalize_words([])
-    illegal_characters_after_splitting(//)
+    contracts_expressions(//, '')
+    splits_text_on(/\s/)
+    normalizes_words([])
+    removes_characters_after_splitting(//)
     def preprocess text
       remove_illegals text             # Remove illegal characters

data/prototype_project/app/application.rb CHANGED Viewed

@@ -4,43 +4,50 @@
 #
 # Have fun with Picky!
 #
-class PickySearch < Application # The App Constant needs to be identical in application.ru.
+class PickySearch < Application # The App Constant needs to be identical in config.ru.
   # This is an example with books that you can adapt.
   #
   # Note: Much more is possible, but let's start out super easy.
   #
-  # Ask me if you have questions or specific requests!
+  # Ask me if you have questions or specific requests.
   #
-  indexes do
-    illegal_characters(/[^a-zA-Z0-9\s\/\-\"\&\.]/)
-    stopwords(/\b(and|the|of|it|in|for)\b/)
-    split_text_on(/[\s\/\-\"\&\.]/)
+  indexing.removes_characters(/[^a-zA-Z0-9\s\/\-\"\&\.]/)
+  indexing.stopwords(/\b(and|the|of|it|in|for)\b/)
+  indexing.splits_text_on(/[\s\/\-\"\&\.]/)
-    add_index :books,
-              Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
-              field(:title, :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
-              field(:author),
-              field(:isbn,  :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
-  end
+  books_index = index :books,
+                      Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
+                      field(:title, :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
+                      field(:author),
+                      field(:isbn,  :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
-  queries do
-    maximum_tokens 5
-    # Note that Picky needs the following characters to
-    # pass through, as they are control characters: *"~:
-    #
-    illegal_characters(/[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/)
-    stopwords(/\b(and|the|of|it|in|for)\b/)
-    split_text_on(/[\s\/\-\,\&]+/)
-    # The example defines two queries that use the same index(es).
-    #
-    # A Full query returns ids, combinations, and counts.
-    # A Live query does return all that Full returns, without ids.
-    #
-    route %r{^/books/full}, Query::Full.new(Indexes[:books])
-    route %r{^/books/live}, Query::Live.new(Indexes[:books])
-  end
+  # Defines the maximum tokens (words) that pass through to the engine.
+  #
+  querying.maximum_tokens 5
+  # Note that Picky needs the following characters to
+  # pass through, as they are control characters: *"~:
+  #
+  querying.removes_characters(/[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/)
+  querying.stopwords(/\b(and|the|of|it|in|for)\b/)
+  querying.splits_text_on(/[\s\/\-\,\&]+/)
+  # The example defines two queries that use the same index(es).
+  #
+  # A Full query returns ids, combinations, and counts.
+  # A Live query does return all that Full returns, without ids.
+  #
+  # Note: You can pass a query multiple indexes and it will combine them.
+  #
+  full_books = Query::Full.new books_index
+  live_books = Query::Live.new books_index
+  # Routing is simple.
+  # A path regexp pointing to a query that will be run.
+  #
+  route %r{^/books/full} => full_books
+  route %r{^/books/live} => live_books
 end