RubyGems - picky - Versions diffs - 2.5.2 → 2.6.0 - Mend

picky 2.5.2 → 2.6.0

Files changed (255) hide show

data/lib/picky/adapters/rack/base.rb +23 -0
data/lib/picky/adapters/rack/live_parameters.rb +33 -0
data/lib/picky/adapters/rack/query.rb +65 -0
data/lib/picky/adapters/rack.rb +30 -0
data/lib/picky/application.rb +5 -5
data/lib/picky/backend/backend.rb +108 -0
data/lib/picky/backend/file/basic.rb +101 -0
data/lib/picky/backend/file/json.rb +34 -0
data/lib/picky/backend/file/marshal.rb +34 -0
data/lib/picky/backend/file/text.rb +56 -0
data/lib/picky/backend/files.rb +30 -0
data/lib/picky/backend/redis/basic.rb +85 -0
data/lib/picky/backend/redis/list_hash.rb +49 -0
data/lib/picky/backend/redis/string_hash.rb +40 -0
data/lib/picky/backend/redis.rb +40 -0
data/lib/picky/calculations/location.rb +57 -0
data/lib/picky/categories.rb +62 -0
data/lib/picky/categories_indexed.rb +93 -0
data/lib/picky/categories_indexing.rb +12 -0
data/lib/picky/category.rb +127 -0
data/lib/picky/category_indexed.rb +64 -0
data/lib/picky/category_indexing.rb +145 -0
data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
data/lib/picky/extensions/class.rb +11 -0
data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
data/lib/picky/frontend_adapters/rack.rb +146 -0
data/lib/picky/generators/aliases.rb +3 -3
data/lib/picky/generators/base.rb +15 -0
data/lib/picky/generators/partial/default.rb +5 -0
data/lib/picky/generators/partial/none.rb +31 -0
data/lib/picky/generators/partial/strategy.rb +25 -0
data/lib/picky/generators/partial/substring.rb +118 -0
data/lib/picky/generators/partial_generator.rb +15 -0
data/lib/picky/generators/similarity/default.rb +7 -0
data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
data/lib/picky/generators/similarity/metaphone.rb +28 -0
data/lib/picky/generators/similarity/none.rb +31 -0
data/lib/picky/generators/similarity/phonetic.rb +65 -0
data/lib/picky/generators/similarity/soundex.rb +28 -0
data/lib/picky/generators/similarity/strategy.rb +9 -0
data/lib/picky/generators/similarity_generator.rb +15 -0
data/lib/picky/generators/strategy.rb +14 -0
data/lib/picky/generators/weights/default.rb +7 -0
data/lib/picky/generators/weights/logarithmic.rb +39 -0
data/lib/picky/generators/weights/strategy.rb +9 -0
data/lib/picky/generators/weights_generator.rb +15 -0
data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
data/lib/picky/index/base.rb +119 -104
data/lib/picky/index/base_indexed.rb +27 -0
data/lib/picky/index/base_indexing.rb +119 -0
data/lib/picky/index/memory.rb +6 -18
data/lib/picky/index/redis.rb +6 -18
data/lib/picky/indexed/bundle/base.rb +110 -0
data/lib/picky/indexed/bundle/memory.rb +91 -0
data/lib/picky/indexed/bundle/redis.rb +45 -0
data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
data/lib/picky/indexed/wrappers/category/location.rb +25 -0
data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
data/lib/picky/indexes.rb +73 -0
data/lib/picky/indexes_indexed.rb +29 -0
data/lib/picky/indexes_indexing.rb +49 -0
data/lib/picky/indexing/bundle/base.rb +212 -0
data/lib/picky/indexing/bundle/memory.rb +25 -0
data/lib/picky/indexing/bundle/redis.rb +24 -0
data/lib/picky/indexing/bundle/super_base.rb +61 -0
data/lib/picky/indexing/wrappers/category/location.rb +25 -0
data/lib/picky/interfaces/live_parameters.rb +8 -8
data/lib/picky/loader.rb +89 -95
data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
data/lib/picky/query/allocation.rb +84 -0
data/lib/picky/query/allocations.rb +114 -0
data/lib/picky/query/combination.rb +76 -0
data/lib/picky/query/combinations/base.rb +70 -0
data/lib/picky/query/combinations/memory.rb +48 -0
data/lib/picky/query/combinations/redis.rb +86 -0
data/lib/picky/query/indexes.rb +195 -0
data/lib/picky/query/qualifiers.rb +76 -0
data/lib/picky/query/token.rb +198 -0
data/lib/picky/query/tokens.rb +103 -0
data/lib/picky/{internals/query → query}/weights.rb +0 -0
data/lib/picky/results.rb +1 -1
data/lib/picky/search.rb +6 -6
data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
data/lib/picky/sources/db.rb +7 -7
data/lib/picky/sources/wrappers/location.rb +2 -2
data/lib/picky/tokenizers/base.rb +224 -0
data/lib/picky/tokenizers/index.rb +30 -0
data/lib/picky/tokenizers/location.rb +49 -0
data/lib/picky/tokenizers/query.rb +55 -0
data/lib/tasks/index.rake +4 -3
data/lib/tasks/try.rake +2 -2
data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
data/spec/lib/application_spec.rb +3 -3
data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
data/spec/lib/generators/aliases_spec.rb +3 -3
data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
data/spec/lib/index/base_spec.rb +10 -53
data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
data/spec/lib/indexes_class_spec.rb +30 -0
data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
data/spec/lib/query/allocation_spec.rb +1 -1
data/spec/lib/query/allocations_spec.rb +1 -1
data/spec/lib/query/combination_spec.rb +5 -5
data/spec/lib/query/combinations/base_spec.rb +1 -1
data/spec/lib/query/combinations/memory_spec.rb +1 -1
data/spec/lib/query/combinations/redis_spec.rb +1 -1
data/spec/lib/query/indexes_spec.rb +1 -1
data/spec/lib/query/qualifiers_spec.rb +4 -4
data/spec/lib/query/token_spec.rb +3 -3
data/spec/lib/query/tokens_spec.rb +32 -32
data/spec/lib/search_spec.rb +5 -5
data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
data/spec/lib/sources/db_spec.rb +4 -8
data/spec/lib/sources/wrappers/location_spec.rb +1 -1
data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
metadata +214 -215
data/lib/picky/aliases.rb +0 -4
data/lib/picky/index_bundle.rb +0 -48
data/lib/picky/indexed/indexes.rb +0 -59
data/lib/picky/indexing/indexes.rb +0 -87
data/lib/picky/internals/adapters/rack/base.rb +0 -27
data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
data/lib/picky/internals/adapters/rack/query.rb +0 -69
data/lib/picky/internals/adapters/rack.rb +0 -34
data/lib/picky/internals/calculations/location.rb +0 -59
data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
data/lib/picky/internals/generators/base.rb +0 -19
data/lib/picky/internals/generators/partial/default.rb +0 -7
data/lib/picky/internals/generators/partial/none.rb +0 -35
data/lib/picky/internals/generators/partial/strategy.rb +0 -29
data/lib/picky/internals/generators/partial/substring.rb +0 -122
data/lib/picky/internals/generators/partial_generator.rb +0 -19
data/lib/picky/internals/generators/similarity/default.rb +0 -9
data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
data/lib/picky/internals/generators/similarity/none.rb +0 -35
data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
data/lib/picky/internals/generators/similarity_generator.rb +0 -19
data/lib/picky/internals/generators/strategy.rb +0 -18
data/lib/picky/internals/generators/weights/default.rb +0 -9
data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
data/lib/picky/internals/generators/weights/strategy.rb +0 -11
data/lib/picky/internals/generators/weights_generator.rb +0 -19
data/lib/picky/internals/index/backend.rb +0 -112
data/lib/picky/internals/index/file/basic.rb +0 -105
data/lib/picky/internals/index/file/json.rb +0 -38
data/lib/picky/internals/index/file/marshal.rb +0 -38
data/lib/picky/internals/index/file/text.rb +0 -60
data/lib/picky/internals/index/files.rb +0 -34
data/lib/picky/internals/index/redis/basic.rb +0 -89
data/lib/picky/internals/index/redis/list_hash.rb +0 -53
data/lib/picky/internals/index/redis/string_hash.rb +0 -44
data/lib/picky/internals/index/redis.rb +0 -44
data/lib/picky/internals/indexed/bundle/base.rb +0 -114
data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
data/lib/picky/internals/indexed/categories.rb +0 -140
data/lib/picky/internals/indexed/category.rb +0 -111
data/lib/picky/internals/indexed/index.rb +0 -63
data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
data/lib/picky/internals/indexing/bundle/base.rb +0 -216
data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
data/lib/picky/internals/indexing/category.rb +0 -153
data/lib/picky/internals/indexing/index.rb +0 -142
data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
data/lib/picky/internals/query/allocation.rb +0 -88
data/lib/picky/internals/query/allocations.rb +0 -118
data/lib/picky/internals/query/combination.rb +0 -80
data/lib/picky/internals/query/combinations/base.rb +0 -74
data/lib/picky/internals/query/combinations/memory.rb +0 -52
data/lib/picky/internals/query/combinations/redis.rb +0 -90
data/lib/picky/internals/query/indexes.rb +0 -199
data/lib/picky/internals/query/qualifiers.rb +0 -82
data/lib/picky/internals/query/token.rb +0 -202
data/lib/picky/internals/query/tokens.rb +0 -109
data/lib/picky/internals/shared/category.rb +0 -52
data/lib/picky/internals/tokenizers/base.rb +0 -228
data/lib/picky/internals/tokenizers/index.rb +0 -34
data/lib/picky/internals/tokenizers/location.rb +0 -54
data/lib/picky/internals/tokenizers/query.rb +0 -59
data/lib/picky/internals.rb +0 -2
data/spec/lib/aliases_spec.rb +0 -9
data/spec/lib/index_bundle_spec.rb +0 -69

data/lib/picky/query/tokens.rb ADDED Viewed

@@ -0,0 +1,103 @@
+# encoding: utf-8
+#
+module Query
+  # This class primarily handles switching through similar token constellations.
+  #
+  class Tokens # :nodoc:all
+    # Basically delegates to its internal tokens array.
+    #
+    self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
+    # Create a new Tokens object with the array of tokens passed in.
+    #
+    def initialize tokens = []
+      @tokens = tokens
+    end
+    # Creates a new Tokens object from a number of Strings.
+    #
+    # Options:
+    #  * downcase: Whether to downcase the passed strings (default is true)
+    #
+    def self.processed words, downcase = true
+      new words.collect! { |word| Token.processed word, downcase }
+    end
+    # Tokenizes each token.
+    #
+    # Note: Passed tokenizer needs to offer #normalize(text).
+    #
+    def tokenize_with tokenizer
+      @tokens.each { |token| token.tokenize_with(tokenizer) }
+    end
+    # Generates an array in the form of
+    # [
+    #  [combination],                           # of token 1
+    #  [combination, combination, combination], # of token 2
+    #  [combination, combination]               # of token 3
+    # ]
+    #
+    def possible_combinations_in index
+      @tokens.inject([]) do |combinations, token|
+        possible_combinations = token.possible_combinations_in index
+        # TODO Could move the ignore_unassigned_tokens here!
+        #
+        # Note: Optimization for ignoring tokens that allocate to nothing and
+        # can be ignored.
+        # For example in a special search, where "florian" is not
+        # mapped to any category.
+        #
+        possible_combinations ? combinations << possible_combinations : combinations
+      end
+    end
+    # Makes the last of the tokens partial.
+    #
+    def partialize_last
+      @tokens.last.partial = true unless empty?
+    end
+    # Caps the tokens to the maximum.
+    #
+    def cap maximum
+      @tokens.slice!(maximum..-1) if cap?(maximum)
+    end
+    def cap? maximum
+      @tokens.size > maximum
+    end
+    # Rejects blank tokens.
+    #
+    def reject
+      @tokens.reject! &:blank?
+    end
+    # Returns a solr query.
+    #
+    def to_solr_query
+      @tokens.map(&:to_solr).join ' '
+    end
+    #
+    #
+    def originals
+      @tokens.map(&:original)
+    end
+    def == other
+      self.tokens == other.tokens
+    end
+    # Just join the token original texts.
+    #
+    def to_s
+      originals.join ' '
+    end
+  end
+end

data/lib/picky/{internals/query → query}/weights.rb RENAMED Viewed

File without changes

data/lib/picky/results.rb CHANGED Viewed

@@ -10,7 +10,7 @@ class Results
   # Takes instances of Query::Allocations as param.
   #
-  def initialize amount = 0, offset = 0, allocations = Internals::Query::Allocations.new
+  def initialize amount = 0, offset = 0, allocations = Query::Allocations.new
     @offset      = offset
     @amount      = amount
     @allocations = allocations

data/lib/picky/search.rb CHANGED Viewed

@@ -35,7 +35,7 @@ class Search
   def initialize *index_definitions
     options      = Hash === index_definitions.last ? index_definitions.pop : {}
-    @indexes  = Internals::Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
+    @indexes  = Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
     searching options[:tokenizer]
     boost     options[:weights]
@@ -54,11 +54,11 @@ class Search
     @tokenizer = if options.respond_to?(:tokenize)
       options
     else
-      options && Internals::Tokenizers::Query.new(options)
+      options && Tokenizers::Query.new(options)
     end
   end
   def tokenizer
-    @tokenizer || Internals::Tokenizers::Query.default
+    @tokenizer || Tokenizers::Query.default
   end
   # TODO Doc. Spec.
   #
@@ -82,14 +82,14 @@ class Search
   # Picky will raise a Query::Indexes::DifferentTypesError.
   #
   @@mapping = {
-    Index::Memory => Internals::Query::Combinations::Memory,
-    Index::Redis  => Internals::Query::Combinations::Redis
+    Index::Memory => Query::Combinations::Memory,
+    Index::Redis  => Query::Combinations::Redis
   }
   def combinations_type_for index_definitions_ary
     index_types = index_definitions_ary.map(&:class)
     index_types.uniq!
     raise_different(index_types) if index_types.size > 1
-    !index_types.empty? && @@mapping[*index_types] || Internals::Query::Combinations::Memory
+    !index_types.empty? && @@mapping[*index_types] || Query::Combinations::Memory
   end
   # Currently it isn't possible using Memory and Redis etc.
   # indexes in the same query index group.

data/lib/picky/{internals/solr → solr}/schema_generator.rb RENAMED Viewed

File without changes

data/lib/picky/sources/db.rb CHANGED Viewed

@@ -87,7 +87,7 @@ module Sources
     def take_snapshot index
       connect_backend
-      origin = snapshot_table_name index
+      origin = snapshot_table_name index.name
       on_database = database.connection
       # Drop the table if it exists.
@@ -109,16 +109,16 @@ module Sources
     # Counts all the entries that are used for the index.
     #
-    def count index
+    def count index_name
       connect_backend
-      database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index)}").to_i
+      database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
     end
     # The name of the snapshot table created by Picky.
     #
-    def snapshot_table_name index
-      "picky_#{index.name}_index"
+    def snapshot_table_name index_name
+      "picky_#{index_name}_index"
     end
     # Harvests the data to index in chunks.
@@ -126,7 +126,7 @@ module Sources
     def harvest category, &block
       connect_backend
-      (0..count(category.index)).step(chunksize) do |offset|
+      (0..count(category.index_name)).step(chunksize) do |offset|
         get_data category, offset, &block
       end
     end
@@ -166,7 +166,7 @@ module Sources
     # The harvest statement used to pull data from the snapshot table.
     #
     def harvest_statement category
-      "SELECT id, #{category.from} FROM #{snapshot_table_name(category.index)} st"
+      "SELECT id, #{category.from} FROM #{snapshot_table_name(category.index_name)} st"
     end
     # The amount of records that are loaded each chunk.

data/lib/picky/sources/wrappers/location.rb CHANGED Viewed

@@ -10,7 +10,7 @@ module Sources
       def initialize source, grid, precision = 1
         super source
-        @calculation = Internals::Calculations::Location.new grid, precision
+        @calculation = Calculations::Location.new grid, precision
       end
       # Yield the data (id, text for id) for the given category.
@@ -42,7 +42,7 @@ module Sources
         # TODO Move to the right place.
         #
-        category.exact[:location_minimum] = minimum
+        category.indexing_exact[:location_minimum] = minimum
       end
     end

data/lib/picky/tokenizers/base.rb ADDED Viewed

@@ -0,0 +1,224 @@
+module Tokenizers # :nodoc:all
+  # Defines tokenizing processes used both in indexing and querying.
+  #
+  class Base
+    # TODO Move EMPTY_STRING top level.
+    #
+    EMPTY_STRING = ''.freeze
+    def to_s
+      reject_condition_location = @reject_condition.to_s[/:(\d+) \(lambda\)/, 1]
+      <<-TOKENIZER
+Removes characters:        #{@removes_characters_regexp ? "/#{@removes_characters_regexp.source}/" : '-'}
+Stopwords:                 #{@remove_stopwords_regexp ? "/#{@remove_stopwords_regexp.source}/" : '-'}
+Splits text on:            #{@splits_text_on.respond_to?(:source) ? "/#{@splits_text_on.source}/" : (@splits_text_on ? @splits_text_on : '-')}
+Removes chars after split: #{@removes_characters_after_splitting_regexp ? "/#{@removes_characters_after_splitting_regexp.source}/" : '-'}
+Normalizes words:          #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
+Rejects tokens?            #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
+Substitutes chars?         #{@substituter ? "Yes, using #{@substituter}." : '-' }
+Case sensitive?            #{@case_sensitive ? "Yes." : "-"}
+      TOKENIZER
+    end
+    # Stopwords.
+    #
+    # We only allow regexps (even if string would be okay
+    # too for gsub! - it's too hard to understand)
+    #
+    def stopwords regexp
+      check_argument_in __method__, Regexp, regexp
+      @remove_stopwords_regexp = regexp
+    end
+    def remove_stopwords text
+      text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
+      text
+    end
+    @@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
+    def remove_non_single_stopwords text
+      return text if text.match @@non_single_stopword_regexp
+      remove_stopwords text
+    end
+    # Illegals.
+    #
+    # We only allow regexps (even if string would be okay
+    # too for gsub! - it's too hard to understand)
+    #
+    def removes_characters regexp
+      check_argument_in __method__, Regexp, regexp
+      @removes_characters_regexp = regexp
+    end
+    def remove_illegals text
+      text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
+      text
+    end
+    # Splitting.
+    #
+    # We allow Strings and Regexps.
+    # Note: We do not test against to_str since symbols do not work with String#split.
+    #
+    def splits_text_on regexp_or_string
+      raise ArgumentError.new "#{__method__} takes a Regexp or String as argument, not a #{regexp_or_string.class}." unless Regexp === regexp_or_string || String === regexp_or_string
+      @splits_text_on = regexp_or_string
+    end
+    def split text
+      text.split @splits_text_on
+    end
+    # Normalizing.
+    #
+    # We only allow arrays.
+    #
+    def normalizes_words regexp_replaces
+      raise ArgumentError.new "#{__method__} takes an Array of replaces as argument, not a #{regexp_replaces.class}." unless regexp_replaces.respond_to?(:to_ary)
+      @normalizes_words_regexp_replaces = regexp_replaces
+    end
+    def normalize_with_patterns text
+      return text unless @normalizes_words_regexp_replaces
+      @normalizes_words_regexp_replaces.each do |regex, replace|
+        # This should be sufficient
+        #
+        text.gsub!(regex, replace) and break
+      end
+      remove_after_normalizing_illegals text
+      text
+    end
+    # Illegal after normalizing.
+    #
+    # We only allow regexps (even if string would be okay
+    # too for gsub! - it's too hard to understand)
+    #
+    def removes_characters_after_splitting regexp
+      check_argument_in __method__, Regexp, regexp
+      @removes_characters_after_splitting_regexp = regexp
+    end
+    def remove_after_normalizing_illegals text
+      text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
+    end
+    # Substitute Characters with this substituter.
+    #
+    # Default is European Character substitution.
+    #
+    def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
+      raise ArgumentError.new "The substitutes_characters_with option needs a character substituter, which responds to #substitute." unless substituter.respond_to?(:substitute)
+      @substituter = substituter
+    end
+    def substitute_characters text
+      substituter?? substituter.substitute(text) : text
+    end
+    # Reject tokens after tokenizing based on the given criteria.
+    #
+    # Note: Currently only for indexing.
+    #
+    def reject_token_if &condition
+      @reject_condition = condition
+    end
+    def reject tokens
+      tokens.reject! &@reject_condition
+    end
+    def case_sensitive case_sensitive
+      @case_sensitive = case_sensitive
+    end
+    def downcase?
+      !@case_sensitive
+    end
+    # Checks if the right argument type has been given.
+    #
+    def check_argument_in method, type, argument, &condition
+      raise ArgumentError.new "Application##{method} takes a #{type} as argument, not a #{argument.class}." unless type === argument
+    end
+    # Returns a number of tokens, generated from the given text.
+    #
+    # Note:
+    #  * preprocess, pretokenize are hooks
+    #
+    def tokenize text
+      text   = preprocess text  # processing the text
+      return empty_tokens if text.blank?
+      words  = pretokenize text # splitting and preparations for tokenizing
+      return empty_tokens if words.empty?
+      tokens = tokens_for words # creating tokens / strings
+               process tokens   # processing tokens / strings
+    end
+    attr_reader :substituter
+    alias substituter? substituter
+    def initialize options = {}
+      removes_characters options[:removes_characters]                                 if options[:removes_characters]
+      contracts_expressions *options[:contracts_expressions]                          if options[:contracts_expressions]
+      stopwords options[:stopwords]                                                   if options[:stopwords]
+      normalizes_words options[:normalizes_words]                                     if options[:normalizes_words]
+      removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
+      substitutes_characters_with options[:substitutes_characters_with]               if options[:substitutes_characters_with]
+      case_sensitive options[:case_sensitive]                                         unless options[:case_sensitive].nil?
+      # Defaults.
+      #
+      splits_text_on options[:splits_text_on] || /\s/
+      reject_token_if &(options[:reject_token_if] || options[:rejects_token_if] || :blank?) # TODO Decide on using an s or not.
+    end
+    # Default preprocessing hook.
+    #
+    # Does:
+    # 1. Character substitution.
+    # 2. Remove illegal expressions.
+    # 3. Remove non-single stopwords. (Stopwords that occur with other words)
+    #
+    def preprocess text
+      text = substitute_characters text
+      remove_illegals text
+      # We do not remove single stopwords e.g. in the indexer for
+      # an entirely different reason than in the query tokenizer.
+      # An indexed thing with just name "UND" (a possible stopword)
+      # should not lose its name.
+      #
+      remove_non_single_stopwords text
+      text
+    end
+    # Pretokenizing.
+    #
+    # Does:
+    # 1. Split the text into words.
+    # 2. Normalize each word.
+    #
+    def pretokenize text
+      words = split text
+      words.collect! do |word|
+        normalize_with_patterns word
+        word
+      end
+    end
+    # Basic postprocessing (overridden in both query/index tokenizers).
+    #
+    def process tokens
+      reject tokens # Reject any tokens that don't meet criteria
+      tokens
+    end
+    # # Converts words into real tokens.
+    # #
+    # def tokens_for words
+    #   Query::Tokens.new words.collect! { |word| token_for word }
+    # end
+    # Turns non-blank text into symbols.
+    #
+    def symbolize text
+      text.blank? ? nil : text.to_sym
+    end
+  end
+end

data/lib/picky/tokenizers/index.rb ADDED Viewed

@@ -0,0 +1,30 @@
+module Tokenizers
+  # The base indexing tokenizer.
+  #
+  # Override in indexing subclasses and define in configuration.
+  #
+  class Index < Base
+    def self.default= new_default
+      @default = new_default
+    end
+    def self.default
+      @default ||= new
+    end
+    # Does not actually return a token, but a
+    # symbol "token".
+    #
+    def tokens_for words
+      words.collect! { |word| word.downcase! if downcase?; word.to_sym }
+    end
+    # Returns empty tokens.
+    #
+    def empty_tokens
+      []
+    end
+  end
+end

data/lib/picky/tokenizers/location.rb ADDED Viewed

@@ -0,0 +1,49 @@
+module Tokenizers
+  class Location < Base
+    attr_reader :calculation
+    def initialize options = {}
+      super options
+      grid      = options[:grid]
+      precision = options[:precision] || 1
+      @calculation = Calculations::Location.new grid, precision
+      @minimum = 1.0 / 0
+      @locations = []
+    end
+    # TODO Work on this!
+    #
+    def tokenize text
+      # Gather min/max.
+      #
+      source.harvest category do |indexed_id, location|
+        location = location.to_f
+        minimum = location if location < minimum
+        locations << [indexed_id, location]
+      end
+      calculation.minimum = minimum
+      # Recalculate locations.
+      #
+      locations.each do |indexed_id, location|
+        calculation.recalculated_range(location).each do |new_location|
+          yield indexed_id, new_location.to_s
+        end
+      end
+      # TODO Move to the right place.
+      #
+      category.indexing_exact[:location_minimum] = minimum
+    end
+  end
+end

data/lib/picky/tokenizers/query.rb ADDED Viewed

@@ -0,0 +1,55 @@
+# encoding: utf-8
+#
+module Tokenizers
+  # There are a few class methods that you can use to configure how a query works.
+  #
+  # removes_characters regexp
+  # illegal_after_normalizing regexp
+  # stopwords regexp
+  # contracts_expressions regexp, to_string
+  # splits_text_on regexp
+  # normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
+  #
+  class Query < Base
+    def self.default= new_default
+      @default = new_default
+    end
+    def self.default
+      @default ||= new
+    end
+    attr_reader :maximum_tokens
+    def initialize options = {}
+      super options
+      @maximum_tokens = options[:maximum_tokens] || 5
+    end
+    # Let each token process itself.
+    # Reject, limit, and partialize tokens.
+    #
+    # In querying we work with real tokens (in indexing it's just symbols).
+    #
+    def process tokens
+      tokens.reject                # Reject any tokens that don't meet criteria.
+      tokens.cap maximum_tokens    # Cut off superfluous tokens.
+      tokens.partialize_last       # Set certain tokens as partial.
+      tokens
+    end
+    # Converts words into real tokens.
+    #
+    def tokens_for words
+      ::Query::Tokens.processed words, downcase?
+    end
+    # Returns a tokens object.
+    #
+    def empty_tokens
+      ::Query::Tokens.new
+    end
+  end
+end

data/lib/tasks/index.rake CHANGED Viewed

@@ -23,9 +23,10 @@ namespace :index do
   desc "Generates a specific index from index snapshots (category optional)."
   task :specific, [:index, :category] => :application do |_, options|
     index, category = options.index, options.category
-    specific_index = Indexes.find index.to_sym, (category && category.to_sym)
-    specific_index.index!
-    specific_index.cache!
+    specific = Indexes[index]
+    specific = specific[category] if category
+    specific.index
   end
 end

data/lib/tasks/try.rake CHANGED Viewed

@@ -6,7 +6,7 @@ namespace :try do
   task :index, [:text, :index, :category] => :application do |_, options|
     text, index, category = options.text, options.index, options.category
-    tokenizer = category ? Indexes.find(index, category).tokenizer : Internals::Tokenizers::Index.default
+    tokenizer = category ? Indexes.find(index, category).tokenizer : Tokenizers::Index.default
     puts "\"#{text}\" is saved in the index as              #{tokenizer.tokenize(text.dup).to_a}"
   end
@@ -15,7 +15,7 @@ namespace :try do
   task :query, [:text] => :application do |_, options|
     text = options.text
-    puts "\"#{text}\" as a search will be preprocessed into #{Internals::Tokenizers::Query.default.tokenize(text.dup).to_a.map(&:to_s).map(&:to_sym)}"
+    puts "\"#{text}\" as a search will be preprocessed into #{Tokenizers::Query.default.tokenize(text.dup).to_a.map(&:to_s).map(&:to_sym)}"
     puts
     puts "(category qualifiers, e.g. title: are removed if they do not exist as a qualifier, so 'toitle:bla' -> 'bla')"
   end

data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb RENAMED Viewed

@@ -2,7 +2,7 @@
 #
 require 'spec_helper'
-describe Internals::Adapters::Rack::Base do
+describe Adapters::Rack::Base do
   before(:each) do
     @adapter = described_class.new

data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb RENAMED Viewed

@@ -2,7 +2,7 @@
 #
 require 'spec_helper'
-describe Internals::Adapters::Rack::LiveParameters do
+describe Adapters::Rack::LiveParameters do
   let(:live_parameters) { stub :live_parameters }
   let(:adapter) { described_class.new live_parameters }

data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb RENAMED Viewed

@@ -2,7 +2,7 @@
 #
 require 'spec_helper'
-describe Internals::Adapters::Rack::Query do
+describe Adapters::Rack::Query do
   before(:each) do
     @query   = stub :query

data/spec/lib/application_spec.rb CHANGED Viewed

@@ -15,8 +15,8 @@ describe Application do
           route %r{^/books} => Search.new(books)
         end
-        Internals::Tokenizers::Index.default.tokenize 'some text'
-        Internals::Tokenizers::Query.default.tokenize 'some text'
+        Tokenizers::Index.default.tokenize 'some text'
+        Tokenizers::Query.default.tokenize 'some text'
       }.should_not raise_error
     end
     it "should run ok" do
@@ -105,7 +105,7 @@ describe Application do
       lambda { Application.rack_adapter }.should_not raise_error
     end
     it "should return a new FrontendAdapters::Rack instance" do
-      Application.rack_adapter.should be_kind_of(Internals::FrontendAdapters::Rack)
+      Application.rack_adapter.should be_kind_of(FrontendAdapters::Rack)
     end
     it "should cache the instance" do
       Application.rack_adapter.should == Application.rack_adapter

data/spec/lib/{internals/index → backend}/file/basic_spec.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 require 'spec_helper'
-describe Internals::Index::File::Basic do
+describe Backend::File::Basic do
   let(:file) { described_class.new 'some/cache/path/to/file' }

data/spec/lib/{internals/index → backend}/file/json_spec.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 require 'spec_helper'
-describe Internals::Index::File::JSON do
+describe Backend::File::JSON do
   before(:each) do
     @file = described_class.new "some_cache_path"