RubyGems - picky - Versions diffs - 1.4.1 → 1.4.2 - Mend

picky 1.4.1 → 1.4.2

Files changed (229) hide show

data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
data/lib/picky/application.rb +18 -19
data/lib/picky/cores.rb +1 -1
data/lib/picky/generators/aliases.rb +3 -0
data/lib/picky/index/base.rb +179 -0
data/lib/picky/index/memory.rb +28 -0
data/lib/picky/index/redis.rb +28 -0
data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
data/lib/picky/indexed/indexes.rb +11 -7
data/lib/picky/indexing/indexes.rb +14 -8
data/lib/picky/internals/adapters/rack/base.rb +27 -0
data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
data/lib/picky/internals/adapters/rack/query.rb +63 -0
data/lib/picky/internals/adapters/rack.rb +34 -0
data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
data/lib/picky/internals/generators/base.rb +19 -0
data/lib/picky/internals/generators/partial/default.rb +7 -0
data/lib/picky/internals/generators/partial/none.rb +35 -0
data/lib/picky/internals/generators/partial/strategy.rb +29 -0
data/lib/picky/internals/generators/partial/substring.rb +122 -0
data/lib/picky/internals/generators/partial_generator.rb +19 -0
data/lib/picky/internals/generators/similarity/default.rb +9 -0
data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
data/lib/picky/internals/generators/similarity/none.rb +35 -0
data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
data/lib/picky/internals/generators/similarity_generator.rb +19 -0
data/lib/picky/internals/generators/strategy.rb +18 -0
data/lib/picky/internals/generators/weights/default.rb +9 -0
data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
data/lib/picky/internals/generators/weights/strategy.rb +11 -0
data/lib/picky/internals/generators/weights_generator.rb +19 -0
data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
data/lib/picky/internals/index/backend.rb +113 -0
data/lib/picky/internals/index/file/basic.rb +101 -0
data/lib/picky/internals/index/file/json.rb +38 -0
data/lib/picky/internals/index/file/marshal.rb +38 -0
data/lib/picky/internals/index/file/text.rb +60 -0
data/lib/picky/internals/index/files.rb +24 -0
data/lib/picky/internals/index/redis/basic.rb +77 -0
data/lib/picky/internals/index/redis/list_hash.rb +46 -0
data/lib/picky/internals/index/redis/string_hash.rb +35 -0
data/lib/picky/internals/index/redis.rb +44 -0
data/lib/picky/internals/indexed/bundle/base.rb +72 -0
data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
data/lib/picky/internals/indexed/categories.rb +135 -0
data/lib/picky/internals/indexed/category.rb +90 -0
data/lib/picky/internals/indexed/index.rb +57 -0
data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
data/lib/picky/internals/indexing/bundle/base.rb +219 -0
data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
data/lib/picky/internals/indexing/categories.rb +42 -0
data/lib/picky/internals/indexing/category.rb +120 -0
data/lib/picky/internals/indexing/index.rb +67 -0
data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
data/lib/picky/internals/query/allocation.rb +88 -0
data/lib/picky/internals/query/allocations.rb +137 -0
data/lib/picky/internals/query/combination.rb +80 -0
data/lib/picky/internals/query/combinations/base.rb +84 -0
data/lib/picky/internals/query/combinations/memory.rb +58 -0
data/lib/picky/internals/query/combinations/redis.rb +59 -0
data/lib/picky/internals/query/indexes.rb +180 -0
data/lib/picky/internals/query/qualifiers.rb +81 -0
data/lib/picky/internals/query/token.rb +215 -0
data/lib/picky/internals/query/tokens.rb +89 -0
data/lib/picky/{query → internals/query}/weights.rb +0 -0
data/lib/picky/internals/results/base.rb +106 -0
data/lib/picky/internals/results/full.rb +17 -0
data/lib/picky/internals/results/live.rb +17 -0
data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
data/lib/picky/internals/tokenizers/base.rb +166 -0
data/lib/picky/internals/tokenizers/index.rb +63 -0
data/lib/picky/internals/tokenizers/query.rb +79 -0
data/lib/picky/loader.rb +148 -112
data/lib/picky/query/base.rb +57 -26
data/lib/picky/query/full.rb +1 -1
data/lib/picky/query/live.rb +1 -1
data/lib/picky/sources/db.rb +27 -6
data/lib/tasks/index.rake +3 -3
data/lib/tasks/try.rake +2 -2
data/spec/lib/aliases_spec.rb +9 -0
data/spec/lib/application_spec.rb +3 -3
data/spec/lib/generators/aliases_spec.rb +1 -0
data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
data/spec/lib/index_bundle_spec.rb +71 -0
data/spec/lib/indexed/indexes_spec.rb +61 -0
data/spec/lib/indexing/indexes_spec.rb +94 -24
data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
data/spec/lib/internals/results/base_spec.rb +105 -0
data/spec/lib/internals/results/full_spec.rb +78 -0
data/spec/lib/internals/results/live_spec.rb +88 -0
data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
data/spec/lib/query/allocation_spec.rb +12 -12
data/spec/lib/query/allocations_spec.rb +19 -19
data/spec/lib/query/base_spec.rb +28 -4
data/spec/lib/query/combination_spec.rb +8 -9
data/spec/lib/query/combinations/base_spec.rb +116 -0
data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
data/spec/lib/query/combinations/redis_spec.rb +132 -0
data/spec/lib/query/full_spec.rb +2 -2
data/spec/lib/query/indexes_spec.rb +81 -0
data/spec/lib/query/live_spec.rb +3 -3
data/spec/lib/query/qualifiers_spec.rb +6 -6
data/spec/lib/query/token_spec.rb +38 -38
data/spec/lib/query/tokens_spec.rb +35 -35
data/spec/lib/sources/db_spec.rb +23 -18
metadata +212 -181
data/lib/picky/adapters/rack/base.rb +0 -23
data/lib/picky/adapters/rack/live_parameters.rb +0 -33
data/lib/picky/adapters/rack/query.rb +0 -59
data/lib/picky/adapters/rack.rb +0 -28
data/lib/picky/cacher/convenience.rb +0 -3
data/lib/picky/cacher/generator.rb +0 -15
data/lib/picky/cacher/partial/default.rb +0 -5
data/lib/picky/cacher/partial/none.rb +0 -31
data/lib/picky/cacher/partial/strategy.rb +0 -21
data/lib/picky/cacher/partial/substring.rb +0 -118
data/lib/picky/cacher/partial_generator.rb +0 -15
data/lib/picky/cacher/similarity/default.rb +0 -7
data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
data/lib/picky/cacher/similarity/none.rb +0 -31
data/lib/picky/cacher/similarity/strategy.rb +0 -9
data/lib/picky/cacher/similarity_generator.rb +0 -15
data/lib/picky/cacher/strategy.rb +0 -12
data/lib/picky/cacher/weights/default.rb +0 -7
data/lib/picky/cacher/weights/logarithmic.rb +0 -39
data/lib/picky/cacher/weights/strategy.rb +0 -9
data/lib/picky/cacher/weights_generator.rb +0 -15
data/lib/picky/frontend_adapters/rack.rb +0 -150
data/lib/picky/index/bundle.rb +0 -54
data/lib/picky/index/file/basic.rb +0 -97
data/lib/picky/index/file/json.rb +0 -34
data/lib/picky/index/file/marshal.rb +0 -34
data/lib/picky/index/file/text.rb +0 -56
data/lib/picky/index/files.rb +0 -118
data/lib/picky/index_api.rb +0 -175
data/lib/picky/indexed/bundle.rb +0 -54
data/lib/picky/indexed/categories.rb +0 -131
data/lib/picky/indexed/category.rb +0 -85
data/lib/picky/indexed/index.rb +0 -39
data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
data/lib/picky/indexing/bundle.rb +0 -213
data/lib/picky/indexing/categories.rb +0 -38
data/lib/picky/indexing/category.rb +0 -117
data/lib/picky/indexing/index.rb +0 -55
data/lib/picky/query/allocation.rb +0 -82
data/lib/picky/query/allocations.rb +0 -130
data/lib/picky/query/combination.rb +0 -74
data/lib/picky/query/combinations.rb +0 -105
data/lib/picky/query/qualifiers.rb +0 -77
data/lib/picky/query/token.rb +0 -202
data/lib/picky/query/tokens.rb +0 -86
data/lib/picky/query/weigher.rb +0 -165
data/lib/picky/results/base.rb +0 -102
data/lib/picky/results/full.rb +0 -13
data/lib/picky/results/live.rb +0 -13
data/lib/picky/tokenizers/base.rb +0 -161
data/lib/picky/tokenizers/index.rb +0 -58
data/lib/picky/tokenizers/query.rb +0 -74
data/spec/lib/cacher/partial/default_spec.rb +0 -15
data/spec/lib/cacher/partial/none_spec.rb +0 -17
data/spec/lib/cacher/weights_generator_spec.rb +0 -21
data/spec/lib/results/base_spec.rb +0 -257
data/spec/lib/results/live_spec.rb +0 -15

data/lib/picky/internals/query/token.rb ADDED Viewed

@@ -0,0 +1,215 @@
+module Internals
+  module Query
+    # This is a query token. Together with other tokens it makes up a query.
+    #
+    # It remembers the original form, and and a normalized form.
+    #
+    # It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
+    #
+    # TODO Make partial / similarity char configurable.
+    #
+    class Token # :nodoc:all
+      attr_reader :text, :original
+      attr_writer :similar
+      delegate :blank?, :to => :text
+      # Normal initializer.
+      #
+      # Note: Use this if you do not want a qualified and normalized token.
+      #
+      def initialize text
+        @text = text
+      end
+      # Returns a qualified and normalized token.
+      #
+      # Note: Use this in the search engine if you need a qualified
+      #       and normalized token. I.e. one prepared for a search.
+      #
+      def self.processed text
+        token = new text
+        token.qualify
+        token.extract_original
+        token.partialize
+        token.similarize
+        token.remove_illegals
+        token
+      end
+      # This returns a predefined category name if the user has given one.
+      #
+      def user_defined_category_name
+        @qualifier
+      end
+      # Extracts a qualifier for this token and pre-assigns an allocation.
+      #
+      # Note: Removes the qualifier if it is not allowed.
+      #
+      def qualify
+        @qualifier, @text = split @text
+        @qualifier = Query::Qualifiers.instance.normalize @qualifier
+      end
+      def extract_original
+        @original = @text.dup
+      end
+      # Partial is a conditional setter.
+      #
+      # It is only settable if it hasn't been set yet.
+      #
+      def partial= partial
+        @partial = partial if @partial.nil?
+      end
+      def partial?
+        !@similar && @partial
+      end
+      # If the text ends with *, partialize it. If with ", don't.
+      #
+      @@no_partial = /\"\Z/
+      @@partial    = /\*\Z/
+      def partialize
+        self.partial = false and return if @text =~ @@no_partial
+        self.partial = true if @text =~ @@partial
+      end
+      # If the text ends with ~ similarize it. If with ", don't.
+      #
+      @@no_similar = /\"\Z/
+      @@similar    = /\~\Z/
+      def similarize
+        self.similar = false and return if @text =~ @@no_similar
+        self.similar = true if @text =~ @@similar
+      end
+      def similar?
+        @similar
+      end
+      # Normalizes this token's text.
+      #
+      @@illegals = /["*~]/
+      def remove_illegals
+        @text.gsub! @@illegals, '' unless @text.blank?
+      end
+      # Visitor for tokenizer.
+      #
+      # TODO Rewrite!!!
+      #
+      def tokenize_with tokenizer
+        @text = tokenizer.normalize @text
+      end
+      # TODO spec!
+      #
+      # TODO Rewrite!!
+      #
+      def tokenized tokenizer
+        tokenizer.tokenize(@text.to_s).each do |text|
+          yield text
+        end
+      end
+      # Returns an array of possible combinations.
+      #
+      def possible_combinations_in type
+        type.possible_combinations self
+      end
+      # Returns a token with the next similar text.
+      #
+      # TODO Rewrite this. It is hard to understand. Also spec performance.
+      #
+      def next_similar_token category
+        token = self.dup
+        token if token.next_similar category.bundle_for(token)
+      end
+      # Sets and returns the next similar word.
+      #
+      # Note: Also overrides the original.
+      #
+      def next_similar bundle
+        @text = @original = (similarity(bundle).shift || return) if similar?
+      end
+      # Lazy similar reader.
+      #
+      def similarity bundle = nil
+        @similarity || @similarity = generate_similarity_for(bundle)
+      end
+      # Returns an enumerator that traverses over the similar.
+      #
+      # Note: The dup isn't too nice – since it is needed on account of the shift, above.
+      #       (We avoid a StopIteration exception. Which of both is less evil?)
+      #
+      def generate_similarity_for bundle
+        bundle.similar(@text).dup || []
+      end
+      # Generates a solr term from this token.
+      #
+      # E.g. "name:heroes~0.75"
+      #
+      @@solr_fuzzy_mapping = {
+        1 => :'',
+        2 => :'',
+        3 => :'',
+        4 => :'~0.74',
+        5 => :'~0.78',
+        6 => :'~0.81',
+        7 => :'~0.83',
+        8 => :'~0.85',
+        9 => :'~0.87',
+       10 => :'~0.89'
+      }
+      @@solr_fuzzy_mapping.default = :'~0.9'
+      def to_solr
+        blank? ? '' : (to_s + @@solr_fuzzy_mapping[@text.size].to_s)
+      end
+      #
+      #
+      def to_result
+        [@original, @text]
+      end
+      # Internal identifier.
+      #
+      # TODO Uh.
+      #
+      def identifier
+        "#{similar?? :similarity : :index}:#{@text}"
+      end
+      # Displays the qualifier text and the text, joined.
+      #
+      # e.g. name:meier
+      #
+      def to_s
+        [@qualifier, @text].compact.join ':'
+      end
+      private
+        # Splits text into a qualifier and text.
+        #
+        # Returns [qualifier, text].
+        #
+        def split unqualified_text
+          qualifier, text = (unqualified_text || '').split(':', 2)
+          if text.blank?
+            [nil, (qualifier || '')]
+          else
+            [qualifier, text]
+          end
+        end
+    end
+  end
+end

data/lib/picky/internals/query/tokens.rb ADDED Viewed

@@ -0,0 +1,89 @@
+# encoding: utf-8
+#
+module Internals
+  #
+  #
+  module Query
+    # This class primarily handles switching through similar token constellations.
+    #
+    class Tokens # :nodoc:all
+      # Basically delegates to its internal tokens array.
+      #
+      self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
+      #
+      #
+      def initialize tokens = []
+        @tokens = tokens
+      end
+      #
+      #
+      def tokenize_with tokenizer
+        @tokens.each { |token| token.tokenize_with(tokenizer) }
+      end
+      # Generates an array in the form of
+      # [
+      #  [combination],                           # of token 1
+      #  [combination, combination, combination], # of token 2
+      #  [combination, combination]               # of token 3
+      # ]
+      #
+      # TODO If we want token behaviour defined per Query, we can
+      #      compact! here
+      #
+      def possible_combinations_in type
+        @tokens.inject([]) do |combinations, token|
+          combinations << token.possible_combinations_in(type)
+        end
+        # TODO compact! if ignore_unassigned_tokens
+      end
+      # Makes the last of the tokens partial.
+      #
+      def partialize_last
+        @tokens.last.partial = true unless empty?
+      end
+      # Caps the tokens to the maximum.
+      #
+      def cap maximum
+        @tokens.slice!(maximum..-1) if cap?(maximum)
+      end
+      def cap? maximum
+        @tokens.size > maximum
+      end
+      # Rejects blank tokens.
+      #
+      def reject
+        @tokens.reject! &:blank?
+      end
+      # Returns a solr query.
+      #
+      def to_solr_query
+        @tokens.map(&:to_solr).join ' '
+      end
+      #
+      #
+      def originals
+        @tokens.map(&:original)
+      end
+      # Just join the token original texts.
+      #
+      def to_s
+        originals.join ' '
+      end
+    end
+  end
+end

data/lib/picky/{query → internals/query}/weights.rb RENAMED Viewed

File without changes

data/lib/picky/internals/results/base.rb ADDED Viewed

@@ -0,0 +1,106 @@
+module Internals
+  module Results # :nodoc:all
+    # This is the internal results object. Usually, to_marshal, or to_json
+    # is called on it to get a string for the answer.
+    #
+    class Base
+      # Duration is set externally by the query.
+      #
+      attr_writer :duration
+      attr_reader :allocations, :offset
+      # Takes instances of Query::Allocations as param.
+      #
+      def initialize offset = 0, allocations = Query::Allocations.new
+        @offset = offset
+        @allocations = allocations # || Query::Allocations.new
+      end
+      # Create new results and calculate the ids.
+      #
+      def self.from offset, allocations
+        results = new offset, allocations
+        results.prepare!
+        results
+      end
+      #
+      #
+      def serialize
+        { allocations: allocations.to_result,
+          offset:      offset,
+          duration:    duration,
+          total:       total }
+      end
+      # The default format is json.
+      #
+      def to_response options = {}
+        to_json options
+      end
+      # Convert to json format.
+      #
+      def to_json options = {}
+        serialize.to_json options
+      end
+      # This starts the actual processing.
+      #
+      # Without this, the allocations are not processed,
+      # and no ids are calculated.
+      #
+      def prepare!
+        allocations.process! self.max_results, self.offset
+      end
+      # Duration default is 0.
+      #
+      def duration
+        @duration || 0
+      end
+      # The total results. Delegates to the allocations.
+      #
+      # Caches.
+      #
+      def total
+        @total || @total = allocations.total || 0
+      end
+      # How many results are returned.
+      #
+      # Set in config using
+      #   Results::Full.max_results = 20
+      #
+      class_inheritable_accessor :max_results
+      def max_results
+        self.class.max_results
+      end
+      # Convenience methods.
+      #
+      # Delegates to allocations.
+      #
+      def ids amount = 20
+        allocations.ids amount
+      end
+      # Gets an amout of random ids from the allocations.
+      #
+      # Note: Basically delegates to the allocations.
+      #
+      def random_ids amount = 1
+        allocations.random_ids amount
+      end
+      # Human readable log.
+      #
+      def to_log query
+        "|#{Time.now.to_s(:db)}|#{'%8f' % duration}|#{'%-50s' % query}|#{'%8d' % total}|#{'%4d' % offset}|#{'%2d' % allocations.size}|"
+      end
+    end
+  end
+end

data/lib/picky/internals/results/full.rb ADDED Viewed

@@ -0,0 +1,17 @@
+module Internals
+  module Results
+    # Full results are limited to maximally 20 results (by default).
+    #
+    class Full < Base
+      self.max_results = 20
+      def to_log *args
+        ?> + super
+      end
+    end
+  end
+end

data/lib/picky/internals/results/live.rb ADDED Viewed

@@ -0,0 +1,17 @@
+module Internals
+  module Results
+    # Live results are not returning any results.
+    #
+    class Live < Base
+      self.max_results = 0
+      def to_log *args
+        ?. + super
+      end
+    end
+  end
+end

data/lib/picky/{solr → internals/solr}/schema_generator.rb RENAMED Viewed

File without changes

data/lib/picky/internals/tokenizers/base.rb ADDED Viewed

@@ -0,0 +1,166 @@
+module Internals
+  module Tokenizers # :nodoc:all
+    # Defines tokenizing processes used both in indexing and querying.
+    #
+    class Base
+      # TODO Move EMPTY_STRING top level.
+      #
+      EMPTY_STRING = ''.freeze
+      # Stopwords.
+      #
+      def stopwords regexp
+        @remove_stopwords_regexp = regexp
+      end
+      def remove_stopwords text
+        text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
+        text
+      end
+      @@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
+      def remove_non_single_stopwords text
+        return text if text.match @@non_single_stopword_regexp
+        remove_stopwords text
+      end
+      # Illegals.
+      #
+      # TODO Should there be a legal?
+      #
+      def removes_characters regexp
+        @removes_characters_regexp = regexp
+      end
+      def remove_illegals text
+        text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
+        text
+      end
+      # Splitting.
+      #
+      def splits_text_on regexp
+        @splits_text_on_regexp = regexp
+      end
+      def split text
+        text.split @splits_text_on_regexp
+      end
+      # Normalizing.
+      #
+      def normalizes_words regexp_replaces
+        @normalizes_words_regexp_replaces = regexp_replaces
+      end
+      def normalize_with_patterns text
+        return text unless @normalizes_words_regexp_replaces
+        @normalizes_words_regexp_replaces.each do |regex, replace|
+          # This should be sufficient
+          #
+          text.gsub!(regex, replace) and break
+        end
+        remove_after_normalizing_illegals text
+        text
+      end
+      # Illegal after normalizing.
+      #
+      def removes_characters_after_splitting regexp
+        @removes_characters_after_splitting_regexp = regexp
+      end
+      def remove_after_normalizing_illegals text
+        text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
+      end
+      # Substitute Characters with this substituter.
+      #
+      # Default is European Character substitution.
+      #
+      def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
+        # TODO Raise if it doesn't quack substitute?
+        @substituter = substituter
+      end
+      def substitute_characters text
+        substituter?? substituter.substitute(text) : text
+      end
+      # Reject tokens after tokenizing based on the given criteria.
+      #
+      # Note: Currently only for indexing. TODO Redesign and write for both!
+      #
+      def reject_token_if &condition
+        @reject_condition = condition
+      end
+      def reject tokens
+        tokens.reject! &@reject_condition
+      end
+      # Returns a number of tokens, generated from the given text.
+      #
+      # Note:
+      #  * preprocess, pretokenize are hooks
+      #
+      def tokenize text
+        text   = preprocess text  # processing the text
+        return empty_tokens if text.blank?
+        words  = pretokenize text # splitting and preparations for tokenizing
+        return empty_tokens if words.empty?
+        tokens = tokens_for words # creating tokens / strings
+                 process tokens   # processing tokens / strings
+      end
+      attr_reader :substituter
+      alias substituter? substituter
+      def initialize options = {}
+        removes_characters options[:removes_characters]                                 if options[:removes_characters]
+        contracts_expressions *options[:contracts_expressions]                          if options[:contracts_expressions]
+        stopwords options[:stopwords]                                                   if options[:stopwords]
+        normalizes_words options[:normalizes_words]                                     if options[:normalizes_words]
+        removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
+        substitutes_characters_with options[:substitutes_characters_with]               if options[:substitutes_characters_with]
+        # Defaults.
+        #
+        splits_text_on options[:splits_text_on] || /\s/
+        reject_token_if &(options[:reject_token_if] || :blank?)
+      end
+      # Hooks.
+      #
+      # Preprocessing.
+      #
+      def preprocess text; end
+      # Pretokenizing.
+      #
+      def pretokenize text; end
+      # Postprocessing.
+      #
+      def process tokens
+        reject tokens # Reject any tokens that don't meet criteria
+        tokens
+      end
+      # Converts words into real tokens.
+      #
+      def tokens_for words
+        Internals::Query::Tokens.new words.collect! { |word| token_for word }
+      end
+      # Turns non-blank text into symbols.
+      #
+      def symbolize text
+        text.blank? ? nil : text.to_sym
+      end
+      # Returns a tokens object.
+      #
+      def empty_tokens
+        Internals::Query::Tokens.new
+      end
+    end
+  end
+end

data/lib/picky/internals/tokenizers/index.rb ADDED Viewed

@@ -0,0 +1,63 @@
+module Internals
+  module Tokenizers
+    # The base indexing tokenizer.
+    #
+    # Override in indexing subclasses and define in configuration.
+    #
+    class Index < Base
+      def self.default= new_default
+        @default = new_default
+      end
+      def self.default
+        @default ||= new
+      end
+      # Default indexing preprocessing hook.
+      #
+      # Does:
+      # 1. Character substitution.
+      # 2. Downcasing.
+      # 3. Remove illegal expressions.
+      # 4. Remove non-single stopwords. (Stopwords that occur with other words)
+      #
+      def preprocess text
+        text = substitute_characters text
+        text.downcase!
+        remove_illegals text
+        # we do not remove single stopwords for an entirely different
+        # reason than in the query tokenizer.
+        # An indexed thing with just name "UND" (a possible stopword) should not lose its name.
+        #
+        remove_non_single_stopwords text
+        text
+      end
+      # Default indexing pretokenizing hook.
+      #
+      # Does:
+      # 1. Split the text into words.
+      # 2. Normalize each word.
+      #
+      def pretokenize text
+        words = split text
+        words.collect! do |word|
+          normalize_with_patterns word
+          word
+        end
+      end
+      # Does not actually return a token, but a
+      # symbol "token".
+      #
+      def token_for text
+        symbolize text
+      end
+    end
+  end
+end