RubyGems - picky - Versions diffs - 2.7.0 → 3.0.0.pre1 - Mend

picky 2.7.0 → 3.0.0.pre1

Files changed (213) hide show

data/lib/picky/adapters/rack/base.rb +20 -16
data/lib/picky/adapters/rack/live_parameters.rb +28 -24
data/lib/picky/adapters/rack/search.rb +67 -0
data/lib/picky/adapters/rack.rb +27 -23
data/lib/picky/application.rb +246 -236
data/lib/picky/backend/base.rb +115 -119
data/lib/picky/backend/file/basic.rb +102 -98
data/lib/picky/backend/file/json.rb +27 -23
data/lib/picky/backend/file/marshal.rb +32 -28
data/lib/picky/backend/file/text.rb +45 -41
data/lib/picky/backend/files.rb +19 -15
data/lib/picky/backend/redis/basic.rb +76 -72
data/lib/picky/backend/redis/list_hash.rb +40 -36
data/lib/picky/backend/redis/string_hash.rb +30 -26
data/lib/picky/backend/redis.rb +32 -28
data/lib/picky/bundle.rb +82 -57
data/lib/{bundling.rb → picky/bundling.rb} +0 -0
data/lib/picky/calculations/location.rb +51 -47
data/lib/picky/categories.rb +60 -56
data/lib/picky/categories_indexed.rb +73 -82
data/lib/picky/categories_indexing.rb +12 -8
data/lib/picky/category.rb +109 -120
data/lib/picky/category_indexed.rb +39 -41
data/lib/picky/category_indexing.rb +123 -125
data/lib/picky/character_substituters/west_european.rb +32 -26
data/lib/{constants.rb → picky/constants.rb} +0 -0
data/lib/picky/cores.rb +96 -92
data/lib/{deployment.rb → picky/deployment.rb} +0 -0
data/lib/picky/frontend_adapters/rack.rb +133 -118
data/lib/picky/generators/aliases.rb +5 -3
data/lib/picky/generators/base.rb +11 -7
data/lib/picky/generators/partial/default.rb +7 -3
data/lib/picky/generators/partial/none.rb +24 -20
data/lib/picky/generators/partial/strategy.rb +20 -16
data/lib/picky/generators/partial/substring.rb +94 -90
data/lib/picky/generators/partial_generator.rb +11 -7
data/lib/picky/generators/similarity/default.rb +9 -5
data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
data/lib/picky/generators/similarity/metaphone.rb +20 -16
data/lib/picky/generators/similarity/none.rb +23 -19
data/lib/picky/generators/similarity/phonetic.rb +49 -45
data/lib/picky/generators/similarity/soundex.rb +20 -16
data/lib/picky/generators/similarity/strategy.rb +10 -6
data/lib/picky/generators/similarity_generator.rb +11 -7
data/lib/picky/generators/strategy.rb +14 -10
data/lib/picky/generators/weights/default.rb +9 -5
data/lib/picky/generators/weights/logarithmic.rb +30 -26
data/lib/picky/generators/weights/strategy.rb +10 -6
data/lib/picky/generators/weights_generator.rb +11 -7
data/lib/picky/helpers/measuring.rb +20 -16
data/lib/picky/indexed/bundle/base.rb +39 -37
data/lib/picky/indexed/bundle/memory.rb +68 -64
data/lib/picky/indexed/bundle/redis.rb +73 -69
data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
data/lib/picky/indexed/wrappers/category/location.rb +17 -13
data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
data/lib/picky/indexers/base.rb +26 -22
data/lib/picky/indexers/parallel.rb +62 -58
data/lib/picky/indexers/serial.rb +41 -37
data/lib/picky/indexes/index.rb +400 -0
data/lib/picky/indexes/index_indexed.rb +24 -0
data/lib/picky/indexes/index_indexing.rb +138 -0
data/lib/picky/indexes/memory.rb +20 -0
data/lib/picky/indexes/redis.rb +20 -0
data/lib/picky/indexes.rb +68 -61
data/lib/picky/indexes_indexed.rb +16 -12
data/lib/picky/indexes_indexing.rb +41 -37
data/lib/picky/indexing/bundle/base.rb +216 -205
data/lib/picky/indexing/bundle/memory.rb +16 -11
data/lib/picky/indexing/bundle/redis.rb +14 -12
data/lib/picky/indexing/wrappers/category/location.rb +17 -13
data/lib/picky/interfaces/live_parameters.rb +159 -154
data/lib/picky/loader.rb +267 -304
data/lib/picky/loggers/search.rb +20 -13
data/lib/picky/no_source_specified_exception.rb +7 -3
data/lib/picky/performant.rb +6 -2
data/lib/picky/query/allocation.rb +71 -67
data/lib/picky/query/allocations.rb +99 -94
data/lib/picky/query/combination.rb +70 -66
data/lib/picky/query/combinations/base.rb +56 -52
data/lib/picky/query/combinations/memory.rb +36 -32
data/lib/picky/query/combinations/redis.rb +66 -62
data/lib/picky/query/indexes.rb +175 -160
data/lib/picky/query/qualifier_category_mapper.rb +43 -0
data/lib/picky/query/token.rb +165 -172
data/lib/picky/query/tokens.rb +86 -82
data/lib/picky/query/weights.rb +44 -48
data/lib/picky/query.rb +5 -1
data/lib/picky/rack/harakiri.rb +51 -47
data/lib/picky/results.rb +81 -77
data/lib/picky/search.rb +169 -158
data/lib/picky/sinatra.rb +34 -0
data/lib/picky/sources/base.rb +73 -70
data/lib/picky/sources/couch.rb +61 -57
data/lib/picky/sources/csv.rb +68 -64
data/lib/picky/sources/db.rb +139 -135
data/lib/picky/sources/delicious.rb +52 -48
data/lib/picky/sources/mongo.rb +68 -63
data/lib/picky/sources/wrappers/base.rb +20 -16
data/lib/picky/sources/wrappers/location.rb +37 -33
data/lib/picky/statistics.rb +46 -43
data/lib/picky/tasks.rb +3 -0
data/lib/picky/tokenizers/base.rb +192 -187
data/lib/picky/tokenizers/index.rb +25 -21
data/lib/picky/tokenizers/location.rb +33 -29
data/lib/picky/tokenizers/query.rb +49 -43
data/lib/picky.rb +21 -13
data/lib/tasks/application.rake +1 -1
data/lib/tasks/index.rake +3 -3
data/lib/tasks/routes.rake +1 -1
data/lib/tasks/server.rake +1 -1
data/spec/lib/adapters/rack/base_spec.rb +1 -1
data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
data/spec/lib/adapters/rack/query_spec.rb +1 -1
data/spec/lib/application_spec.rb +39 -32
data/spec/lib/backend/file/basic_spec.rb +2 -2
data/spec/lib/backend/file/json_spec.rb +2 -2
data/spec/lib/backend/file/marshal_spec.rb +2 -2
data/spec/lib/backend/file/text_spec.rb +1 -1
data/spec/lib/backend/files_spec.rb +14 -24
data/spec/lib/backend/redis/basic_spec.rb +2 -2
data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
data/spec/lib/backend/redis_spec.rb +20 -13
data/spec/lib/calculations/location_spec.rb +1 -1
data/spec/lib/categories_indexed_spec.rb +16 -34
data/spec/lib/category_indexed_spec.rb +9 -27
data/spec/lib/category_indexing_spec.rb +2 -3
data/spec/lib/category_spec.rb +10 -10
data/spec/lib/character_substituters/west_european_spec.rb +6 -5
data/spec/lib/cores_spec.rb +17 -17
data/spec/lib/extensions/symbol_spec.rb +15 -1
data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
data/spec/lib/generators/aliases_spec.rb +3 -3
data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
data/spec/lib/generators/partial/default_spec.rb +3 -3
data/spec/lib/generators/partial/none_spec.rb +2 -2
data/spec/lib/generators/partial/substring_spec.rb +1 -1
data/spec/lib/generators/partial_generator_spec.rb +3 -3
data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
data/spec/lib/generators/similarity/none_spec.rb +1 -1
data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
data/spec/lib/generators/similarity_generator_spec.rb +2 -2
data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
data/spec/lib/generators/weights_generator_spec.rb +1 -1
data/spec/lib/helpers/measuring_spec.rb +2 -2
data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
data/spec/lib/indexers/base_spec.rb +1 -1
data/spec/lib/indexers/parallel_spec.rb +1 -1
data/spec/lib/indexers/serial_spec.rb +1 -1
data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
data/spec/lib/indexes_class_spec.rb +2 -2
data/spec/lib/indexes_indexed_spec.rb +1 -1
data/spec/lib/indexes_indexing_spec.rb +1 -1
data/spec/lib/indexes_spec.rb +1 -1
data/spec/lib/indexing/bundle/base_spec.rb +7 -5
data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
data/spec/lib/loader_spec.rb +17 -19
data/spec/lib/loggers/search_spec.rb +2 -2
data/spec/lib/query/allocation_spec.rb +1 -1
data/spec/lib/query/allocations_spec.rb +1 -1
data/spec/lib/query/combination_spec.rb +4 -4
data/spec/lib/query/combinations/base_spec.rb +1 -1
data/spec/lib/query/combinations/memory_spec.rb +1 -1
data/spec/lib/query/combinations/redis_spec.rb +1 -1
data/spec/lib/query/indexes_spec.rb +7 -2
data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
data/spec/lib/query/token_spec.rb +32 -53
data/spec/lib/query/tokens_spec.rb +30 -35
data/spec/lib/query/weights_spec.rb +16 -16
data/spec/lib/rack/harakiri_spec.rb +5 -5
data/spec/lib/results_spec.rb +1 -1
data/spec/lib/search_spec.rb +24 -22
data/spec/lib/sinatra_spec.rb +36 -0
data/spec/lib/sources/base_spec.rb +1 -1
data/spec/lib/sources/couch_spec.rb +9 -9
data/spec/lib/sources/csv_spec.rb +7 -7
data/spec/lib/sources/db_spec.rb +2 -2
data/spec/lib/sources/delicious_spec.rb +5 -5
data/spec/lib/sources/mongo_spec.rb +7 -7
data/spec/lib/sources/wrappers/base_spec.rb +2 -2
data/spec/lib/sources/wrappers/location_spec.rb +1 -1
data/spec/lib/statistics_spec.rb +1 -1
data/spec/lib/tokenizers/base_spec.rb +2 -2
data/spec/lib/tokenizers/index_spec.rb +1 -1
data/spec/lib/tokenizers/query_spec.rb +1 -1
metadata +30 -30
data/lib/picky/adapters/rack/query.rb +0 -65
data/lib/picky/index/base.rb +0 -409
data/lib/picky/index/base_indexed.rb +0 -29
data/lib/picky/index/base_indexing.rb +0 -127
data/lib/picky/index/memory.rb +0 -16
data/lib/picky/index/redis.rb +0 -16
data/lib/picky/query/qualifiers.rb +0 -76
data/lib/picky/query/solr.rb +0 -60
data/lib/picky/signals.rb +0 -8
data/lib/picky-tasks.rb +0 -6
data/lib/tasks/spec.rake +0 -11
data/spec/lib/query/qualifiers_spec.rb +0 -31

data/lib/picky/sources/wrappers/location.rb CHANGED Viewed

@@ -1,48 +1,52 @@
-module Sources
+module Picky
-  module Wrappers
+  module Sources
-    # Should this actually just be a tokenizer?
-    #
-    class Location < Base
+    module Wrappers
-      attr_reader :calculation
-      def initialize source, grid, precision = 1
-        super source
-        @calculation = Calculations::Location.new grid, precision
-      end
-      # Yield the data (id, text for id) for the given category.
+      # Should this actually just be a tokenizer?
       #
-      def harvest category
-        minimum = 1.0/0
+      class Location < Base
-        # Cache. TODO Make option?
-        #
-        locations = []
+        attr_reader :calculation
-        # Gather min/max.
-        #
-        source.harvest category do |indexed_id, location|
-          location = location.to_f
-          minimum = location if location < minimum
-          locations << [indexed_id, location]
+        def initialize source, grid, precision = 1
+          super source
+          @calculation = Calculations::Location.new grid, precision
         end
-        calculation.minimum = minimum
-        # Recalculate locations.
+        # Yield the data (id, text for id) for the given category.
         #
-        locations.each do |indexed_id, location|
-          calculation.recalculated_range(location).each do |new_location|
-            yield indexed_id, new_location.to_s
+        def harvest category
+          minimum = 1.0/0
+          # Cache.
+          #
+          locations = []
+          # Gather min/max.
+          #
+          source.harvest category do |indexed_id, location|
+            location = location.to_f
+            minimum = location if location < minimum
+            locations << [indexed_id, location]
           end
+          calculation.minimum = minimum
+          # Recalculate locations.
+          #
+          locations.each do |indexed_id, location|
+            calculation.recalculated_range(location).each do |new_location|
+              yield indexed_id, new_location.to_s
+            end
+          end
+          # TODO Move to the right place.
+          #
+          category.indexing_exact[:location_minimum] = minimum
         end
-        # TODO Move to the right place.
-        #
-        category.indexing_exact[:location_minimum] = minimum
       end
     end

data/lib/picky/statistics.rb CHANGED Viewed

@@ -1,60 +1,63 @@
 # encoding: utf-8
 #
+module Picky
-# Gathers various statistics.
-#
-class Statistics # :nodoc:all
+  # Gathers various statistics.
+  #
+  class Statistics # :nodoc:all
-  def initialize
-    @indexes = ["\033[1mIndexes analysis\033[m:"]
-  end
+    def initialize
+      @indexes = ["\033[1mIndexes analysis\033[m:"]
+    end
-  def preamble
-    loc = lines_of_code File.open('app/application.rb').read
+    def preamble
+      loc = lines_of_code File.open('app/application.rb').read
-    @preamble ||= <<-PREAMBLE
-\033[1mApplication(s)\033[m
-  Definition LOC:  #{"%4d" % loc}
-  Indexes defined: #{"%4d" % Indexes.size}
-PREAMBLE
-  end
+      @preamble ||= <<-PREAMBLE
+  \033[1mApplication(s)\033[m
+    Definition LOC:  #{"%4d" % loc}
+    Indexes defined: #{"%4d" % Indexes.size}
+  PREAMBLE
+    end
-  # Gathers information about the application.
-  #
-  def application
-    preamble
-    @application = Application.apps.map &:indented_to_s
-  end
+    # Gathers information about the application.
+    #
+    def application
+      preamble
+      @application = Application.apps.map &:indented_to_s
+    end
-  # Gathers information about the indexes.
-  #
-  def analyze object
-    object.each_category do |category|
-      @indexes << <<-ANALYSIS
-#{"#{category.index_name}".indented_to_s}\n
-#{"#{category.name}".indented_to_s(4)}\n
-#{"exact\n#{Analyzer.new.analyze(category.indexed_exact).indented_to_s}".indented_to_s(6)}\n
-#{"partial\n#{Analyzer.new.analyze(category.indexed_partial).indented_to_s}".indented_to_s(6)}
-ANALYSIS
+    # Gathers information about the indexes.
+    #
+    def analyze object
+      object.each_category do |category|
+        @indexes << <<-ANALYSIS
+  #{"#{category.index_name}".indented_to_s}\n
+  #{"#{category.name}".indented_to_s(4)}\n
+  #{"exact\n#{Analyzer.new.analyze(category.indexed_exact).indented_to_s}".indented_to_s(6)}\n
+  #{"partial\n#{Analyzer.new.analyze(category.indexed_partial).indented_to_s}".indented_to_s(6)}
+  ANALYSIS
+      end
     end
-  end
-  # Outputs all gathered statistics.
-  #
-  def to_s
-    <<-STATS
+    # Outputs all gathered statistics.
+    #
+    def to_s
+      <<-STATS
-Picky Configuration:
+  Picky Configuration:
-#{[@preamble, @application, @indexes.join("\n")].compact.join("\n")}
-STATS
-  end
+  #{[@preamble, @application, @indexes.join("\n")].compact.join("\n")}
+  STATS
+    end
-  # Internal methods.
-  #
+    # Internal methods.
+    #
+    def lines_of_code text
+      text.scan(/^\s*[^#\s].*$/).size
+    end
-  def lines_of_code text
-    text.scan(/^\s*[^#\s].*$/).size
   end
 end

data/lib/picky/tasks.rb ADDED Viewed

@@ -0,0 +1,3 @@
+all_rake_files = File.expand_path '../../tasks/*.rake', __FILE__
+Dir[all_rake_files].each { |rakefile| load rakefile }

data/lib/picky/tokenizers/base.rb CHANGED Viewed

@@ -1,16 +1,18 @@
-module Tokenizers # :nodoc:all
+module Picky
-  # Defines tokenizing processes used both in indexing and querying.
-  #
-  class Base
+  module Tokenizers # :nodoc:all
-    # TODO Move EMPTY_STRING top level.
+    # Defines tokenizing processes used both in indexing and querying.
     #
-    EMPTY_STRING = ''.freeze
+    class Base
-    def to_s
-      reject_condition_location = @reject_condition.to_s[/:(\d+) \(lambda\)/, 1]
-      <<-TOKENIZER
+      # TODO Move EMPTY_STRING top level.
+      #
+      EMPTY_STRING = ''.freeze
+      def to_s
+        reject_condition_location = @reject_condition.to_s[/:(\d+) \(lambda\)/, 1]
+        <<-TOKENIZER
 Removes characters:        #{@removes_characters_regexp ? "/#{@removes_characters_regexp.source}/" : '-'}
 Stopwords:                 #{@remove_stopwords_regexp ? "/#{@remove_stopwords_regexp.source}/" : '-'}
 Splits text on:            #{@splits_text_on.respond_to?(:source) ? "/#{@splits_text_on.source}/" : (@splits_text_on ? @splits_text_on : '-')}
@@ -19,204 +21,207 @@ Normalizes words:          #{@normalizes_words_regexp_replaces ? @normalizes_wor
 Rejects tokens?            #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
 Substitutes chars?         #{@substituter ? "Yes, using #{@substituter}." : '-' }
 Case sensitive?            #{@case_sensitive ? "Yes." : "-"}
-      TOKENIZER
-    end
+        TOKENIZER
+      end
-    # Stopwords.
-    #
-    # We only allow regexps (even if string would be okay
-    # too for gsub! - it's too hard to understand)
-    #
-    def stopwords regexp
-      check_argument_in __method__, Regexp, regexp
-      @remove_stopwords_regexp = regexp
-    end
-    def remove_stopwords text
-      text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
-      text
-    end
-    @@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
-    def remove_non_single_stopwords text
-      return text if text.match @@non_single_stopword_regexp
-      remove_stopwords text
-    end
+      # Stopwords.
+      #
+      # We only allow regexps (even if string would be okay
+      # too for gsub! - it's too hard to understand)
+      #
+      def stopwords regexp
+        check_argument_in __method__, Regexp, regexp
+        @remove_stopwords_regexp = regexp
+      end
+      def remove_stopwords text
+        text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
+        text
+      end
+      @@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
+      def remove_non_single_stopwords text
+        return text if text.match @@non_single_stopword_regexp
+        remove_stopwords text
+      end
-    # Illegals.
-    #
-    # We only allow regexps (even if string would be okay
-    # too for gsub! - it's too hard to understand)
-    #
-    def removes_characters regexp
-      check_argument_in __method__, Regexp, regexp
-      @removes_characters_regexp = regexp
-    end
-    def remove_illegals text
-      text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
-      text
-    end
+      # Illegals.
+      #
+      # We only allow regexps (even if string would be okay
+      # too for gsub! - it's too hard to understand)
+      #
+      def removes_characters regexp
+        check_argument_in __method__, Regexp, regexp
+        @removes_characters_regexp = regexp
+      end
+      def remove_illegals text
+        text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
+        text
+      end
-    # Splitting.
-    #
-    # We allow Strings and Regexps.
-    # Note: We do not test against to_str since symbols do not work with String#split.
-    #
-    def splits_text_on regexp_or_string
-      raise ArgumentError.new "#{__method__} takes a Regexp or String as argument, not a #{regexp_or_string.class}." unless Regexp === regexp_or_string || String === regexp_or_string
-      @splits_text_on = regexp_or_string
-    end
-    def split text
-      text.split @splits_text_on
-    end
+      # Splitting.
+      #
+      # We allow Strings and Regexps.
+      # Note: We do not test against to_str since symbols do not work with String#split.
+      #
+      def splits_text_on regexp_or_string
+        raise ArgumentError.new "#{__method__} takes a Regexp or String as argument, not a #{regexp_or_string.class}." unless Regexp === regexp_or_string || String === regexp_or_string
+        @splits_text_on = regexp_or_string
+      end
+      def split text
+        text.split @splits_text_on
+      end
-    # Normalizing.
-    #
-    # We only allow arrays.
-    #
-    def normalizes_words regexp_replaces
-      raise ArgumentError.new "#{__method__} takes an Array of replaces as argument, not a #{regexp_replaces.class}." unless regexp_replaces.respond_to?(:to_ary)
-      @normalizes_words_regexp_replaces = regexp_replaces
-    end
-    def normalize_with_patterns text
-      return text unless @normalizes_words_regexp_replaces
+      # Normalizing.
+      #
+      # We only allow arrays.
+      #
+      def normalizes_words regexp_replaces
+        raise ArgumentError.new "#{__method__} takes an Array of replaces as argument, not a #{regexp_replaces.class}." unless regexp_replaces.respond_to?(:to_ary)
+        @normalizes_words_regexp_replaces = regexp_replaces
+      end
+      def normalize_with_patterns text
+        return text unless @normalizes_words_regexp_replaces
-      @normalizes_words_regexp_replaces.each do |regex, replace|
-        # This should be sufficient
-        #
-        text.gsub!(regex, replace) and break
+        @normalizes_words_regexp_replaces.each do |regex, replace|
+          # This should be sufficient
+          #
+          text.gsub!(regex, replace) and break
+        end
+        remove_after_normalizing_illegals text
+        text
       end
-      remove_after_normalizing_illegals text
-      text
-    end
-    # Illegal after normalizing.
-    #
-    # We only allow regexps (even if string would be okay
-    # too for gsub! - it's too hard to understand)
-    #
-    def removes_characters_after_splitting regexp
-      check_argument_in __method__, Regexp, regexp
-      @removes_characters_after_splitting_regexp = regexp
-    end
-    def remove_after_normalizing_illegals text
-      text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
-    end
+      # Illegal after normalizing.
+      #
+      # We only allow regexps (even if string would be okay
+      # too for gsub! - it's too hard to understand)
+      #
+      def removes_characters_after_splitting regexp
+        check_argument_in __method__, Regexp, regexp
+        @removes_characters_after_splitting_regexp = regexp
+      end
+      def remove_after_normalizing_illegals text
+        text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
+      end
-    # Substitute Characters with this substituter.
-    #
-    # Default is European Character substitution.
-    #
-    def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
-      raise ArgumentError.new "The substitutes_characters_with option needs a character substituter, which responds to #substitute." unless substituter.respond_to?(:substitute)
-      @substituter = substituter
-    end
-    def substitute_characters text
-      substituter?? substituter.substitute(text) : text
-    end
+      # Substitute Characters with this substituter.
+      #
+      # Default is European Character substitution.
+      #
+      def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
+        raise ArgumentError.new "The substitutes_characters_with option needs a character substituter, which responds to #substitute." unless substituter.respond_to?(:substitute)
+        @substituter = substituter
+      end
+      def substitute_characters text
+        substituter?? substituter.substitute(text) : text
+      end
-    # Reject tokens after tokenizing based on the given criteria.
-    #
-    # Note: Currently only for indexing.
-    #
-    def reject_token_if &condition
-      @reject_condition = condition
-    end
-    def reject tokens
-      tokens.reject! &@reject_condition
-    end
+      # Reject tokens after tokenizing based on the given criteria.
+      #
+      # Note: Currently only for indexing.
+      #
+      def reject_token_if &condition
+        @reject_condition = condition
+      end
+      def reject tokens
+        tokens.reject! &@reject_condition
+      end
-    def case_sensitive case_sensitive
-      @case_sensitive = case_sensitive
-    end
-    def downcase?
-      !@case_sensitive
-    end
+      def case_sensitive case_sensitive
+        @case_sensitive = case_sensitive
+      end
+      def downcase?
+        !@case_sensitive
+      end
-    # Checks if the right argument type has been given.
-    #
-    def check_argument_in method, type, argument, &condition
-      raise ArgumentError.new "Application##{method} takes a #{type} as argument, not a #{argument.class}." unless type === argument
-    end
+      # Checks if the right argument type has been given.
+      #
+      def check_argument_in method, type, argument, &condition
+        raise ArgumentError.new "Application##{method} takes a #{type} as argument, not a #{argument.class}." unless type === argument
+      end
-    # Returns a number of tokens, generated from the given text.
-    #
-    # Note:
-    #  * preprocess, pretokenize are hooks
-    #
-    def tokenize text
-      text   = preprocess text  # processing the text
-      return empty_tokens if text.blank?
-      words  = pretokenize text # splitting and preparations for tokenizing
-      return empty_tokens if words.empty?
-      tokens = tokens_for words # creating tokens / strings
-               process tokens   # processing tokens / strings
-    end
+      # Returns a number of tokens, generated from the given text.
+      #
+      # Note:
+      #  * preprocess, pretokenize are hooks
+      #
+      def tokenize text
+        text   = preprocess text  # processing the text
+        return empty_tokens if text.blank?
+        words  = pretokenize text # splitting and preparations for tokenizing
+        return empty_tokens if words.empty?
+        tokens = tokens_for words # creating tokens / strings
+                 process tokens   # processing tokens / strings
+      end
+      attr_reader :substituter
+      alias substituter? substituter
-    attr_reader :substituter
-    alias substituter? substituter
+      def initialize options = {}
+        removes_characters options[:removes_characters]                                 if options[:removes_characters]
+        contracts_expressions *options[:contracts_expressions]                          if options[:contracts_expressions]
+        stopwords options[:stopwords]                                                   if options[:stopwords]
+        normalizes_words options[:normalizes_words]                                     if options[:normalizes_words]
+        removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
+        substitutes_characters_with options[:substitutes_characters_with]               if options[:substitutes_characters_with]
+        case_sensitive options[:case_sensitive]                                         unless options[:case_sensitive].nil?
-    def initialize options = {}
-      removes_characters options[:removes_characters]                                 if options[:removes_characters]
-      contracts_expressions *options[:contracts_expressions]                          if options[:contracts_expressions]
-      stopwords options[:stopwords]                                                   if options[:stopwords]
-      normalizes_words options[:normalizes_words]                                     if options[:normalizes_words]
-      removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
-      substitutes_characters_with options[:substitutes_characters_with]               if options[:substitutes_characters_with]
-      case_sensitive options[:case_sensitive]                                         unless options[:case_sensitive].nil?
+        # Defaults.
+        #
+        splits_text_on options[:splits_text_on] || /\s/
+        reject_token_if &(options[:reject_token_if] || options[:rejects_token_if] || :blank?) # TODO Decide on using an s or not.
+      end
-      # Defaults.
+      # Default preprocessing hook.
       #
-      splits_text_on options[:splits_text_on] || /\s/
-      reject_token_if &(options[:reject_token_if] || options[:rejects_token_if] || :blank?) # TODO Decide on using an s or not.
-    end
+      # Does:
+      # 1. Character substitution.
+      # 2. Remove illegal expressions.
+      # 3. Remove non-single stopwords. (Stopwords that occur with other words)
+      #
+      def preprocess text
+        text = substitute_characters text
+        remove_illegals text
+        # We do not remove single stopwords e.g. in the indexer for
+        # an entirely different reason than in the query tokenizer.
+        # An indexed thing with just name "UND" (a possible stopword)
+        # should not lose its name.
+        #
+        remove_non_single_stopwords text
+        text
+      end
+      # Pretokenizing.
+      #
+      # Does:
+      # 1. Split the text into words.
+      # 2. Normalize each word.
+      #
+      def pretokenize text
+        words = split text
+        words.collect! do |word|
+          normalize_with_patterns word
+          word
+        end
+      end
+      # Basic postprocessing (overridden in both query/index tokenizers).
+      #
+      def process tokens
+        reject tokens # Reject any tokens that don't meet criteria
+        tokens
+      end
-    # Default preprocessing hook.
-    #
-    # Does:
-    # 1. Character substitution.
-    # 2. Remove illegal expressions.
-    # 3. Remove non-single stopwords. (Stopwords that occur with other words)
-    #
-    def preprocess text
-      text = substitute_characters text
-      remove_illegals text
-      # We do not remove single stopwords e.g. in the indexer for
-      # an entirely different reason than in the query tokenizer.
-      # An indexed thing with just name "UND" (a possible stopword)
-      # should not lose its name.
-      #
-      remove_non_single_stopwords text
-      text
-    end
-    # Pretokenizing.
-    #
-    # Does:
-    # 1. Split the text into words.
-    # 2. Normalize each word.
-    #
-    def pretokenize text
-      words = split text
-      words.collect! do |word|
-        normalize_with_patterns word
-        word
+      # # Converts words into real tokens.
+      # #
+      # def tokens_for words
+      #   Query::Tokens.new words.collect! { |word| token_for word }
+      # end
+      # Turns non-blank text into symbols.
+      #
+      def symbolize text
+        text.blank? ? nil : text.to_sym
       end
-    end
-    # Basic postprocessing (overridden in both query/index tokenizers).
-    #
-    def process tokens
-      reject tokens # Reject any tokens that don't meet criteria
-      tokens
-    end
-    # # Converts words into real tokens.
-    # #
-    # def tokens_for words
-    #   Query::Tokens.new words.collect! { |word| token_for word }
-    # end
-    # Turns non-blank text into symbols.
-    #
-    def symbolize text
-      text.blank? ? nil : text.to_sym
     end
   end

data/lib/picky/tokenizers/index.rb CHANGED Viewed

@@ -1,28 +1,32 @@
-module Tokenizers
+module Picky
-  # The base indexing tokenizer.
-  #
-  # Override in indexing subclasses and define in configuration.
-  #
-  class Index < Base
+  module Tokenizers
-    def self.default= new_default
-      @default = new_default
-    end
-    def self.default
-      @default ||= new
-    end
-    # Does not actually return a token, but a
-    # symbol "token".
+    # The base indexing tokenizer.
     #
-    def tokens_for words
-      words.collect! { |word| word.downcase! if downcase?; word.to_sym }
-    end
-    # Returns empty tokens.
+    # Override in indexing subclasses and define in configuration.
     #
-    def empty_tokens
-      []
+    class Index < Base
+      def self.default= new_default
+        @default = new_default
+      end
+      def self.default
+        @default ||= new
+      end
+      # Does not actually return a token, but a
+      # symbol "token".
+      #
+      def tokens_for words
+        words.collect! { |word| word.downcase! if downcase?; word.to_sym }
+      end
+      # Returns empty tokens.
+      #
+      def empty_tokens
+        []
+      end
     end
   end