picky 2.5.2 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
@@ -0,0 +1,103 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Query
|
4
|
+
|
5
|
+
# This class primarily handles switching through similar token constellations.
|
6
|
+
#
|
7
|
+
class Tokens # :nodoc:all
|
8
|
+
|
9
|
+
# Basically delegates to its internal tokens array.
|
10
|
+
#
|
11
|
+
self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
|
12
|
+
|
13
|
+
# Create a new Tokens object with the array of tokens passed in.
|
14
|
+
#
|
15
|
+
def initialize tokens = []
|
16
|
+
@tokens = tokens
|
17
|
+
end
|
18
|
+
|
19
|
+
# Creates a new Tokens object from a number of Strings.
|
20
|
+
#
|
21
|
+
# Options:
|
22
|
+
# * downcase: Whether to downcase the passed strings (default is true)
|
23
|
+
#
|
24
|
+
def self.processed words, downcase = true
|
25
|
+
new words.collect! { |word| Token.processed word, downcase }
|
26
|
+
end
|
27
|
+
|
28
|
+
# Tokenizes each token.
|
29
|
+
#
|
30
|
+
# Note: Passed tokenizer needs to offer #normalize(text).
|
31
|
+
#
|
32
|
+
def tokenize_with tokenizer
|
33
|
+
@tokens.each { |token| token.tokenize_with(tokenizer) }
|
34
|
+
end
|
35
|
+
|
36
|
+
# Generates an array in the form of
|
37
|
+
# [
|
38
|
+
# [combination], # of token 1
|
39
|
+
# [combination, combination, combination], # of token 2
|
40
|
+
# [combination, combination] # of token 3
|
41
|
+
# ]
|
42
|
+
#
|
43
|
+
def possible_combinations_in index
|
44
|
+
@tokens.inject([]) do |combinations, token|
|
45
|
+
possible_combinations = token.possible_combinations_in index
|
46
|
+
|
47
|
+
# TODO Could move the ignore_unassigned_tokens here!
|
48
|
+
#
|
49
|
+
# Note: Optimization for ignoring tokens that allocate to nothing and
|
50
|
+
# can be ignored.
|
51
|
+
# For example in a special search, where "florian" is not
|
52
|
+
# mapped to any category.
|
53
|
+
#
|
54
|
+
possible_combinations ? combinations << possible_combinations : combinations
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Makes the last of the tokens partial.
|
59
|
+
#
|
60
|
+
def partialize_last
|
61
|
+
@tokens.last.partial = true unless empty?
|
62
|
+
end
|
63
|
+
|
64
|
+
# Caps the tokens to the maximum.
|
65
|
+
#
|
66
|
+
def cap maximum
|
67
|
+
@tokens.slice!(maximum..-1) if cap?(maximum)
|
68
|
+
end
|
69
|
+
def cap? maximum
|
70
|
+
@tokens.size > maximum
|
71
|
+
end
|
72
|
+
|
73
|
+
# Rejects blank tokens.
|
74
|
+
#
|
75
|
+
def reject
|
76
|
+
@tokens.reject! &:blank?
|
77
|
+
end
|
78
|
+
|
79
|
+
# Returns a solr query.
|
80
|
+
#
|
81
|
+
def to_solr_query
|
82
|
+
@tokens.map(&:to_solr).join ' '
|
83
|
+
end
|
84
|
+
|
85
|
+
#
|
86
|
+
#
|
87
|
+
def originals
|
88
|
+
@tokens.map(&:original)
|
89
|
+
end
|
90
|
+
|
91
|
+
def == other
|
92
|
+
self.tokens == other.tokens
|
93
|
+
end
|
94
|
+
|
95
|
+
# Just join the token original texts.
|
96
|
+
#
|
97
|
+
def to_s
|
98
|
+
originals.join ' '
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
File without changes
|
data/lib/picky/results.rb
CHANGED
@@ -10,7 +10,7 @@ class Results
|
|
10
10
|
|
11
11
|
# Takes instances of Query::Allocations as param.
|
12
12
|
#
|
13
|
-
def initialize amount = 0, offset = 0, allocations =
|
13
|
+
def initialize amount = 0, offset = 0, allocations = Query::Allocations.new
|
14
14
|
@offset = offset
|
15
15
|
@amount = amount
|
16
16
|
@allocations = allocations
|
data/lib/picky/search.rb
CHANGED
@@ -35,7 +35,7 @@ class Search
|
|
35
35
|
def initialize *index_definitions
|
36
36
|
options = Hash === index_definitions.last ? index_definitions.pop : {}
|
37
37
|
|
38
|
-
@indexes =
|
38
|
+
@indexes = Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
|
39
39
|
searching options[:tokenizer]
|
40
40
|
boost options[:weights]
|
41
41
|
|
@@ -54,11 +54,11 @@ class Search
|
|
54
54
|
@tokenizer = if options.respond_to?(:tokenize)
|
55
55
|
options
|
56
56
|
else
|
57
|
-
options &&
|
57
|
+
options && Tokenizers::Query.new(options)
|
58
58
|
end
|
59
59
|
end
|
60
60
|
def tokenizer
|
61
|
-
@tokenizer ||
|
61
|
+
@tokenizer || Tokenizers::Query.default
|
62
62
|
end
|
63
63
|
# TODO Doc. Spec.
|
64
64
|
#
|
@@ -82,14 +82,14 @@ class Search
|
|
82
82
|
# Picky will raise a Query::Indexes::DifferentTypesError.
|
83
83
|
#
|
84
84
|
@@mapping = {
|
85
|
-
Index::Memory =>
|
86
|
-
Index::Redis =>
|
85
|
+
Index::Memory => Query::Combinations::Memory,
|
86
|
+
Index::Redis => Query::Combinations::Redis
|
87
87
|
}
|
88
88
|
def combinations_type_for index_definitions_ary
|
89
89
|
index_types = index_definitions_ary.map(&:class)
|
90
90
|
index_types.uniq!
|
91
91
|
raise_different(index_types) if index_types.size > 1
|
92
|
-
!index_types.empty? && @@mapping[*index_types] ||
|
92
|
+
!index_types.empty? && @@mapping[*index_types] || Query::Combinations::Memory
|
93
93
|
end
|
94
94
|
# Currently it isn't possible using Memory and Redis etc.
|
95
95
|
# indexes in the same query index group.
|
File without changes
|
data/lib/picky/sources/db.rb
CHANGED
@@ -87,7 +87,7 @@ module Sources
|
|
87
87
|
def take_snapshot index
|
88
88
|
connect_backend
|
89
89
|
|
90
|
-
origin = snapshot_table_name index
|
90
|
+
origin = snapshot_table_name index.name
|
91
91
|
on_database = database.connection
|
92
92
|
|
93
93
|
# Drop the table if it exists.
|
@@ -109,16 +109,16 @@ module Sources
|
|
109
109
|
|
110
110
|
# Counts all the entries that are used for the index.
|
111
111
|
#
|
112
|
-
def count
|
112
|
+
def count index_name
|
113
113
|
connect_backend
|
114
114
|
|
115
|
-
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(
|
115
|
+
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
|
116
116
|
end
|
117
117
|
|
118
118
|
# The name of the snapshot table created by Picky.
|
119
119
|
#
|
120
|
-
def snapshot_table_name
|
121
|
-
"picky_#{
|
120
|
+
def snapshot_table_name index_name
|
121
|
+
"picky_#{index_name}_index"
|
122
122
|
end
|
123
123
|
|
124
124
|
# Harvests the data to index in chunks.
|
@@ -126,7 +126,7 @@ module Sources
|
|
126
126
|
def harvest category, &block
|
127
127
|
connect_backend
|
128
128
|
|
129
|
-
(0..count(category.
|
129
|
+
(0..count(category.index_name)).step(chunksize) do |offset|
|
130
130
|
get_data category, offset, &block
|
131
131
|
end
|
132
132
|
end
|
@@ -166,7 +166,7 @@ module Sources
|
|
166
166
|
# The harvest statement used to pull data from the snapshot table.
|
167
167
|
#
|
168
168
|
def harvest_statement category
|
169
|
-
"SELECT id, #{category.from} FROM #{snapshot_table_name(category.
|
169
|
+
"SELECT id, #{category.from} FROM #{snapshot_table_name(category.index_name)} st"
|
170
170
|
end
|
171
171
|
|
172
172
|
# The amount of records that are loaded each chunk.
|
@@ -10,7 +10,7 @@ module Sources
|
|
10
10
|
|
11
11
|
def initialize source, grid, precision = 1
|
12
12
|
super source
|
13
|
-
@calculation =
|
13
|
+
@calculation = Calculations::Location.new grid, precision
|
14
14
|
end
|
15
15
|
|
16
16
|
# Yield the data (id, text for id) for the given category.
|
@@ -42,7 +42,7 @@ module Sources
|
|
42
42
|
|
43
43
|
# TODO Move to the right place.
|
44
44
|
#
|
45
|
-
category.
|
45
|
+
category.indexing_exact[:location_minimum] = minimum
|
46
46
|
end
|
47
47
|
|
48
48
|
end
|
@@ -0,0 +1,224 @@
|
|
1
|
+
module Tokenizers # :nodoc:all
|
2
|
+
|
3
|
+
# Defines tokenizing processes used both in indexing and querying.
|
4
|
+
#
|
5
|
+
class Base
|
6
|
+
|
7
|
+
# TODO Move EMPTY_STRING top level.
|
8
|
+
#
|
9
|
+
EMPTY_STRING = ''.freeze
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
reject_condition_location = @reject_condition.to_s[/:(\d+) \(lambda\)/, 1]
|
13
|
+
<<-TOKENIZER
|
14
|
+
Removes characters: #{@removes_characters_regexp ? "/#{@removes_characters_regexp.source}/" : '-'}
|
15
|
+
Stopwords: #{@remove_stopwords_regexp ? "/#{@remove_stopwords_regexp.source}/" : '-'}
|
16
|
+
Splits text on: #{@splits_text_on.respond_to?(:source) ? "/#{@splits_text_on.source}/" : (@splits_text_on ? @splits_text_on : '-')}
|
17
|
+
Removes chars after split: #{@removes_characters_after_splitting_regexp ? "/#{@removes_characters_after_splitting_regexp.source}/" : '-'}
|
18
|
+
Normalizes words: #{@normalizes_words_regexp_replaces ? @normalizes_words_regexp_replaces : '-'}
|
19
|
+
Rejects tokens? #{reject_condition_location ? "Yes, see line #{reject_condition_location} in app/application.rb" : '-'}
|
20
|
+
Substitutes chars? #{@substituter ? "Yes, using #{@substituter}." : '-' }
|
21
|
+
Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
22
|
+
TOKENIZER
|
23
|
+
end
|
24
|
+
|
25
|
+
# Stopwords.
|
26
|
+
#
|
27
|
+
# We only allow regexps (even if string would be okay
|
28
|
+
# too for gsub! - it's too hard to understand)
|
29
|
+
#
|
30
|
+
def stopwords regexp
|
31
|
+
check_argument_in __method__, Regexp, regexp
|
32
|
+
@remove_stopwords_regexp = regexp
|
33
|
+
end
|
34
|
+
def remove_stopwords text
|
35
|
+
text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
|
36
|
+
text
|
37
|
+
end
|
38
|
+
@@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
|
39
|
+
def remove_non_single_stopwords text
|
40
|
+
return text if text.match @@non_single_stopword_regexp
|
41
|
+
remove_stopwords text
|
42
|
+
end
|
43
|
+
|
44
|
+
# Illegals.
|
45
|
+
#
|
46
|
+
# We only allow regexps (even if string would be okay
|
47
|
+
# too for gsub! - it's too hard to understand)
|
48
|
+
#
|
49
|
+
def removes_characters regexp
|
50
|
+
check_argument_in __method__, Regexp, regexp
|
51
|
+
@removes_characters_regexp = regexp
|
52
|
+
end
|
53
|
+
def remove_illegals text
|
54
|
+
text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
|
55
|
+
text
|
56
|
+
end
|
57
|
+
|
58
|
+
# Splitting.
|
59
|
+
#
|
60
|
+
# We allow Strings and Regexps.
|
61
|
+
# Note: We do not test against to_str since symbols do not work with String#split.
|
62
|
+
#
|
63
|
+
def splits_text_on regexp_or_string
|
64
|
+
raise ArgumentError.new "#{__method__} takes a Regexp or String as argument, not a #{regexp_or_string.class}." unless Regexp === regexp_or_string || String === regexp_or_string
|
65
|
+
@splits_text_on = regexp_or_string
|
66
|
+
end
|
67
|
+
def split text
|
68
|
+
text.split @splits_text_on
|
69
|
+
end
|
70
|
+
|
71
|
+
# Normalizing.
|
72
|
+
#
|
73
|
+
# We only allow arrays.
|
74
|
+
#
|
75
|
+
def normalizes_words regexp_replaces
|
76
|
+
raise ArgumentError.new "#{__method__} takes an Array of replaces as argument, not a #{regexp_replaces.class}." unless regexp_replaces.respond_to?(:to_ary)
|
77
|
+
@normalizes_words_regexp_replaces = regexp_replaces
|
78
|
+
end
|
79
|
+
def normalize_with_patterns text
|
80
|
+
return text unless @normalizes_words_regexp_replaces
|
81
|
+
|
82
|
+
@normalizes_words_regexp_replaces.each do |regex, replace|
|
83
|
+
# This should be sufficient
|
84
|
+
#
|
85
|
+
text.gsub!(regex, replace) and break
|
86
|
+
end
|
87
|
+
remove_after_normalizing_illegals text
|
88
|
+
text
|
89
|
+
end
|
90
|
+
|
91
|
+
# Illegal after normalizing.
|
92
|
+
#
|
93
|
+
# We only allow regexps (even if string would be okay
|
94
|
+
# too for gsub! - it's too hard to understand)
|
95
|
+
#
|
96
|
+
def removes_characters_after_splitting regexp
|
97
|
+
check_argument_in __method__, Regexp, regexp
|
98
|
+
@removes_characters_after_splitting_regexp = regexp
|
99
|
+
end
|
100
|
+
def remove_after_normalizing_illegals text
|
101
|
+
text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
|
102
|
+
end
|
103
|
+
|
104
|
+
# Substitute Characters with this substituter.
|
105
|
+
#
|
106
|
+
# Default is European Character substitution.
|
107
|
+
#
|
108
|
+
def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
|
109
|
+
raise ArgumentError.new "The substitutes_characters_with option needs a character substituter, which responds to #substitute." unless substituter.respond_to?(:substitute)
|
110
|
+
@substituter = substituter
|
111
|
+
end
|
112
|
+
def substitute_characters text
|
113
|
+
substituter?? substituter.substitute(text) : text
|
114
|
+
end
|
115
|
+
|
116
|
+
# Reject tokens after tokenizing based on the given criteria.
|
117
|
+
#
|
118
|
+
# Note: Currently only for indexing.
|
119
|
+
#
|
120
|
+
def reject_token_if &condition
|
121
|
+
@reject_condition = condition
|
122
|
+
end
|
123
|
+
def reject tokens
|
124
|
+
tokens.reject! &@reject_condition
|
125
|
+
end
|
126
|
+
|
127
|
+
def case_sensitive case_sensitive
|
128
|
+
@case_sensitive = case_sensitive
|
129
|
+
end
|
130
|
+
def downcase?
|
131
|
+
!@case_sensitive
|
132
|
+
end
|
133
|
+
|
134
|
+
# Checks if the right argument type has been given.
|
135
|
+
#
|
136
|
+
def check_argument_in method, type, argument, &condition
|
137
|
+
raise ArgumentError.new "Application##{method} takes a #{type} as argument, not a #{argument.class}." unless type === argument
|
138
|
+
end
|
139
|
+
|
140
|
+
|
141
|
+
# Returns a number of tokens, generated from the given text.
|
142
|
+
#
|
143
|
+
# Note:
|
144
|
+
# * preprocess, pretokenize are hooks
|
145
|
+
#
|
146
|
+
def tokenize text
|
147
|
+
text = preprocess text # processing the text
|
148
|
+
return empty_tokens if text.blank?
|
149
|
+
words = pretokenize text # splitting and preparations for tokenizing
|
150
|
+
return empty_tokens if words.empty?
|
151
|
+
tokens = tokens_for words # creating tokens / strings
|
152
|
+
process tokens # processing tokens / strings
|
153
|
+
end
|
154
|
+
|
155
|
+
attr_reader :substituter
|
156
|
+
alias substituter? substituter
|
157
|
+
|
158
|
+
def initialize options = {}
|
159
|
+
removes_characters options[:removes_characters] if options[:removes_characters]
|
160
|
+
contracts_expressions *options[:contracts_expressions] if options[:contracts_expressions]
|
161
|
+
stopwords options[:stopwords] if options[:stopwords]
|
162
|
+
normalizes_words options[:normalizes_words] if options[:normalizes_words]
|
163
|
+
removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
|
164
|
+
substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with]
|
165
|
+
case_sensitive options[:case_sensitive] unless options[:case_sensitive].nil?
|
166
|
+
|
167
|
+
# Defaults.
|
168
|
+
#
|
169
|
+
splits_text_on options[:splits_text_on] || /\s/
|
170
|
+
reject_token_if &(options[:reject_token_if] || options[:rejects_token_if] || :blank?) # TODO Decide on using an s or not.
|
171
|
+
end
|
172
|
+
|
173
|
+
# Default preprocessing hook.
|
174
|
+
#
|
175
|
+
# Does:
|
176
|
+
# 1. Character substitution.
|
177
|
+
# 2. Remove illegal expressions.
|
178
|
+
# 3. Remove non-single stopwords. (Stopwords that occur with other words)
|
179
|
+
#
|
180
|
+
def preprocess text
|
181
|
+
text = substitute_characters text
|
182
|
+
remove_illegals text
|
183
|
+
# We do not remove single stopwords e.g. in the indexer for
|
184
|
+
# an entirely different reason than in the query tokenizer.
|
185
|
+
# An indexed thing with just name "UND" (a possible stopword)
|
186
|
+
# should not lose its name.
|
187
|
+
#
|
188
|
+
remove_non_single_stopwords text
|
189
|
+
text
|
190
|
+
end
|
191
|
+
# Pretokenizing.
|
192
|
+
#
|
193
|
+
# Does:
|
194
|
+
# 1. Split the text into words.
|
195
|
+
# 2. Normalize each word.
|
196
|
+
#
|
197
|
+
def pretokenize text
|
198
|
+
words = split text
|
199
|
+
words.collect! do |word|
|
200
|
+
normalize_with_patterns word
|
201
|
+
word
|
202
|
+
end
|
203
|
+
end
|
204
|
+
# Basic postprocessing (overridden in both query/index tokenizers).
|
205
|
+
#
|
206
|
+
def process tokens
|
207
|
+
reject tokens # Reject any tokens that don't meet criteria
|
208
|
+
tokens
|
209
|
+
end
|
210
|
+
|
211
|
+
# # Converts words into real tokens.
|
212
|
+
# #
|
213
|
+
# def tokens_for words
|
214
|
+
# Query::Tokens.new words.collect! { |word| token_for word }
|
215
|
+
# end
|
216
|
+
# Turns non-blank text into symbols.
|
217
|
+
#
|
218
|
+
def symbolize text
|
219
|
+
text.blank? ? nil : text.to_sym
|
220
|
+
end
|
221
|
+
|
222
|
+
end
|
223
|
+
|
224
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Tokenizers
|
2
|
+
|
3
|
+
# The base indexing tokenizer.
|
4
|
+
#
|
5
|
+
# Override in indexing subclasses and define in configuration.
|
6
|
+
#
|
7
|
+
class Index < Base
|
8
|
+
|
9
|
+
def self.default= new_default
|
10
|
+
@default = new_default
|
11
|
+
end
|
12
|
+
def self.default
|
13
|
+
@default ||= new
|
14
|
+
end
|
15
|
+
|
16
|
+
# Does not actually return a token, but a
|
17
|
+
# symbol "token".
|
18
|
+
#
|
19
|
+
def tokens_for words
|
20
|
+
words.collect! { |word| word.downcase! if downcase?; word.to_sym }
|
21
|
+
end
|
22
|
+
# Returns empty tokens.
|
23
|
+
#
|
24
|
+
def empty_tokens
|
25
|
+
[]
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Tokenizers
|
2
|
+
|
3
|
+
class Location < Base
|
4
|
+
|
5
|
+
attr_reader :calculation
|
6
|
+
|
7
|
+
def initialize options = {}
|
8
|
+
super options
|
9
|
+
|
10
|
+
grid = options[:grid]
|
11
|
+
precision = options[:precision] || 1
|
12
|
+
|
13
|
+
@calculation = Calculations::Location.new grid, precision
|
14
|
+
|
15
|
+
@minimum = 1.0 / 0
|
16
|
+
|
17
|
+
@locations = []
|
18
|
+
end
|
19
|
+
|
20
|
+
# TODO Work on this!
|
21
|
+
#
|
22
|
+
def tokenize text
|
23
|
+
|
24
|
+
# Gather min/max.
|
25
|
+
#
|
26
|
+
source.harvest category do |indexed_id, location|
|
27
|
+
location = location.to_f
|
28
|
+
minimum = location if location < minimum
|
29
|
+
locations << [indexed_id, location]
|
30
|
+
end
|
31
|
+
|
32
|
+
calculation.minimum = minimum
|
33
|
+
|
34
|
+
# Recalculate locations.
|
35
|
+
#
|
36
|
+
locations.each do |indexed_id, location|
|
37
|
+
calculation.recalculated_range(location).each do |new_location|
|
38
|
+
yield indexed_id, new_location.to_s
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# TODO Move to the right place.
|
43
|
+
#
|
44
|
+
category.indexing_exact[:location_minimum] = minimum
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Tokenizers
|
4
|
+
|
5
|
+
# There are a few class methods that you can use to configure how a query works.
|
6
|
+
#
|
7
|
+
# removes_characters regexp
|
8
|
+
# illegal_after_normalizing regexp
|
9
|
+
# stopwords regexp
|
10
|
+
# contracts_expressions regexp, to_string
|
11
|
+
# splits_text_on regexp
|
12
|
+
# normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
|
13
|
+
#
|
14
|
+
class Query < Base
|
15
|
+
|
16
|
+
def self.default= new_default
|
17
|
+
@default = new_default
|
18
|
+
end
|
19
|
+
def self.default
|
20
|
+
@default ||= new
|
21
|
+
end
|
22
|
+
|
23
|
+
attr_reader :maximum_tokens
|
24
|
+
|
25
|
+
def initialize options = {}
|
26
|
+
super options
|
27
|
+
@maximum_tokens = options[:maximum_tokens] || 5
|
28
|
+
end
|
29
|
+
|
30
|
+
# Let each token process itself.
|
31
|
+
# Reject, limit, and partialize tokens.
|
32
|
+
#
|
33
|
+
# In querying we work with real tokens (in indexing it's just symbols).
|
34
|
+
#
|
35
|
+
def process tokens
|
36
|
+
tokens.reject # Reject any tokens that don't meet criteria.
|
37
|
+
tokens.cap maximum_tokens # Cut off superfluous tokens.
|
38
|
+
tokens.partialize_last # Set certain tokens as partial.
|
39
|
+
tokens
|
40
|
+
end
|
41
|
+
|
42
|
+
# Converts words into real tokens.
|
43
|
+
#
|
44
|
+
def tokens_for words
|
45
|
+
::Query::Tokens.processed words, downcase?
|
46
|
+
end
|
47
|
+
# Returns a tokens object.
|
48
|
+
#
|
49
|
+
def empty_tokens
|
50
|
+
::Query::Tokens.new
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
data/lib/tasks/index.rake
CHANGED
@@ -23,9 +23,10 @@ namespace :index do
|
|
23
23
|
desc "Generates a specific index from index snapshots (category optional)."
|
24
24
|
task :specific, [:index, :category] => :application do |_, options|
|
25
25
|
index, category = options.index, options.category
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
|
27
|
+
specific = Indexes[index]
|
28
|
+
specific = specific[category] if category
|
29
|
+
specific.index
|
29
30
|
end
|
30
31
|
|
31
32
|
end
|
data/lib/tasks/try.rake
CHANGED
@@ -6,7 +6,7 @@ namespace :try do
|
|
6
6
|
task :index, [:text, :index, :category] => :application do |_, options|
|
7
7
|
text, index, category = options.text, options.index, options.category
|
8
8
|
|
9
|
-
tokenizer = category ? Indexes.find(index, category).tokenizer :
|
9
|
+
tokenizer = category ? Indexes.find(index, category).tokenizer : Tokenizers::Index.default
|
10
10
|
|
11
11
|
puts "\"#{text}\" is saved in the index as #{tokenizer.tokenize(text.dup).to_a}"
|
12
12
|
end
|
@@ -15,7 +15,7 @@ namespace :try do
|
|
15
15
|
task :query, [:text] => :application do |_, options|
|
16
16
|
text = options.text
|
17
17
|
|
18
|
-
puts "\"#{text}\" as a search will be preprocessed into #{
|
18
|
+
puts "\"#{text}\" as a search will be preprocessed into #{Tokenizers::Query.default.tokenize(text.dup).to_a.map(&:to_s).map(&:to_sym)}"
|
19
19
|
puts
|
20
20
|
puts "(category qualifiers, e.g. title: are removed if they do not exist as a qualifier, so 'toitle:bla' -> 'bla')"
|
21
21
|
end
|
@@ -15,8 +15,8 @@ describe Application do
|
|
15
15
|
|
16
16
|
route %r{^/books} => Search.new(books)
|
17
17
|
end
|
18
|
-
|
19
|
-
|
18
|
+
Tokenizers::Index.default.tokenize 'some text'
|
19
|
+
Tokenizers::Query.default.tokenize 'some text'
|
20
20
|
}.should_not raise_error
|
21
21
|
end
|
22
22
|
it "should run ok" do
|
@@ -105,7 +105,7 @@ describe Application do
|
|
105
105
|
lambda { Application.rack_adapter }.should_not raise_error
|
106
106
|
end
|
107
107
|
it "should return a new FrontendAdapters::Rack instance" do
|
108
|
-
Application.rack_adapter.should be_kind_of(
|
108
|
+
Application.rack_adapter.should be_kind_of(FrontendAdapters::Rack)
|
109
109
|
end
|
110
110
|
it "should cache the instance" do
|
111
111
|
Application.rack_adapter.should == Application.rack_adapter
|