picky 2.5.2 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
@@ -0,0 +1,40 @@
|
|
1
|
+
module Backend
|
2
|
+
|
3
|
+
class Redis
|
4
|
+
|
5
|
+
class StringHash < Basic
|
6
|
+
|
7
|
+
# Writes the hash into Redis.
|
8
|
+
#
|
9
|
+
# Note: We could use multi, but it did not help.
|
10
|
+
#
|
11
|
+
def dump hash
|
12
|
+
clear
|
13
|
+
hash.each_pair do |key, value|
|
14
|
+
backend.hset namespace, key, value
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Clears the hash.
|
19
|
+
#
|
20
|
+
def clear
|
21
|
+
backend.del namespace
|
22
|
+
end
|
23
|
+
|
24
|
+
# Get a collection.
|
25
|
+
#
|
26
|
+
def collection sym
|
27
|
+
raise "Can't retrieve a collection from a StringHash. Use Index::Redis::ListHash."
|
28
|
+
end
|
29
|
+
|
30
|
+
# Get a single value.
|
31
|
+
#
|
32
|
+
def member sym
|
33
|
+
backend.hget namespace, sym
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Backend
|
2
|
+
|
3
|
+
# TODO Needs a reconnect to be run after forking.
|
4
|
+
#
|
5
|
+
class Redis < Backend
|
6
|
+
|
7
|
+
def initialize bundle_name, category
|
8
|
+
super bundle_name, category
|
9
|
+
|
10
|
+
# Refine a few Redis "types".
|
11
|
+
#
|
12
|
+
@index = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:index"
|
13
|
+
@weights = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:weights"
|
14
|
+
@similarity = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:similarity"
|
15
|
+
@configuration = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:configuration"
|
16
|
+
end
|
17
|
+
|
18
|
+
# Delegate to the right collection.
|
19
|
+
#
|
20
|
+
def ids sym
|
21
|
+
index.collection sym
|
22
|
+
end
|
23
|
+
|
24
|
+
# Delegate to the right member value.
|
25
|
+
#
|
26
|
+
# Note: Converts to float.
|
27
|
+
#
|
28
|
+
def weight sym
|
29
|
+
weights.member(sym).to_f
|
30
|
+
end
|
31
|
+
|
32
|
+
# Delegate to a member value.
|
33
|
+
#
|
34
|
+
def setting sym
|
35
|
+
configuration.member sym
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Calculations # :nodoc:all
|
2
|
+
|
3
|
+
# A location calculation recalculates a 1-d location
|
4
|
+
# to the Picky internal 1-d "grid".
|
5
|
+
#
|
6
|
+
# For example, if you have a location x == 12.3456,
|
7
|
+
# it will be recalculated into 3, if the minimum is 9
|
8
|
+
# and the gridlength is 1.
|
9
|
+
#
|
10
|
+
class Location
|
11
|
+
|
12
|
+
attr_reader :minimum, :precision, :grid
|
13
|
+
|
14
|
+
def initialize user_grid, precision = nil
|
15
|
+
@user_grid = user_grid
|
16
|
+
@precision = precision || 1
|
17
|
+
@grid = @user_grid / (@precision + 0.5)
|
18
|
+
end
|
19
|
+
|
20
|
+
def minimum= minimum
|
21
|
+
# Add a margin of 1 user grid.
|
22
|
+
#
|
23
|
+
minimum -= @user_grid
|
24
|
+
|
25
|
+
# Add plus 1 grid so that the index key never falls on 0.
|
26
|
+
# Why? to_i maps by default to 0.
|
27
|
+
#
|
28
|
+
minimum -= @grid
|
29
|
+
|
30
|
+
@minimum = minimum
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
#
|
35
|
+
def add_margin length
|
36
|
+
@minimum -= length
|
37
|
+
end
|
38
|
+
|
39
|
+
#
|
40
|
+
#
|
41
|
+
def recalculated_range location
|
42
|
+
range recalculate(location)
|
43
|
+
end
|
44
|
+
#
|
45
|
+
#
|
46
|
+
def range around_location
|
47
|
+
(around_location - @precision)..(around_location + @precision)
|
48
|
+
end
|
49
|
+
#
|
50
|
+
#
|
51
|
+
def recalculate location
|
52
|
+
((location - @minimum) / @grid).floor
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
class Categories
|
2
|
+
|
3
|
+
attr_reader :categories, :category_hash
|
4
|
+
|
5
|
+
delegate :each,
|
6
|
+
:first,
|
7
|
+
:map,
|
8
|
+
:to => :categories
|
9
|
+
|
10
|
+
each_delegate :reindex,
|
11
|
+
:to => :categories
|
12
|
+
|
13
|
+
# A list of indexed categories.
|
14
|
+
#
|
15
|
+
# Options:
|
16
|
+
# * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
|
17
|
+
# The default behaviour is that if a token does not match to
|
18
|
+
# any category, the query will not return anything (since a
|
19
|
+
# single token cannot be matched). If you set this option to
|
20
|
+
# true, any token that cannot be matched to a category will be
|
21
|
+
# simply ignored.
|
22
|
+
# Use this if only a few matched words are important, like for
|
23
|
+
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
24
|
+
# you only want to match the zipcode, to have the search engine
|
25
|
+
# display advertisements on the side for the zipcode.
|
26
|
+
# Nifty! :)
|
27
|
+
#
|
28
|
+
def initialize options = {}
|
29
|
+
clear
|
30
|
+
|
31
|
+
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
32
|
+
end
|
33
|
+
|
34
|
+
# Clears both the array of categories and the hash of categories.
|
35
|
+
#
|
36
|
+
def clear
|
37
|
+
@categories = []
|
38
|
+
@category_hash = {}
|
39
|
+
end
|
40
|
+
|
41
|
+
# Find a given category in the categories.
|
42
|
+
#
|
43
|
+
def [] category_name
|
44
|
+
category_name = category_name.to_sym
|
45
|
+
category_hash[category_name] || raise_not_found(category_name)
|
46
|
+
end
|
47
|
+
def raise_not_found category_name
|
48
|
+
raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
|
49
|
+
end
|
50
|
+
|
51
|
+
# Add the given category to the list of categories.
|
52
|
+
#
|
53
|
+
def << category
|
54
|
+
categories << category
|
55
|
+
category_hash[category.name] = category
|
56
|
+
end
|
57
|
+
|
58
|
+
def to_s
|
59
|
+
categories.join(', ')
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
class Categories
|
2
|
+
|
3
|
+
attr_reader :ignore_unassigned_tokens
|
4
|
+
|
5
|
+
each_delegate :load_from_cache,
|
6
|
+
:analyze,
|
7
|
+
:to => :categories
|
8
|
+
|
9
|
+
# Return all possible combinations for the given token.
|
10
|
+
#
|
11
|
+
# This checks if it needs to also search through similar
|
12
|
+
# tokens, if for example, the token is one with ~.
|
13
|
+
# If yes, it puts together all solutions.
|
14
|
+
#
|
15
|
+
def possible_combinations_for token
|
16
|
+
token.similar? ? similar_possible_for(token) : possible_for(token)
|
17
|
+
end
|
18
|
+
# Gets all similar tokens and puts together the possible combinations
|
19
|
+
# for each found similar token.
|
20
|
+
#
|
21
|
+
def similar_possible_for token
|
22
|
+
# Get as many tokens as necessary
|
23
|
+
#
|
24
|
+
tokens = similar_tokens_for token
|
25
|
+
# possible combinations
|
26
|
+
#
|
27
|
+
inject_possible_for tokens
|
28
|
+
end
|
29
|
+
def similar_tokens_for token
|
30
|
+
text = token.text
|
31
|
+
categories.inject([]) do |result, category|
|
32
|
+
next_token = token
|
33
|
+
# Note: We could also break off here if not all the available
|
34
|
+
# similars are needed.
|
35
|
+
# Wait for a concrete case that needs this before taking
|
36
|
+
# action.
|
37
|
+
#
|
38
|
+
while next_token = next_token.next_similar_token(category)
|
39
|
+
result << next_token if next_token && next_token.text != text
|
40
|
+
end
|
41
|
+
result
|
42
|
+
end
|
43
|
+
end
|
44
|
+
def inject_possible_for tokens
|
45
|
+
tokens.inject([]) do |result, token|
|
46
|
+
possible = possible_categories token
|
47
|
+
result + possible_for(token, possible)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Returns possible Combinations for the token.
|
52
|
+
#
|
53
|
+
# Note: The preselected_categories param is an optimization.
|
54
|
+
#
|
55
|
+
# Note: Returns [] if no categories matched (will produce no result).
|
56
|
+
# Returns nil if this token needs to be removed from the query.
|
57
|
+
# (Also none of the categories matched, but the ignore unassigned
|
58
|
+
# tokens option is true)
|
59
|
+
#
|
60
|
+
def possible_for token, preselected_categories = nil
|
61
|
+
possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
|
62
|
+
combination = category.combination_for token
|
63
|
+
combination ? combinations << combination : combinations
|
64
|
+
end
|
65
|
+
# This is an optimization to mark tokens that are ignored.
|
66
|
+
#
|
67
|
+
return if ignore_unassigned_tokens && possible.empty?
|
68
|
+
possible # wrap in combinations
|
69
|
+
end
|
70
|
+
# This returns the possible categories for this token.
|
71
|
+
# If the user has already preselected a category for this token,
|
72
|
+
# like "artist:moby", if not just return all for the given token,
|
73
|
+
# since all are possible.
|
74
|
+
#
|
75
|
+
# Note: Once I thought this was called too often. But it is not (18.01.2011).
|
76
|
+
#
|
77
|
+
def possible_categories token
|
78
|
+
user_defined_categories(token) || categories
|
79
|
+
end
|
80
|
+
# This returns the array of categories if the user has defined
|
81
|
+
# an existing category.
|
82
|
+
#
|
83
|
+
# Note: Returns nil if the user did not define one
|
84
|
+
# or [] if he/she has defined a non-existing one.
|
85
|
+
#
|
86
|
+
def user_defined_categories token
|
87
|
+
names = token.user_defined_category_names
|
88
|
+
names && names.map do |name|
|
89
|
+
category_hash[name]
|
90
|
+
end.compact
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
class Category
|
2
|
+
|
3
|
+
attr_reader :name,
|
4
|
+
:index
|
5
|
+
|
6
|
+
# Mandatory params:
|
7
|
+
# * name: Category name to use as identifier and file names.
|
8
|
+
# * index: Index to which this category is attached to.
|
9
|
+
#
|
10
|
+
# Options:
|
11
|
+
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
12
|
+
# * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
|
13
|
+
# * from: The source category identifier to take the data from.
|
14
|
+
#
|
15
|
+
# Advanced Options:
|
16
|
+
# * source: Use if the category should use a different source.
|
17
|
+
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
18
|
+
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
19
|
+
# * key_format: What this category's keys are formatted with (default is :to_i)
|
20
|
+
#
|
21
|
+
def initialize name, index, options = {}
|
22
|
+
@name = name
|
23
|
+
@index = index
|
24
|
+
|
25
|
+
# Indexing.
|
26
|
+
#
|
27
|
+
@source = options[:source]
|
28
|
+
@from = options[:from]
|
29
|
+
@tokenizer = options[:tokenizer]
|
30
|
+
@key_format = options[:key_format]
|
31
|
+
|
32
|
+
# TODO Push into Bundle. At least the weights.
|
33
|
+
#
|
34
|
+
partial = options[:partial] || Generators::Partial::Default
|
35
|
+
weights = options[:weights] || Generators::Weights::Default
|
36
|
+
similarity = options[:similarity] || Generators::Similarity::Default
|
37
|
+
|
38
|
+
@indexing_exact = index.indexing_bundle_class.new(:exact, self, similarity, Generators::Partial::None.new, weights)
|
39
|
+
@indexing_partial = index.indexing_bundle_class.new(:partial, self, Generators::Similarity::None.new, partial, weights)
|
40
|
+
|
41
|
+
# Indexed.
|
42
|
+
#
|
43
|
+
# TODO Push the defaults out into the index.
|
44
|
+
#
|
45
|
+
@partial_strategy = partial # TODO Duplicate work.
|
46
|
+
|
47
|
+
@indexed_exact = index.indexed_bundle_class.new :exact, self, similarity
|
48
|
+
@indexed_partial = index.indexed_bundle_class.new :partial, self, similarity
|
49
|
+
|
50
|
+
# @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
|
51
|
+
# @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
|
52
|
+
|
53
|
+
# TODO Extract? Yes.
|
54
|
+
#
|
55
|
+
Query::Qualifiers.add(name, generate_qualifiers_from(options) || [name])
|
56
|
+
end
|
57
|
+
|
58
|
+
# Indexes and reloads the category.
|
59
|
+
#
|
60
|
+
def reindex
|
61
|
+
index
|
62
|
+
reload
|
63
|
+
end
|
64
|
+
|
65
|
+
# Category name.
|
66
|
+
#
|
67
|
+
def category_name
|
68
|
+
name
|
69
|
+
end
|
70
|
+
|
71
|
+
# Index name.
|
72
|
+
#
|
73
|
+
def index_name
|
74
|
+
@index.name
|
75
|
+
end
|
76
|
+
|
77
|
+
# Path and partial filename of a specific index on this category.
|
78
|
+
#
|
79
|
+
def index_path bundle_name, type
|
80
|
+
"#{index_directory}/#{name}_#{bundle_name}_#{type}"
|
81
|
+
end
|
82
|
+
|
83
|
+
# Path and partial filename of the prepared index on this category.
|
84
|
+
#
|
85
|
+
def prepared_index_path
|
86
|
+
@prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
|
87
|
+
end
|
88
|
+
def prepared_index_file &block
|
89
|
+
@prepared_index_file ||= Backend::File::Text.new prepared_index_path
|
90
|
+
@prepared_index_file.open_for_indexing &block
|
91
|
+
end
|
92
|
+
|
93
|
+
# The index directory for this category.
|
94
|
+
#
|
95
|
+
def index_directory
|
96
|
+
@index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{@index.name}"
|
97
|
+
end
|
98
|
+
|
99
|
+
# Creates the index directory including all necessary paths above it.
|
100
|
+
#
|
101
|
+
def prepare_index_directory
|
102
|
+
FileUtils.mkdir_p index_directory
|
103
|
+
end
|
104
|
+
|
105
|
+
# Identifier for internal use.
|
106
|
+
#
|
107
|
+
# TODO What internal use?
|
108
|
+
#
|
109
|
+
def identifier
|
110
|
+
@identifier ||= "#{@index.name}:#{name}"
|
111
|
+
end
|
112
|
+
|
113
|
+
def to_info
|
114
|
+
<<-CATEGORY
|
115
|
+
Category(#{name}):
|
116
|
+
Exact:
|
117
|
+
#{exact.indented_to_s(4)}
|
118
|
+
Partial:
|
119
|
+
#{partial.indented_to_s(4)}
|
120
|
+
CATEGORY
|
121
|
+
end
|
122
|
+
|
123
|
+
def to_s
|
124
|
+
"Category(#{name})"
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
#
|
2
|
+
#
|
3
|
+
class Category
|
4
|
+
|
5
|
+
attr_reader :indexed_exact
|
6
|
+
|
7
|
+
# TODO Move to Index.
|
8
|
+
#
|
9
|
+
def generate_qualifiers_from options
|
10
|
+
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
11
|
+
end
|
12
|
+
|
13
|
+
# Loads the index from cache.
|
14
|
+
#
|
15
|
+
def load_from_cache
|
16
|
+
timed_exclaim %Q{"#{identifier}": Loading index from cache.}
|
17
|
+
indexed_exact.load
|
18
|
+
indexed_partial.load
|
19
|
+
end
|
20
|
+
alias reload load_from_cache
|
21
|
+
|
22
|
+
# Loads, analyzes, and clears the index.
|
23
|
+
#
|
24
|
+
# Note: The idea is not to run this while the search engine is running.
|
25
|
+
#
|
26
|
+
def analyze collector
|
27
|
+
collector[identifier] = {
|
28
|
+
:exact => Analyzer.new.analyze(indexed_exact),
|
29
|
+
:partial => Analyzer.new.analyze(indexed_partial)
|
30
|
+
}
|
31
|
+
collector
|
32
|
+
end
|
33
|
+
|
34
|
+
# Gets the weight for this token's text.
|
35
|
+
#
|
36
|
+
def weight token
|
37
|
+
bundle_for(token).weight token.text
|
38
|
+
end
|
39
|
+
|
40
|
+
# Gets the ids for this token's text.
|
41
|
+
#
|
42
|
+
def ids token
|
43
|
+
bundle_for(token).ids token.text
|
44
|
+
end
|
45
|
+
|
46
|
+
# Returns the right index bundle for this token.
|
47
|
+
#
|
48
|
+
def bundle_for token
|
49
|
+
token.partial? ? indexed_partial : indexed_exact
|
50
|
+
end
|
51
|
+
|
52
|
+
# The partial strategy defines whether to really use the partial index.
|
53
|
+
#
|
54
|
+
def indexed_partial
|
55
|
+
@partial_strategy.use_exact_for_partial? ? @indexed_exact : @indexed_partial
|
56
|
+
end
|
57
|
+
|
58
|
+
#
|
59
|
+
#
|
60
|
+
def combination_for token
|
61
|
+
weight(token) && Query::Combination.new(token, self)
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
#
|
2
|
+
#
|
3
|
+
class Category
|
4
|
+
|
5
|
+
attr_reader :indexing_exact,
|
6
|
+
:indexing_partial
|
7
|
+
|
8
|
+
# Prepares and caches this category.
|
9
|
+
#
|
10
|
+
# This one should be used by users.
|
11
|
+
#
|
12
|
+
def index
|
13
|
+
prepare
|
14
|
+
cache
|
15
|
+
end
|
16
|
+
|
17
|
+
# Return an appropriate source.
|
18
|
+
#
|
19
|
+
def source
|
20
|
+
@source || @index.source
|
21
|
+
end
|
22
|
+
|
23
|
+
# Return the key format.
|
24
|
+
#
|
25
|
+
# If the source has no key format, then
|
26
|
+
# check for an explicit key format, and
|
27
|
+
# if none is defined, ask the index for
|
28
|
+
# one.
|
29
|
+
#
|
30
|
+
def key_format
|
31
|
+
source.respond_to?(:key_format) && source.key_format || @key_format || @index.key_format
|
32
|
+
end
|
33
|
+
|
34
|
+
# Where the data is taken from.
|
35
|
+
#
|
36
|
+
def from
|
37
|
+
@from || name
|
38
|
+
end
|
39
|
+
|
40
|
+
# The indexer is lazily generated and cached.
|
41
|
+
#
|
42
|
+
def indexer
|
43
|
+
@indexer ||= source.respond_to?(:each) ? Indexers::Parallel.new(self) : Indexers::Serial.new(self)
|
44
|
+
end
|
45
|
+
|
46
|
+
# TODO This is a hack to get the parallel indexer working.
|
47
|
+
#
|
48
|
+
def categories
|
49
|
+
[self]
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns an appropriate tokenizer.
|
53
|
+
# If one isn't set on this category, will try the index,
|
54
|
+
# and finally the default index tokenizer.
|
55
|
+
#
|
56
|
+
def tokenizer
|
57
|
+
@tokenizer || @index.tokenizer || Tokenizers::Index.default
|
58
|
+
end
|
59
|
+
|
60
|
+
# Backup the caches.
|
61
|
+
# (Revert with restore_caches)
|
62
|
+
#
|
63
|
+
def backup_caches
|
64
|
+
timed_exclaim "Backing up #{identifier}."
|
65
|
+
indexing_exact.backup
|
66
|
+
indexing_partial.backup
|
67
|
+
end
|
68
|
+
|
69
|
+
# Restore the caches.
|
70
|
+
# (Revert with backup_caches)
|
71
|
+
#
|
72
|
+
def restore_caches
|
73
|
+
timed_exclaim "Restoring #{identifier}."
|
74
|
+
indexing_exact.restore
|
75
|
+
indexing_partial.restore
|
76
|
+
end
|
77
|
+
|
78
|
+
# Checks the caches for existence.
|
79
|
+
#
|
80
|
+
def check_caches
|
81
|
+
timed_exclaim "Checking #{identifier}."
|
82
|
+
indexing_exact.raise_unless_cache_exists
|
83
|
+
indexing_partial.raise_unless_cache_exists
|
84
|
+
end
|
85
|
+
|
86
|
+
# Deletes the caches.
|
87
|
+
#
|
88
|
+
def clear_caches
|
89
|
+
timed_exclaim "Deleting #{identifier}."
|
90
|
+
indexing_exact.delete
|
91
|
+
indexing_partial.delete
|
92
|
+
end
|
93
|
+
|
94
|
+
# We need to set what formatting method should be used.
|
95
|
+
# Uses the one defined in the indexer.
|
96
|
+
#
|
97
|
+
# TODO Make this more dynamic.
|
98
|
+
#
|
99
|
+
def configure
|
100
|
+
indexing_exact[:key_format] = self.key_format
|
101
|
+
indexing_partial[:key_format] = self.key_format
|
102
|
+
end
|
103
|
+
|
104
|
+
# Indexes, creates the "prepared_..." file.
|
105
|
+
#
|
106
|
+
# TODO This step could already prepare the id (if a
|
107
|
+
# per category key_format is not really needed).
|
108
|
+
#
|
109
|
+
def prepare
|
110
|
+
prepare_index_directory
|
111
|
+
indexer.index
|
112
|
+
end
|
113
|
+
|
114
|
+
# Generates all caches for this category.
|
115
|
+
#
|
116
|
+
def cache
|
117
|
+
prepare_index_directory
|
118
|
+
generate_caches
|
119
|
+
end
|
120
|
+
|
121
|
+
# Generate the cache data.
|
122
|
+
#
|
123
|
+
def generate_caches
|
124
|
+
configure
|
125
|
+
generate_caches_from_source
|
126
|
+
generate_partial
|
127
|
+
generate_caches_from_memory
|
128
|
+
dump_caches
|
129
|
+
timed_exclaim %Q{"#{identifier}": Caching finished.}
|
130
|
+
end
|
131
|
+
def generate_caches_from_source
|
132
|
+
indexing_exact.generate_caches_from_source
|
133
|
+
end
|
134
|
+
def generate_partial
|
135
|
+
indexing_partial.generate_partial_from indexing_exact.index
|
136
|
+
end
|
137
|
+
def generate_caches_from_memory
|
138
|
+
indexing_partial.generate_caches_from_memory
|
139
|
+
end
|
140
|
+
def dump_caches
|
141
|
+
indexing_exact.dump
|
142
|
+
indexing_partial.dump
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# The original Class class.
|
2
|
+
#
|
3
|
+
class Class # :nodoc:all
|
4
|
+
|
5
|
+
def instance_delegate *methods
|
6
|
+
methods.each do |method|
|
7
|
+
module_eval("def self.#{method}(*args, &block)\nself.instance.__send__(#{method.inspect}, *args, &block)\nend\n", "(__DELEGATION__)", 1)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|