picky 2.5.2 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
module Backend
|
|
2
|
+
|
|
3
|
+
class Redis
|
|
4
|
+
|
|
5
|
+
class StringHash < Basic
|
|
6
|
+
|
|
7
|
+
# Writes the hash into Redis.
|
|
8
|
+
#
|
|
9
|
+
# Note: We could use multi, but it did not help.
|
|
10
|
+
#
|
|
11
|
+
def dump hash
|
|
12
|
+
clear
|
|
13
|
+
hash.each_pair do |key, value|
|
|
14
|
+
backend.hset namespace, key, value
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Clears the hash.
|
|
19
|
+
#
|
|
20
|
+
def clear
|
|
21
|
+
backend.del namespace
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Get a collection.
|
|
25
|
+
#
|
|
26
|
+
def collection sym
|
|
27
|
+
raise "Can't retrieve a collection from a StringHash. Use Index::Redis::ListHash."
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Get a single value.
|
|
31
|
+
#
|
|
32
|
+
def member sym
|
|
33
|
+
backend.hget namespace, sym
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
module Backend
|
|
2
|
+
|
|
3
|
+
# TODO Needs a reconnect to be run after forking.
|
|
4
|
+
#
|
|
5
|
+
class Redis < Backend
|
|
6
|
+
|
|
7
|
+
def initialize bundle_name, category
|
|
8
|
+
super bundle_name, category
|
|
9
|
+
|
|
10
|
+
# Refine a few Redis "types".
|
|
11
|
+
#
|
|
12
|
+
@index = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:index"
|
|
13
|
+
@weights = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:weights"
|
|
14
|
+
@similarity = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:similarity"
|
|
15
|
+
@configuration = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:configuration"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Delegate to the right collection.
|
|
19
|
+
#
|
|
20
|
+
def ids sym
|
|
21
|
+
index.collection sym
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Delegate to the right member value.
|
|
25
|
+
#
|
|
26
|
+
# Note: Converts to float.
|
|
27
|
+
#
|
|
28
|
+
def weight sym
|
|
29
|
+
weights.member(sym).to_f
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Delegate to a member value.
|
|
33
|
+
#
|
|
34
|
+
def setting sym
|
|
35
|
+
configuration.member sym
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
module Calculations # :nodoc:all
|
|
2
|
+
|
|
3
|
+
# A location calculation recalculates a 1-d location
|
|
4
|
+
# to the Picky internal 1-d "grid".
|
|
5
|
+
#
|
|
6
|
+
# For example, if you have a location x == 12.3456,
|
|
7
|
+
# it will be recalculated into 3, if the minimum is 9
|
|
8
|
+
# and the gridlength is 1.
|
|
9
|
+
#
|
|
10
|
+
class Location
|
|
11
|
+
|
|
12
|
+
attr_reader :minimum, :precision, :grid
|
|
13
|
+
|
|
14
|
+
def initialize user_grid, precision = nil
|
|
15
|
+
@user_grid = user_grid
|
|
16
|
+
@precision = precision || 1
|
|
17
|
+
@grid = @user_grid / (@precision + 0.5)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def minimum= minimum
|
|
21
|
+
# Add a margin of 1 user grid.
|
|
22
|
+
#
|
|
23
|
+
minimum -= @user_grid
|
|
24
|
+
|
|
25
|
+
# Add plus 1 grid so that the index key never falls on 0.
|
|
26
|
+
# Why? to_i maps by default to 0.
|
|
27
|
+
#
|
|
28
|
+
minimum -= @grid
|
|
29
|
+
|
|
30
|
+
@minimum = minimum
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
#
|
|
34
|
+
#
|
|
35
|
+
def add_margin length
|
|
36
|
+
@minimum -= length
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
#
|
|
40
|
+
#
|
|
41
|
+
def recalculated_range location
|
|
42
|
+
range recalculate(location)
|
|
43
|
+
end
|
|
44
|
+
#
|
|
45
|
+
#
|
|
46
|
+
def range around_location
|
|
47
|
+
(around_location - @precision)..(around_location + @precision)
|
|
48
|
+
end
|
|
49
|
+
#
|
|
50
|
+
#
|
|
51
|
+
def recalculate location
|
|
52
|
+
((location - @minimum) / @grid).floor
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
class Categories
|
|
2
|
+
|
|
3
|
+
attr_reader :categories, :category_hash
|
|
4
|
+
|
|
5
|
+
delegate :each,
|
|
6
|
+
:first,
|
|
7
|
+
:map,
|
|
8
|
+
:to => :categories
|
|
9
|
+
|
|
10
|
+
each_delegate :reindex,
|
|
11
|
+
:to => :categories
|
|
12
|
+
|
|
13
|
+
# A list of indexed categories.
|
|
14
|
+
#
|
|
15
|
+
# Options:
|
|
16
|
+
# * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
|
|
17
|
+
# The default behaviour is that if a token does not match to
|
|
18
|
+
# any category, the query will not return anything (since a
|
|
19
|
+
# single token cannot be matched). If you set this option to
|
|
20
|
+
# true, any token that cannot be matched to a category will be
|
|
21
|
+
# simply ignored.
|
|
22
|
+
# Use this if only a few matched words are important, like for
|
|
23
|
+
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
|
24
|
+
# you only want to match the zipcode, to have the search engine
|
|
25
|
+
# display advertisements on the side for the zipcode.
|
|
26
|
+
# Nifty! :)
|
|
27
|
+
#
|
|
28
|
+
def initialize options = {}
|
|
29
|
+
clear
|
|
30
|
+
|
|
31
|
+
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Clears both the array of categories and the hash of categories.
|
|
35
|
+
#
|
|
36
|
+
def clear
|
|
37
|
+
@categories = []
|
|
38
|
+
@category_hash = {}
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Find a given category in the categories.
|
|
42
|
+
#
|
|
43
|
+
def [] category_name
|
|
44
|
+
category_name = category_name.to_sym
|
|
45
|
+
category_hash[category_name] || raise_not_found(category_name)
|
|
46
|
+
end
|
|
47
|
+
def raise_not_found category_name
|
|
48
|
+
raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Add the given category to the list of categories.
|
|
52
|
+
#
|
|
53
|
+
def << category
|
|
54
|
+
categories << category
|
|
55
|
+
category_hash[category.name] = category
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def to_s
|
|
59
|
+
categories.join(', ')
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
class Categories
|
|
2
|
+
|
|
3
|
+
attr_reader :ignore_unassigned_tokens
|
|
4
|
+
|
|
5
|
+
each_delegate :load_from_cache,
|
|
6
|
+
:analyze,
|
|
7
|
+
:to => :categories
|
|
8
|
+
|
|
9
|
+
# Return all possible combinations for the given token.
|
|
10
|
+
#
|
|
11
|
+
# This checks if it needs to also search through similar
|
|
12
|
+
# tokens, if for example, the token is one with ~.
|
|
13
|
+
# If yes, it puts together all solutions.
|
|
14
|
+
#
|
|
15
|
+
def possible_combinations_for token
|
|
16
|
+
token.similar? ? similar_possible_for(token) : possible_for(token)
|
|
17
|
+
end
|
|
18
|
+
# Gets all similar tokens and puts together the possible combinations
|
|
19
|
+
# for each found similar token.
|
|
20
|
+
#
|
|
21
|
+
def similar_possible_for token
|
|
22
|
+
# Get as many tokens as necessary
|
|
23
|
+
#
|
|
24
|
+
tokens = similar_tokens_for token
|
|
25
|
+
# possible combinations
|
|
26
|
+
#
|
|
27
|
+
inject_possible_for tokens
|
|
28
|
+
end
|
|
29
|
+
def similar_tokens_for token
|
|
30
|
+
text = token.text
|
|
31
|
+
categories.inject([]) do |result, category|
|
|
32
|
+
next_token = token
|
|
33
|
+
# Note: We could also break off here if not all the available
|
|
34
|
+
# similars are needed.
|
|
35
|
+
# Wait for a concrete case that needs this before taking
|
|
36
|
+
# action.
|
|
37
|
+
#
|
|
38
|
+
while next_token = next_token.next_similar_token(category)
|
|
39
|
+
result << next_token if next_token && next_token.text != text
|
|
40
|
+
end
|
|
41
|
+
result
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
def inject_possible_for tokens
|
|
45
|
+
tokens.inject([]) do |result, token|
|
|
46
|
+
possible = possible_categories token
|
|
47
|
+
result + possible_for(token, possible)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Returns possible Combinations for the token.
|
|
52
|
+
#
|
|
53
|
+
# Note: The preselected_categories param is an optimization.
|
|
54
|
+
#
|
|
55
|
+
# Note: Returns [] if no categories matched (will produce no result).
|
|
56
|
+
# Returns nil if this token needs to be removed from the query.
|
|
57
|
+
# (Also none of the categories matched, but the ignore unassigned
|
|
58
|
+
# tokens option is true)
|
|
59
|
+
#
|
|
60
|
+
def possible_for token, preselected_categories = nil
|
|
61
|
+
possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
|
|
62
|
+
combination = category.combination_for token
|
|
63
|
+
combination ? combinations << combination : combinations
|
|
64
|
+
end
|
|
65
|
+
# This is an optimization to mark tokens that are ignored.
|
|
66
|
+
#
|
|
67
|
+
return if ignore_unassigned_tokens && possible.empty?
|
|
68
|
+
possible # wrap in combinations
|
|
69
|
+
end
|
|
70
|
+
# This returns the possible categories for this token.
|
|
71
|
+
# If the user has already preselected a category for this token,
|
|
72
|
+
# like "artist:moby", if not just return all for the given token,
|
|
73
|
+
# since all are possible.
|
|
74
|
+
#
|
|
75
|
+
# Note: Once I thought this was called too often. But it is not (18.01.2011).
|
|
76
|
+
#
|
|
77
|
+
def possible_categories token
|
|
78
|
+
user_defined_categories(token) || categories
|
|
79
|
+
end
|
|
80
|
+
# This returns the array of categories if the user has defined
|
|
81
|
+
# an existing category.
|
|
82
|
+
#
|
|
83
|
+
# Note: Returns nil if the user did not define one
|
|
84
|
+
# or [] if he/she has defined a non-existing one.
|
|
85
|
+
#
|
|
86
|
+
def user_defined_categories token
|
|
87
|
+
names = token.user_defined_category_names
|
|
88
|
+
names && names.map do |name|
|
|
89
|
+
category_hash[name]
|
|
90
|
+
end.compact
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
end
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
class Category
|
|
2
|
+
|
|
3
|
+
attr_reader :name,
|
|
4
|
+
:index
|
|
5
|
+
|
|
6
|
+
# Mandatory params:
|
|
7
|
+
# * name: Category name to use as identifier and file names.
|
|
8
|
+
# * index: Index to which this category is attached to.
|
|
9
|
+
#
|
|
10
|
+
# Options:
|
|
11
|
+
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
|
12
|
+
# * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
|
|
13
|
+
# * from: The source category identifier to take the data from.
|
|
14
|
+
#
|
|
15
|
+
# Advanced Options:
|
|
16
|
+
# * source: Use if the category should use a different source.
|
|
17
|
+
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
|
18
|
+
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
|
19
|
+
# * key_format: What this category's keys are formatted with (default is :to_i)
|
|
20
|
+
#
|
|
21
|
+
def initialize name, index, options = {}
|
|
22
|
+
@name = name
|
|
23
|
+
@index = index
|
|
24
|
+
|
|
25
|
+
# Indexing.
|
|
26
|
+
#
|
|
27
|
+
@source = options[:source]
|
|
28
|
+
@from = options[:from]
|
|
29
|
+
@tokenizer = options[:tokenizer]
|
|
30
|
+
@key_format = options[:key_format]
|
|
31
|
+
|
|
32
|
+
# TODO Push into Bundle. At least the weights.
|
|
33
|
+
#
|
|
34
|
+
partial = options[:partial] || Generators::Partial::Default
|
|
35
|
+
weights = options[:weights] || Generators::Weights::Default
|
|
36
|
+
similarity = options[:similarity] || Generators::Similarity::Default
|
|
37
|
+
|
|
38
|
+
@indexing_exact = index.indexing_bundle_class.new(:exact, self, similarity, Generators::Partial::None.new, weights)
|
|
39
|
+
@indexing_partial = index.indexing_bundle_class.new(:partial, self, Generators::Similarity::None.new, partial, weights)
|
|
40
|
+
|
|
41
|
+
# Indexed.
|
|
42
|
+
#
|
|
43
|
+
# TODO Push the defaults out into the index.
|
|
44
|
+
#
|
|
45
|
+
@partial_strategy = partial # TODO Duplicate work.
|
|
46
|
+
|
|
47
|
+
@indexed_exact = index.indexed_bundle_class.new :exact, self, similarity
|
|
48
|
+
@indexed_partial = index.indexed_bundle_class.new :partial, self, similarity
|
|
49
|
+
|
|
50
|
+
# @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
|
|
51
|
+
# @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
|
|
52
|
+
|
|
53
|
+
# TODO Extract? Yes.
|
|
54
|
+
#
|
|
55
|
+
Query::Qualifiers.add(name, generate_qualifiers_from(options) || [name])
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Indexes and reloads the category.
|
|
59
|
+
#
|
|
60
|
+
def reindex
|
|
61
|
+
index
|
|
62
|
+
reload
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Category name.
|
|
66
|
+
#
|
|
67
|
+
def category_name
|
|
68
|
+
name
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Index name.
|
|
72
|
+
#
|
|
73
|
+
def index_name
|
|
74
|
+
@index.name
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Path and partial filename of a specific index on this category.
|
|
78
|
+
#
|
|
79
|
+
def index_path bundle_name, type
|
|
80
|
+
"#{index_directory}/#{name}_#{bundle_name}_#{type}"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Path and partial filename of the prepared index on this category.
|
|
84
|
+
#
|
|
85
|
+
def prepared_index_path
|
|
86
|
+
@prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
|
|
87
|
+
end
|
|
88
|
+
def prepared_index_file &block
|
|
89
|
+
@prepared_index_file ||= Backend::File::Text.new prepared_index_path
|
|
90
|
+
@prepared_index_file.open_for_indexing &block
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# The index directory for this category.
|
|
94
|
+
#
|
|
95
|
+
def index_directory
|
|
96
|
+
@index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{@index.name}"
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Creates the index directory including all necessary paths above it.
|
|
100
|
+
#
|
|
101
|
+
def prepare_index_directory
|
|
102
|
+
FileUtils.mkdir_p index_directory
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Identifier for internal use.
|
|
106
|
+
#
|
|
107
|
+
# TODO What internal use?
|
|
108
|
+
#
|
|
109
|
+
def identifier
|
|
110
|
+
@identifier ||= "#{@index.name}:#{name}"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def to_info
|
|
114
|
+
<<-CATEGORY
|
|
115
|
+
Category(#{name}):
|
|
116
|
+
Exact:
|
|
117
|
+
#{exact.indented_to_s(4)}
|
|
118
|
+
Partial:
|
|
119
|
+
#{partial.indented_to_s(4)}
|
|
120
|
+
CATEGORY
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def to_s
|
|
124
|
+
"Category(#{name})"
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#
|
|
2
|
+
#
|
|
3
|
+
class Category
|
|
4
|
+
|
|
5
|
+
attr_reader :indexed_exact
|
|
6
|
+
|
|
7
|
+
# TODO Move to Index.
|
|
8
|
+
#
|
|
9
|
+
def generate_qualifiers_from options
|
|
10
|
+
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Loads the index from cache.
|
|
14
|
+
#
|
|
15
|
+
def load_from_cache
|
|
16
|
+
timed_exclaim %Q{"#{identifier}": Loading index from cache.}
|
|
17
|
+
indexed_exact.load
|
|
18
|
+
indexed_partial.load
|
|
19
|
+
end
|
|
20
|
+
alias reload load_from_cache
|
|
21
|
+
|
|
22
|
+
# Loads, analyzes, and clears the index.
|
|
23
|
+
#
|
|
24
|
+
# Note: The idea is not to run this while the search engine is running.
|
|
25
|
+
#
|
|
26
|
+
def analyze collector
|
|
27
|
+
collector[identifier] = {
|
|
28
|
+
:exact => Analyzer.new.analyze(indexed_exact),
|
|
29
|
+
:partial => Analyzer.new.analyze(indexed_partial)
|
|
30
|
+
}
|
|
31
|
+
collector
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Gets the weight for this token's text.
|
|
35
|
+
#
|
|
36
|
+
def weight token
|
|
37
|
+
bundle_for(token).weight token.text
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Gets the ids for this token's text.
|
|
41
|
+
#
|
|
42
|
+
def ids token
|
|
43
|
+
bundle_for(token).ids token.text
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Returns the right index bundle for this token.
|
|
47
|
+
#
|
|
48
|
+
def bundle_for token
|
|
49
|
+
token.partial? ? indexed_partial : indexed_exact
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# The partial strategy defines whether to really use the partial index.
|
|
53
|
+
#
|
|
54
|
+
def indexed_partial
|
|
55
|
+
@partial_strategy.use_exact_for_partial? ? @indexed_exact : @indexed_partial
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
#
|
|
59
|
+
#
|
|
60
|
+
def combination_for token
|
|
61
|
+
weight(token) && Query::Combination.new(token, self)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
end
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
#
|
|
2
|
+
#
|
|
3
|
+
class Category
|
|
4
|
+
|
|
5
|
+
attr_reader :indexing_exact,
|
|
6
|
+
:indexing_partial
|
|
7
|
+
|
|
8
|
+
# Prepares and caches this category.
|
|
9
|
+
#
|
|
10
|
+
# This one should be used by users.
|
|
11
|
+
#
|
|
12
|
+
def index
|
|
13
|
+
prepare
|
|
14
|
+
cache
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Return an appropriate source.
|
|
18
|
+
#
|
|
19
|
+
def source
|
|
20
|
+
@source || @index.source
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Return the key format.
|
|
24
|
+
#
|
|
25
|
+
# If the source has no key format, then
|
|
26
|
+
# check for an explicit key format, and
|
|
27
|
+
# if none is defined, ask the index for
|
|
28
|
+
# one.
|
|
29
|
+
#
|
|
30
|
+
def key_format
|
|
31
|
+
source.respond_to?(:key_format) && source.key_format || @key_format || @index.key_format
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Where the data is taken from.
|
|
35
|
+
#
|
|
36
|
+
def from
|
|
37
|
+
@from || name
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# The indexer is lazily generated and cached.
|
|
41
|
+
#
|
|
42
|
+
def indexer
|
|
43
|
+
@indexer ||= source.respond_to?(:each) ? Indexers::Parallel.new(self) : Indexers::Serial.new(self)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# TODO This is a hack to get the parallel indexer working.
|
|
47
|
+
#
|
|
48
|
+
def categories
|
|
49
|
+
[self]
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Returns an appropriate tokenizer.
|
|
53
|
+
# If one isn't set on this category, will try the index,
|
|
54
|
+
# and finally the default index tokenizer.
|
|
55
|
+
#
|
|
56
|
+
def tokenizer
|
|
57
|
+
@tokenizer || @index.tokenizer || Tokenizers::Index.default
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Backup the caches.
|
|
61
|
+
# (Revert with restore_caches)
|
|
62
|
+
#
|
|
63
|
+
def backup_caches
|
|
64
|
+
timed_exclaim "Backing up #{identifier}."
|
|
65
|
+
indexing_exact.backup
|
|
66
|
+
indexing_partial.backup
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Restore the caches.
|
|
70
|
+
# (Revert with backup_caches)
|
|
71
|
+
#
|
|
72
|
+
def restore_caches
|
|
73
|
+
timed_exclaim "Restoring #{identifier}."
|
|
74
|
+
indexing_exact.restore
|
|
75
|
+
indexing_partial.restore
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Checks the caches for existence.
|
|
79
|
+
#
|
|
80
|
+
def check_caches
|
|
81
|
+
timed_exclaim "Checking #{identifier}."
|
|
82
|
+
indexing_exact.raise_unless_cache_exists
|
|
83
|
+
indexing_partial.raise_unless_cache_exists
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Deletes the caches.
|
|
87
|
+
#
|
|
88
|
+
def clear_caches
|
|
89
|
+
timed_exclaim "Deleting #{identifier}."
|
|
90
|
+
indexing_exact.delete
|
|
91
|
+
indexing_partial.delete
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# We need to set what formatting method should be used.
|
|
95
|
+
# Uses the one defined in the indexer.
|
|
96
|
+
#
|
|
97
|
+
# TODO Make this more dynamic.
|
|
98
|
+
#
|
|
99
|
+
def configure
|
|
100
|
+
indexing_exact[:key_format] = self.key_format
|
|
101
|
+
indexing_partial[:key_format] = self.key_format
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Indexes, creates the "prepared_..." file.
|
|
105
|
+
#
|
|
106
|
+
# TODO This step could already prepare the id (if a
|
|
107
|
+
# per category key_format is not really needed).
|
|
108
|
+
#
|
|
109
|
+
def prepare
|
|
110
|
+
prepare_index_directory
|
|
111
|
+
indexer.index
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Generates all caches for this category.
|
|
115
|
+
#
|
|
116
|
+
def cache
|
|
117
|
+
prepare_index_directory
|
|
118
|
+
generate_caches
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Generate the cache data.
|
|
122
|
+
#
|
|
123
|
+
def generate_caches
|
|
124
|
+
configure
|
|
125
|
+
generate_caches_from_source
|
|
126
|
+
generate_partial
|
|
127
|
+
generate_caches_from_memory
|
|
128
|
+
dump_caches
|
|
129
|
+
timed_exclaim %Q{"#{identifier}": Caching finished.}
|
|
130
|
+
end
|
|
131
|
+
def generate_caches_from_source
|
|
132
|
+
indexing_exact.generate_caches_from_source
|
|
133
|
+
end
|
|
134
|
+
def generate_partial
|
|
135
|
+
indexing_partial.generate_partial_from indexing_exact.index
|
|
136
|
+
end
|
|
137
|
+
def generate_caches_from_memory
|
|
138
|
+
indexing_partial.generate_caches_from_memory
|
|
139
|
+
end
|
|
140
|
+
def dump_caches
|
|
141
|
+
indexing_exact.dump
|
|
142
|
+
indexing_partial.dump
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
end
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# The original Class class.
|
|
2
|
+
#
|
|
3
|
+
class Class # :nodoc:all
|
|
4
|
+
|
|
5
|
+
def instance_delegate *methods
|
|
6
|
+
methods.each do |method|
|
|
7
|
+
module_eval("def self.#{method}(*args, &block)\nself.instance.__send__(#{method.inspect}, *args, &block)\nend\n", "(__DELEGATION__)", 1)
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
end
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|