picky 2.7.0 → 3.0.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/adapters/rack/base.rb +20 -16
- data/lib/picky/adapters/rack/live_parameters.rb +28 -24
- data/lib/picky/adapters/rack/search.rb +67 -0
- data/lib/picky/adapters/rack.rb +27 -23
- data/lib/picky/application.rb +246 -236
- data/lib/picky/backend/base.rb +115 -119
- data/lib/picky/backend/file/basic.rb +102 -98
- data/lib/picky/backend/file/json.rb +27 -23
- data/lib/picky/backend/file/marshal.rb +32 -28
- data/lib/picky/backend/file/text.rb +45 -41
- data/lib/picky/backend/files.rb +19 -15
- data/lib/picky/backend/redis/basic.rb +76 -72
- data/lib/picky/backend/redis/list_hash.rb +40 -36
- data/lib/picky/backend/redis/string_hash.rb +30 -26
- data/lib/picky/backend/redis.rb +32 -28
- data/lib/picky/bundle.rb +82 -57
- data/lib/{bundling.rb → picky/bundling.rb} +0 -0
- data/lib/picky/calculations/location.rb +51 -47
- data/lib/picky/categories.rb +60 -56
- data/lib/picky/categories_indexed.rb +73 -82
- data/lib/picky/categories_indexing.rb +12 -8
- data/lib/picky/category.rb +109 -120
- data/lib/picky/category_indexed.rb +39 -41
- data/lib/picky/category_indexing.rb +123 -125
- data/lib/picky/character_substituters/west_european.rb +32 -26
- data/lib/{constants.rb → picky/constants.rb} +0 -0
- data/lib/picky/cores.rb +96 -92
- data/lib/{deployment.rb → picky/deployment.rb} +0 -0
- data/lib/picky/frontend_adapters/rack.rb +133 -118
- data/lib/picky/generators/aliases.rb +5 -3
- data/lib/picky/generators/base.rb +11 -7
- data/lib/picky/generators/partial/default.rb +7 -3
- data/lib/picky/generators/partial/none.rb +24 -20
- data/lib/picky/generators/partial/strategy.rb +20 -16
- data/lib/picky/generators/partial/substring.rb +94 -90
- data/lib/picky/generators/partial_generator.rb +11 -7
- data/lib/picky/generators/similarity/default.rb +9 -5
- data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
- data/lib/picky/generators/similarity/metaphone.rb +20 -16
- data/lib/picky/generators/similarity/none.rb +23 -19
- data/lib/picky/generators/similarity/phonetic.rb +49 -45
- data/lib/picky/generators/similarity/soundex.rb +20 -16
- data/lib/picky/generators/similarity/strategy.rb +10 -6
- data/lib/picky/generators/similarity_generator.rb +11 -7
- data/lib/picky/generators/strategy.rb +14 -10
- data/lib/picky/generators/weights/default.rb +9 -5
- data/lib/picky/generators/weights/logarithmic.rb +30 -26
- data/lib/picky/generators/weights/strategy.rb +10 -6
- data/lib/picky/generators/weights_generator.rb +11 -7
- data/lib/picky/helpers/measuring.rb +20 -16
- data/lib/picky/indexed/bundle/base.rb +39 -37
- data/lib/picky/indexed/bundle/memory.rb +68 -64
- data/lib/picky/indexed/bundle/redis.rb +73 -69
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
- data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
- data/lib/picky/indexed/wrappers/category/location.rb +17 -13
- data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
- data/lib/picky/indexers/base.rb +26 -22
- data/lib/picky/indexers/parallel.rb +62 -58
- data/lib/picky/indexers/serial.rb +41 -37
- data/lib/picky/indexes/index.rb +400 -0
- data/lib/picky/indexes/index_indexed.rb +24 -0
- data/lib/picky/indexes/index_indexing.rb +138 -0
- data/lib/picky/indexes/memory.rb +20 -0
- data/lib/picky/indexes/redis.rb +20 -0
- data/lib/picky/indexes.rb +68 -61
- data/lib/picky/indexes_indexed.rb +16 -12
- data/lib/picky/indexes_indexing.rb +41 -37
- data/lib/picky/indexing/bundle/base.rb +216 -205
- data/lib/picky/indexing/bundle/memory.rb +16 -11
- data/lib/picky/indexing/bundle/redis.rb +14 -12
- data/lib/picky/indexing/wrappers/category/location.rb +17 -13
- data/lib/picky/interfaces/live_parameters.rb +159 -154
- data/lib/picky/loader.rb +267 -304
- data/lib/picky/loggers/search.rb +20 -13
- data/lib/picky/no_source_specified_exception.rb +7 -3
- data/lib/picky/performant.rb +6 -2
- data/lib/picky/query/allocation.rb +71 -67
- data/lib/picky/query/allocations.rb +99 -94
- data/lib/picky/query/combination.rb +70 -66
- data/lib/picky/query/combinations/base.rb +56 -52
- data/lib/picky/query/combinations/memory.rb +36 -32
- data/lib/picky/query/combinations/redis.rb +66 -62
- data/lib/picky/query/indexes.rb +175 -160
- data/lib/picky/query/qualifier_category_mapper.rb +43 -0
- data/lib/picky/query/token.rb +165 -172
- data/lib/picky/query/tokens.rb +86 -82
- data/lib/picky/query/weights.rb +44 -48
- data/lib/picky/query.rb +5 -1
- data/lib/picky/rack/harakiri.rb +51 -47
- data/lib/picky/results.rb +81 -77
- data/lib/picky/search.rb +169 -158
- data/lib/picky/sinatra.rb +34 -0
- data/lib/picky/sources/base.rb +73 -70
- data/lib/picky/sources/couch.rb +61 -57
- data/lib/picky/sources/csv.rb +68 -64
- data/lib/picky/sources/db.rb +139 -135
- data/lib/picky/sources/delicious.rb +52 -48
- data/lib/picky/sources/mongo.rb +68 -63
- data/lib/picky/sources/wrappers/base.rb +20 -16
- data/lib/picky/sources/wrappers/location.rb +37 -33
- data/lib/picky/statistics.rb +46 -43
- data/lib/picky/tasks.rb +3 -0
- data/lib/picky/tokenizers/base.rb +192 -187
- data/lib/picky/tokenizers/index.rb +25 -21
- data/lib/picky/tokenizers/location.rb +33 -29
- data/lib/picky/tokenizers/query.rb +49 -43
- data/lib/picky.rb +21 -13
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/routes.rake +1 -1
- data/lib/tasks/server.rake +1 -1
- data/spec/lib/adapters/rack/base_spec.rb +1 -1
- data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/adapters/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +39 -32
- data/spec/lib/backend/file/basic_spec.rb +2 -2
- data/spec/lib/backend/file/json_spec.rb +2 -2
- data/spec/lib/backend/file/marshal_spec.rb +2 -2
- data/spec/lib/backend/file/text_spec.rb +1 -1
- data/spec/lib/backend/files_spec.rb +14 -24
- data/spec/lib/backend/redis/basic_spec.rb +2 -2
- data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
- data/spec/lib/backend/redis_spec.rb +20 -13
- data/spec/lib/calculations/location_spec.rb +1 -1
- data/spec/lib/categories_indexed_spec.rb +16 -34
- data/spec/lib/category_indexed_spec.rb +9 -27
- data/spec/lib/category_indexing_spec.rb +2 -3
- data/spec/lib/category_spec.rb +10 -10
- data/spec/lib/character_substituters/west_european_spec.rb +6 -5
- data/spec/lib/cores_spec.rb +17 -17
- data/spec/lib/extensions/symbol_spec.rb +15 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
- data/spec/lib/generators/partial/default_spec.rb +3 -3
- data/spec/lib/generators/partial/none_spec.rb +2 -2
- data/spec/lib/generators/partial/substring_spec.rb +1 -1
- data/spec/lib/generators/partial_generator_spec.rb +3 -3
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/generators/similarity_generator_spec.rb +2 -2
- data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/generators/weights_generator_spec.rb +1 -1
- data/spec/lib/helpers/measuring_spec.rb +2 -2
- data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
- data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/base_spec.rb +1 -1
- data/spec/lib/indexers/parallel_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +1 -1
- data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
- data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
- data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
- data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
- data/spec/lib/indexes_class_spec.rb +2 -2
- data/spec/lib/indexes_indexed_spec.rb +1 -1
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/indexes_spec.rb +1 -1
- data/spec/lib/indexing/bundle/base_spec.rb +7 -5
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
- data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
- data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
- data/spec/lib/loader_spec.rb +17 -19
- data/spec/lib/loggers/search_spec.rb +2 -2
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +4 -4
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +7 -2
- data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
- data/spec/lib/query/token_spec.rb +32 -53
- data/spec/lib/query/tokens_spec.rb +30 -35
- data/spec/lib/query/weights_spec.rb +16 -16
- data/spec/lib/rack/harakiri_spec.rb +5 -5
- data/spec/lib/results_spec.rb +1 -1
- data/spec/lib/search_spec.rb +24 -22
- data/spec/lib/sinatra_spec.rb +36 -0
- data/spec/lib/sources/base_spec.rb +1 -1
- data/spec/lib/sources/couch_spec.rb +9 -9
- data/spec/lib/sources/csv_spec.rb +7 -7
- data/spec/lib/sources/db_spec.rb +2 -2
- data/spec/lib/sources/delicious_spec.rb +5 -5
- data/spec/lib/sources/mongo_spec.rb +7 -7
- data/spec/lib/sources/wrappers/base_spec.rb +2 -2
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/statistics_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/lib/tokenizers/query_spec.rb +1 -1
- metadata +30 -30
- data/lib/picky/adapters/rack/query.rb +0 -65
- data/lib/picky/index/base.rb +0 -409
- data/lib/picky/index/base_indexed.rb +0 -29
- data/lib/picky/index/base_indexing.rb +0 -127
- data/lib/picky/index/memory.rb +0 -16
- data/lib/picky/index/redis.rb +0 -16
- data/lib/picky/query/qualifiers.rb +0 -76
- data/lib/picky/query/solr.rb +0 -60
- data/lib/picky/signals.rb +0 -8
- data/lib/picky-tasks.rb +0 -6
- data/lib/tasks/spec.rake +0 -11
- data/spec/lib/query/qualifiers_spec.rb +0 -31
data/lib/picky/categories.rb
CHANGED
@@ -1,63 +1,67 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
attr_reader :categories, :category_hash
|
4
|
-
|
5
|
-
delegate :each,
|
6
|
-
:first,
|
7
|
-
:map,
|
8
|
-
:to => :categories
|
9
|
-
|
10
|
-
each_delegate :reindex,
|
11
|
-
:each_category,
|
12
|
-
:to => :categories
|
13
|
-
|
14
|
-
# A list of indexed categories.
|
15
|
-
#
|
16
|
-
# Options:
|
17
|
-
# * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
|
18
|
-
# The default behaviour is that if a token does not match to
|
19
|
-
# any category, the query will not return anything (since a
|
20
|
-
# single token cannot be matched). If you set this option to
|
21
|
-
# true, any token that cannot be matched to a category will be
|
22
|
-
# simply ignored.
|
23
|
-
# Use this if only a few matched words are important, like for
|
24
|
-
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
25
|
-
# you only want to match the zipcode, to have the search engine
|
26
|
-
# display advertisements on the side for the zipcode.
|
27
|
-
# Nifty! :)
|
28
|
-
#
|
29
|
-
def initialize options = {}
|
30
|
-
clear_categories
|
31
|
-
|
32
|
-
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
33
|
-
end
|
1
|
+
module Picky
|
34
2
|
|
35
|
-
|
36
|
-
#
|
37
|
-
def clear_categories
|
38
|
-
@categories = []
|
39
|
-
@category_hash = {}
|
40
|
-
end
|
3
|
+
class Categories
|
41
4
|
|
42
|
-
|
43
|
-
#
|
44
|
-
def << category
|
45
|
-
categories << category
|
46
|
-
category_hash[category.name] = category
|
47
|
-
end
|
5
|
+
attr_reader :categories, :category_hash
|
48
6
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
7
|
+
delegate :each,
|
8
|
+
:first,
|
9
|
+
:map,
|
10
|
+
:to => :categories
|
11
|
+
|
12
|
+
each_delegate :reindex,
|
13
|
+
:each_category,
|
14
|
+
:to => :categories
|
15
|
+
|
16
|
+
# A list of indexed categories.
|
17
|
+
#
|
18
|
+
# Options:
|
19
|
+
# * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
|
20
|
+
# The default behaviour is that if a token does not match to
|
21
|
+
# any category, the query will not return anything (since a
|
22
|
+
# single token cannot be matched). If you set this option to
|
23
|
+
# true, any token that cannot be matched to a category will be
|
24
|
+
# simply ignored.
|
25
|
+
# Use this if only a few matched words are important, like for
|
26
|
+
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
27
|
+
# you only want to match the zipcode, to have the search engine
|
28
|
+
# display advertisements on the side for the zipcode.
|
29
|
+
# Nifty! :)
|
30
|
+
#
|
31
|
+
def initialize options = {}
|
32
|
+
clear_categories
|
33
|
+
|
34
|
+
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
35
|
+
end
|
36
|
+
|
37
|
+
# Clears both the array of categories and the hash of categories.
|
38
|
+
#
|
39
|
+
def clear_categories
|
40
|
+
@categories = []
|
41
|
+
@category_hash = {}
|
42
|
+
end
|
43
|
+
|
44
|
+
# Add the given category to the list of categories.
|
45
|
+
#
|
46
|
+
def << category
|
47
|
+
categories << category
|
48
|
+
category_hash[category.name] = category
|
49
|
+
end
|
50
|
+
|
51
|
+
# Find a given category in the categories.
|
52
|
+
#
|
53
|
+
def [] category_name
|
54
|
+
category_name = category_name.to_sym
|
55
|
+
category_hash[category_name] || raise_not_found(category_name)
|
56
|
+
end
|
57
|
+
def raise_not_found category_name
|
58
|
+
raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
|
59
|
+
end
|
60
|
+
|
61
|
+
def to_s
|
62
|
+
categories.join(', ')
|
63
|
+
end
|
58
64
|
|
59
|
-
def to_s
|
60
|
-
categories.join(', ')
|
61
65
|
end
|
62
66
|
|
63
67
|
end
|
@@ -1,98 +1,89 @@
|
|
1
|
-
|
1
|
+
module Picky
|
2
2
|
|
3
|
-
|
3
|
+
class Categories
|
4
4
|
|
5
|
-
|
6
|
-
:analyze,
|
7
|
-
:to => :categories
|
5
|
+
attr_reader :ignore_unassigned_tokens
|
8
6
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
# tokens, if for example, the token is one with ~.
|
13
|
-
# If yes, it puts together all solutions.
|
14
|
-
#
|
15
|
-
def possible_combinations_for token
|
16
|
-
token.similar? ? similar_possible_for(token) : possible_for(token)
|
17
|
-
end
|
7
|
+
each_delegate :load_from_cache,
|
8
|
+
:analyze,
|
9
|
+
:to => :categories
|
18
10
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
11
|
+
# Return all possible combinations for the given token.
|
12
|
+
#
|
13
|
+
# This checks if it needs to also search through similar
|
14
|
+
# tokens, if for example, the token is one with ~.
|
15
|
+
# If yes, it puts together all solutions.
|
16
|
+
#
|
17
|
+
def possible_combinations token
|
18
|
+
token.similar? ? similar_possible_for(token) : possible_for(token)
|
19
|
+
end
|
26
20
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
# Note: We could also break off here if not all the available
|
34
|
-
# similars are needed.
|
35
|
-
# Wait for a concrete case that needs this before taking
|
36
|
-
# action.
|
37
|
-
#
|
38
|
-
while next_token = next_token.next_similar_token(category)
|
39
|
-
result << next_token if next_token && next_token.text != text
|
40
|
-
end
|
41
|
-
result
|
21
|
+
# Gets all similar tokens and puts together the possible combinations
|
22
|
+
# for each found similar token.
|
23
|
+
#
|
24
|
+
def similar_possible_for token
|
25
|
+
tokens = similar_tokens_for token
|
26
|
+
inject_possible_for tokens
|
42
27
|
end
|
43
|
-
end
|
44
28
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
29
|
+
# Returns all possible similar tokens for the given token.
|
30
|
+
#
|
31
|
+
def similar_tokens_for token
|
32
|
+
text = token.text
|
33
|
+
categories.inject([]) do |result, category|
|
34
|
+
next_token = token
|
35
|
+
# Note: We could also break off here if not all the available
|
36
|
+
# similars are needed.
|
37
|
+
# Wait for a concrete case that needs this before taking
|
38
|
+
# action.
|
39
|
+
#
|
40
|
+
while next_token = next_token.next_similar_token(category)
|
41
|
+
result << next_token if next_token && next_token.text != text
|
42
|
+
end
|
43
|
+
result
|
44
|
+
end
|
51
45
|
end
|
52
|
-
end
|
53
46
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
# tokens option is true)
|
62
|
-
#
|
63
|
-
def possible_for token, preselected_categories = nil
|
64
|
-
possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
|
65
|
-
combination = category.combination_for token
|
66
|
-
combination ? combinations << combination : combinations
|
47
|
+
#
|
48
|
+
#
|
49
|
+
def inject_possible_for tokens
|
50
|
+
tokens.inject([]) do |result, token|
|
51
|
+
possible = possible_categories token
|
52
|
+
result + possible_for(token, possible)
|
53
|
+
end
|
67
54
|
end
|
68
|
-
|
55
|
+
|
56
|
+
# Returns possible Combinations for the token.
|
69
57
|
#
|
70
|
-
|
71
|
-
|
72
|
-
|
58
|
+
# Note: The preselected_categories param is an optimization.
|
59
|
+
#
|
60
|
+
# Note: Returns [] if no categories matched (will produce no result).
|
61
|
+
# Returns nil if this token needs to be removed from the query.
|
62
|
+
# (Also none of the categories matched, but the ignore unassigned
|
63
|
+
# tokens option is true)
|
64
|
+
#
|
65
|
+
def possible_for token, preselected_categories = nil
|
66
|
+
possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
|
67
|
+
combination = category.combination_for token
|
68
|
+
combination ? combinations << combination : combinations
|
69
|
+
end
|
70
|
+
# This is an optimization to mark tokens that are ignored.
|
71
|
+
#
|
72
|
+
return if ignore_unassigned_tokens && possible.empty?
|
73
|
+
possible
|
74
|
+
end
|
73
75
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
76
|
+
# This returns the possible categories for this token.
|
77
|
+
# If the user has already preselected a category for this token,
|
78
|
+
# like "artist:moby", if not just return all for the given token,
|
79
|
+
# since all are possible.
|
80
|
+
#
|
81
|
+
# Note: Once I thought this was called too often. But it is not (18.01.2011).
|
82
|
+
#
|
83
|
+
def possible_categories token
|
84
|
+
token.user_defined_categories || categories
|
85
|
+
end
|
84
86
|
|
85
|
-
# This returns the array of categories if the user has defined
|
86
|
-
# an existing category.
|
87
|
-
#
|
88
|
-
# Note: Returns nil if the user did not define one
|
89
|
-
# or [] if he/she has defined a non-existing one.
|
90
|
-
#
|
91
|
-
def user_defined_categories token
|
92
|
-
names = token.user_defined_category_names
|
93
|
-
names && names.map do |name|
|
94
|
-
category_hash[name]
|
95
|
-
end.compact
|
96
87
|
end
|
97
88
|
|
98
89
|
end
|
@@ -1,10 +1,14 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
class Categories
|
4
|
+
|
5
|
+
each_delegate :cache,
|
6
|
+
:check,
|
7
|
+
:clear,
|
8
|
+
:backup,
|
9
|
+
:restore,
|
10
|
+
:to => :categories
|
11
|
+
|
12
|
+
end
|
9
13
|
|
10
14
|
end
|
data/lib/picky/category.rb
CHANGED
@@ -1,139 +1,128 @@
|
|
1
|
-
|
1
|
+
module Picky
|
2
2
|
|
3
|
-
|
3
|
+
class Category
|
4
4
|
|
5
|
-
|
6
|
-
# * name: Category name to use as identifier and file names.
|
7
|
-
# * index: Index to which this category is attached to.
|
8
|
-
#
|
9
|
-
# Options:
|
10
|
-
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
11
|
-
# * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
|
12
|
-
# * from: The source category identifier to take the data from.
|
13
|
-
#
|
14
|
-
# Advanced Options:
|
15
|
-
# * source: Use if the category should use a different source.
|
16
|
-
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
17
|
-
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
18
|
-
# * key_format: What this category's keys are formatted with (default is :to_i)
|
19
|
-
#
|
20
|
-
def initialize name, index, options = {}
|
21
|
-
@name = name
|
22
|
-
@index = index
|
5
|
+
attr_reader :name
|
23
6
|
|
24
|
-
#
|
7
|
+
# Mandatory params:
|
8
|
+
# * name: Category name to use as identifier and file names.
|
9
|
+
# * index: Index to which this category is attached to.
|
25
10
|
#
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
# TODO Push into Bundle. At least the weights.
|
11
|
+
# Options:
|
12
|
+
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
13
|
+
# * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
|
14
|
+
# * from: The source category identifier to take the data from.
|
32
15
|
#
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
@indexing_partial = index.indexing_bundle_class.new(:partial, self, weights, partial, Generators::Similarity::None.new)
|
39
|
-
|
40
|
-
# Indexed.
|
16
|
+
# Advanced Options:
|
17
|
+
# * source: Use if the category should use a different source.
|
18
|
+
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
19
|
+
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
20
|
+
# * key_format: What this category's keys are formatted with (default is :to_i)
|
41
21
|
#
|
42
|
-
|
22
|
+
def initialize name, index, options = {}
|
23
|
+
@name = name
|
24
|
+
@index = index
|
25
|
+
|
26
|
+
# Indexing.
|
27
|
+
#
|
28
|
+
@source = options[:source]
|
29
|
+
@from = options[:from]
|
30
|
+
@tokenizer = options[:tokenizer]
|
31
|
+
@key_format = options[:key_format]
|
32
|
+
@qualifiers = extract_qualifiers_from options
|
33
|
+
|
34
|
+
weights = options[:weights] || Generators::Weights::Default
|
35
|
+
partial = options[:partial] || Generators::Partial::Default
|
36
|
+
similarity = options[:similarity] || Generators::Similarity::Default
|
37
|
+
|
38
|
+
@indexing_exact = index.indexing_bundle_class.new :exact, self, weights, Generators::Partial::None.new, similarity, options
|
39
|
+
@indexing_partial = index.indexing_bundle_class.new :partial, self, weights, partial, Generators::Similarity::None.new, options
|
40
|
+
|
41
|
+
# Indexed.
|
42
|
+
#
|
43
|
+
@indexed_exact = index.indexed_bundle_class.new :exact, self, similarity
|
44
|
+
if partial.use_exact_for_partial?
|
45
|
+
@indexed_partial = @indexed_exact
|
46
|
+
else
|
47
|
+
@indexed_partial = index.indexed_bundle_class.new :partial, self, similarity
|
48
|
+
end
|
49
|
+
|
50
|
+
# @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
|
51
|
+
# @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
|
52
|
+
end
|
53
|
+
|
54
|
+
# Indexes and reloads the category.
|
43
55
|
#
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
# @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
|
50
|
-
# @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
|
56
|
+
def reindex
|
57
|
+
index
|
58
|
+
reload
|
59
|
+
end
|
51
60
|
|
52
|
-
#
|
61
|
+
# Index name.
|
53
62
|
#
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
# TODO Move to Index.
|
58
|
-
#
|
59
|
-
def generate_qualifiers_from options
|
60
|
-
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
61
|
-
end
|
63
|
+
def index_name
|
64
|
+
@index.name
|
65
|
+
end
|
62
66
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
end
|
75
|
-
|
76
|
-
# Index name.
|
77
|
-
#
|
78
|
-
def index_name
|
79
|
-
@index.name
|
80
|
-
end
|
67
|
+
# Returns the qualifiers if set or
|
68
|
+
# just the name if not.
|
69
|
+
#
|
70
|
+
def qualifiers
|
71
|
+
@qualifiers || [name]
|
72
|
+
end
|
73
|
+
# Extract qualifiers from the options.
|
74
|
+
#
|
75
|
+
def extract_qualifiers_from options
|
76
|
+
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
77
|
+
end
|
81
78
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
79
|
+
# The category itself just yields itself.
|
80
|
+
#
|
81
|
+
def each_category
|
82
|
+
yield self
|
83
|
+
end
|
87
84
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
85
|
+
# Path and partial filename of the prepared index on this category.
|
86
|
+
#
|
87
|
+
def prepared_index_path
|
88
|
+
@prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
|
89
|
+
end
|
90
|
+
# Get an opened index file.
|
91
|
+
#
|
92
|
+
# Note: If you don't use it with the block, do not forget to close it.
|
93
|
+
#
|
94
|
+
def prepared_index_file &block
|
95
|
+
@prepared_index_file ||= Backend::File::Text.new prepared_index_path
|
96
|
+
@prepared_index_file.open &block
|
97
|
+
end
|
98
|
+
# Creates the index directory including all necessary paths above it.
|
99
|
+
#
|
100
|
+
# Note: Interface method called by any indexers.
|
101
|
+
#
|
102
|
+
def prepare_index_directory
|
103
|
+
FileUtils.mkdir_p index_directory
|
104
|
+
end
|
108
105
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
106
|
+
# The index directory for this category.
|
107
|
+
#
|
108
|
+
# TODO Push down into files?
|
109
|
+
#
|
110
|
+
def index_directory
|
111
|
+
@index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{@index.name}"
|
112
|
+
end
|
114
113
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
# * partial index
|
121
|
-
# * similarity index
|
122
|
-
#
|
123
|
-
def index_path bundle_name, type
|
124
|
-
"#{index_directory}/#{name}_#{bundle_name}_#{type}"
|
125
|
-
end
|
114
|
+
# Identifier for technical output.
|
115
|
+
#
|
116
|
+
def identifier
|
117
|
+
"#{@index.identifier}:#{name}"
|
118
|
+
end
|
126
119
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
120
|
+
#
|
121
|
+
#
|
122
|
+
def to_s
|
123
|
+
"#{self.class}(#{identifier})"
|
124
|
+
end
|
132
125
|
|
133
|
-
#
|
134
|
-
#
|
135
|
-
def to_s
|
136
|
-
"Category(#{name})"
|
137
126
|
end
|
138
127
|
|
139
128
|
end
|
@@ -1,48 +1,46 @@
|
|
1
|
-
|
2
|
-
#
|
3
|
-
class Category
|
1
|
+
module Picky
|
4
2
|
|
5
|
-
attr_reader :indexed_exact
|
6
|
-
|
7
|
-
# Loads the index from cache.
|
8
|
-
#
|
9
|
-
def load_from_cache
|
10
|
-
timed_exclaim %Q{"#{identifier}": Loading index from cache.}
|
11
|
-
indexed_exact.load
|
12
|
-
indexed_partial.load
|
13
|
-
end
|
14
|
-
alias reload load_from_cache
|
15
|
-
|
16
|
-
# Gets the weight for this token's text.
|
17
|
-
#
|
18
|
-
def weight token
|
19
|
-
bundle_for(token).weight token.text
|
20
|
-
end
|
21
|
-
|
22
|
-
# Gets the ids for this token's text.
|
23
3
|
#
|
24
|
-
def ids token
|
25
|
-
bundle_for(token).ids token.text
|
26
|
-
end
|
27
|
-
|
28
|
-
# Returns the right index bundle for this token.
|
29
4
|
#
|
30
|
-
|
31
|
-
|
32
|
-
|
5
|
+
class Category
|
6
|
+
|
7
|
+
attr_reader :indexed_exact,
|
8
|
+
:indexed_partial
|
9
|
+
|
10
|
+
# Loads the index from cache.
|
11
|
+
#
|
12
|
+
def load_from_cache
|
13
|
+
timed_exclaim %Q{"#{identifier}": Loading index from cache.}
|
14
|
+
indexed_exact.load
|
15
|
+
indexed_partial.load
|
16
|
+
end
|
17
|
+
alias reload load_from_cache
|
18
|
+
|
19
|
+
# Gets the weight for this token's text.
|
20
|
+
#
|
21
|
+
def weight token
|
22
|
+
bundle_for(token).weight token.text
|
23
|
+
end
|
24
|
+
|
25
|
+
# Gets the ids for this token's text.
|
26
|
+
#
|
27
|
+
def ids token
|
28
|
+
bundle_for(token).ids token.text
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns the right index bundle for this token.
|
32
|
+
#
|
33
|
+
def bundle_for token
|
34
|
+
token.partial? ? indexed_partial : indexed_exact
|
35
|
+
end
|
36
|
+
|
37
|
+
# Returns a combination for the token,
|
38
|
+
# or nil, if there is none.
|
39
|
+
#
|
40
|
+
def combination_for token
|
41
|
+
weight(token) && Query::Combination.new(token, self)
|
42
|
+
end
|
33
43
|
|
34
|
-
# The partial strategy defines whether to
|
35
|
-
# really use the partial index.
|
36
|
-
#
|
37
|
-
def indexed_partial
|
38
|
-
@partial_strategy.use_exact_for_partial? ? @indexed_exact : @indexed_partial
|
39
|
-
end
|
40
|
-
|
41
|
-
# Returns a combination for the token,
|
42
|
-
# or nil, if there is none.
|
43
|
-
#
|
44
|
-
def combination_for token
|
45
|
-
weight(token) && Query::Combination.new(token, self)
|
46
44
|
end
|
47
45
|
|
48
46
|
end
|