picky 2.7.0 → 3.0.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +20 -16
- data/lib/picky/adapters/rack/live_parameters.rb +28 -24
- data/lib/picky/adapters/rack/search.rb +67 -0
- data/lib/picky/adapters/rack.rb +27 -23
- data/lib/picky/application.rb +246 -236
- data/lib/picky/backend/base.rb +115 -119
- data/lib/picky/backend/file/basic.rb +102 -98
- data/lib/picky/backend/file/json.rb +27 -23
- data/lib/picky/backend/file/marshal.rb +32 -28
- data/lib/picky/backend/file/text.rb +45 -41
- data/lib/picky/backend/files.rb +19 -15
- data/lib/picky/backend/redis/basic.rb +76 -72
- data/lib/picky/backend/redis/list_hash.rb +40 -36
- data/lib/picky/backend/redis/string_hash.rb +30 -26
- data/lib/picky/backend/redis.rb +32 -28
- data/lib/picky/bundle.rb +82 -57
- data/lib/{bundling.rb → picky/bundling.rb} +0 -0
- data/lib/picky/calculations/location.rb +51 -47
- data/lib/picky/categories.rb +60 -56
- data/lib/picky/categories_indexed.rb +73 -82
- data/lib/picky/categories_indexing.rb +12 -8
- data/lib/picky/category.rb +109 -120
- data/lib/picky/category_indexed.rb +39 -41
- data/lib/picky/category_indexing.rb +123 -125
- data/lib/picky/character_substituters/west_european.rb +32 -26
- data/lib/{constants.rb → picky/constants.rb} +0 -0
- data/lib/picky/cores.rb +96 -92
- data/lib/{deployment.rb → picky/deployment.rb} +0 -0
- data/lib/picky/frontend_adapters/rack.rb +133 -118
- data/lib/picky/generators/aliases.rb +5 -3
- data/lib/picky/generators/base.rb +11 -7
- data/lib/picky/generators/partial/default.rb +7 -3
- data/lib/picky/generators/partial/none.rb +24 -20
- data/lib/picky/generators/partial/strategy.rb +20 -16
- data/lib/picky/generators/partial/substring.rb +94 -90
- data/lib/picky/generators/partial_generator.rb +11 -7
- data/lib/picky/generators/similarity/default.rb +9 -5
- data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
- data/lib/picky/generators/similarity/metaphone.rb +20 -16
- data/lib/picky/generators/similarity/none.rb +23 -19
- data/lib/picky/generators/similarity/phonetic.rb +49 -45
- data/lib/picky/generators/similarity/soundex.rb +20 -16
- data/lib/picky/generators/similarity/strategy.rb +10 -6
- data/lib/picky/generators/similarity_generator.rb +11 -7
- data/lib/picky/generators/strategy.rb +14 -10
- data/lib/picky/generators/weights/default.rb +9 -5
- data/lib/picky/generators/weights/logarithmic.rb +30 -26
- data/lib/picky/generators/weights/strategy.rb +10 -6
- data/lib/picky/generators/weights_generator.rb +11 -7
- data/lib/picky/helpers/measuring.rb +20 -16
- data/lib/picky/indexed/bundle/base.rb +39 -37
- data/lib/picky/indexed/bundle/memory.rb +68 -64
- data/lib/picky/indexed/bundle/redis.rb +73 -69
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
- data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
- data/lib/picky/indexed/wrappers/category/location.rb +17 -13
- data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
- data/lib/picky/indexers/base.rb +26 -22
- data/lib/picky/indexers/parallel.rb +62 -58
- data/lib/picky/indexers/serial.rb +41 -37
- data/lib/picky/indexes/index.rb +400 -0
- data/lib/picky/indexes/index_indexed.rb +24 -0
- data/lib/picky/indexes/index_indexing.rb +138 -0
- data/lib/picky/indexes/memory.rb +20 -0
- data/lib/picky/indexes/redis.rb +20 -0
- data/lib/picky/indexes.rb +68 -61
- data/lib/picky/indexes_indexed.rb +16 -12
- data/lib/picky/indexes_indexing.rb +41 -37
- data/lib/picky/indexing/bundle/base.rb +216 -205
- data/lib/picky/indexing/bundle/memory.rb +16 -11
- data/lib/picky/indexing/bundle/redis.rb +14 -12
- data/lib/picky/indexing/wrappers/category/location.rb +17 -13
- data/lib/picky/interfaces/live_parameters.rb +159 -154
- data/lib/picky/loader.rb +267 -304
- data/lib/picky/loggers/search.rb +20 -13
- data/lib/picky/no_source_specified_exception.rb +7 -3
- data/lib/picky/performant.rb +6 -2
- data/lib/picky/query/allocation.rb +71 -67
- data/lib/picky/query/allocations.rb +99 -94
- data/lib/picky/query/combination.rb +70 -66
- data/lib/picky/query/combinations/base.rb +56 -52
- data/lib/picky/query/combinations/memory.rb +36 -32
- data/lib/picky/query/combinations/redis.rb +66 -62
- data/lib/picky/query/indexes.rb +175 -160
- data/lib/picky/query/qualifier_category_mapper.rb +43 -0
- data/lib/picky/query/token.rb +165 -172
- data/lib/picky/query/tokens.rb +86 -82
- data/lib/picky/query/weights.rb +44 -48
- data/lib/picky/query.rb +5 -1
- data/lib/picky/rack/harakiri.rb +51 -47
- data/lib/picky/results.rb +81 -77
- data/lib/picky/search.rb +169 -158
- data/lib/picky/sinatra.rb +34 -0
- data/lib/picky/sources/base.rb +73 -70
- data/lib/picky/sources/couch.rb +61 -57
- data/lib/picky/sources/csv.rb +68 -64
- data/lib/picky/sources/db.rb +139 -135
- data/lib/picky/sources/delicious.rb +52 -48
- data/lib/picky/sources/mongo.rb +68 -63
- data/lib/picky/sources/wrappers/base.rb +20 -16
- data/lib/picky/sources/wrappers/location.rb +37 -33
- data/lib/picky/statistics.rb +46 -43
- data/lib/picky/tasks.rb +3 -0
- data/lib/picky/tokenizers/base.rb +192 -187
- data/lib/picky/tokenizers/index.rb +25 -21
- data/lib/picky/tokenizers/location.rb +33 -29
- data/lib/picky/tokenizers/query.rb +49 -43
- data/lib/picky.rb +21 -13
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/routes.rake +1 -1
- data/lib/tasks/server.rake +1 -1
- data/spec/lib/adapters/rack/base_spec.rb +1 -1
- data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/adapters/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +39 -32
- data/spec/lib/backend/file/basic_spec.rb +2 -2
- data/spec/lib/backend/file/json_spec.rb +2 -2
- data/spec/lib/backend/file/marshal_spec.rb +2 -2
- data/spec/lib/backend/file/text_spec.rb +1 -1
- data/spec/lib/backend/files_spec.rb +14 -24
- data/spec/lib/backend/redis/basic_spec.rb +2 -2
- data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
- data/spec/lib/backend/redis_spec.rb +20 -13
- data/spec/lib/calculations/location_spec.rb +1 -1
- data/spec/lib/categories_indexed_spec.rb +16 -34
- data/spec/lib/category_indexed_spec.rb +9 -27
- data/spec/lib/category_indexing_spec.rb +2 -3
- data/spec/lib/category_spec.rb +10 -10
- data/spec/lib/character_substituters/west_european_spec.rb +6 -5
- data/spec/lib/cores_spec.rb +17 -17
- data/spec/lib/extensions/symbol_spec.rb +15 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
- data/spec/lib/generators/partial/default_spec.rb +3 -3
- data/spec/lib/generators/partial/none_spec.rb +2 -2
- data/spec/lib/generators/partial/substring_spec.rb +1 -1
- data/spec/lib/generators/partial_generator_spec.rb +3 -3
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/generators/similarity_generator_spec.rb +2 -2
- data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/generators/weights_generator_spec.rb +1 -1
- data/spec/lib/helpers/measuring_spec.rb +2 -2
- data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
- data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/base_spec.rb +1 -1
- data/spec/lib/indexers/parallel_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +1 -1
- data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
- data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
- data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
- data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
- data/spec/lib/indexes_class_spec.rb +2 -2
- data/spec/lib/indexes_indexed_spec.rb +1 -1
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/indexes_spec.rb +1 -1
- data/spec/lib/indexing/bundle/base_spec.rb +7 -5
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
- data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
- data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
- data/spec/lib/loader_spec.rb +17 -19
- data/spec/lib/loggers/search_spec.rb +2 -2
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +4 -4
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +7 -2
- data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
- data/spec/lib/query/token_spec.rb +32 -53
- data/spec/lib/query/tokens_spec.rb +30 -35
- data/spec/lib/query/weights_spec.rb +16 -16
- data/spec/lib/rack/harakiri_spec.rb +5 -5
- data/spec/lib/results_spec.rb +1 -1
- data/spec/lib/search_spec.rb +24 -22
- data/spec/lib/sinatra_spec.rb +36 -0
- data/spec/lib/sources/base_spec.rb +1 -1
- data/spec/lib/sources/couch_spec.rb +9 -9
- data/spec/lib/sources/csv_spec.rb +7 -7
- data/spec/lib/sources/db_spec.rb +2 -2
- data/spec/lib/sources/delicious_spec.rb +5 -5
- data/spec/lib/sources/mongo_spec.rb +7 -7
- data/spec/lib/sources/wrappers/base_spec.rb +2 -2
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/statistics_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/lib/tokenizers/query_spec.rb +1 -1
- metadata +30 -30
- data/lib/picky/adapters/rack/query.rb +0 -65
- data/lib/picky/index/base.rb +0 -409
- data/lib/picky/index/base_indexed.rb +0 -29
- data/lib/picky/index/base_indexing.rb +0 -127
- data/lib/picky/index/memory.rb +0 -16
- data/lib/picky/index/redis.rb +0 -16
- data/lib/picky/query/qualifiers.rb +0 -76
- data/lib/picky/query/solr.rb +0 -60
- data/lib/picky/signals.rb +0 -8
- data/lib/picky-tasks.rb +0 -6
- data/lib/tasks/spec.rake +0 -11
- data/spec/lib/query/qualifiers_spec.rb +0 -31
data/lib/picky/categories.rb
CHANGED
|
@@ -1,63 +1,67 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
attr_reader :categories, :category_hash
|
|
4
|
-
|
|
5
|
-
delegate :each,
|
|
6
|
-
:first,
|
|
7
|
-
:map,
|
|
8
|
-
:to => :categories
|
|
9
|
-
|
|
10
|
-
each_delegate :reindex,
|
|
11
|
-
:each_category,
|
|
12
|
-
:to => :categories
|
|
13
|
-
|
|
14
|
-
# A list of indexed categories.
|
|
15
|
-
#
|
|
16
|
-
# Options:
|
|
17
|
-
# * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
|
|
18
|
-
# The default behaviour is that if a token does not match to
|
|
19
|
-
# any category, the query will not return anything (since a
|
|
20
|
-
# single token cannot be matched). If you set this option to
|
|
21
|
-
# true, any token that cannot be matched to a category will be
|
|
22
|
-
# simply ignored.
|
|
23
|
-
# Use this if only a few matched words are important, like for
|
|
24
|
-
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
|
25
|
-
# you only want to match the zipcode, to have the search engine
|
|
26
|
-
# display advertisements on the side for the zipcode.
|
|
27
|
-
# Nifty! :)
|
|
28
|
-
#
|
|
29
|
-
def initialize options = {}
|
|
30
|
-
clear_categories
|
|
31
|
-
|
|
32
|
-
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
|
33
|
-
end
|
|
1
|
+
module Picky
|
|
34
2
|
|
|
35
|
-
|
|
36
|
-
#
|
|
37
|
-
def clear_categories
|
|
38
|
-
@categories = []
|
|
39
|
-
@category_hash = {}
|
|
40
|
-
end
|
|
3
|
+
class Categories
|
|
41
4
|
|
|
42
|
-
|
|
43
|
-
#
|
|
44
|
-
def << category
|
|
45
|
-
categories << category
|
|
46
|
-
category_hash[category.name] = category
|
|
47
|
-
end
|
|
5
|
+
attr_reader :categories, :category_hash
|
|
48
6
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
7
|
+
delegate :each,
|
|
8
|
+
:first,
|
|
9
|
+
:map,
|
|
10
|
+
:to => :categories
|
|
11
|
+
|
|
12
|
+
each_delegate :reindex,
|
|
13
|
+
:each_category,
|
|
14
|
+
:to => :categories
|
|
15
|
+
|
|
16
|
+
# A list of indexed categories.
|
|
17
|
+
#
|
|
18
|
+
# Options:
|
|
19
|
+
# * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
|
|
20
|
+
# The default behaviour is that if a token does not match to
|
|
21
|
+
# any category, the query will not return anything (since a
|
|
22
|
+
# single token cannot be matched). If you set this option to
|
|
23
|
+
# true, any token that cannot be matched to a category will be
|
|
24
|
+
# simply ignored.
|
|
25
|
+
# Use this if only a few matched words are important, like for
|
|
26
|
+
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
|
27
|
+
# you only want to match the zipcode, to have the search engine
|
|
28
|
+
# display advertisements on the side for the zipcode.
|
|
29
|
+
# Nifty! :)
|
|
30
|
+
#
|
|
31
|
+
def initialize options = {}
|
|
32
|
+
clear_categories
|
|
33
|
+
|
|
34
|
+
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Clears both the array of categories and the hash of categories.
|
|
38
|
+
#
|
|
39
|
+
def clear_categories
|
|
40
|
+
@categories = []
|
|
41
|
+
@category_hash = {}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Add the given category to the list of categories.
|
|
45
|
+
#
|
|
46
|
+
def << category
|
|
47
|
+
categories << category
|
|
48
|
+
category_hash[category.name] = category
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Find a given category in the categories.
|
|
52
|
+
#
|
|
53
|
+
def [] category_name
|
|
54
|
+
category_name = category_name.to_sym
|
|
55
|
+
category_hash[category_name] || raise_not_found(category_name)
|
|
56
|
+
end
|
|
57
|
+
def raise_not_found category_name
|
|
58
|
+
raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def to_s
|
|
62
|
+
categories.join(', ')
|
|
63
|
+
end
|
|
58
64
|
|
|
59
|
-
def to_s
|
|
60
|
-
categories.join(', ')
|
|
61
65
|
end
|
|
62
66
|
|
|
63
67
|
end
|
|
@@ -1,98 +1,89 @@
|
|
|
1
|
-
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
class Categories
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
:analyze,
|
|
7
|
-
:to => :categories
|
|
5
|
+
attr_reader :ignore_unassigned_tokens
|
|
8
6
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
# tokens, if for example, the token is one with ~.
|
|
13
|
-
# If yes, it puts together all solutions.
|
|
14
|
-
#
|
|
15
|
-
def possible_combinations_for token
|
|
16
|
-
token.similar? ? similar_possible_for(token) : possible_for(token)
|
|
17
|
-
end
|
|
7
|
+
each_delegate :load_from_cache,
|
|
8
|
+
:analyze,
|
|
9
|
+
:to => :categories
|
|
18
10
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
11
|
+
# Return all possible combinations for the given token.
|
|
12
|
+
#
|
|
13
|
+
# This checks if it needs to also search through similar
|
|
14
|
+
# tokens, if for example, the token is one with ~.
|
|
15
|
+
# If yes, it puts together all solutions.
|
|
16
|
+
#
|
|
17
|
+
def possible_combinations token
|
|
18
|
+
token.similar? ? similar_possible_for(token) : possible_for(token)
|
|
19
|
+
end
|
|
26
20
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
# Note: We could also break off here if not all the available
|
|
34
|
-
# similars are needed.
|
|
35
|
-
# Wait for a concrete case that needs this before taking
|
|
36
|
-
# action.
|
|
37
|
-
#
|
|
38
|
-
while next_token = next_token.next_similar_token(category)
|
|
39
|
-
result << next_token if next_token && next_token.text != text
|
|
40
|
-
end
|
|
41
|
-
result
|
|
21
|
+
# Gets all similar tokens and puts together the possible combinations
|
|
22
|
+
# for each found similar token.
|
|
23
|
+
#
|
|
24
|
+
def similar_possible_for token
|
|
25
|
+
tokens = similar_tokens_for token
|
|
26
|
+
inject_possible_for tokens
|
|
42
27
|
end
|
|
43
|
-
end
|
|
44
28
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
29
|
+
# Returns all possible similar tokens for the given token.
|
|
30
|
+
#
|
|
31
|
+
def similar_tokens_for token
|
|
32
|
+
text = token.text
|
|
33
|
+
categories.inject([]) do |result, category|
|
|
34
|
+
next_token = token
|
|
35
|
+
# Note: We could also break off here if not all the available
|
|
36
|
+
# similars are needed.
|
|
37
|
+
# Wait for a concrete case that needs this before taking
|
|
38
|
+
# action.
|
|
39
|
+
#
|
|
40
|
+
while next_token = next_token.next_similar_token(category)
|
|
41
|
+
result << next_token if next_token && next_token.text != text
|
|
42
|
+
end
|
|
43
|
+
result
|
|
44
|
+
end
|
|
51
45
|
end
|
|
52
|
-
end
|
|
53
46
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
# tokens option is true)
|
|
62
|
-
#
|
|
63
|
-
def possible_for token, preselected_categories = nil
|
|
64
|
-
possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
|
|
65
|
-
combination = category.combination_for token
|
|
66
|
-
combination ? combinations << combination : combinations
|
|
47
|
+
#
|
|
48
|
+
#
|
|
49
|
+
def inject_possible_for tokens
|
|
50
|
+
tokens.inject([]) do |result, token|
|
|
51
|
+
possible = possible_categories token
|
|
52
|
+
result + possible_for(token, possible)
|
|
53
|
+
end
|
|
67
54
|
end
|
|
68
|
-
|
|
55
|
+
|
|
56
|
+
# Returns possible Combinations for the token.
|
|
69
57
|
#
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
58
|
+
# Note: The preselected_categories param is an optimization.
|
|
59
|
+
#
|
|
60
|
+
# Note: Returns [] if no categories matched (will produce no result).
|
|
61
|
+
# Returns nil if this token needs to be removed from the query.
|
|
62
|
+
# (Also none of the categories matched, but the ignore unassigned
|
|
63
|
+
# tokens option is true)
|
|
64
|
+
#
|
|
65
|
+
def possible_for token, preselected_categories = nil
|
|
66
|
+
possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
|
|
67
|
+
combination = category.combination_for token
|
|
68
|
+
combination ? combinations << combination : combinations
|
|
69
|
+
end
|
|
70
|
+
# This is an optimization to mark tokens that are ignored.
|
|
71
|
+
#
|
|
72
|
+
return if ignore_unassigned_tokens && possible.empty?
|
|
73
|
+
possible
|
|
74
|
+
end
|
|
73
75
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
76
|
+
# This returns the possible categories for this token.
|
|
77
|
+
# If the user has already preselected a category for this token,
|
|
78
|
+
# like "artist:moby", if not just return all for the given token,
|
|
79
|
+
# since all are possible.
|
|
80
|
+
#
|
|
81
|
+
# Note: Once I thought this was called too often. But it is not (18.01.2011).
|
|
82
|
+
#
|
|
83
|
+
def possible_categories token
|
|
84
|
+
token.user_defined_categories || categories
|
|
85
|
+
end
|
|
84
86
|
|
|
85
|
-
# This returns the array of categories if the user has defined
|
|
86
|
-
# an existing category.
|
|
87
|
-
#
|
|
88
|
-
# Note: Returns nil if the user did not define one
|
|
89
|
-
# or [] if he/she has defined a non-existing one.
|
|
90
|
-
#
|
|
91
|
-
def user_defined_categories token
|
|
92
|
-
names = token.user_defined_category_names
|
|
93
|
-
names && names.map do |name|
|
|
94
|
-
category_hash[name]
|
|
95
|
-
end.compact
|
|
96
87
|
end
|
|
97
88
|
|
|
98
89
|
end
|
|
@@ -1,10 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
module Picky
|
|
2
|
+
|
|
3
|
+
class Categories
|
|
4
|
+
|
|
5
|
+
each_delegate :cache,
|
|
6
|
+
:check,
|
|
7
|
+
:clear,
|
|
8
|
+
:backup,
|
|
9
|
+
:restore,
|
|
10
|
+
:to => :categories
|
|
11
|
+
|
|
12
|
+
end
|
|
9
13
|
|
|
10
14
|
end
|
data/lib/picky/category.rb
CHANGED
|
@@ -1,139 +1,128 @@
|
|
|
1
|
-
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
class Category
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
# * name: Category name to use as identifier and file names.
|
|
7
|
-
# * index: Index to which this category is attached to.
|
|
8
|
-
#
|
|
9
|
-
# Options:
|
|
10
|
-
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
|
11
|
-
# * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
|
|
12
|
-
# * from: The source category identifier to take the data from.
|
|
13
|
-
#
|
|
14
|
-
# Advanced Options:
|
|
15
|
-
# * source: Use if the category should use a different source.
|
|
16
|
-
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
|
17
|
-
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
|
18
|
-
# * key_format: What this category's keys are formatted with (default is :to_i)
|
|
19
|
-
#
|
|
20
|
-
def initialize name, index, options = {}
|
|
21
|
-
@name = name
|
|
22
|
-
@index = index
|
|
5
|
+
attr_reader :name
|
|
23
6
|
|
|
24
|
-
#
|
|
7
|
+
# Mandatory params:
|
|
8
|
+
# * name: Category name to use as identifier and file names.
|
|
9
|
+
# * index: Index to which this category is attached to.
|
|
25
10
|
#
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
# TODO Push into Bundle. At least the weights.
|
|
11
|
+
# Options:
|
|
12
|
+
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
|
13
|
+
# * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
|
|
14
|
+
# * from: The source category identifier to take the data from.
|
|
32
15
|
#
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
@indexing_partial = index.indexing_bundle_class.new(:partial, self, weights, partial, Generators::Similarity::None.new)
|
|
39
|
-
|
|
40
|
-
# Indexed.
|
|
16
|
+
# Advanced Options:
|
|
17
|
+
# * source: Use if the category should use a different source.
|
|
18
|
+
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
|
19
|
+
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
|
20
|
+
# * key_format: What this category's keys are formatted with (default is :to_i)
|
|
41
21
|
#
|
|
42
|
-
|
|
22
|
+
def initialize name, index, options = {}
|
|
23
|
+
@name = name
|
|
24
|
+
@index = index
|
|
25
|
+
|
|
26
|
+
# Indexing.
|
|
27
|
+
#
|
|
28
|
+
@source = options[:source]
|
|
29
|
+
@from = options[:from]
|
|
30
|
+
@tokenizer = options[:tokenizer]
|
|
31
|
+
@key_format = options[:key_format]
|
|
32
|
+
@qualifiers = extract_qualifiers_from options
|
|
33
|
+
|
|
34
|
+
weights = options[:weights] || Generators::Weights::Default
|
|
35
|
+
partial = options[:partial] || Generators::Partial::Default
|
|
36
|
+
similarity = options[:similarity] || Generators::Similarity::Default
|
|
37
|
+
|
|
38
|
+
@indexing_exact = index.indexing_bundle_class.new :exact, self, weights, Generators::Partial::None.new, similarity, options
|
|
39
|
+
@indexing_partial = index.indexing_bundle_class.new :partial, self, weights, partial, Generators::Similarity::None.new, options
|
|
40
|
+
|
|
41
|
+
# Indexed.
|
|
42
|
+
#
|
|
43
|
+
@indexed_exact = index.indexed_bundle_class.new :exact, self, similarity
|
|
44
|
+
if partial.use_exact_for_partial?
|
|
45
|
+
@indexed_partial = @indexed_exact
|
|
46
|
+
else
|
|
47
|
+
@indexed_partial = index.indexed_bundle_class.new :partial, self, similarity
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
|
|
51
|
+
# @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Indexes and reloads the category.
|
|
43
55
|
#
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
# @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
|
|
50
|
-
# @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
|
|
56
|
+
def reindex
|
|
57
|
+
index
|
|
58
|
+
reload
|
|
59
|
+
end
|
|
51
60
|
|
|
52
|
-
#
|
|
61
|
+
# Index name.
|
|
53
62
|
#
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
# TODO Move to Index.
|
|
58
|
-
#
|
|
59
|
-
def generate_qualifiers_from options
|
|
60
|
-
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
|
61
|
-
end
|
|
63
|
+
def index_name
|
|
64
|
+
@index.name
|
|
65
|
+
end
|
|
62
66
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
# Index name.
|
|
77
|
-
#
|
|
78
|
-
def index_name
|
|
79
|
-
@index.name
|
|
80
|
-
end
|
|
67
|
+
# Returns the qualifiers if set or
|
|
68
|
+
# just the name if not.
|
|
69
|
+
#
|
|
70
|
+
def qualifiers
|
|
71
|
+
@qualifiers || [name]
|
|
72
|
+
end
|
|
73
|
+
# Extract qualifiers from the options.
|
|
74
|
+
#
|
|
75
|
+
def extract_qualifiers_from options
|
|
76
|
+
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
|
77
|
+
end
|
|
81
78
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
79
|
+
# The category itself just yields itself.
|
|
80
|
+
#
|
|
81
|
+
def each_category
|
|
82
|
+
yield self
|
|
83
|
+
end
|
|
87
84
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
85
|
+
# Path and partial filename of the prepared index on this category.
|
|
86
|
+
#
|
|
87
|
+
def prepared_index_path
|
|
88
|
+
@prepared_index_path ||= "#{index_directory}/prepared_#{name}_index"
|
|
89
|
+
end
|
|
90
|
+
# Get an opened index file.
|
|
91
|
+
#
|
|
92
|
+
# Note: If you don't use it with the block, do not forget to close it.
|
|
93
|
+
#
|
|
94
|
+
def prepared_index_file &block
|
|
95
|
+
@prepared_index_file ||= Backend::File::Text.new prepared_index_path
|
|
96
|
+
@prepared_index_file.open &block
|
|
97
|
+
end
|
|
98
|
+
# Creates the index directory including all necessary paths above it.
|
|
99
|
+
#
|
|
100
|
+
# Note: Interface method called by any indexers.
|
|
101
|
+
#
|
|
102
|
+
def prepare_index_directory
|
|
103
|
+
FileUtils.mkdir_p index_directory
|
|
104
|
+
end
|
|
108
105
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
106
|
+
# The index directory for this category.
|
|
107
|
+
#
|
|
108
|
+
# TODO Push down into files?
|
|
109
|
+
#
|
|
110
|
+
def index_directory
|
|
111
|
+
@index_directory ||= "#{PICKY_ROOT}/index/#{PICKY_ENVIRONMENT}/#{@index.name}"
|
|
112
|
+
end
|
|
114
113
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
# * partial index
|
|
121
|
-
# * similarity index
|
|
122
|
-
#
|
|
123
|
-
def index_path bundle_name, type
|
|
124
|
-
"#{index_directory}/#{name}_#{bundle_name}_#{type}"
|
|
125
|
-
end
|
|
114
|
+
# Identifier for technical output.
|
|
115
|
+
#
|
|
116
|
+
def identifier
|
|
117
|
+
"#{@index.identifier}:#{name}"
|
|
118
|
+
end
|
|
126
119
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
120
|
+
#
|
|
121
|
+
#
|
|
122
|
+
def to_s
|
|
123
|
+
"#{self.class}(#{identifier})"
|
|
124
|
+
end
|
|
132
125
|
|
|
133
|
-
#
|
|
134
|
-
#
|
|
135
|
-
def to_s
|
|
136
|
-
"Category(#{name})"
|
|
137
126
|
end
|
|
138
127
|
|
|
139
128
|
end
|
|
@@ -1,48 +1,46 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
class Category
|
|
1
|
+
module Picky
|
|
4
2
|
|
|
5
|
-
attr_reader :indexed_exact
|
|
6
|
-
|
|
7
|
-
# Loads the index from cache.
|
|
8
|
-
#
|
|
9
|
-
def load_from_cache
|
|
10
|
-
timed_exclaim %Q{"#{identifier}": Loading index from cache.}
|
|
11
|
-
indexed_exact.load
|
|
12
|
-
indexed_partial.load
|
|
13
|
-
end
|
|
14
|
-
alias reload load_from_cache
|
|
15
|
-
|
|
16
|
-
# Gets the weight for this token's text.
|
|
17
|
-
#
|
|
18
|
-
def weight token
|
|
19
|
-
bundle_for(token).weight token.text
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
# Gets the ids for this token's text.
|
|
23
3
|
#
|
|
24
|
-
def ids token
|
|
25
|
-
bundle_for(token).ids token.text
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
# Returns the right index bundle for this token.
|
|
29
4
|
#
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
5
|
+
class Category
|
|
6
|
+
|
|
7
|
+
attr_reader :indexed_exact,
|
|
8
|
+
:indexed_partial
|
|
9
|
+
|
|
10
|
+
# Loads the index from cache.
|
|
11
|
+
#
|
|
12
|
+
def load_from_cache
|
|
13
|
+
timed_exclaim %Q{"#{identifier}": Loading index from cache.}
|
|
14
|
+
indexed_exact.load
|
|
15
|
+
indexed_partial.load
|
|
16
|
+
end
|
|
17
|
+
alias reload load_from_cache
|
|
18
|
+
|
|
19
|
+
# Gets the weight for this token's text.
|
|
20
|
+
#
|
|
21
|
+
def weight token
|
|
22
|
+
bundle_for(token).weight token.text
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Gets the ids for this token's text.
|
|
26
|
+
#
|
|
27
|
+
def ids token
|
|
28
|
+
bundle_for(token).ids token.text
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Returns the right index bundle for this token.
|
|
32
|
+
#
|
|
33
|
+
def bundle_for token
|
|
34
|
+
token.partial? ? indexed_partial : indexed_exact
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Returns a combination for the token,
|
|
38
|
+
# or nil, if there is none.
|
|
39
|
+
#
|
|
40
|
+
def combination_for token
|
|
41
|
+
weight(token) && Query::Combination.new(token, self)
|
|
42
|
+
end
|
|
33
43
|
|
|
34
|
-
# The partial strategy defines whether to
|
|
35
|
-
# really use the partial index.
|
|
36
|
-
#
|
|
37
|
-
def indexed_partial
|
|
38
|
-
@partial_strategy.use_exact_for_partial? ? @indexed_exact : @indexed_partial
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
# Returns a combination for the token,
|
|
42
|
-
# or nil, if there is none.
|
|
43
|
-
#
|
|
44
|
-
def combination_for token
|
|
45
|
-
weight(token) && Query::Combination.new(token, self)
|
|
46
44
|
end
|
|
47
45
|
|
|
48
46
|
end
|