picky 2.7.0 → 3.0.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/adapters/rack/base.rb +20 -16
- data/lib/picky/adapters/rack/live_parameters.rb +28 -24
- data/lib/picky/adapters/rack/search.rb +67 -0
- data/lib/picky/adapters/rack.rb +27 -23
- data/lib/picky/application.rb +246 -236
- data/lib/picky/backend/base.rb +115 -119
- data/lib/picky/backend/file/basic.rb +102 -98
- data/lib/picky/backend/file/json.rb +27 -23
- data/lib/picky/backend/file/marshal.rb +32 -28
- data/lib/picky/backend/file/text.rb +45 -41
- data/lib/picky/backend/files.rb +19 -15
- data/lib/picky/backend/redis/basic.rb +76 -72
- data/lib/picky/backend/redis/list_hash.rb +40 -36
- data/lib/picky/backend/redis/string_hash.rb +30 -26
- data/lib/picky/backend/redis.rb +32 -28
- data/lib/picky/bundle.rb +82 -57
- data/lib/{bundling.rb → picky/bundling.rb} +0 -0
- data/lib/picky/calculations/location.rb +51 -47
- data/lib/picky/categories.rb +60 -56
- data/lib/picky/categories_indexed.rb +73 -82
- data/lib/picky/categories_indexing.rb +12 -8
- data/lib/picky/category.rb +109 -120
- data/lib/picky/category_indexed.rb +39 -41
- data/lib/picky/category_indexing.rb +123 -125
- data/lib/picky/character_substituters/west_european.rb +32 -26
- data/lib/{constants.rb → picky/constants.rb} +0 -0
- data/lib/picky/cores.rb +96 -92
- data/lib/{deployment.rb → picky/deployment.rb} +0 -0
- data/lib/picky/frontend_adapters/rack.rb +133 -118
- data/lib/picky/generators/aliases.rb +5 -3
- data/lib/picky/generators/base.rb +11 -7
- data/lib/picky/generators/partial/default.rb +7 -3
- data/lib/picky/generators/partial/none.rb +24 -20
- data/lib/picky/generators/partial/strategy.rb +20 -16
- data/lib/picky/generators/partial/substring.rb +94 -90
- data/lib/picky/generators/partial_generator.rb +11 -7
- data/lib/picky/generators/similarity/default.rb +9 -5
- data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
- data/lib/picky/generators/similarity/metaphone.rb +20 -16
- data/lib/picky/generators/similarity/none.rb +23 -19
- data/lib/picky/generators/similarity/phonetic.rb +49 -45
- data/lib/picky/generators/similarity/soundex.rb +20 -16
- data/lib/picky/generators/similarity/strategy.rb +10 -6
- data/lib/picky/generators/similarity_generator.rb +11 -7
- data/lib/picky/generators/strategy.rb +14 -10
- data/lib/picky/generators/weights/default.rb +9 -5
- data/lib/picky/generators/weights/logarithmic.rb +30 -26
- data/lib/picky/generators/weights/strategy.rb +10 -6
- data/lib/picky/generators/weights_generator.rb +11 -7
- data/lib/picky/helpers/measuring.rb +20 -16
- data/lib/picky/indexed/bundle/base.rb +39 -37
- data/lib/picky/indexed/bundle/memory.rb +68 -64
- data/lib/picky/indexed/bundle/redis.rb +73 -69
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
- data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
- data/lib/picky/indexed/wrappers/category/location.rb +17 -13
- data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
- data/lib/picky/indexers/base.rb +26 -22
- data/lib/picky/indexers/parallel.rb +62 -58
- data/lib/picky/indexers/serial.rb +41 -37
- data/lib/picky/indexes/index.rb +400 -0
- data/lib/picky/indexes/index_indexed.rb +24 -0
- data/lib/picky/indexes/index_indexing.rb +138 -0
- data/lib/picky/indexes/memory.rb +20 -0
- data/lib/picky/indexes/redis.rb +20 -0
- data/lib/picky/indexes.rb +68 -61
- data/lib/picky/indexes_indexed.rb +16 -12
- data/lib/picky/indexes_indexing.rb +41 -37
- data/lib/picky/indexing/bundle/base.rb +216 -205
- data/lib/picky/indexing/bundle/memory.rb +16 -11
- data/lib/picky/indexing/bundle/redis.rb +14 -12
- data/lib/picky/indexing/wrappers/category/location.rb +17 -13
- data/lib/picky/interfaces/live_parameters.rb +159 -154
- data/lib/picky/loader.rb +267 -304
- data/lib/picky/loggers/search.rb +20 -13
- data/lib/picky/no_source_specified_exception.rb +7 -3
- data/lib/picky/performant.rb +6 -2
- data/lib/picky/query/allocation.rb +71 -67
- data/lib/picky/query/allocations.rb +99 -94
- data/lib/picky/query/combination.rb +70 -66
- data/lib/picky/query/combinations/base.rb +56 -52
- data/lib/picky/query/combinations/memory.rb +36 -32
- data/lib/picky/query/combinations/redis.rb +66 -62
- data/lib/picky/query/indexes.rb +175 -160
- data/lib/picky/query/qualifier_category_mapper.rb +43 -0
- data/lib/picky/query/token.rb +165 -172
- data/lib/picky/query/tokens.rb +86 -82
- data/lib/picky/query/weights.rb +44 -48
- data/lib/picky/query.rb +5 -1
- data/lib/picky/rack/harakiri.rb +51 -47
- data/lib/picky/results.rb +81 -77
- data/lib/picky/search.rb +169 -158
- data/lib/picky/sinatra.rb +34 -0
- data/lib/picky/sources/base.rb +73 -70
- data/lib/picky/sources/couch.rb +61 -57
- data/lib/picky/sources/csv.rb +68 -64
- data/lib/picky/sources/db.rb +139 -135
- data/lib/picky/sources/delicious.rb +52 -48
- data/lib/picky/sources/mongo.rb +68 -63
- data/lib/picky/sources/wrappers/base.rb +20 -16
- data/lib/picky/sources/wrappers/location.rb +37 -33
- data/lib/picky/statistics.rb +46 -43
- data/lib/picky/tasks.rb +3 -0
- data/lib/picky/tokenizers/base.rb +192 -187
- data/lib/picky/tokenizers/index.rb +25 -21
- data/lib/picky/tokenizers/location.rb +33 -29
- data/lib/picky/tokenizers/query.rb +49 -43
- data/lib/picky.rb +21 -13
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/routes.rake +1 -1
- data/lib/tasks/server.rake +1 -1
- data/spec/lib/adapters/rack/base_spec.rb +1 -1
- data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/adapters/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +39 -32
- data/spec/lib/backend/file/basic_spec.rb +2 -2
- data/spec/lib/backend/file/json_spec.rb +2 -2
- data/spec/lib/backend/file/marshal_spec.rb +2 -2
- data/spec/lib/backend/file/text_spec.rb +1 -1
- data/spec/lib/backend/files_spec.rb +14 -24
- data/spec/lib/backend/redis/basic_spec.rb +2 -2
- data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
- data/spec/lib/backend/redis_spec.rb +20 -13
- data/spec/lib/calculations/location_spec.rb +1 -1
- data/spec/lib/categories_indexed_spec.rb +16 -34
- data/spec/lib/category_indexed_spec.rb +9 -27
- data/spec/lib/category_indexing_spec.rb +2 -3
- data/spec/lib/category_spec.rb +10 -10
- data/spec/lib/character_substituters/west_european_spec.rb +6 -5
- data/spec/lib/cores_spec.rb +17 -17
- data/spec/lib/extensions/symbol_spec.rb +15 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
- data/spec/lib/generators/partial/default_spec.rb +3 -3
- data/spec/lib/generators/partial/none_spec.rb +2 -2
- data/spec/lib/generators/partial/substring_spec.rb +1 -1
- data/spec/lib/generators/partial_generator_spec.rb +3 -3
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/generators/similarity_generator_spec.rb +2 -2
- data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/generators/weights_generator_spec.rb +1 -1
- data/spec/lib/helpers/measuring_spec.rb +2 -2
- data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
- data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/base_spec.rb +1 -1
- data/spec/lib/indexers/parallel_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +1 -1
- data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
- data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
- data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
- data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
- data/spec/lib/indexes_class_spec.rb +2 -2
- data/spec/lib/indexes_indexed_spec.rb +1 -1
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/indexes_spec.rb +1 -1
- data/spec/lib/indexing/bundle/base_spec.rb +7 -5
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
- data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
- data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
- data/spec/lib/loader_spec.rb +17 -19
- data/spec/lib/loggers/search_spec.rb +2 -2
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +4 -4
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +7 -2
- data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
- data/spec/lib/query/token_spec.rb +32 -53
- data/spec/lib/query/tokens_spec.rb +30 -35
- data/spec/lib/query/weights_spec.rb +16 -16
- data/spec/lib/rack/harakiri_spec.rb +5 -5
- data/spec/lib/results_spec.rb +1 -1
- data/spec/lib/search_spec.rb +24 -22
- data/spec/lib/sinatra_spec.rb +36 -0
- data/spec/lib/sources/base_spec.rb +1 -1
- data/spec/lib/sources/couch_spec.rb +9 -9
- data/spec/lib/sources/csv_spec.rb +7 -7
- data/spec/lib/sources/db_spec.rb +2 -2
- data/spec/lib/sources/delicious_spec.rb +5 -5
- data/spec/lib/sources/mongo_spec.rb +7 -7
- data/spec/lib/sources/wrappers/base_spec.rb +2 -2
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/statistics_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/lib/tokenizers/query_spec.rb +1 -1
- metadata +30 -30
- data/lib/picky/adapters/rack/query.rb +0 -65
- data/lib/picky/index/base.rb +0 -409
- data/lib/picky/index/base_indexed.rb +0 -29
- data/lib/picky/index/base_indexing.rb +0 -127
- data/lib/picky/index/memory.rb +0 -16
- data/lib/picky/index/redis.rb +0 -16
- data/lib/picky/query/qualifiers.rb +0 -76
- data/lib/picky/query/solr.rb +0 -60
- data/lib/picky/signals.rb +0 -8
- data/lib/picky-tasks.rb +0 -6
- data/lib/tasks/spec.rake +0 -11
- data/spec/lib/query/qualifiers_spec.rb +0 -31
@@ -1,24 +1,28 @@
|
|
1
|
-
|
2
|
-
#
|
3
|
-
module Generators
|
1
|
+
module Picky
|
4
2
|
|
5
|
-
|
3
|
+
# encoding: utf-8
|
4
|
+
#
|
5
|
+
module Generators
|
6
6
|
|
7
|
-
|
8
|
-
# and Levenshtein.
|
9
|
-
#
|
10
|
-
# It uses the double metaphone to get similar words
|
11
|
-
# and ranks them using the levenshtein.
|
12
|
-
#
|
13
|
-
class DoubleMetaphone < Phonetic
|
7
|
+
module Similarity
|
14
8
|
|
15
|
-
#
|
9
|
+
# It's actually a combination of double metaphone
|
10
|
+
# and Levenshtein.
|
16
11
|
#
|
17
|
-
#
|
12
|
+
# It uses the double metaphone to get similar words
|
13
|
+
# and ranks them using the levenshtein.
|
18
14
|
#
|
19
|
-
|
20
|
-
|
21
|
-
|
15
|
+
class DoubleMetaphone < Phonetic
|
16
|
+
|
17
|
+
# Encodes the given symbol.
|
18
|
+
#
|
19
|
+
# Returns a symbol.
|
20
|
+
#
|
21
|
+
def encoded sym
|
22
|
+
codes = Text::Metaphone.double_metaphone sym.to_s
|
23
|
+
codes.first.to_sym unless codes.empty?
|
24
|
+
end
|
25
|
+
|
22
26
|
end
|
23
27
|
|
24
28
|
end
|
@@ -1,24 +1,28 @@
|
|
1
|
-
|
2
|
-
#
|
3
|
-
module Generators
|
1
|
+
module Picky
|
4
2
|
|
5
|
-
|
3
|
+
# encoding: utf-8
|
4
|
+
#
|
5
|
+
module Generators
|
6
6
|
|
7
|
-
|
8
|
-
# and Levenshtein.
|
9
|
-
#
|
10
|
-
# It uses the metaphone to get similar words
|
11
|
-
# and ranks them using the levenshtein.
|
12
|
-
#
|
13
|
-
class Metaphone < Phonetic
|
7
|
+
module Similarity
|
14
8
|
|
15
|
-
#
|
9
|
+
# It's actually a combination of metaphone
|
10
|
+
# and Levenshtein.
|
16
11
|
#
|
17
|
-
#
|
12
|
+
# It uses the metaphone to get similar words
|
13
|
+
# and ranks them using the levenshtein.
|
18
14
|
#
|
19
|
-
|
20
|
-
|
21
|
-
|
15
|
+
class Metaphone < Phonetic
|
16
|
+
|
17
|
+
# Encodes the given symbol.
|
18
|
+
#
|
19
|
+
# Returns a symbol.
|
20
|
+
#
|
21
|
+
def encoded sym
|
22
|
+
code = Text::Metaphone.metaphone sym.to_s
|
23
|
+
code.to_sym if code
|
24
|
+
end
|
25
|
+
|
22
26
|
end
|
23
27
|
|
24
28
|
end
|
@@ -1,27 +1,31 @@
|
|
1
|
-
module
|
1
|
+
module Picky
|
2
2
|
|
3
|
-
module
|
3
|
+
module Generators
|
4
4
|
|
5
|
-
|
6
|
-
#
|
7
|
-
class None < Strategy
|
5
|
+
module Similarity
|
8
6
|
|
9
|
-
#
|
7
|
+
# Similarity strategy that does nothing.
|
10
8
|
#
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
class None < Strategy
|
10
|
+
|
11
|
+
# Does not encode text. Just returns nil.
|
12
|
+
#
|
13
|
+
def encoded text
|
14
|
+
nil
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns an empty index.
|
18
|
+
#
|
19
|
+
def generate_from index
|
20
|
+
{}
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns if this strategy's generated file is saved.
|
24
|
+
#
|
25
|
+
def saved?
|
26
|
+
false
|
27
|
+
end
|
14
28
|
|
15
|
-
# Returns an empty index.
|
16
|
-
#
|
17
|
-
def generate_from index
|
18
|
-
{}
|
19
|
-
end
|
20
|
-
|
21
|
-
# Returns if this strategy's generated file is saved.
|
22
|
-
#
|
23
|
-
def saved?
|
24
|
-
false
|
25
29
|
end
|
26
30
|
|
27
31
|
end
|
@@ -1,62 +1,66 @@
|
|
1
|
-
|
2
|
-
#
|
3
|
-
module Generators
|
1
|
+
module Picky
|
4
2
|
|
5
|
-
|
3
|
+
# encoding: utf-8
|
4
|
+
#
|
5
|
+
module Generators
|
6
6
|
|
7
|
-
|
8
|
-
# and Levenshtein.
|
9
|
-
#
|
10
|
-
# It uses the double metaphone to get similar words
|
11
|
-
# and ranks them using the levenshtein.
|
12
|
-
#
|
13
|
-
class Phonetic < Strategy
|
14
|
-
|
15
|
-
attr_reader :amount
|
7
|
+
module Similarity
|
16
8
|
|
9
|
+
# It's actually a combination of double metaphone
|
10
|
+
# and Levenshtein.
|
17
11
|
#
|
12
|
+
# It uses the double metaphone to get similar words
|
13
|
+
# and ranks them using the levenshtein.
|
18
14
|
#
|
19
|
-
|
20
|
-
raise "In Picky 2.0+, the Similarity::Phonetic has been renamed to Similarity::DoubleMetaphone. Please use that one. Thanks!" if self.class == Phonetic
|
21
|
-
@amount = amount
|
22
|
-
end
|
15
|
+
class Phonetic < Strategy
|
23
16
|
|
24
|
-
|
25
|
-
#
|
26
|
-
# In the following form:
|
27
|
-
# [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
|
28
|
-
#
|
29
|
-
def generate_from inverted
|
30
|
-
hash = hashify inverted.keys
|
31
|
-
sort hash
|
32
|
-
end
|
17
|
+
attr_reader :amount
|
33
18
|
|
34
|
-
protected
|
35
|
-
|
36
|
-
# Sorts the index values in place.
|
37
19
|
#
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
end
|
43
|
-
hash
|
20
|
+
#
|
21
|
+
def initialize amount = 10
|
22
|
+
raise "In Picky 2.0+, the Similarity::Phonetic has been renamed to Similarity::DoubleMetaphone. Please use that one. Thanks!" if self.class == Phonetic
|
23
|
+
@amount = amount
|
44
24
|
end
|
45
25
|
|
46
|
-
#
|
26
|
+
# Generates an index for the given index (in exact index style).
|
47
27
|
#
|
48
|
-
#
|
49
|
-
#
|
28
|
+
# In the following form:
|
29
|
+
# [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
|
50
30
|
#
|
51
|
-
def
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
31
|
+
def generate_from inverted
|
32
|
+
hash = hashify inverted.keys
|
33
|
+
sort hash
|
34
|
+
end
|
35
|
+
|
36
|
+
protected
|
37
|
+
|
38
|
+
# Sorts the index values in place.
|
39
|
+
#
|
40
|
+
def sort hash
|
41
|
+
hash.each_pair.each do |code, ary|
|
42
|
+
ary.sort_by_levenshtein! code
|
43
|
+
ary.slice! amount, ary.size # size is not perfectly correct, but anyway
|
56
44
|
end
|
57
|
-
|
45
|
+
hash
|
58
46
|
end
|
59
|
-
|
47
|
+
|
48
|
+
# Hashifies a list of symbols.
|
49
|
+
#
|
50
|
+
# Where:
|
51
|
+
# { encoded_sym => [syms] }
|
52
|
+
#
|
53
|
+
def hashify list
|
54
|
+
list.inject({}) do |total, element|
|
55
|
+
if code = encoded(element)
|
56
|
+
total[code] ||= []
|
57
|
+
total[code] << element
|
58
|
+
end
|
59
|
+
total
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
60
64
|
|
61
65
|
end
|
62
66
|
|
@@ -1,24 +1,28 @@
|
|
1
|
-
|
2
|
-
#
|
3
|
-
module Generators
|
1
|
+
module Picky
|
4
2
|
|
5
|
-
|
3
|
+
# encoding: utf-8
|
4
|
+
#
|
5
|
+
module Generators
|
6
6
|
|
7
|
-
|
8
|
-
# and Levenshtein.
|
9
|
-
#
|
10
|
-
# It uses the soundex to get similar words
|
11
|
-
# and ranks them using the levenshtein.
|
12
|
-
#
|
13
|
-
class Soundex < Phonetic
|
7
|
+
module Similarity
|
14
8
|
|
15
|
-
#
|
9
|
+
# It's actually a combination of soundex
|
10
|
+
# and Levenshtein.
|
16
11
|
#
|
17
|
-
#
|
12
|
+
# It uses the soundex to get similar words
|
13
|
+
# and ranks them using the levenshtein.
|
18
14
|
#
|
19
|
-
|
20
|
-
|
21
|
-
|
15
|
+
class Soundex < Phonetic
|
16
|
+
|
17
|
+
# Encodes the given symbol.
|
18
|
+
#
|
19
|
+
# Returns a symbol.
|
20
|
+
#
|
21
|
+
def encoded sym
|
22
|
+
code = Text::Soundex.soundex sym.to_s
|
23
|
+
code.to_sym if code
|
24
|
+
end
|
25
|
+
|
22
26
|
end
|
23
27
|
|
24
28
|
end
|
@@ -1,9 +1,13 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
module Generators
|
4
|
+
module Similarity
|
5
|
+
# Base class for all similarity strategies.
|
6
|
+
#
|
7
|
+
class Strategy < Generators::Strategy
|
8
|
+
|
9
|
+
end
|
7
10
|
end
|
8
11
|
end
|
12
|
+
|
9
13
|
end
|
@@ -1,13 +1,17 @@
|
|
1
|
-
module
|
1
|
+
module Picky
|
2
2
|
|
3
|
-
|
4
|
-
#
|
5
|
-
class SimilarityGenerator < Base
|
3
|
+
module Generators
|
6
4
|
|
7
|
-
#
|
5
|
+
# Uses no similarity as default.
|
8
6
|
#
|
9
|
-
|
10
|
-
|
7
|
+
class SimilarityGenerator < Base
|
8
|
+
|
9
|
+
# Generate a similarity index based on the given inverted index.
|
10
|
+
#
|
11
|
+
def generate strategy = Similarity::None.new
|
12
|
+
strategy.generate_from self.inverted
|
13
|
+
end
|
14
|
+
|
11
15
|
end
|
12
16
|
|
13
17
|
end
|
@@ -1,14 +1,18 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
module Generators
|
4
|
+
|
5
|
+
class Strategy
|
6
|
+
|
7
|
+
# By default, all caches are saved in a
|
8
|
+
# storage (like a file).
|
9
|
+
#
|
10
|
+
def saved?
|
11
|
+
true
|
12
|
+
end
|
13
|
+
|
10
14
|
end
|
11
|
-
|
15
|
+
|
12
16
|
end
|
13
17
|
|
14
18
|
end
|
@@ -1,35 +1,39 @@
|
|
1
|
-
module
|
1
|
+
module Picky
|
2
2
|
|
3
|
-
module
|
3
|
+
module Generators
|
4
4
|
|
5
|
-
|
6
|
-
# If for a key k we have x ids, the weight is:
|
7
|
-
# w(x): log(x)
|
8
|
-
# Special case: If x < 1, then we use 0.
|
9
|
-
#
|
10
|
-
class Logarithmic < Strategy
|
5
|
+
module Weights
|
11
6
|
|
12
|
-
#
|
7
|
+
# Uses a logarithmic weight.
|
8
|
+
# If for a key k we have x ids, the weight is:
|
9
|
+
# w(x): log(x)
|
10
|
+
# Special case: If x < 1, then we use 0.
|
13
11
|
#
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
hash
|
12
|
+
class Logarithmic < Strategy
|
13
|
+
|
14
|
+
# Generates a partial index from the given inverted index.
|
15
|
+
#
|
16
|
+
def generate_from inverted
|
17
|
+
inverted.inject({}) do |hash, text_ids|
|
18
|
+
text, ids = *text_ids
|
19
|
+
weight = weight_for ids.size
|
20
|
+
hash[text] ||= weight.round(2) if weight
|
21
|
+
hash
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Sets the weight value.
|
26
|
+
#
|
27
|
+
# If the size is 0 or one, we would get -Infinity or 0.0.
|
28
|
+
# Thus we do not set a value if there is just one. The default, dynamically, is 0.
|
29
|
+
#
|
30
|
+
# BUT: We need the value, even if 0. To designate that there is a weight!
|
31
|
+
#
|
32
|
+
def weight_for amount
|
33
|
+
return 0 if amount < 1
|
34
|
+
Math.log amount
|
20
35
|
end
|
21
|
-
end
|
22
36
|
|
23
|
-
# Sets the weight value.
|
24
|
-
#
|
25
|
-
# If the size is 0 or one, we would get -Infinity or 0.0.
|
26
|
-
# Thus we do not set a value if there is just one. The default, dynamically, is 0.
|
27
|
-
#
|
28
|
-
# BUT: We need the value, even if 0. To designate that there is a weight!
|
29
|
-
#
|
30
|
-
def weight_for amount
|
31
|
-
return 0 if amount < 1
|
32
|
-
Math.log amount
|
33
37
|
end
|
34
38
|
|
35
39
|
end
|
@@ -1,9 +1,13 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
module Generators
|
4
|
+
module Weights
|
5
|
+
# Superclass for weighing strategies.
|
6
|
+
#
|
7
|
+
class Strategy < Generators::Strategy
|
8
|
+
|
9
|
+
end
|
7
10
|
end
|
8
11
|
end
|
12
|
+
|
9
13
|
end
|
@@ -1,13 +1,17 @@
|
|
1
|
-
module
|
1
|
+
module Picky
|
2
2
|
|
3
|
-
|
4
|
-
#
|
5
|
-
class WeightsGenerator < Base
|
3
|
+
module Generators
|
6
4
|
|
7
|
-
#
|
5
|
+
# Uses a logarithmic algorithm as default.
|
8
6
|
#
|
9
|
-
|
10
|
-
|
7
|
+
class WeightsGenerator < Base
|
8
|
+
|
9
|
+
# Generate a weights index based on the given inverted index.
|
10
|
+
#
|
11
|
+
def generate strategy = Weights::Logarithmic.new
|
12
|
+
strategy.generate_from self.inverted
|
13
|
+
end
|
14
|
+
|
11
15
|
end
|
12
16
|
|
13
17
|
end
|
@@ -1,19 +1,23 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
# Helper methods for measuring, benchmarking, logging.
|
4
|
+
#
|
5
|
+
module Helpers
|
6
|
+
module Measuring
|
7
|
+
|
8
|
+
# Returns a duration in seconds.
|
9
|
+
#
|
10
|
+
def timed *args, &block
|
11
|
+
block_to_be_measured = lambda &block
|
12
|
+
|
13
|
+
time_begin = Time.now.to_f
|
14
|
+
|
15
|
+
block_to_be_measured.call *args
|
16
|
+
|
17
|
+
Time.now.to_f - time_begin
|
18
|
+
end
|
19
|
+
|
16
20
|
end
|
17
|
-
|
18
21
|
end
|
22
|
+
|
19
23
|
end
|
@@ -1,41 +1,43 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
module Indexed # :nodoc:all
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
module Indexed # :nodoc:all
|
4
|
+
|
5
|
+
# An indexed bundle is a number of memory/redis
|
6
|
+
# indexes that compose the indexes for a single category:
|
7
|
+
# * core (inverted) index
|
8
|
+
# * weights index
|
9
|
+
# * similarity index
|
10
|
+
# * index configuration
|
11
|
+
#
|
12
|
+
# Indexed refers to them being indexed.
|
13
|
+
# This class notably offers the methods:
|
14
|
+
# * load
|
15
|
+
# * clear
|
16
|
+
#
|
17
|
+
# To (re)load or clear the current indexes.
|
18
|
+
#
|
19
|
+
module Bundle
|
20
|
+
|
21
|
+
class Base < Picky::Bundle
|
22
|
+
|
23
|
+
# Loads all indexes.
|
24
|
+
#
|
25
|
+
def load
|
26
|
+
load_inverted
|
27
|
+
load_weights
|
28
|
+
load_similarity
|
29
|
+
load_configuration
|
30
|
+
end
|
31
|
+
|
32
|
+
# Clears all indexes.
|
33
|
+
#
|
34
|
+
def clear
|
35
|
+
clear_inverted
|
36
|
+
clear_weights
|
37
|
+
clear_similarity
|
38
|
+
clear_configuration
|
39
|
+
end
|
31
40
|
|
32
|
-
# Clears all indexes.
|
33
|
-
#
|
34
|
-
def clear
|
35
|
-
clear_inverted
|
36
|
-
clear_weights
|
37
|
-
clear_similarity
|
38
|
-
clear_configuration
|
39
41
|
end
|
40
42
|
|
41
43
|
end
|