picky 2.7.0 → 3.0.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +20 -16
- data/lib/picky/adapters/rack/live_parameters.rb +28 -24
- data/lib/picky/adapters/rack/search.rb +67 -0
- data/lib/picky/adapters/rack.rb +27 -23
- data/lib/picky/application.rb +246 -236
- data/lib/picky/backend/base.rb +115 -119
- data/lib/picky/backend/file/basic.rb +102 -98
- data/lib/picky/backend/file/json.rb +27 -23
- data/lib/picky/backend/file/marshal.rb +32 -28
- data/lib/picky/backend/file/text.rb +45 -41
- data/lib/picky/backend/files.rb +19 -15
- data/lib/picky/backend/redis/basic.rb +76 -72
- data/lib/picky/backend/redis/list_hash.rb +40 -36
- data/lib/picky/backend/redis/string_hash.rb +30 -26
- data/lib/picky/backend/redis.rb +32 -28
- data/lib/picky/bundle.rb +82 -57
- data/lib/{bundling.rb → picky/bundling.rb} +0 -0
- data/lib/picky/calculations/location.rb +51 -47
- data/lib/picky/categories.rb +60 -56
- data/lib/picky/categories_indexed.rb +73 -82
- data/lib/picky/categories_indexing.rb +12 -8
- data/lib/picky/category.rb +109 -120
- data/lib/picky/category_indexed.rb +39 -41
- data/lib/picky/category_indexing.rb +123 -125
- data/lib/picky/character_substituters/west_european.rb +32 -26
- data/lib/{constants.rb → picky/constants.rb} +0 -0
- data/lib/picky/cores.rb +96 -92
- data/lib/{deployment.rb → picky/deployment.rb} +0 -0
- data/lib/picky/frontend_adapters/rack.rb +133 -118
- data/lib/picky/generators/aliases.rb +5 -3
- data/lib/picky/generators/base.rb +11 -7
- data/lib/picky/generators/partial/default.rb +7 -3
- data/lib/picky/generators/partial/none.rb +24 -20
- data/lib/picky/generators/partial/strategy.rb +20 -16
- data/lib/picky/generators/partial/substring.rb +94 -90
- data/lib/picky/generators/partial_generator.rb +11 -7
- data/lib/picky/generators/similarity/default.rb +9 -5
- data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
- data/lib/picky/generators/similarity/metaphone.rb +20 -16
- data/lib/picky/generators/similarity/none.rb +23 -19
- data/lib/picky/generators/similarity/phonetic.rb +49 -45
- data/lib/picky/generators/similarity/soundex.rb +20 -16
- data/lib/picky/generators/similarity/strategy.rb +10 -6
- data/lib/picky/generators/similarity_generator.rb +11 -7
- data/lib/picky/generators/strategy.rb +14 -10
- data/lib/picky/generators/weights/default.rb +9 -5
- data/lib/picky/generators/weights/logarithmic.rb +30 -26
- data/lib/picky/generators/weights/strategy.rb +10 -6
- data/lib/picky/generators/weights_generator.rb +11 -7
- data/lib/picky/helpers/measuring.rb +20 -16
- data/lib/picky/indexed/bundle/base.rb +39 -37
- data/lib/picky/indexed/bundle/memory.rb +68 -64
- data/lib/picky/indexed/bundle/redis.rb +73 -69
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
- data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
- data/lib/picky/indexed/wrappers/category/location.rb +17 -13
- data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
- data/lib/picky/indexers/base.rb +26 -22
- data/lib/picky/indexers/parallel.rb +62 -58
- data/lib/picky/indexers/serial.rb +41 -37
- data/lib/picky/indexes/index.rb +400 -0
- data/lib/picky/indexes/index_indexed.rb +24 -0
- data/lib/picky/indexes/index_indexing.rb +138 -0
- data/lib/picky/indexes/memory.rb +20 -0
- data/lib/picky/indexes/redis.rb +20 -0
- data/lib/picky/indexes.rb +68 -61
- data/lib/picky/indexes_indexed.rb +16 -12
- data/lib/picky/indexes_indexing.rb +41 -37
- data/lib/picky/indexing/bundle/base.rb +216 -205
- data/lib/picky/indexing/bundle/memory.rb +16 -11
- data/lib/picky/indexing/bundle/redis.rb +14 -12
- data/lib/picky/indexing/wrappers/category/location.rb +17 -13
- data/lib/picky/interfaces/live_parameters.rb +159 -154
- data/lib/picky/loader.rb +267 -304
- data/lib/picky/loggers/search.rb +20 -13
- data/lib/picky/no_source_specified_exception.rb +7 -3
- data/lib/picky/performant.rb +6 -2
- data/lib/picky/query/allocation.rb +71 -67
- data/lib/picky/query/allocations.rb +99 -94
- data/lib/picky/query/combination.rb +70 -66
- data/lib/picky/query/combinations/base.rb +56 -52
- data/lib/picky/query/combinations/memory.rb +36 -32
- data/lib/picky/query/combinations/redis.rb +66 -62
- data/lib/picky/query/indexes.rb +175 -160
- data/lib/picky/query/qualifier_category_mapper.rb +43 -0
- data/lib/picky/query/token.rb +165 -172
- data/lib/picky/query/tokens.rb +86 -82
- data/lib/picky/query/weights.rb +44 -48
- data/lib/picky/query.rb +5 -1
- data/lib/picky/rack/harakiri.rb +51 -47
- data/lib/picky/results.rb +81 -77
- data/lib/picky/search.rb +169 -158
- data/lib/picky/sinatra.rb +34 -0
- data/lib/picky/sources/base.rb +73 -70
- data/lib/picky/sources/couch.rb +61 -57
- data/lib/picky/sources/csv.rb +68 -64
- data/lib/picky/sources/db.rb +139 -135
- data/lib/picky/sources/delicious.rb +52 -48
- data/lib/picky/sources/mongo.rb +68 -63
- data/lib/picky/sources/wrappers/base.rb +20 -16
- data/lib/picky/sources/wrappers/location.rb +37 -33
- data/lib/picky/statistics.rb +46 -43
- data/lib/picky/tasks.rb +3 -0
- data/lib/picky/tokenizers/base.rb +192 -187
- data/lib/picky/tokenizers/index.rb +25 -21
- data/lib/picky/tokenizers/location.rb +33 -29
- data/lib/picky/tokenizers/query.rb +49 -43
- data/lib/picky.rb +21 -13
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/routes.rake +1 -1
- data/lib/tasks/server.rake +1 -1
- data/spec/lib/adapters/rack/base_spec.rb +1 -1
- data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/adapters/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +39 -32
- data/spec/lib/backend/file/basic_spec.rb +2 -2
- data/spec/lib/backend/file/json_spec.rb +2 -2
- data/spec/lib/backend/file/marshal_spec.rb +2 -2
- data/spec/lib/backend/file/text_spec.rb +1 -1
- data/spec/lib/backend/files_spec.rb +14 -24
- data/spec/lib/backend/redis/basic_spec.rb +2 -2
- data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
- data/spec/lib/backend/redis_spec.rb +20 -13
- data/spec/lib/calculations/location_spec.rb +1 -1
- data/spec/lib/categories_indexed_spec.rb +16 -34
- data/spec/lib/category_indexed_spec.rb +9 -27
- data/spec/lib/category_indexing_spec.rb +2 -3
- data/spec/lib/category_spec.rb +10 -10
- data/spec/lib/character_substituters/west_european_spec.rb +6 -5
- data/spec/lib/cores_spec.rb +17 -17
- data/spec/lib/extensions/symbol_spec.rb +15 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
- data/spec/lib/generators/partial/default_spec.rb +3 -3
- data/spec/lib/generators/partial/none_spec.rb +2 -2
- data/spec/lib/generators/partial/substring_spec.rb +1 -1
- data/spec/lib/generators/partial_generator_spec.rb +3 -3
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/generators/similarity_generator_spec.rb +2 -2
- data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/generators/weights_generator_spec.rb +1 -1
- data/spec/lib/helpers/measuring_spec.rb +2 -2
- data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
- data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/base_spec.rb +1 -1
- data/spec/lib/indexers/parallel_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +1 -1
- data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
- data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
- data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
- data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
- data/spec/lib/indexes_class_spec.rb +2 -2
- data/spec/lib/indexes_indexed_spec.rb +1 -1
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/indexes_spec.rb +1 -1
- data/spec/lib/indexing/bundle/base_spec.rb +7 -5
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
- data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
- data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
- data/spec/lib/loader_spec.rb +17 -19
- data/spec/lib/loggers/search_spec.rb +2 -2
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +4 -4
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +7 -2
- data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
- data/spec/lib/query/token_spec.rb +32 -53
- data/spec/lib/query/tokens_spec.rb +30 -35
- data/spec/lib/query/weights_spec.rb +16 -16
- data/spec/lib/rack/harakiri_spec.rb +5 -5
- data/spec/lib/results_spec.rb +1 -1
- data/spec/lib/search_spec.rb +24 -22
- data/spec/lib/sinatra_spec.rb +36 -0
- data/spec/lib/sources/base_spec.rb +1 -1
- data/spec/lib/sources/couch_spec.rb +9 -9
- data/spec/lib/sources/csv_spec.rb +7 -7
- data/spec/lib/sources/db_spec.rb +2 -2
- data/spec/lib/sources/delicious_spec.rb +5 -5
- data/spec/lib/sources/mongo_spec.rb +7 -7
- data/spec/lib/sources/wrappers/base_spec.rb +2 -2
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/statistics_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/lib/tokenizers/query_spec.rb +1 -1
- metadata +30 -30
- data/lib/picky/adapters/rack/query.rb +0 -65
- data/lib/picky/index/base.rb +0 -409
- data/lib/picky/index/base_indexed.rb +0 -29
- data/lib/picky/index/base_indexing.rb +0 -127
- data/lib/picky/index/memory.rb +0 -16
- data/lib/picky/index/redis.rb +0 -16
- data/lib/picky/query/qualifiers.rb +0 -76
- data/lib/picky/query/solr.rb +0 -60
- data/lib/picky/signals.rb +0 -8
- data/lib/picky-tasks.rb +0 -6
- data/lib/tasks/spec.rake +0 -11
- data/spec/lib/query/qualifiers_spec.rb +0 -31
|
@@ -1,24 +1,28 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
module Generators
|
|
1
|
+
module Picky
|
|
4
2
|
|
|
5
|
-
|
|
3
|
+
# encoding: utf-8
|
|
4
|
+
#
|
|
5
|
+
module Generators
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
# and Levenshtein.
|
|
9
|
-
#
|
|
10
|
-
# It uses the double metaphone to get similar words
|
|
11
|
-
# and ranks them using the levenshtein.
|
|
12
|
-
#
|
|
13
|
-
class DoubleMetaphone < Phonetic
|
|
7
|
+
module Similarity
|
|
14
8
|
|
|
15
|
-
#
|
|
9
|
+
# It's actually a combination of double metaphone
|
|
10
|
+
# and Levenshtein.
|
|
16
11
|
#
|
|
17
|
-
#
|
|
12
|
+
# It uses the double metaphone to get similar words
|
|
13
|
+
# and ranks them using the levenshtein.
|
|
18
14
|
#
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
15
|
+
class DoubleMetaphone < Phonetic
|
|
16
|
+
|
|
17
|
+
# Encodes the given symbol.
|
|
18
|
+
#
|
|
19
|
+
# Returns a symbol.
|
|
20
|
+
#
|
|
21
|
+
def encoded sym
|
|
22
|
+
codes = Text::Metaphone.double_metaphone sym.to_s
|
|
23
|
+
codes.first.to_sym unless codes.empty?
|
|
24
|
+
end
|
|
25
|
+
|
|
22
26
|
end
|
|
23
27
|
|
|
24
28
|
end
|
|
@@ -1,24 +1,28 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
module Generators
|
|
1
|
+
module Picky
|
|
4
2
|
|
|
5
|
-
|
|
3
|
+
# encoding: utf-8
|
|
4
|
+
#
|
|
5
|
+
module Generators
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
# and Levenshtein.
|
|
9
|
-
#
|
|
10
|
-
# It uses the metaphone to get similar words
|
|
11
|
-
# and ranks them using the levenshtein.
|
|
12
|
-
#
|
|
13
|
-
class Metaphone < Phonetic
|
|
7
|
+
module Similarity
|
|
14
8
|
|
|
15
|
-
#
|
|
9
|
+
# It's actually a combination of metaphone
|
|
10
|
+
# and Levenshtein.
|
|
16
11
|
#
|
|
17
|
-
#
|
|
12
|
+
# It uses the metaphone to get similar words
|
|
13
|
+
# and ranks them using the levenshtein.
|
|
18
14
|
#
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
15
|
+
class Metaphone < Phonetic
|
|
16
|
+
|
|
17
|
+
# Encodes the given symbol.
|
|
18
|
+
#
|
|
19
|
+
# Returns a symbol.
|
|
20
|
+
#
|
|
21
|
+
def encoded sym
|
|
22
|
+
code = Text::Metaphone.metaphone sym.to_s
|
|
23
|
+
code.to_sym if code
|
|
24
|
+
end
|
|
25
|
+
|
|
22
26
|
end
|
|
23
27
|
|
|
24
28
|
end
|
|
@@ -1,27 +1,31 @@
|
|
|
1
|
-
module
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
module
|
|
3
|
+
module Generators
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
#
|
|
7
|
-
class None < Strategy
|
|
5
|
+
module Similarity
|
|
8
6
|
|
|
9
|
-
#
|
|
7
|
+
# Similarity strategy that does nothing.
|
|
10
8
|
#
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
9
|
+
class None < Strategy
|
|
10
|
+
|
|
11
|
+
# Does not encode text. Just returns nil.
|
|
12
|
+
#
|
|
13
|
+
def encoded text
|
|
14
|
+
nil
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Returns an empty index.
|
|
18
|
+
#
|
|
19
|
+
def generate_from index
|
|
20
|
+
{}
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Returns if this strategy's generated file is saved.
|
|
24
|
+
#
|
|
25
|
+
def saved?
|
|
26
|
+
false
|
|
27
|
+
end
|
|
14
28
|
|
|
15
|
-
# Returns an empty index.
|
|
16
|
-
#
|
|
17
|
-
def generate_from index
|
|
18
|
-
{}
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
# Returns if this strategy's generated file is saved.
|
|
22
|
-
#
|
|
23
|
-
def saved?
|
|
24
|
-
false
|
|
25
29
|
end
|
|
26
30
|
|
|
27
31
|
end
|
|
@@ -1,62 +1,66 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
module Generators
|
|
1
|
+
module Picky
|
|
4
2
|
|
|
5
|
-
|
|
3
|
+
# encoding: utf-8
|
|
4
|
+
#
|
|
5
|
+
module Generators
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
# and Levenshtein.
|
|
9
|
-
#
|
|
10
|
-
# It uses the double metaphone to get similar words
|
|
11
|
-
# and ranks them using the levenshtein.
|
|
12
|
-
#
|
|
13
|
-
class Phonetic < Strategy
|
|
14
|
-
|
|
15
|
-
attr_reader :amount
|
|
7
|
+
module Similarity
|
|
16
8
|
|
|
9
|
+
# It's actually a combination of double metaphone
|
|
10
|
+
# and Levenshtein.
|
|
17
11
|
#
|
|
12
|
+
# It uses the double metaphone to get similar words
|
|
13
|
+
# and ranks them using the levenshtein.
|
|
18
14
|
#
|
|
19
|
-
|
|
20
|
-
raise "In Picky 2.0+, the Similarity::Phonetic has been renamed to Similarity::DoubleMetaphone. Please use that one. Thanks!" if self.class == Phonetic
|
|
21
|
-
@amount = amount
|
|
22
|
-
end
|
|
15
|
+
class Phonetic < Strategy
|
|
23
16
|
|
|
24
|
-
|
|
25
|
-
#
|
|
26
|
-
# In the following form:
|
|
27
|
-
# [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
|
|
28
|
-
#
|
|
29
|
-
def generate_from inverted
|
|
30
|
-
hash = hashify inverted.keys
|
|
31
|
-
sort hash
|
|
32
|
-
end
|
|
17
|
+
attr_reader :amount
|
|
33
18
|
|
|
34
|
-
protected
|
|
35
|
-
|
|
36
|
-
# Sorts the index values in place.
|
|
37
19
|
#
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
end
|
|
43
|
-
hash
|
|
20
|
+
#
|
|
21
|
+
def initialize amount = 10
|
|
22
|
+
raise "In Picky 2.0+, the Similarity::Phonetic has been renamed to Similarity::DoubleMetaphone. Please use that one. Thanks!" if self.class == Phonetic
|
|
23
|
+
@amount = amount
|
|
44
24
|
end
|
|
45
25
|
|
|
46
|
-
#
|
|
26
|
+
# Generates an index for the given index (in exact index style).
|
|
47
27
|
#
|
|
48
|
-
#
|
|
49
|
-
#
|
|
28
|
+
# In the following form:
|
|
29
|
+
# [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
|
|
50
30
|
#
|
|
51
|
-
def
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
31
|
+
def generate_from inverted
|
|
32
|
+
hash = hashify inverted.keys
|
|
33
|
+
sort hash
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
protected
|
|
37
|
+
|
|
38
|
+
# Sorts the index values in place.
|
|
39
|
+
#
|
|
40
|
+
def sort hash
|
|
41
|
+
hash.each_pair.each do |code, ary|
|
|
42
|
+
ary.sort_by_levenshtein! code
|
|
43
|
+
ary.slice! amount, ary.size # size is not perfectly correct, but anyway
|
|
56
44
|
end
|
|
57
|
-
|
|
45
|
+
hash
|
|
58
46
|
end
|
|
59
|
-
|
|
47
|
+
|
|
48
|
+
# Hashifies a list of symbols.
|
|
49
|
+
#
|
|
50
|
+
# Where:
|
|
51
|
+
# { encoded_sym => [syms] }
|
|
52
|
+
#
|
|
53
|
+
def hashify list
|
|
54
|
+
list.inject({}) do |total, element|
|
|
55
|
+
if code = encoded(element)
|
|
56
|
+
total[code] ||= []
|
|
57
|
+
total[code] << element
|
|
58
|
+
end
|
|
59
|
+
total
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
end
|
|
60
64
|
|
|
61
65
|
end
|
|
62
66
|
|
|
@@ -1,24 +1,28 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
module Generators
|
|
1
|
+
module Picky
|
|
4
2
|
|
|
5
|
-
|
|
3
|
+
# encoding: utf-8
|
|
4
|
+
#
|
|
5
|
+
module Generators
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
# and Levenshtein.
|
|
9
|
-
#
|
|
10
|
-
# It uses the soundex to get similar words
|
|
11
|
-
# and ranks them using the levenshtein.
|
|
12
|
-
#
|
|
13
|
-
class Soundex < Phonetic
|
|
7
|
+
module Similarity
|
|
14
8
|
|
|
15
|
-
#
|
|
9
|
+
# It's actually a combination of soundex
|
|
10
|
+
# and Levenshtein.
|
|
16
11
|
#
|
|
17
|
-
#
|
|
12
|
+
# It uses the soundex to get similar words
|
|
13
|
+
# and ranks them using the levenshtein.
|
|
18
14
|
#
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
15
|
+
class Soundex < Phonetic
|
|
16
|
+
|
|
17
|
+
# Encodes the given symbol.
|
|
18
|
+
#
|
|
19
|
+
# Returns a symbol.
|
|
20
|
+
#
|
|
21
|
+
def encoded sym
|
|
22
|
+
code = Text::Soundex.soundex sym.to_s
|
|
23
|
+
code.to_sym if code
|
|
24
|
+
end
|
|
25
|
+
|
|
22
26
|
end
|
|
23
27
|
|
|
24
28
|
end
|
|
@@ -1,9 +1,13 @@
|
|
|
1
|
-
module
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
1
|
+
module Picky
|
|
2
|
+
|
|
3
|
+
module Generators
|
|
4
|
+
module Similarity
|
|
5
|
+
# Base class for all similarity strategies.
|
|
6
|
+
#
|
|
7
|
+
class Strategy < Generators::Strategy
|
|
8
|
+
|
|
9
|
+
end
|
|
7
10
|
end
|
|
8
11
|
end
|
|
12
|
+
|
|
9
13
|
end
|
|
@@ -1,13 +1,17 @@
|
|
|
1
|
-
module
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
class SimilarityGenerator < Base
|
|
3
|
+
module Generators
|
|
6
4
|
|
|
7
|
-
#
|
|
5
|
+
# Uses no similarity as default.
|
|
8
6
|
#
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
class SimilarityGenerator < Base
|
|
8
|
+
|
|
9
|
+
# Generate a similarity index based on the given inverted index.
|
|
10
|
+
#
|
|
11
|
+
def generate strategy = Similarity::None.new
|
|
12
|
+
strategy.generate_from self.inverted
|
|
13
|
+
end
|
|
14
|
+
|
|
11
15
|
end
|
|
12
16
|
|
|
13
17
|
end
|
|
@@ -1,14 +1,18 @@
|
|
|
1
|
-
module
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
1
|
+
module Picky
|
|
2
|
+
|
|
3
|
+
module Generators
|
|
4
|
+
|
|
5
|
+
class Strategy
|
|
6
|
+
|
|
7
|
+
# By default, all caches are saved in a
|
|
8
|
+
# storage (like a file).
|
|
9
|
+
#
|
|
10
|
+
def saved?
|
|
11
|
+
true
|
|
12
|
+
end
|
|
13
|
+
|
|
10
14
|
end
|
|
11
|
-
|
|
15
|
+
|
|
12
16
|
end
|
|
13
17
|
|
|
14
18
|
end
|
|
@@ -1,35 +1,39 @@
|
|
|
1
|
-
module
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
module
|
|
3
|
+
module Generators
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
# If for a key k we have x ids, the weight is:
|
|
7
|
-
# w(x): log(x)
|
|
8
|
-
# Special case: If x < 1, then we use 0.
|
|
9
|
-
#
|
|
10
|
-
class Logarithmic < Strategy
|
|
5
|
+
module Weights
|
|
11
6
|
|
|
12
|
-
#
|
|
7
|
+
# Uses a logarithmic weight.
|
|
8
|
+
# If for a key k we have x ids, the weight is:
|
|
9
|
+
# w(x): log(x)
|
|
10
|
+
# Special case: If x < 1, then we use 0.
|
|
13
11
|
#
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
hash
|
|
12
|
+
class Logarithmic < Strategy
|
|
13
|
+
|
|
14
|
+
# Generates a partial index from the given inverted index.
|
|
15
|
+
#
|
|
16
|
+
def generate_from inverted
|
|
17
|
+
inverted.inject({}) do |hash, text_ids|
|
|
18
|
+
text, ids = *text_ids
|
|
19
|
+
weight = weight_for ids.size
|
|
20
|
+
hash[text] ||= weight.round(2) if weight
|
|
21
|
+
hash
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Sets the weight value.
|
|
26
|
+
#
|
|
27
|
+
# If the size is 0 or one, we would get -Infinity or 0.0.
|
|
28
|
+
# Thus we do not set a value if there is just one. The default, dynamically, is 0.
|
|
29
|
+
#
|
|
30
|
+
# BUT: We need the value, even if 0. To designate that there is a weight!
|
|
31
|
+
#
|
|
32
|
+
def weight_for amount
|
|
33
|
+
return 0 if amount < 1
|
|
34
|
+
Math.log amount
|
|
20
35
|
end
|
|
21
|
-
end
|
|
22
36
|
|
|
23
|
-
# Sets the weight value.
|
|
24
|
-
#
|
|
25
|
-
# If the size is 0 or one, we would get -Infinity or 0.0.
|
|
26
|
-
# Thus we do not set a value if there is just one. The default, dynamically, is 0.
|
|
27
|
-
#
|
|
28
|
-
# BUT: We need the value, even if 0. To designate that there is a weight!
|
|
29
|
-
#
|
|
30
|
-
def weight_for amount
|
|
31
|
-
return 0 if amount < 1
|
|
32
|
-
Math.log amount
|
|
33
37
|
end
|
|
34
38
|
|
|
35
39
|
end
|
|
@@ -1,9 +1,13 @@
|
|
|
1
|
-
module
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
1
|
+
module Picky
|
|
2
|
+
|
|
3
|
+
module Generators
|
|
4
|
+
module Weights
|
|
5
|
+
# Superclass for weighing strategies.
|
|
6
|
+
#
|
|
7
|
+
class Strategy < Generators::Strategy
|
|
8
|
+
|
|
9
|
+
end
|
|
7
10
|
end
|
|
8
11
|
end
|
|
12
|
+
|
|
9
13
|
end
|
|
@@ -1,13 +1,17 @@
|
|
|
1
|
-
module
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
class WeightsGenerator < Base
|
|
3
|
+
module Generators
|
|
6
4
|
|
|
7
|
-
#
|
|
5
|
+
# Uses a logarithmic algorithm as default.
|
|
8
6
|
#
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
class WeightsGenerator < Base
|
|
8
|
+
|
|
9
|
+
# Generate a weights index based on the given inverted index.
|
|
10
|
+
#
|
|
11
|
+
def generate strategy = Weights::Logarithmic.new
|
|
12
|
+
strategy.generate_from self.inverted
|
|
13
|
+
end
|
|
14
|
+
|
|
11
15
|
end
|
|
12
16
|
|
|
13
17
|
end
|
|
@@ -1,19 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
1
|
+
module Picky
|
|
2
|
+
|
|
3
|
+
# Helper methods for measuring, benchmarking, logging.
|
|
4
|
+
#
|
|
5
|
+
module Helpers
|
|
6
|
+
module Measuring
|
|
7
|
+
|
|
8
|
+
# Returns a duration in seconds.
|
|
9
|
+
#
|
|
10
|
+
def timed *args, &block
|
|
11
|
+
block_to_be_measured = lambda &block
|
|
12
|
+
|
|
13
|
+
time_begin = Time.now.to_f
|
|
14
|
+
|
|
15
|
+
block_to_be_measured.call *args
|
|
16
|
+
|
|
17
|
+
Time.now.to_f - time_begin
|
|
18
|
+
end
|
|
19
|
+
|
|
16
20
|
end
|
|
17
|
-
|
|
18
21
|
end
|
|
22
|
+
|
|
19
23
|
end
|
|
@@ -1,41 +1,43 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
module Indexed # :nodoc:all
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
1
|
+
module Picky
|
|
2
|
+
|
|
3
|
+
module Indexed # :nodoc:all
|
|
4
|
+
|
|
5
|
+
# An indexed bundle is a number of memory/redis
|
|
6
|
+
# indexes that compose the indexes for a single category:
|
|
7
|
+
# * core (inverted) index
|
|
8
|
+
# * weights index
|
|
9
|
+
# * similarity index
|
|
10
|
+
# * index configuration
|
|
11
|
+
#
|
|
12
|
+
# Indexed refers to them being indexed.
|
|
13
|
+
# This class notably offers the methods:
|
|
14
|
+
# * load
|
|
15
|
+
# * clear
|
|
16
|
+
#
|
|
17
|
+
# To (re)load or clear the current indexes.
|
|
18
|
+
#
|
|
19
|
+
module Bundle
|
|
20
|
+
|
|
21
|
+
class Base < Picky::Bundle
|
|
22
|
+
|
|
23
|
+
# Loads all indexes.
|
|
24
|
+
#
|
|
25
|
+
def load
|
|
26
|
+
load_inverted
|
|
27
|
+
load_weights
|
|
28
|
+
load_similarity
|
|
29
|
+
load_configuration
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Clears all indexes.
|
|
33
|
+
#
|
|
34
|
+
def clear
|
|
35
|
+
clear_inverted
|
|
36
|
+
clear_weights
|
|
37
|
+
clear_similarity
|
|
38
|
+
clear_configuration
|
|
39
|
+
end
|
|
31
40
|
|
|
32
|
-
# Clears all indexes.
|
|
33
|
-
#
|
|
34
|
-
def clear
|
|
35
|
-
clear_inverted
|
|
36
|
-
clear_weights
|
|
37
|
-
clear_similarity
|
|
38
|
-
clear_configuration
|
|
39
41
|
end
|
|
40
42
|
|
|
41
43
|
end
|