picky 2.5.2 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
#
|
|
3
|
-
module Internals
|
|
4
|
-
|
|
5
|
-
module Generators
|
|
6
|
-
|
|
7
|
-
module Similarity
|
|
8
|
-
|
|
9
|
-
# It's actually a combination of double metaphone
|
|
10
|
-
# and Levenshtein.
|
|
11
|
-
#
|
|
12
|
-
# It uses the double metaphone to get similar words
|
|
13
|
-
# and ranks them using the levenshtein.
|
|
14
|
-
#
|
|
15
|
-
class DoubleMetaphone < Phonetic
|
|
16
|
-
|
|
17
|
-
# Encodes the given symbol.
|
|
18
|
-
#
|
|
19
|
-
# Returns a symbol.
|
|
20
|
-
#
|
|
21
|
-
def encoded sym
|
|
22
|
-
codes = Text::Metaphone.double_metaphone sym.to_s
|
|
23
|
-
codes.first.to_sym unless codes.empty?
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
end
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
#
|
|
3
|
-
module Internals
|
|
4
|
-
|
|
5
|
-
module Generators
|
|
6
|
-
|
|
7
|
-
module Similarity
|
|
8
|
-
|
|
9
|
-
# It's actually a combination of metaphone
|
|
10
|
-
# and Levenshtein.
|
|
11
|
-
#
|
|
12
|
-
# It uses the metaphone to get similar words
|
|
13
|
-
# and ranks them using the levenshtein.
|
|
14
|
-
#
|
|
15
|
-
class Metaphone < Phonetic
|
|
16
|
-
|
|
17
|
-
# Encodes the given symbol.
|
|
18
|
-
#
|
|
19
|
-
# Returns a symbol.
|
|
20
|
-
#
|
|
21
|
-
def encoded sym
|
|
22
|
-
code = Text::Metaphone.metaphone sym.to_s
|
|
23
|
-
code.to_sym if code
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
end
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Generators
|
|
4
|
-
|
|
5
|
-
module Similarity
|
|
6
|
-
|
|
7
|
-
# Similarity strategy that does nothing.
|
|
8
|
-
#
|
|
9
|
-
class None < Strategy
|
|
10
|
-
|
|
11
|
-
# Does not encode text. Just returns nil.
|
|
12
|
-
#
|
|
13
|
-
def encoded text
|
|
14
|
-
nil
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
# Returns an empty index.
|
|
18
|
-
#
|
|
19
|
-
def generate_from index
|
|
20
|
-
{}
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# Returns if this strategy's generated file is saved.
|
|
24
|
-
#
|
|
25
|
-
def saved?
|
|
26
|
-
false
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
end
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
#
|
|
3
|
-
module Internals
|
|
4
|
-
|
|
5
|
-
module Generators
|
|
6
|
-
|
|
7
|
-
module Similarity
|
|
8
|
-
|
|
9
|
-
# It's actually a combination of double metaphone
|
|
10
|
-
# and Levenshtein.
|
|
11
|
-
#
|
|
12
|
-
# It uses the double metaphone to get similar words
|
|
13
|
-
# and ranks them using the levenshtein.
|
|
14
|
-
#
|
|
15
|
-
class Phonetic < Strategy
|
|
16
|
-
|
|
17
|
-
attr_reader :amount
|
|
18
|
-
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
def initialize amount = 10
|
|
22
|
-
raise "In Picky 2.0+, the Similarity::Phonetic has been renamed to Similarity::DoubleMetaphone. Please use that one. Thanks!" if self.class == Phonetic
|
|
23
|
-
@amount = amount
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
# Generates an index for the given index (in exact index style).
|
|
27
|
-
#
|
|
28
|
-
# In the following form:
|
|
29
|
-
# [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
|
|
30
|
-
#
|
|
31
|
-
def generate_from index
|
|
32
|
-
hash = hashify index.keys
|
|
33
|
-
sort hash
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
protected
|
|
37
|
-
|
|
38
|
-
# Sorts the index values in place.
|
|
39
|
-
#
|
|
40
|
-
def sort index
|
|
41
|
-
index.each_pair.each do |code, ary|
|
|
42
|
-
ary.sort_by_levenshtein! code
|
|
43
|
-
ary.slice! amount, ary.size # size is not perfectly correct, but anyway
|
|
44
|
-
end
|
|
45
|
-
index
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
# Hashifies a list of symbols.
|
|
49
|
-
#
|
|
50
|
-
# Where:
|
|
51
|
-
# { encoded_sym => [syms] }
|
|
52
|
-
#
|
|
53
|
-
def hashify list
|
|
54
|
-
list.inject({}) do |total, element|
|
|
55
|
-
if code = encoded(element)
|
|
56
|
-
total[code] ||= []
|
|
57
|
-
total[code] << element
|
|
58
|
-
end
|
|
59
|
-
total
|
|
60
|
-
end
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
end
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
#
|
|
3
|
-
module Internals
|
|
4
|
-
|
|
5
|
-
module Generators
|
|
6
|
-
|
|
7
|
-
module Similarity
|
|
8
|
-
|
|
9
|
-
# It's actually a combination of soundex
|
|
10
|
-
# and Levenshtein.
|
|
11
|
-
#
|
|
12
|
-
# It uses the soundex to get similar words
|
|
13
|
-
# and ranks them using the levenshtein.
|
|
14
|
-
#
|
|
15
|
-
class Soundex < Phonetic
|
|
16
|
-
|
|
17
|
-
# Encodes the given symbol.
|
|
18
|
-
#
|
|
19
|
-
# Returns a symbol.
|
|
20
|
-
#
|
|
21
|
-
def encoded sym
|
|
22
|
-
code = Text::Soundex.soundex sym.to_s
|
|
23
|
-
code.to_sym if code
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
end
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Generators
|
|
4
|
-
|
|
5
|
-
# Uses no similarity as default.
|
|
6
|
-
#
|
|
7
|
-
class SimilarityGenerator < Base
|
|
8
|
-
|
|
9
|
-
# Generate a similarity index based on the given index.
|
|
10
|
-
#
|
|
11
|
-
def generate strategy = Similarity::None.new
|
|
12
|
-
strategy.generate_from self.index
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
end
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Generators
|
|
4
|
-
|
|
5
|
-
module Weights
|
|
6
|
-
|
|
7
|
-
# Uses a logarithmic weight.
|
|
8
|
-
# If for a key k we have x ids, the weight is:
|
|
9
|
-
# w(x): log(x)
|
|
10
|
-
# Special case: If x < 1, then we use 0.
|
|
11
|
-
#
|
|
12
|
-
class Logarithmic < Strategy
|
|
13
|
-
|
|
14
|
-
# Generates a partial index from the given index.
|
|
15
|
-
#
|
|
16
|
-
def generate_from index
|
|
17
|
-
index.inject({}) do |hash, text_ids|
|
|
18
|
-
text, ids = *text_ids
|
|
19
|
-
weight = weight_for ids.size
|
|
20
|
-
hash[text] ||= weight.round(2) if weight
|
|
21
|
-
hash
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
# Sets the weight value.
|
|
26
|
-
#
|
|
27
|
-
# If the size is 0 or one, we would get -Infinity or 0.0.
|
|
28
|
-
# Thus we do not set a value if there is just one. The default, dynamically, is 0.
|
|
29
|
-
#
|
|
30
|
-
# BUT: We need the value, even if 0. To designate that there is a weight!
|
|
31
|
-
#
|
|
32
|
-
def weight_for amount
|
|
33
|
-
return 0 if amount < 1
|
|
34
|
-
Math.log amount
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
end
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Generators
|
|
4
|
-
|
|
5
|
-
# Uses a logarithmic algorithm as default.
|
|
6
|
-
#
|
|
7
|
-
class WeightsGenerator < Base
|
|
8
|
-
|
|
9
|
-
# Generate a weights index based on the given index.
|
|
10
|
-
#
|
|
11
|
-
def generate strategy = Weights::Logarithmic.new
|
|
12
|
-
strategy.generate_from self.index
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
end
|
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Index
|
|
4
|
-
|
|
5
|
-
class Backend
|
|
6
|
-
|
|
7
|
-
attr_reader :bundle_name
|
|
8
|
-
attr_reader :prepared, :index, :weights, :similarity, :configuration
|
|
9
|
-
|
|
10
|
-
delegate :index_name, :category_name, :to => :@category
|
|
11
|
-
|
|
12
|
-
def initialize bundle_name, category
|
|
13
|
-
@bundle_name = bundle_name
|
|
14
|
-
@category = category
|
|
15
|
-
@prepared = File::Text.new category.prepared_index_path
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
# Delegators.
|
|
19
|
-
#
|
|
20
|
-
|
|
21
|
-
# Retrieving data.
|
|
22
|
-
#
|
|
23
|
-
def retrieve &block
|
|
24
|
-
prepared.retrieve &block
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
# Dumping.
|
|
28
|
-
#
|
|
29
|
-
def dump_index index_hash
|
|
30
|
-
index.dump index_hash
|
|
31
|
-
end
|
|
32
|
-
def dump_weights weights_hash
|
|
33
|
-
weights.dump weights_hash
|
|
34
|
-
end
|
|
35
|
-
def dump_similarity similarity_hash
|
|
36
|
-
similarity.dump similarity_hash
|
|
37
|
-
end
|
|
38
|
-
def dump_configuration configuration_hash
|
|
39
|
-
configuration.dump configuration_hash
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
# Loading.
|
|
43
|
-
#
|
|
44
|
-
def load_index
|
|
45
|
-
index.load
|
|
46
|
-
end
|
|
47
|
-
def load_similarity
|
|
48
|
-
similarity.load
|
|
49
|
-
end
|
|
50
|
-
def load_weights
|
|
51
|
-
weights.load
|
|
52
|
-
end
|
|
53
|
-
def load_configuration
|
|
54
|
-
configuration.load
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
# Cache ok?
|
|
58
|
-
#
|
|
59
|
-
def index_cache_ok?
|
|
60
|
-
index.cache_ok?
|
|
61
|
-
end
|
|
62
|
-
def similarity_cache_ok?
|
|
63
|
-
similarity.cache_ok?
|
|
64
|
-
end
|
|
65
|
-
def weights_cache_ok?
|
|
66
|
-
weights.cache_ok?
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
# Cache small?
|
|
70
|
-
#
|
|
71
|
-
def index_cache_small?
|
|
72
|
-
index.cache_small?
|
|
73
|
-
end
|
|
74
|
-
def similarity_cache_small?
|
|
75
|
-
similarity.cache_small?
|
|
76
|
-
end
|
|
77
|
-
def weights_cache_small?
|
|
78
|
-
weights.cache_small?
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
# Copies the indexes to the "backup" directory.
|
|
82
|
-
#
|
|
83
|
-
def backup
|
|
84
|
-
index.backup
|
|
85
|
-
weights.backup
|
|
86
|
-
similarity.backup
|
|
87
|
-
configuration.backup
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
# Restores the indexes from the "backup" directory.
|
|
91
|
-
#
|
|
92
|
-
def restore
|
|
93
|
-
index.restore
|
|
94
|
-
weights.restore
|
|
95
|
-
similarity.restore
|
|
96
|
-
configuration.restore
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
# Delete all index files.
|
|
100
|
-
#
|
|
101
|
-
def delete
|
|
102
|
-
index.delete
|
|
103
|
-
weights.delete
|
|
104
|
-
similarity.delete
|
|
105
|
-
configuration.delete
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
end
|
|
111
|
-
|
|
112
|
-
end
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Index
|
|
4
|
-
|
|
5
|
-
# Handles all aspects of index files, such as dumping/loading.
|
|
6
|
-
#
|
|
7
|
-
module File
|
|
8
|
-
|
|
9
|
-
# Base class for all index files.
|
|
10
|
-
#
|
|
11
|
-
# Provides necessary helper methods for its
|
|
12
|
-
# subclasses.
|
|
13
|
-
# Not directly useable, as it does not provide
|
|
14
|
-
# dump/load methods.
|
|
15
|
-
#
|
|
16
|
-
class Basic
|
|
17
|
-
|
|
18
|
-
attr_reader :cache_path
|
|
19
|
-
|
|
20
|
-
# An index cache takes a path, without file extension,
|
|
21
|
-
# which will be provided by the subclasses.
|
|
22
|
-
#
|
|
23
|
-
def initialize cache_path
|
|
24
|
-
@cache_path = "#{cache_path}.#{extension}"
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
def to_s
|
|
28
|
-
cache_path
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# The default extension for index files is "index".
|
|
32
|
-
#
|
|
33
|
-
def extension
|
|
34
|
-
:index
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
# Will copy the index file to a location that
|
|
38
|
-
# is in a directory named "backup" right under
|
|
39
|
-
# the directory the index file is in.
|
|
40
|
-
#
|
|
41
|
-
def backup
|
|
42
|
-
prepare_backup backup_directory
|
|
43
|
-
FileUtils.cp cache_path, target, verbose: true
|
|
44
|
-
end
|
|
45
|
-
# The backup directory of this file.
|
|
46
|
-
# Equal to the file's dirname plus /backup
|
|
47
|
-
#
|
|
48
|
-
def backup_directory
|
|
49
|
-
::File.join ::File.dirname(cache_path), 'backup'
|
|
50
|
-
end
|
|
51
|
-
# Prepares the backup directory for the file.
|
|
52
|
-
#
|
|
53
|
-
def prepare_backup target
|
|
54
|
-
FileUtils.mkdir target unless Dir.exists?(target)
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
# Copies the file from its backup location back
|
|
58
|
-
# to the original location.
|
|
59
|
-
#
|
|
60
|
-
def restore
|
|
61
|
-
FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
|
|
62
|
-
end
|
|
63
|
-
# The backup filename.
|
|
64
|
-
#
|
|
65
|
-
def backup_file_path_of path
|
|
66
|
-
dir, name = ::File.split path
|
|
67
|
-
::File.join dir, 'backup', name
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
# Deletes the file.
|
|
71
|
-
#
|
|
72
|
-
def delete
|
|
73
|
-
`rm -Rf #{cache_path}`
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
# Checks.
|
|
77
|
-
#
|
|
78
|
-
|
|
79
|
-
# Is this cache file suspiciously small?
|
|
80
|
-
# (less than 8 Bytes of size)
|
|
81
|
-
#
|
|
82
|
-
def cache_small?
|
|
83
|
-
size_of(cache_path) < 8
|
|
84
|
-
end
|
|
85
|
-
# Is the cache ok? (existing and larger than
|
|
86
|
-
# zero Bytes in size)
|
|
87
|
-
#
|
|
88
|
-
# A small cache is still ok.
|
|
89
|
-
#
|
|
90
|
-
def cache_ok?
|
|
91
|
-
size_of(cache_path) > 0
|
|
92
|
-
end
|
|
93
|
-
# Extracts the size of the file in Bytes.
|
|
94
|
-
#
|
|
95
|
-
def size_of path
|
|
96
|
-
`ls -l #{path} | awk '{print $5}'`.to_i
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
end
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Index
|
|
4
|
-
|
|
5
|
-
module File
|
|
6
|
-
|
|
7
|
-
# Index files dumped in the JSON format.
|
|
8
|
-
#
|
|
9
|
-
class JSON < Basic
|
|
10
|
-
|
|
11
|
-
# Uses the extension "json".
|
|
12
|
-
#
|
|
13
|
-
def extension
|
|
14
|
-
:json
|
|
15
|
-
end
|
|
16
|
-
# Loads the index hash from json format.
|
|
17
|
-
#
|
|
18
|
-
def load
|
|
19
|
-
Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
|
|
20
|
-
end
|
|
21
|
-
# Dumps the index hash in json format.
|
|
22
|
-
#
|
|
23
|
-
def dump hash
|
|
24
|
-
hash.dump_json cache_path
|
|
25
|
-
end
|
|
26
|
-
# A json file does not provide retrieve functionality.
|
|
27
|
-
#
|
|
28
|
-
def retrieve
|
|
29
|
-
raise "Can't retrieve from JSON file. Use text file."
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
end
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Index
|
|
4
|
-
|
|
5
|
-
module File
|
|
6
|
-
|
|
7
|
-
# Index data in the Ruby Marshal format.
|
|
8
|
-
#
|
|
9
|
-
class Marshal < Basic
|
|
10
|
-
|
|
11
|
-
# Uses the extension "dump".
|
|
12
|
-
#
|
|
13
|
-
def extension
|
|
14
|
-
:dump
|
|
15
|
-
end
|
|
16
|
-
# Loads the index hash from marshal format.
|
|
17
|
-
#
|
|
18
|
-
def load
|
|
19
|
-
::Marshal.load ::File.open(cache_path, 'r:binary')
|
|
20
|
-
end
|
|
21
|
-
# Dumps the index hash in marshal format.
|
|
22
|
-
#
|
|
23
|
-
def dump hash
|
|
24
|
-
hash.dump_marshalled cache_path
|
|
25
|
-
end
|
|
26
|
-
# A marshal file does not provide retrieve functionality.
|
|
27
|
-
#
|
|
28
|
-
def retrieve
|
|
29
|
-
raise "Can't retrieve from marshalled file. Use text file."
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
end
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Index
|
|
4
|
-
|
|
5
|
-
module File
|
|
6
|
-
|
|
7
|
-
# Index data dumped in the text format.
|
|
8
|
-
#
|
|
9
|
-
class Text < Basic
|
|
10
|
-
|
|
11
|
-
# Uses the extension "txt".
|
|
12
|
-
#
|
|
13
|
-
def extension
|
|
14
|
-
:txt
|
|
15
|
-
end
|
|
16
|
-
# Text files are used exclusively for
|
|
17
|
-
# prepared data files.
|
|
18
|
-
#
|
|
19
|
-
def load
|
|
20
|
-
raise "Can't load from text file. Use JSON or Marshal."
|
|
21
|
-
end
|
|
22
|
-
# Text files are used exclusively for
|
|
23
|
-
# prepared data files.
|
|
24
|
-
#
|
|
25
|
-
def dump hash
|
|
26
|
-
raise "Can't dump to text file. Use JSON or Marshal."
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# Retrieves prepared index data in the form
|
|
30
|
-
# * id,data\n
|
|
31
|
-
# * id,data\n
|
|
32
|
-
# * id,data\n
|
|
33
|
-
#
|
|
34
|
-
# Yields an id string and a symbol token.
|
|
35
|
-
#
|
|
36
|
-
def retrieve
|
|
37
|
-
id = nil
|
|
38
|
-
token = nil
|
|
39
|
-
::File.open(cache_path, 'r:binary') do |file|
|
|
40
|
-
file.each_line do |line|
|
|
41
|
-
id, token = line.split ?,, 2
|
|
42
|
-
yield id, (token.chomp! || token).to_sym
|
|
43
|
-
end
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
#
|
|
48
|
-
#
|
|
49
|
-
def open_for_indexing &block
|
|
50
|
-
::File.open cache_path, 'w:binary', &block
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
end
|