picky 2.5.2 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
@@ -1,32 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
|
-
module Internals
|
4
|
-
|
5
|
-
module Generators
|
6
|
-
|
7
|
-
module Similarity
|
8
|
-
|
9
|
-
# It's actually a combination of double metaphone
|
10
|
-
# and Levenshtein.
|
11
|
-
#
|
12
|
-
# It uses the double metaphone to get similar words
|
13
|
-
# and ranks them using the levenshtein.
|
14
|
-
#
|
15
|
-
class DoubleMetaphone < Phonetic
|
16
|
-
|
17
|
-
# Encodes the given symbol.
|
18
|
-
#
|
19
|
-
# Returns a symbol.
|
20
|
-
#
|
21
|
-
def encoded sym
|
22
|
-
codes = Text::Metaphone.double_metaphone sym.to_s
|
23
|
-
codes.first.to_sym unless codes.empty?
|
24
|
-
end
|
25
|
-
|
26
|
-
end
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|
@@ -1,32 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
|
-
module Internals
|
4
|
-
|
5
|
-
module Generators
|
6
|
-
|
7
|
-
module Similarity
|
8
|
-
|
9
|
-
# It's actually a combination of metaphone
|
10
|
-
# and Levenshtein.
|
11
|
-
#
|
12
|
-
# It uses the metaphone to get similar words
|
13
|
-
# and ranks them using the levenshtein.
|
14
|
-
#
|
15
|
-
class Metaphone < Phonetic
|
16
|
-
|
17
|
-
# Encodes the given symbol.
|
18
|
-
#
|
19
|
-
# Returns a symbol.
|
20
|
-
#
|
21
|
-
def encoded sym
|
22
|
-
code = Text::Metaphone.metaphone sym.to_s
|
23
|
-
code.to_sym if code
|
24
|
-
end
|
25
|
-
|
26
|
-
end
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Generators
|
4
|
-
|
5
|
-
module Similarity
|
6
|
-
|
7
|
-
# Similarity strategy that does nothing.
|
8
|
-
#
|
9
|
-
class None < Strategy
|
10
|
-
|
11
|
-
# Does not encode text. Just returns nil.
|
12
|
-
#
|
13
|
-
def encoded text
|
14
|
-
nil
|
15
|
-
end
|
16
|
-
|
17
|
-
# Returns an empty index.
|
18
|
-
#
|
19
|
-
def generate_from index
|
20
|
-
{}
|
21
|
-
end
|
22
|
-
|
23
|
-
# Returns if this strategy's generated file is saved.
|
24
|
-
#
|
25
|
-
def saved?
|
26
|
-
false
|
27
|
-
end
|
28
|
-
|
29
|
-
end
|
30
|
-
|
31
|
-
end
|
32
|
-
|
33
|
-
end
|
34
|
-
|
35
|
-
end
|
@@ -1,69 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
|
-
module Internals
|
4
|
-
|
5
|
-
module Generators
|
6
|
-
|
7
|
-
module Similarity
|
8
|
-
|
9
|
-
# It's actually a combination of double metaphone
|
10
|
-
# and Levenshtein.
|
11
|
-
#
|
12
|
-
# It uses the double metaphone to get similar words
|
13
|
-
# and ranks them using the levenshtein.
|
14
|
-
#
|
15
|
-
class Phonetic < Strategy
|
16
|
-
|
17
|
-
attr_reader :amount
|
18
|
-
|
19
|
-
#
|
20
|
-
#
|
21
|
-
def initialize amount = 10
|
22
|
-
raise "In Picky 2.0+, the Similarity::Phonetic has been renamed to Similarity::DoubleMetaphone. Please use that one. Thanks!" if self.class == Phonetic
|
23
|
-
@amount = amount
|
24
|
-
end
|
25
|
-
|
26
|
-
# Generates an index for the given index (in exact index style).
|
27
|
-
#
|
28
|
-
# In the following form:
|
29
|
-
# [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
|
30
|
-
#
|
31
|
-
def generate_from index
|
32
|
-
hash = hashify index.keys
|
33
|
-
sort hash
|
34
|
-
end
|
35
|
-
|
36
|
-
protected
|
37
|
-
|
38
|
-
# Sorts the index values in place.
|
39
|
-
#
|
40
|
-
def sort index
|
41
|
-
index.each_pair.each do |code, ary|
|
42
|
-
ary.sort_by_levenshtein! code
|
43
|
-
ary.slice! amount, ary.size # size is not perfectly correct, but anyway
|
44
|
-
end
|
45
|
-
index
|
46
|
-
end
|
47
|
-
|
48
|
-
# Hashifies a list of symbols.
|
49
|
-
#
|
50
|
-
# Where:
|
51
|
-
# { encoded_sym => [syms] }
|
52
|
-
#
|
53
|
-
def hashify list
|
54
|
-
list.inject({}) do |total, element|
|
55
|
-
if code = encoded(element)
|
56
|
-
total[code] ||= []
|
57
|
-
total[code] << element
|
58
|
-
end
|
59
|
-
total
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
end
|
64
|
-
|
65
|
-
end
|
66
|
-
|
67
|
-
end
|
68
|
-
|
69
|
-
end
|
@@ -1,32 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
|
-
module Internals
|
4
|
-
|
5
|
-
module Generators
|
6
|
-
|
7
|
-
module Similarity
|
8
|
-
|
9
|
-
# It's actually a combination of soundex
|
10
|
-
# and Levenshtein.
|
11
|
-
#
|
12
|
-
# It uses the soundex to get similar words
|
13
|
-
# and ranks them using the levenshtein.
|
14
|
-
#
|
15
|
-
class Soundex < Phonetic
|
16
|
-
|
17
|
-
# Encodes the given symbol.
|
18
|
-
#
|
19
|
-
# Returns a symbol.
|
20
|
-
#
|
21
|
-
def encoded sym
|
22
|
-
code = Text::Soundex.soundex sym.to_s
|
23
|
-
code.to_sym if code
|
24
|
-
end
|
25
|
-
|
26
|
-
end
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|
@@ -1,19 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Generators
|
4
|
-
|
5
|
-
# Uses no similarity as default.
|
6
|
-
#
|
7
|
-
class SimilarityGenerator < Base
|
8
|
-
|
9
|
-
# Generate a similarity index based on the given index.
|
10
|
-
#
|
11
|
-
def generate strategy = Similarity::None.new
|
12
|
-
strategy.generate_from self.index
|
13
|
-
end
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
end
|
@@ -1,43 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Generators
|
4
|
-
|
5
|
-
module Weights
|
6
|
-
|
7
|
-
# Uses a logarithmic weight.
|
8
|
-
# If for a key k we have x ids, the weight is:
|
9
|
-
# w(x): log(x)
|
10
|
-
# Special case: If x < 1, then we use 0.
|
11
|
-
#
|
12
|
-
class Logarithmic < Strategy
|
13
|
-
|
14
|
-
# Generates a partial index from the given index.
|
15
|
-
#
|
16
|
-
def generate_from index
|
17
|
-
index.inject({}) do |hash, text_ids|
|
18
|
-
text, ids = *text_ids
|
19
|
-
weight = weight_for ids.size
|
20
|
-
hash[text] ||= weight.round(2) if weight
|
21
|
-
hash
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
# Sets the weight value.
|
26
|
-
#
|
27
|
-
# If the size is 0 or one, we would get -Infinity or 0.0.
|
28
|
-
# Thus we do not set a value if there is just one. The default, dynamically, is 0.
|
29
|
-
#
|
30
|
-
# BUT: We need the value, even if 0. To designate that there is a weight!
|
31
|
-
#
|
32
|
-
def weight_for amount
|
33
|
-
return 0 if amount < 1
|
34
|
-
Math.log amount
|
35
|
-
end
|
36
|
-
|
37
|
-
end
|
38
|
-
|
39
|
-
end
|
40
|
-
|
41
|
-
end
|
42
|
-
|
43
|
-
end
|
@@ -1,19 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Generators
|
4
|
-
|
5
|
-
# Uses a logarithmic algorithm as default.
|
6
|
-
#
|
7
|
-
class WeightsGenerator < Base
|
8
|
-
|
9
|
-
# Generate a weights index based on the given index.
|
10
|
-
#
|
11
|
-
def generate strategy = Weights::Logarithmic.new
|
12
|
-
strategy.generate_from self.index
|
13
|
-
end
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
end
|
@@ -1,112 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Index
|
4
|
-
|
5
|
-
class Backend
|
6
|
-
|
7
|
-
attr_reader :bundle_name
|
8
|
-
attr_reader :prepared, :index, :weights, :similarity, :configuration
|
9
|
-
|
10
|
-
delegate :index_name, :category_name, :to => :@category
|
11
|
-
|
12
|
-
def initialize bundle_name, category
|
13
|
-
@bundle_name = bundle_name
|
14
|
-
@category = category
|
15
|
-
@prepared = File::Text.new category.prepared_index_path
|
16
|
-
end
|
17
|
-
|
18
|
-
# Delegators.
|
19
|
-
#
|
20
|
-
|
21
|
-
# Retrieving data.
|
22
|
-
#
|
23
|
-
def retrieve &block
|
24
|
-
prepared.retrieve &block
|
25
|
-
end
|
26
|
-
|
27
|
-
# Dumping.
|
28
|
-
#
|
29
|
-
def dump_index index_hash
|
30
|
-
index.dump index_hash
|
31
|
-
end
|
32
|
-
def dump_weights weights_hash
|
33
|
-
weights.dump weights_hash
|
34
|
-
end
|
35
|
-
def dump_similarity similarity_hash
|
36
|
-
similarity.dump similarity_hash
|
37
|
-
end
|
38
|
-
def dump_configuration configuration_hash
|
39
|
-
configuration.dump configuration_hash
|
40
|
-
end
|
41
|
-
|
42
|
-
# Loading.
|
43
|
-
#
|
44
|
-
def load_index
|
45
|
-
index.load
|
46
|
-
end
|
47
|
-
def load_similarity
|
48
|
-
similarity.load
|
49
|
-
end
|
50
|
-
def load_weights
|
51
|
-
weights.load
|
52
|
-
end
|
53
|
-
def load_configuration
|
54
|
-
configuration.load
|
55
|
-
end
|
56
|
-
|
57
|
-
# Cache ok?
|
58
|
-
#
|
59
|
-
def index_cache_ok?
|
60
|
-
index.cache_ok?
|
61
|
-
end
|
62
|
-
def similarity_cache_ok?
|
63
|
-
similarity.cache_ok?
|
64
|
-
end
|
65
|
-
def weights_cache_ok?
|
66
|
-
weights.cache_ok?
|
67
|
-
end
|
68
|
-
|
69
|
-
# Cache small?
|
70
|
-
#
|
71
|
-
def index_cache_small?
|
72
|
-
index.cache_small?
|
73
|
-
end
|
74
|
-
def similarity_cache_small?
|
75
|
-
similarity.cache_small?
|
76
|
-
end
|
77
|
-
def weights_cache_small?
|
78
|
-
weights.cache_small?
|
79
|
-
end
|
80
|
-
|
81
|
-
# Copies the indexes to the "backup" directory.
|
82
|
-
#
|
83
|
-
def backup
|
84
|
-
index.backup
|
85
|
-
weights.backup
|
86
|
-
similarity.backup
|
87
|
-
configuration.backup
|
88
|
-
end
|
89
|
-
|
90
|
-
# Restores the indexes from the "backup" directory.
|
91
|
-
#
|
92
|
-
def restore
|
93
|
-
index.restore
|
94
|
-
weights.restore
|
95
|
-
similarity.restore
|
96
|
-
configuration.restore
|
97
|
-
end
|
98
|
-
|
99
|
-
# Delete all index files.
|
100
|
-
#
|
101
|
-
def delete
|
102
|
-
index.delete
|
103
|
-
weights.delete
|
104
|
-
similarity.delete
|
105
|
-
configuration.delete
|
106
|
-
end
|
107
|
-
|
108
|
-
end
|
109
|
-
|
110
|
-
end
|
111
|
-
|
112
|
-
end
|
@@ -1,105 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Index
|
4
|
-
|
5
|
-
# Handles all aspects of index files, such as dumping/loading.
|
6
|
-
#
|
7
|
-
module File
|
8
|
-
|
9
|
-
# Base class for all index files.
|
10
|
-
#
|
11
|
-
# Provides necessary helper methods for its
|
12
|
-
# subclasses.
|
13
|
-
# Not directly useable, as it does not provide
|
14
|
-
# dump/load methods.
|
15
|
-
#
|
16
|
-
class Basic
|
17
|
-
|
18
|
-
attr_reader :cache_path
|
19
|
-
|
20
|
-
# An index cache takes a path, without file extension,
|
21
|
-
# which will be provided by the subclasses.
|
22
|
-
#
|
23
|
-
def initialize cache_path
|
24
|
-
@cache_path = "#{cache_path}.#{extension}"
|
25
|
-
end
|
26
|
-
|
27
|
-
def to_s
|
28
|
-
cache_path
|
29
|
-
end
|
30
|
-
|
31
|
-
# The default extension for index files is "index".
|
32
|
-
#
|
33
|
-
def extension
|
34
|
-
:index
|
35
|
-
end
|
36
|
-
|
37
|
-
# Will copy the index file to a location that
|
38
|
-
# is in a directory named "backup" right under
|
39
|
-
# the directory the index file is in.
|
40
|
-
#
|
41
|
-
def backup
|
42
|
-
prepare_backup backup_directory
|
43
|
-
FileUtils.cp cache_path, target, verbose: true
|
44
|
-
end
|
45
|
-
# The backup directory of this file.
|
46
|
-
# Equal to the file's dirname plus /backup
|
47
|
-
#
|
48
|
-
def backup_directory
|
49
|
-
::File.join ::File.dirname(cache_path), 'backup'
|
50
|
-
end
|
51
|
-
# Prepares the backup directory for the file.
|
52
|
-
#
|
53
|
-
def prepare_backup target
|
54
|
-
FileUtils.mkdir target unless Dir.exists?(target)
|
55
|
-
end
|
56
|
-
|
57
|
-
# Copies the file from its backup location back
|
58
|
-
# to the original location.
|
59
|
-
#
|
60
|
-
def restore
|
61
|
-
FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
|
62
|
-
end
|
63
|
-
# The backup filename.
|
64
|
-
#
|
65
|
-
def backup_file_path_of path
|
66
|
-
dir, name = ::File.split path
|
67
|
-
::File.join dir, 'backup', name
|
68
|
-
end
|
69
|
-
|
70
|
-
# Deletes the file.
|
71
|
-
#
|
72
|
-
def delete
|
73
|
-
`rm -Rf #{cache_path}`
|
74
|
-
end
|
75
|
-
|
76
|
-
# Checks.
|
77
|
-
#
|
78
|
-
|
79
|
-
# Is this cache file suspiciously small?
|
80
|
-
# (less than 8 Bytes of size)
|
81
|
-
#
|
82
|
-
def cache_small?
|
83
|
-
size_of(cache_path) < 8
|
84
|
-
end
|
85
|
-
# Is the cache ok? (existing and larger than
|
86
|
-
# zero Bytes in size)
|
87
|
-
#
|
88
|
-
# A small cache is still ok.
|
89
|
-
#
|
90
|
-
def cache_ok?
|
91
|
-
size_of(cache_path) > 0
|
92
|
-
end
|
93
|
-
# Extracts the size of the file in Bytes.
|
94
|
-
#
|
95
|
-
def size_of path
|
96
|
-
`ls -l #{path} | awk '{print $5}'`.to_i
|
97
|
-
end
|
98
|
-
|
99
|
-
end
|
100
|
-
|
101
|
-
end
|
102
|
-
|
103
|
-
end
|
104
|
-
|
105
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Index
|
4
|
-
|
5
|
-
module File
|
6
|
-
|
7
|
-
# Index files dumped in the JSON format.
|
8
|
-
#
|
9
|
-
class JSON < Basic
|
10
|
-
|
11
|
-
# Uses the extension "json".
|
12
|
-
#
|
13
|
-
def extension
|
14
|
-
:json
|
15
|
-
end
|
16
|
-
# Loads the index hash from json format.
|
17
|
-
#
|
18
|
-
def load
|
19
|
-
Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
|
20
|
-
end
|
21
|
-
# Dumps the index hash in json format.
|
22
|
-
#
|
23
|
-
def dump hash
|
24
|
-
hash.dump_json cache_path
|
25
|
-
end
|
26
|
-
# A json file does not provide retrieve functionality.
|
27
|
-
#
|
28
|
-
def retrieve
|
29
|
-
raise "Can't retrieve from JSON file. Use text file."
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
37
|
-
|
38
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Index
|
4
|
-
|
5
|
-
module File
|
6
|
-
|
7
|
-
# Index data in the Ruby Marshal format.
|
8
|
-
#
|
9
|
-
class Marshal < Basic
|
10
|
-
|
11
|
-
# Uses the extension "dump".
|
12
|
-
#
|
13
|
-
def extension
|
14
|
-
:dump
|
15
|
-
end
|
16
|
-
# Loads the index hash from marshal format.
|
17
|
-
#
|
18
|
-
def load
|
19
|
-
::Marshal.load ::File.open(cache_path, 'r:binary')
|
20
|
-
end
|
21
|
-
# Dumps the index hash in marshal format.
|
22
|
-
#
|
23
|
-
def dump hash
|
24
|
-
hash.dump_marshalled cache_path
|
25
|
-
end
|
26
|
-
# A marshal file does not provide retrieve functionality.
|
27
|
-
#
|
28
|
-
def retrieve
|
29
|
-
raise "Can't retrieve from marshalled file. Use text file."
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
37
|
-
|
38
|
-
end
|
@@ -1,60 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Index
|
4
|
-
|
5
|
-
module File
|
6
|
-
|
7
|
-
# Index data dumped in the text format.
|
8
|
-
#
|
9
|
-
class Text < Basic
|
10
|
-
|
11
|
-
# Uses the extension "txt".
|
12
|
-
#
|
13
|
-
def extension
|
14
|
-
:txt
|
15
|
-
end
|
16
|
-
# Text files are used exclusively for
|
17
|
-
# prepared data files.
|
18
|
-
#
|
19
|
-
def load
|
20
|
-
raise "Can't load from text file. Use JSON or Marshal."
|
21
|
-
end
|
22
|
-
# Text files are used exclusively for
|
23
|
-
# prepared data files.
|
24
|
-
#
|
25
|
-
def dump hash
|
26
|
-
raise "Can't dump to text file. Use JSON or Marshal."
|
27
|
-
end
|
28
|
-
|
29
|
-
# Retrieves prepared index data in the form
|
30
|
-
# * id,data\n
|
31
|
-
# * id,data\n
|
32
|
-
# * id,data\n
|
33
|
-
#
|
34
|
-
# Yields an id string and a symbol token.
|
35
|
-
#
|
36
|
-
def retrieve
|
37
|
-
id = nil
|
38
|
-
token = nil
|
39
|
-
::File.open(cache_path, 'r:binary') do |file|
|
40
|
-
file.each_line do |line|
|
41
|
-
id, token = line.split ?,, 2
|
42
|
-
yield id, (token.chomp! || token).to_sym
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
#
|
48
|
-
#
|
49
|
-
def open_for_indexing &block
|
50
|
-
::File.open cache_path, 'w:binary', &block
|
51
|
-
end
|
52
|
-
|
53
|
-
|
54
|
-
end
|
55
|
-
|
56
|
-
end
|
57
|
-
|
58
|
-
end
|
59
|
-
|
60
|
-
end
|