picky 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
@@ -1,97 +0,0 @@
|
|
1
|
-
module Index
|
2
|
-
|
3
|
-
# Handles all aspects of index files, such as dumping/loading.
|
4
|
-
#
|
5
|
-
module File
|
6
|
-
|
7
|
-
# Base class for all index files.
|
8
|
-
#
|
9
|
-
# Provides necessary helper methods for its
|
10
|
-
# subclasses.
|
11
|
-
# Not directly useable, as it does not provide
|
12
|
-
# dump/load methods.
|
13
|
-
#
|
14
|
-
class Basic
|
15
|
-
|
16
|
-
attr_reader :cache_path
|
17
|
-
|
18
|
-
# An index cache takes a path, without file extension,
|
19
|
-
# which will be provided by the subclasses.
|
20
|
-
#
|
21
|
-
def initialize cache_path
|
22
|
-
@cache_path = "#{cache_path}.#{extension}"
|
23
|
-
end
|
24
|
-
|
25
|
-
# The default extension for index files is "index".
|
26
|
-
#
|
27
|
-
def extension
|
28
|
-
:index
|
29
|
-
end
|
30
|
-
|
31
|
-
# Will copy the index file to a location that
|
32
|
-
# is in a directory named "backup" right under
|
33
|
-
# the directory the index file is in.
|
34
|
-
#
|
35
|
-
def backup
|
36
|
-
prepare_backup backup_directory
|
37
|
-
FileUtils.cp cache_path, target, verbose: true
|
38
|
-
end
|
39
|
-
# The backup directory of this file.
|
40
|
-
# Equal to the file's dirname plus /backup
|
41
|
-
#
|
42
|
-
def backup_directory
|
43
|
-
::File.join ::File.dirname(cache_path), 'backup'
|
44
|
-
end
|
45
|
-
# Prepares the backup directory for the file.
|
46
|
-
#
|
47
|
-
def prepare_backup target
|
48
|
-
FileUtils.mkdir target unless Dir.exists?(target)
|
49
|
-
end
|
50
|
-
|
51
|
-
# Copies the file from its backup location back
|
52
|
-
# to the original location.
|
53
|
-
#
|
54
|
-
def restore
|
55
|
-
FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
|
56
|
-
end
|
57
|
-
# The backup filename.
|
58
|
-
#
|
59
|
-
def backup_file_path_of path
|
60
|
-
dir, name = ::File.split path
|
61
|
-
::File.join dir, 'backup', name
|
62
|
-
end
|
63
|
-
|
64
|
-
# Deletes the file.
|
65
|
-
#
|
66
|
-
def delete
|
67
|
-
`rm -Rf #{cache_path}`
|
68
|
-
end
|
69
|
-
|
70
|
-
# Checks.
|
71
|
-
#
|
72
|
-
|
73
|
-
# Is this cache file suspiciously small?
|
74
|
-
# (less than 8 Bytes of size)
|
75
|
-
#
|
76
|
-
def cache_small?
|
77
|
-
size_of(cache_path) < 8
|
78
|
-
end
|
79
|
-
# Is the cache ok? (existing and larger than
|
80
|
-
# zero Bytes in size)
|
81
|
-
#
|
82
|
-
# A small cache is still ok.
|
83
|
-
#
|
84
|
-
def cache_ok?
|
85
|
-
size_of(cache_path) > 0
|
86
|
-
end
|
87
|
-
# Extracts the size of the file in Bytes.
|
88
|
-
#
|
89
|
-
def size_of path
|
90
|
-
`ls -l #{path} | awk '{print $5}'`.to_i
|
91
|
-
end
|
92
|
-
|
93
|
-
end
|
94
|
-
|
95
|
-
end
|
96
|
-
|
97
|
-
end
|
@@ -1,34 +0,0 @@
|
|
1
|
-
module Index
|
2
|
-
|
3
|
-
module File
|
4
|
-
|
5
|
-
# Index files dumped in the JSON format.
|
6
|
-
#
|
7
|
-
class JSON < Basic
|
8
|
-
|
9
|
-
# Uses the extension "json".
|
10
|
-
#
|
11
|
-
def extension
|
12
|
-
:json
|
13
|
-
end
|
14
|
-
# Loads the index hash from json format.
|
15
|
-
#
|
16
|
-
def load
|
17
|
-
Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
|
18
|
-
end
|
19
|
-
# Dumps the index hash in json format.
|
20
|
-
#
|
21
|
-
def dump hash
|
22
|
-
hash.dump_json cache_path
|
23
|
-
end
|
24
|
-
# A json file does not provide retrieve functionality.
|
25
|
-
#
|
26
|
-
def retrieve
|
27
|
-
raise "Can't retrieve from JSON file. Use text file."
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
@@ -1,34 +0,0 @@
|
|
1
|
-
module Index
|
2
|
-
|
3
|
-
module File
|
4
|
-
|
5
|
-
# Index data in the Ruby Marshal format.
|
6
|
-
#
|
7
|
-
class Marshal < Basic
|
8
|
-
|
9
|
-
# Uses the extension "dump".
|
10
|
-
#
|
11
|
-
def extension
|
12
|
-
:dump
|
13
|
-
end
|
14
|
-
# Loads the index hash from marshal format.
|
15
|
-
#
|
16
|
-
def load
|
17
|
-
::Marshal.load ::File.open(cache_path, 'r:binary')
|
18
|
-
end
|
19
|
-
# Dumps the index hash in marshal format.
|
20
|
-
#
|
21
|
-
def dump hash
|
22
|
-
hash.dump_marshalled cache_path
|
23
|
-
end
|
24
|
-
# A marshal file does not provide retrieve functionality.
|
25
|
-
#
|
26
|
-
def retrieve
|
27
|
-
raise "Can't retrieve from marshalled file. Use text file."
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
module Index
|
2
|
-
|
3
|
-
module File
|
4
|
-
|
5
|
-
# Index data dumped in the text format.
|
6
|
-
#
|
7
|
-
class Text < Basic
|
8
|
-
|
9
|
-
# Uses the extension "txt".
|
10
|
-
#
|
11
|
-
def extension
|
12
|
-
:txt
|
13
|
-
end
|
14
|
-
# Text files are used exclusively for
|
15
|
-
# prepared data files.
|
16
|
-
#
|
17
|
-
def load
|
18
|
-
raise "Can't load from text file. Use JSON or Marshal."
|
19
|
-
end
|
20
|
-
# Text files are used exclusively for
|
21
|
-
# prepared data files.
|
22
|
-
#
|
23
|
-
def dump hash
|
24
|
-
raise "Can't dump to text file. Use JSON or Marshal."
|
25
|
-
end
|
26
|
-
|
27
|
-
# Retrieves prepared index data in the form
|
28
|
-
# * id,data\n
|
29
|
-
# * id,data\n
|
30
|
-
# * id,data\n
|
31
|
-
#
|
32
|
-
# Yields an id string and a symbol token.
|
33
|
-
#
|
34
|
-
def retrieve
|
35
|
-
id = nil
|
36
|
-
token = nil
|
37
|
-
::File.open(cache_path, 'r:binary') do |file|
|
38
|
-
file.each_line do |line|
|
39
|
-
id, token = line.split ?,, 2
|
40
|
-
yield id, (token.chomp! || token).to_sym
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
#
|
46
|
-
#
|
47
|
-
def open_for_indexing &block
|
48
|
-
::File.open cache_path, 'w:binary', &block
|
49
|
-
end
|
50
|
-
|
51
|
-
|
52
|
-
end
|
53
|
-
|
54
|
-
end
|
55
|
-
|
56
|
-
end
|
data/lib/picky/index/files.rb
DELETED
@@ -1,118 +0,0 @@
|
|
1
|
-
module Index
|
2
|
-
|
3
|
-
class Files
|
4
|
-
|
5
|
-
attr_reader :bundle_name
|
6
|
-
attr_reader :prepared, :index, :weights, :similarity, :configuration
|
7
|
-
|
8
|
-
delegate :index_name, :category_name, :to => :@config
|
9
|
-
|
10
|
-
def initialize bundle_name, config
|
11
|
-
@bundle_name = bundle_name
|
12
|
-
@config = config
|
13
|
-
|
14
|
-
# Note: We marshal the similarity, as the
|
15
|
-
# Yajl json lib cannot load symbolized
|
16
|
-
# values, just keys.
|
17
|
-
#
|
18
|
-
@prepared = File::Text.new config.prepared_index_path
|
19
|
-
@index = File::JSON.new config.index_path(bundle_name, :index)
|
20
|
-
@weights = File::JSON.new config.index_path(bundle_name, :weights)
|
21
|
-
@similarity = File::Marshal.new config.index_path(bundle_name, :similarity)
|
22
|
-
@configuration = File::JSON.new config.index_path(bundle_name, :configuration)
|
23
|
-
end
|
24
|
-
|
25
|
-
# Delegators.
|
26
|
-
#
|
27
|
-
|
28
|
-
# Retrieving data.
|
29
|
-
#
|
30
|
-
def retrieve &block
|
31
|
-
prepared.retrieve &block
|
32
|
-
end
|
33
|
-
|
34
|
-
# Dumping.
|
35
|
-
#
|
36
|
-
def dump_index index_hash
|
37
|
-
index.dump index_hash
|
38
|
-
end
|
39
|
-
def dump_weights weights_hash
|
40
|
-
weights.dump weights_hash
|
41
|
-
end
|
42
|
-
def dump_similarity similarity_hash
|
43
|
-
similarity.dump similarity_hash
|
44
|
-
end
|
45
|
-
def dump_configuration configuration_hash
|
46
|
-
configuration.dump configuration_hash
|
47
|
-
end
|
48
|
-
|
49
|
-
# Loading.
|
50
|
-
#
|
51
|
-
def load_index
|
52
|
-
index.load
|
53
|
-
end
|
54
|
-
def load_similarity
|
55
|
-
similarity.load
|
56
|
-
end
|
57
|
-
def load_weights
|
58
|
-
weights.load
|
59
|
-
end
|
60
|
-
def load_configuration
|
61
|
-
configuration.load
|
62
|
-
end
|
63
|
-
|
64
|
-
# Cache ok?
|
65
|
-
#
|
66
|
-
def index_cache_ok?
|
67
|
-
index.cache_ok?
|
68
|
-
end
|
69
|
-
def similarity_cache_ok?
|
70
|
-
similarity.cache_ok?
|
71
|
-
end
|
72
|
-
def weights_cache_ok?
|
73
|
-
weights.cache_ok?
|
74
|
-
end
|
75
|
-
|
76
|
-
# Cache small?
|
77
|
-
#
|
78
|
-
def index_cache_small?
|
79
|
-
index.cache_small?
|
80
|
-
end
|
81
|
-
def similarity_cache_small?
|
82
|
-
similarity.cache_small?
|
83
|
-
end
|
84
|
-
def weights_cache_small?
|
85
|
-
weights.cache_small?
|
86
|
-
end
|
87
|
-
|
88
|
-
# Copies the indexes to the "backup" directory.
|
89
|
-
#
|
90
|
-
def backup
|
91
|
-
index.backup
|
92
|
-
weights.backup
|
93
|
-
similarity.backup
|
94
|
-
configuration.backup
|
95
|
-
end
|
96
|
-
|
97
|
-
# Restores the indexes from the "backup" directory.
|
98
|
-
#
|
99
|
-
def restore
|
100
|
-
index.restore
|
101
|
-
weights.restore
|
102
|
-
similarity.restore
|
103
|
-
configuration.restore
|
104
|
-
end
|
105
|
-
|
106
|
-
|
107
|
-
# Delete all index files.
|
108
|
-
#
|
109
|
-
def delete
|
110
|
-
index.delete
|
111
|
-
weights.delete
|
112
|
-
similarity.delete
|
113
|
-
configuration.delete
|
114
|
-
end
|
115
|
-
|
116
|
-
end
|
117
|
-
|
118
|
-
end
|
data/lib/picky/index_api.rb
DELETED
@@ -1,175 +0,0 @@
|
|
1
|
-
# This class defines the indexing and index API that is exposed to the user
|
2
|
-
# as the #index method inside the Application class.
|
3
|
-
#
|
4
|
-
# It provides a single front for both indexing and index options. We suggest to always use the index API.
|
5
|
-
#
|
6
|
-
# Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
|
7
|
-
#
|
8
|
-
class IndexAPI
|
9
|
-
|
10
|
-
attr_reader :name, :indexing, :indexed
|
11
|
-
|
12
|
-
# Create a new index with a given source.
|
13
|
-
#
|
14
|
-
# === Parameters
|
15
|
-
# * name: A name that will be used for the index directory and in the Picky front end.
|
16
|
-
# * source: Where the data comes from, e.g. Sources::CSV.new(...)
|
17
|
-
#
|
18
|
-
# === Options
|
19
|
-
# * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
|
20
|
-
# * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
|
21
|
-
#
|
22
|
-
def initialize name, source, options = {}
|
23
|
-
@name = name
|
24
|
-
@indexing = Indexing::Index.new name, source, options
|
25
|
-
@indexed = Indexed::Index.new name, options
|
26
|
-
|
27
|
-
# Centralized registry.
|
28
|
-
#
|
29
|
-
Indexes.register self
|
30
|
-
end
|
31
|
-
|
32
|
-
# Defines a searchable category on the index.
|
33
|
-
#
|
34
|
-
# === Parameters
|
35
|
-
# * category_name: This identifier is used in the front end, but also to categorize query text. For example, “title:hobbit” will narrow the hobbit query on categories with the identifier :title.
|
36
|
-
#
|
37
|
-
# === Options
|
38
|
-
# * partial: Partial::None.new or Partial::Substring.new(from: starting_char, to: ending_char). Default is Partial::Substring.new(from: -3, to: -1).
|
39
|
-
# * similarity: Similarity::None.new or Similarity::Phonetic.new(similar_words_searched). Default is Similarity::None.new.
|
40
|
-
# * qualifiers: An array of qualifiers with which you can define which category you’d like to search, for example “title:hobbit” will search for hobbit in just title categories. Example: qualifiers: [:t, :titre, :title] (use it for example with multiple languages). Default is the name of the category.
|
41
|
-
# * qualifier: Convenience options if you just need a single qualifier, see above. Example: qualifiers => :title. Default is the name of the category.
|
42
|
-
# * source: Use a different source than the index uses. If you think you need that, there might be a better solution to your problem. Please post to the mailing list first with your application.rb :)
|
43
|
-
# * from: Take the data from the data category with this name. Example: You have a source Sources::CSV.new(:title, file:'some_file.csv') but you want the category to be called differently. The you use from: define_category(:similar_title, :from => :title).
|
44
|
-
#
|
45
|
-
def define_category category_name, options = {}
|
46
|
-
category_name = category_name.to_sym
|
47
|
-
|
48
|
-
indexing_category = indexing.define_category category_name, options
|
49
|
-
indexed_category = indexed.define_category category_name, options
|
50
|
-
|
51
|
-
yield indexing_category, indexed_category if block_given?
|
52
|
-
|
53
|
-
self
|
54
|
-
end
|
55
|
-
alias category define_category
|
56
|
-
|
57
|
-
# HIGHLY EXPERIMENTAL Try if you feel "beta" ;)
|
58
|
-
#
|
59
|
-
# Make this category range searchable with a fixed range. If you need other ranges, define another category with a different range value.
|
60
|
-
#
|
61
|
-
# Example:
|
62
|
-
# You have data values inside 1..100, and you want to have Picky return
|
63
|
-
# not only the results for 47 if you search for 47, but also results for
|
64
|
-
# 45, 46, or 47.2, 48.9, in a range of 2 around 47, so (45..49).
|
65
|
-
#
|
66
|
-
# Then you use:
|
67
|
-
# my_index.define_ranged_category :values_inside_1_100, 2
|
68
|
-
#
|
69
|
-
# Optionally, you give it a precision value to reduce the error margin
|
70
|
-
# around 47 (Picky is a bit liberal).
|
71
|
-
# my_index.define_ranged_category :values_inside_1_100, 2, precision: 5
|
72
|
-
#
|
73
|
-
# This will force Picky to maximally be wrong 5% of the given range value
|
74
|
-
# (5% of 2 = 0.1) instead of the default 20% (20% of 2 = 0.4).
|
75
|
-
#
|
76
|
-
# We suggest not to use much more than 5 as a higher precision is more performance intensive for less and less precision gain.
|
77
|
-
#
|
78
|
-
# == Protip 1
|
79
|
-
#
|
80
|
-
# Create two ranged categories to make an area search:
|
81
|
-
# index.define_ranged_category :x, 1
|
82
|
-
# index.define_ranged_category :y, 1
|
83
|
-
#
|
84
|
-
# Search for it using for example:
|
85
|
-
# x:133, y:120
|
86
|
-
#
|
87
|
-
# This will search this square area (* = 133, 120: The "search" point entered):
|
88
|
-
#
|
89
|
-
# 132 134
|
90
|
-
# | |
|
91
|
-
# --|---------|-- 121
|
92
|
-
# | |
|
93
|
-
# | * |
|
94
|
-
# | |
|
95
|
-
# --|---------|-- 119
|
96
|
-
# | |
|
97
|
-
#
|
98
|
-
# Note: The area does not need to be square, but can be rectangular.
|
99
|
-
#
|
100
|
-
# == Protip 2
|
101
|
-
#
|
102
|
-
# Create three ranged categories to make a volume search.
|
103
|
-
#
|
104
|
-
# Or go crazy and use 4 ranged categories for a space/time search! ;)
|
105
|
-
#
|
106
|
-
# === Parameters
|
107
|
-
# * category_name: The category_name as used in #define_category.
|
108
|
-
# * range: The range (in the units of your data values) around the query point where we search for results.
|
109
|
-
#
|
110
|
-
# -----|<- range ->*------------|-----
|
111
|
-
#
|
112
|
-
# === Options
|
113
|
-
# * precision: Default is 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
|
114
|
-
# * ... all options of #define_category.
|
115
|
-
#
|
116
|
-
def define_ranged_category category_name, range, options = {}
|
117
|
-
precision = options[:precision]
|
118
|
-
|
119
|
-
options = { partial: Partial::None.new }.merge options
|
120
|
-
|
121
|
-
define_category category_name, options do |indexing, indexed|
|
122
|
-
indexing.source = Sources::Wrappers::Location.new indexing, grid: range, precision: precision
|
123
|
-
indexing.tokenizer = Tokenizers::Index.new
|
124
|
-
|
125
|
-
exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: range, precision: precision
|
126
|
-
indexed.exact = exact_bundle
|
127
|
-
indexed.partial = exact_bundle # A partial token also uses the exact index.
|
128
|
-
end
|
129
|
-
end
|
130
|
-
alias ranged_category define_ranged_category
|
131
|
-
|
132
|
-
# HIGHLY EXPERIMENTAL Not correctly working yet. Try it if you feel "beta".
|
133
|
-
#
|
134
|
-
# Also a range search see #define_ranged_category, but on the earth's surface.
|
135
|
-
#
|
136
|
-
# Parameters:
|
137
|
-
# * name: The name as used in #define_category.
|
138
|
-
# * radius: The distance (in km) around the query point which we search for results.
|
139
|
-
#
|
140
|
-
# Note: Picky uses a square, not a circle. We hope that's ok for most usages.
|
141
|
-
#
|
142
|
-
# -----------------------------
|
143
|
-
# | |
|
144
|
-
# | |
|
145
|
-
# | |
|
146
|
-
# | |
|
147
|
-
# | |
|
148
|
-
# | *<- radius ->|
|
149
|
-
# | |
|
150
|
-
# | |
|
151
|
-
# | |
|
152
|
-
# | |
|
153
|
-
# | |
|
154
|
-
# -----------------------------
|
155
|
-
#
|
156
|
-
# Options
|
157
|
-
# * precision: Default 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
|
158
|
-
# * from: The data category to take the data for this category from.
|
159
|
-
#
|
160
|
-
# TODO Redo. Will have to write a wrapper that combines two categories that are indexed simultaneously.
|
161
|
-
#
|
162
|
-
def define_map_location name, radius, options = {} # :nodoc:
|
163
|
-
# The radius is given as if all the locations were on the equator.
|
164
|
-
#
|
165
|
-
# TODO Need to recalculate since not many locations are on the equator ;) This is just a prototype.
|
166
|
-
#
|
167
|
-
# This calculates km -> longitude (degrees).
|
168
|
-
#
|
169
|
-
# A degree on the equator is equal to ~111,319.9 meters.
|
170
|
-
# So a km on the equator is equal to 0.00898312 degrees.
|
171
|
-
#
|
172
|
-
define_ranged_category name, radius * 0.00898312, options
|
173
|
-
end
|
174
|
-
alias map_location define_map_location
|
175
|
-
end
|
data/lib/picky/indexed/bundle.rb
DELETED
@@ -1,54 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
|
-
module Indexed # :nodoc:all
|
4
|
-
|
5
|
-
# This is the _actual_ index.
|
6
|
-
#
|
7
|
-
# Handles exact/partial index, weights index, and similarity index.
|
8
|
-
#
|
9
|
-
# Delegates file handling and checking to an *Indexed*::*Files* object.
|
10
|
-
#
|
11
|
-
class Bundle < Index::Bundle
|
12
|
-
|
13
|
-
# Get the ids for the given symbol.
|
14
|
-
#
|
15
|
-
def ids sym
|
16
|
-
@index[sym] || []
|
17
|
-
end
|
18
|
-
# Get a weight for the given symbol.
|
19
|
-
#
|
20
|
-
def weight sym
|
21
|
-
@weights[sym]
|
22
|
-
end
|
23
|
-
|
24
|
-
# Loads all indexes.
|
25
|
-
#
|
26
|
-
def load
|
27
|
-
load_index
|
28
|
-
load_weights
|
29
|
-
load_similarity
|
30
|
-
load_configuration
|
31
|
-
end
|
32
|
-
# Loads the core index.
|
33
|
-
#
|
34
|
-
def load_index
|
35
|
-
self.index = files.load_index
|
36
|
-
end
|
37
|
-
# Loads the weights index.
|
38
|
-
#
|
39
|
-
def load_weights
|
40
|
-
self.weights = files.load_weights
|
41
|
-
end
|
42
|
-
# Loads the similarity index.
|
43
|
-
#
|
44
|
-
def load_similarity
|
45
|
-
self.similarity = files.load_similarity
|
46
|
-
end
|
47
|
-
# Loads the configuration.
|
48
|
-
#
|
49
|
-
def load_configuration
|
50
|
-
self.configuration = files.load_configuration
|
51
|
-
end
|
52
|
-
|
53
|
-
end
|
54
|
-
end
|
@@ -1,131 +0,0 @@
|
|
1
|
-
module Indexed
|
2
|
-
|
3
|
-
class Categories
|
4
|
-
|
5
|
-
attr_reader :categories, :category_hash, :ignore_unassigned_tokens
|
6
|
-
|
7
|
-
each_delegate :load_from_cache,
|
8
|
-
:to => :categories
|
9
|
-
|
10
|
-
# A list of indexed categories.
|
11
|
-
#
|
12
|
-
# Options:
|
13
|
-
# * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
|
14
|
-
# The default behaviour is that if a token does not match to
|
15
|
-
# any category, the query will not return anything (since a
|
16
|
-
# single token cannot be matched). If you set this option to
|
17
|
-
# true, any token that cannot be matched to a category will be
|
18
|
-
# simply ignored.
|
19
|
-
# Use this if only a few matched words are important, like for
|
20
|
-
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
21
|
-
# you only want to match the zipcode, to have the search engine
|
22
|
-
# display advertisements on the side for the zipcode.
|
23
|
-
# Nifty! :)
|
24
|
-
#
|
25
|
-
def initialize options = {}
|
26
|
-
clear
|
27
|
-
|
28
|
-
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
29
|
-
end
|
30
|
-
|
31
|
-
# Clears both the array of categories and the hash of categories.
|
32
|
-
#
|
33
|
-
def clear
|
34
|
-
@categories = []
|
35
|
-
@category_hash = {}
|
36
|
-
end
|
37
|
-
|
38
|
-
# Add the given category to the list of categories.
|
39
|
-
#
|
40
|
-
def << category
|
41
|
-
categories << category
|
42
|
-
# Note: [category] is an optimization, since I need an array
|
43
|
-
# of categories.
|
44
|
-
# It's faster to just package it in an array on loading
|
45
|
-
# Picky than doing it over and over with each query.
|
46
|
-
#
|
47
|
-
category_hash[category.name] = [category]
|
48
|
-
end
|
49
|
-
|
50
|
-
# Return all possible combinations for the given token.
|
51
|
-
#
|
52
|
-
# This checks if it needs to also search through similar
|
53
|
-
# tokens, if for example, the token is one with ~.
|
54
|
-
# If yes, it puts together all solutions.
|
55
|
-
#
|
56
|
-
def possible_combinations_for token
|
57
|
-
token.similar? ? similar_possible_for(token) : possible_for(token)
|
58
|
-
end
|
59
|
-
# Gets all similar tokens and puts together the possible combinations
|
60
|
-
# for each found similar token.
|
61
|
-
#
|
62
|
-
def similar_possible_for token
|
63
|
-
# Get as many similar tokens as necessary
|
64
|
-
#
|
65
|
-
tokens = similar_tokens_for token
|
66
|
-
# possible combinations
|
67
|
-
#
|
68
|
-
inject_possible_for tokens
|
69
|
-
end
|
70
|
-
def similar_tokens_for token
|
71
|
-
text = token.text
|
72
|
-
categories.inject([]) do |result, category|
|
73
|
-
next_token = token
|
74
|
-
# Note: We could also break off here if not all the available
|
75
|
-
# similars are needed.
|
76
|
-
# Wait for a concrete case that needs this before taking
|
77
|
-
# action.
|
78
|
-
#
|
79
|
-
while next_token = next_token.next_similar_token(category)
|
80
|
-
result << next_token if next_token && next_token.text != text
|
81
|
-
end
|
82
|
-
result
|
83
|
-
end
|
84
|
-
end
|
85
|
-
def inject_possible_for tokens
|
86
|
-
tokens.inject([]) do |result, token|
|
87
|
-
possible = possible_categories token
|
88
|
-
result + possible_for(token, possible)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
# Returns possible Combinations for the token.
|
93
|
-
#
|
94
|
-
# Note: The preselected_categories param is an optimization.
|
95
|
-
#
|
96
|
-
# Note: Returns [] if no categories matched (will produce no result).
|
97
|
-
# Returns nil if this token needs to be removed from the query.
|
98
|
-
# (Also none of the categories matched, but the ignore unassigned
|
99
|
-
# tokens option is true)
|
100
|
-
#
|
101
|
-
def possible_for token, preselected_categories = nil
|
102
|
-
possible = (preselected_categories || possible_categories(token)).map { |category| category.combination_for(token) }
|
103
|
-
possible.compact!
|
104
|
-
# This is an optimization to mark tokens that are ignored.
|
105
|
-
#
|
106
|
-
return if ignore_unassigned_tokens && possible.empty?
|
107
|
-
possible # wrap in combinations
|
108
|
-
end
|
109
|
-
# This returns the possible categories for this token.
|
110
|
-
# If the user has already preselected a category for this token,
|
111
|
-
# like "artist:moby", if not just return all for the given token,
|
112
|
-
# since all are possible.
|
113
|
-
#
|
114
|
-
# Note: Once I thought this was called too often. But it is not (18.01.2011).
|
115
|
-
#
|
116
|
-
def possible_categories token
|
117
|
-
user_defined_categories(token) || categories
|
118
|
-
end
|
119
|
-
# This returns the array of categories if the user has defined
|
120
|
-
# an existing category.
|
121
|
-
#
|
122
|
-
# Note: Returns nil if the user did not define one
|
123
|
-
# or if he/she has defined a non-existing one.
|
124
|
-
#
|
125
|
-
def user_defined_categories token
|
126
|
-
category_hash[token.user_defined_category_name]
|
127
|
-
end
|
128
|
-
|
129
|
-
end
|
130
|
-
|
131
|
-
end
|