picky 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
module Index
|
|
2
|
-
|
|
3
|
-
# Handles all aspects of index files, such as dumping/loading.
|
|
4
|
-
#
|
|
5
|
-
module File
|
|
6
|
-
|
|
7
|
-
# Base class for all index files.
|
|
8
|
-
#
|
|
9
|
-
# Provides necessary helper methods for its
|
|
10
|
-
# subclasses.
|
|
11
|
-
# Not directly useable, as it does not provide
|
|
12
|
-
# dump/load methods.
|
|
13
|
-
#
|
|
14
|
-
class Basic
|
|
15
|
-
|
|
16
|
-
attr_reader :cache_path
|
|
17
|
-
|
|
18
|
-
# An index cache takes a path, without file extension,
|
|
19
|
-
# which will be provided by the subclasses.
|
|
20
|
-
#
|
|
21
|
-
def initialize cache_path
|
|
22
|
-
@cache_path = "#{cache_path}.#{extension}"
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
# The default extension for index files is "index".
|
|
26
|
-
#
|
|
27
|
-
def extension
|
|
28
|
-
:index
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# Will copy the index file to a location that
|
|
32
|
-
# is in a directory named "backup" right under
|
|
33
|
-
# the directory the index file is in.
|
|
34
|
-
#
|
|
35
|
-
def backup
|
|
36
|
-
prepare_backup backup_directory
|
|
37
|
-
FileUtils.cp cache_path, target, verbose: true
|
|
38
|
-
end
|
|
39
|
-
# The backup directory of this file.
|
|
40
|
-
# Equal to the file's dirname plus /backup
|
|
41
|
-
#
|
|
42
|
-
def backup_directory
|
|
43
|
-
::File.join ::File.dirname(cache_path), 'backup'
|
|
44
|
-
end
|
|
45
|
-
# Prepares the backup directory for the file.
|
|
46
|
-
#
|
|
47
|
-
def prepare_backup target
|
|
48
|
-
FileUtils.mkdir target unless Dir.exists?(target)
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# Copies the file from its backup location back
|
|
52
|
-
# to the original location.
|
|
53
|
-
#
|
|
54
|
-
def restore
|
|
55
|
-
FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
|
|
56
|
-
end
|
|
57
|
-
# The backup filename.
|
|
58
|
-
#
|
|
59
|
-
def backup_file_path_of path
|
|
60
|
-
dir, name = ::File.split path
|
|
61
|
-
::File.join dir, 'backup', name
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# Deletes the file.
|
|
65
|
-
#
|
|
66
|
-
def delete
|
|
67
|
-
`rm -Rf #{cache_path}`
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
# Checks.
|
|
71
|
-
#
|
|
72
|
-
|
|
73
|
-
# Is this cache file suspiciously small?
|
|
74
|
-
# (less than 8 Bytes of size)
|
|
75
|
-
#
|
|
76
|
-
def cache_small?
|
|
77
|
-
size_of(cache_path) < 8
|
|
78
|
-
end
|
|
79
|
-
# Is the cache ok? (existing and larger than
|
|
80
|
-
# zero Bytes in size)
|
|
81
|
-
#
|
|
82
|
-
# A small cache is still ok.
|
|
83
|
-
#
|
|
84
|
-
def cache_ok?
|
|
85
|
-
size_of(cache_path) > 0
|
|
86
|
-
end
|
|
87
|
-
# Extracts the size of the file in Bytes.
|
|
88
|
-
#
|
|
89
|
-
def size_of path
|
|
90
|
-
`ls -l #{path} | awk '{print $5}'`.to_i
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
end
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
module Index
|
|
2
|
-
|
|
3
|
-
module File
|
|
4
|
-
|
|
5
|
-
# Index files dumped in the JSON format.
|
|
6
|
-
#
|
|
7
|
-
class JSON < Basic
|
|
8
|
-
|
|
9
|
-
# Uses the extension "json".
|
|
10
|
-
#
|
|
11
|
-
def extension
|
|
12
|
-
:json
|
|
13
|
-
end
|
|
14
|
-
# Loads the index hash from json format.
|
|
15
|
-
#
|
|
16
|
-
def load
|
|
17
|
-
Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
|
|
18
|
-
end
|
|
19
|
-
# Dumps the index hash in json format.
|
|
20
|
-
#
|
|
21
|
-
def dump hash
|
|
22
|
-
hash.dump_json cache_path
|
|
23
|
-
end
|
|
24
|
-
# A json file does not provide retrieve functionality.
|
|
25
|
-
#
|
|
26
|
-
def retrieve
|
|
27
|
-
raise "Can't retrieve from JSON file. Use text file."
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
end
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
module Index
|
|
2
|
-
|
|
3
|
-
module File
|
|
4
|
-
|
|
5
|
-
# Index data in the Ruby Marshal format.
|
|
6
|
-
#
|
|
7
|
-
class Marshal < Basic
|
|
8
|
-
|
|
9
|
-
# Uses the extension "dump".
|
|
10
|
-
#
|
|
11
|
-
def extension
|
|
12
|
-
:dump
|
|
13
|
-
end
|
|
14
|
-
# Loads the index hash from marshal format.
|
|
15
|
-
#
|
|
16
|
-
def load
|
|
17
|
-
::Marshal.load ::File.open(cache_path, 'r:binary')
|
|
18
|
-
end
|
|
19
|
-
# Dumps the index hash in marshal format.
|
|
20
|
-
#
|
|
21
|
-
def dump hash
|
|
22
|
-
hash.dump_marshalled cache_path
|
|
23
|
-
end
|
|
24
|
-
# A marshal file does not provide retrieve functionality.
|
|
25
|
-
#
|
|
26
|
-
def retrieve
|
|
27
|
-
raise "Can't retrieve from marshalled file. Use text file."
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
end
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
module Index
|
|
2
|
-
|
|
3
|
-
module File
|
|
4
|
-
|
|
5
|
-
# Index data dumped in the text format.
|
|
6
|
-
#
|
|
7
|
-
class Text < Basic
|
|
8
|
-
|
|
9
|
-
# Uses the extension "txt".
|
|
10
|
-
#
|
|
11
|
-
def extension
|
|
12
|
-
:txt
|
|
13
|
-
end
|
|
14
|
-
# Text files are used exclusively for
|
|
15
|
-
# prepared data files.
|
|
16
|
-
#
|
|
17
|
-
def load
|
|
18
|
-
raise "Can't load from text file. Use JSON or Marshal."
|
|
19
|
-
end
|
|
20
|
-
# Text files are used exclusively for
|
|
21
|
-
# prepared data files.
|
|
22
|
-
#
|
|
23
|
-
def dump hash
|
|
24
|
-
raise "Can't dump to text file. Use JSON or Marshal."
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
# Retrieves prepared index data in the form
|
|
28
|
-
# * id,data\n
|
|
29
|
-
# * id,data\n
|
|
30
|
-
# * id,data\n
|
|
31
|
-
#
|
|
32
|
-
# Yields an id string and a symbol token.
|
|
33
|
-
#
|
|
34
|
-
def retrieve
|
|
35
|
-
id = nil
|
|
36
|
-
token = nil
|
|
37
|
-
::File.open(cache_path, 'r:binary') do |file|
|
|
38
|
-
file.each_line do |line|
|
|
39
|
-
id, token = line.split ?,, 2
|
|
40
|
-
yield id, (token.chomp! || token).to_sym
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
#
|
|
46
|
-
#
|
|
47
|
-
def open_for_indexing &block
|
|
48
|
-
::File.open cache_path, 'w:binary', &block
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
end
|
data/lib/picky/index/files.rb
DELETED
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
module Index
|
|
2
|
-
|
|
3
|
-
class Files
|
|
4
|
-
|
|
5
|
-
attr_reader :bundle_name
|
|
6
|
-
attr_reader :prepared, :index, :weights, :similarity, :configuration
|
|
7
|
-
|
|
8
|
-
delegate :index_name, :category_name, :to => :@config
|
|
9
|
-
|
|
10
|
-
def initialize bundle_name, config
|
|
11
|
-
@bundle_name = bundle_name
|
|
12
|
-
@config = config
|
|
13
|
-
|
|
14
|
-
# Note: We marshal the similarity, as the
|
|
15
|
-
# Yajl json lib cannot load symbolized
|
|
16
|
-
# values, just keys.
|
|
17
|
-
#
|
|
18
|
-
@prepared = File::Text.new config.prepared_index_path
|
|
19
|
-
@index = File::JSON.new config.index_path(bundle_name, :index)
|
|
20
|
-
@weights = File::JSON.new config.index_path(bundle_name, :weights)
|
|
21
|
-
@similarity = File::Marshal.new config.index_path(bundle_name, :similarity)
|
|
22
|
-
@configuration = File::JSON.new config.index_path(bundle_name, :configuration)
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
# Delegators.
|
|
26
|
-
#
|
|
27
|
-
|
|
28
|
-
# Retrieving data.
|
|
29
|
-
#
|
|
30
|
-
def retrieve &block
|
|
31
|
-
prepared.retrieve &block
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
# Dumping.
|
|
35
|
-
#
|
|
36
|
-
def dump_index index_hash
|
|
37
|
-
index.dump index_hash
|
|
38
|
-
end
|
|
39
|
-
def dump_weights weights_hash
|
|
40
|
-
weights.dump weights_hash
|
|
41
|
-
end
|
|
42
|
-
def dump_similarity similarity_hash
|
|
43
|
-
similarity.dump similarity_hash
|
|
44
|
-
end
|
|
45
|
-
def dump_configuration configuration_hash
|
|
46
|
-
configuration.dump configuration_hash
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
# Loading.
|
|
50
|
-
#
|
|
51
|
-
def load_index
|
|
52
|
-
index.load
|
|
53
|
-
end
|
|
54
|
-
def load_similarity
|
|
55
|
-
similarity.load
|
|
56
|
-
end
|
|
57
|
-
def load_weights
|
|
58
|
-
weights.load
|
|
59
|
-
end
|
|
60
|
-
def load_configuration
|
|
61
|
-
configuration.load
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# Cache ok?
|
|
65
|
-
#
|
|
66
|
-
def index_cache_ok?
|
|
67
|
-
index.cache_ok?
|
|
68
|
-
end
|
|
69
|
-
def similarity_cache_ok?
|
|
70
|
-
similarity.cache_ok?
|
|
71
|
-
end
|
|
72
|
-
def weights_cache_ok?
|
|
73
|
-
weights.cache_ok?
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
# Cache small?
|
|
77
|
-
#
|
|
78
|
-
def index_cache_small?
|
|
79
|
-
index.cache_small?
|
|
80
|
-
end
|
|
81
|
-
def similarity_cache_small?
|
|
82
|
-
similarity.cache_small?
|
|
83
|
-
end
|
|
84
|
-
def weights_cache_small?
|
|
85
|
-
weights.cache_small?
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
# Copies the indexes to the "backup" directory.
|
|
89
|
-
#
|
|
90
|
-
def backup
|
|
91
|
-
index.backup
|
|
92
|
-
weights.backup
|
|
93
|
-
similarity.backup
|
|
94
|
-
configuration.backup
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
# Restores the indexes from the "backup" directory.
|
|
98
|
-
#
|
|
99
|
-
def restore
|
|
100
|
-
index.restore
|
|
101
|
-
weights.restore
|
|
102
|
-
similarity.restore
|
|
103
|
-
configuration.restore
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
# Delete all index files.
|
|
108
|
-
#
|
|
109
|
-
def delete
|
|
110
|
-
index.delete
|
|
111
|
-
weights.delete
|
|
112
|
-
similarity.delete
|
|
113
|
-
configuration.delete
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
end
|
data/lib/picky/index_api.rb
DELETED
|
@@ -1,175 +0,0 @@
|
|
|
1
|
-
# This class defines the indexing and index API that is exposed to the user
|
|
2
|
-
# as the #index method inside the Application class.
|
|
3
|
-
#
|
|
4
|
-
# It provides a single front for both indexing and index options. We suggest to always use the index API.
|
|
5
|
-
#
|
|
6
|
-
# Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
|
|
7
|
-
#
|
|
8
|
-
class IndexAPI
|
|
9
|
-
|
|
10
|
-
attr_reader :name, :indexing, :indexed
|
|
11
|
-
|
|
12
|
-
# Create a new index with a given source.
|
|
13
|
-
#
|
|
14
|
-
# === Parameters
|
|
15
|
-
# * name: A name that will be used for the index directory and in the Picky front end.
|
|
16
|
-
# * source: Where the data comes from, e.g. Sources::CSV.new(...)
|
|
17
|
-
#
|
|
18
|
-
# === Options
|
|
19
|
-
# * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
|
|
20
|
-
# * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
|
|
21
|
-
#
|
|
22
|
-
def initialize name, source, options = {}
|
|
23
|
-
@name = name
|
|
24
|
-
@indexing = Indexing::Index.new name, source, options
|
|
25
|
-
@indexed = Indexed::Index.new name, options
|
|
26
|
-
|
|
27
|
-
# Centralized registry.
|
|
28
|
-
#
|
|
29
|
-
Indexes.register self
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
# Defines a searchable category on the index.
|
|
33
|
-
#
|
|
34
|
-
# === Parameters
|
|
35
|
-
# * category_name: This identifier is used in the front end, but also to categorize query text. For example, “title:hobbit” will narrow the hobbit query on categories with the identifier :title.
|
|
36
|
-
#
|
|
37
|
-
# === Options
|
|
38
|
-
# * partial: Partial::None.new or Partial::Substring.new(from: starting_char, to: ending_char). Default is Partial::Substring.new(from: -3, to: -1).
|
|
39
|
-
# * similarity: Similarity::None.new or Similarity::Phonetic.new(similar_words_searched). Default is Similarity::None.new.
|
|
40
|
-
# * qualifiers: An array of qualifiers with which you can define which category you’d like to search, for example “title:hobbit” will search for hobbit in just title categories. Example: qualifiers: [:t, :titre, :title] (use it for example with multiple languages). Default is the name of the category.
|
|
41
|
-
# * qualifier: Convenience options if you just need a single qualifier, see above. Example: qualifiers => :title. Default is the name of the category.
|
|
42
|
-
# * source: Use a different source than the index uses. If you think you need that, there might be a better solution to your problem. Please post to the mailing list first with your application.rb :)
|
|
43
|
-
# * from: Take the data from the data category with this name. Example: You have a source Sources::CSV.new(:title, file:'some_file.csv') but you want the category to be called differently. The you use from: define_category(:similar_title, :from => :title).
|
|
44
|
-
#
|
|
45
|
-
def define_category category_name, options = {}
|
|
46
|
-
category_name = category_name.to_sym
|
|
47
|
-
|
|
48
|
-
indexing_category = indexing.define_category category_name, options
|
|
49
|
-
indexed_category = indexed.define_category category_name, options
|
|
50
|
-
|
|
51
|
-
yield indexing_category, indexed_category if block_given?
|
|
52
|
-
|
|
53
|
-
self
|
|
54
|
-
end
|
|
55
|
-
alias category define_category
|
|
56
|
-
|
|
57
|
-
# HIGHLY EXPERIMENTAL Try if you feel "beta" ;)
|
|
58
|
-
#
|
|
59
|
-
# Make this category range searchable with a fixed range. If you need other ranges, define another category with a different range value.
|
|
60
|
-
#
|
|
61
|
-
# Example:
|
|
62
|
-
# You have data values inside 1..100, and you want to have Picky return
|
|
63
|
-
# not only the results for 47 if you search for 47, but also results for
|
|
64
|
-
# 45, 46, or 47.2, 48.9, in a range of 2 around 47, so (45..49).
|
|
65
|
-
#
|
|
66
|
-
# Then you use:
|
|
67
|
-
# my_index.define_ranged_category :values_inside_1_100, 2
|
|
68
|
-
#
|
|
69
|
-
# Optionally, you give it a precision value to reduce the error margin
|
|
70
|
-
# around 47 (Picky is a bit liberal).
|
|
71
|
-
# my_index.define_ranged_category :values_inside_1_100, 2, precision: 5
|
|
72
|
-
#
|
|
73
|
-
# This will force Picky to maximally be wrong 5% of the given range value
|
|
74
|
-
# (5% of 2 = 0.1) instead of the default 20% (20% of 2 = 0.4).
|
|
75
|
-
#
|
|
76
|
-
# We suggest not to use much more than 5 as a higher precision is more performance intensive for less and less precision gain.
|
|
77
|
-
#
|
|
78
|
-
# == Protip 1
|
|
79
|
-
#
|
|
80
|
-
# Create two ranged categories to make an area search:
|
|
81
|
-
# index.define_ranged_category :x, 1
|
|
82
|
-
# index.define_ranged_category :y, 1
|
|
83
|
-
#
|
|
84
|
-
# Search for it using for example:
|
|
85
|
-
# x:133, y:120
|
|
86
|
-
#
|
|
87
|
-
# This will search this square area (* = 133, 120: The "search" point entered):
|
|
88
|
-
#
|
|
89
|
-
# 132 134
|
|
90
|
-
# | |
|
|
91
|
-
# --|---------|-- 121
|
|
92
|
-
# | |
|
|
93
|
-
# | * |
|
|
94
|
-
# | |
|
|
95
|
-
# --|---------|-- 119
|
|
96
|
-
# | |
|
|
97
|
-
#
|
|
98
|
-
# Note: The area does not need to be square, but can be rectangular.
|
|
99
|
-
#
|
|
100
|
-
# == Protip 2
|
|
101
|
-
#
|
|
102
|
-
# Create three ranged categories to make a volume search.
|
|
103
|
-
#
|
|
104
|
-
# Or go crazy and use 4 ranged categories for a space/time search! ;)
|
|
105
|
-
#
|
|
106
|
-
# === Parameters
|
|
107
|
-
# * category_name: The category_name as used in #define_category.
|
|
108
|
-
# * range: The range (in the units of your data values) around the query point where we search for results.
|
|
109
|
-
#
|
|
110
|
-
# -----|<- range ->*------------|-----
|
|
111
|
-
#
|
|
112
|
-
# === Options
|
|
113
|
-
# * precision: Default is 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
|
|
114
|
-
# * ... all options of #define_category.
|
|
115
|
-
#
|
|
116
|
-
def define_ranged_category category_name, range, options = {}
|
|
117
|
-
precision = options[:precision]
|
|
118
|
-
|
|
119
|
-
options = { partial: Partial::None.new }.merge options
|
|
120
|
-
|
|
121
|
-
define_category category_name, options do |indexing, indexed|
|
|
122
|
-
indexing.source = Sources::Wrappers::Location.new indexing, grid: range, precision: precision
|
|
123
|
-
indexing.tokenizer = Tokenizers::Index.new
|
|
124
|
-
|
|
125
|
-
exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: range, precision: precision
|
|
126
|
-
indexed.exact = exact_bundle
|
|
127
|
-
indexed.partial = exact_bundle # A partial token also uses the exact index.
|
|
128
|
-
end
|
|
129
|
-
end
|
|
130
|
-
alias ranged_category define_ranged_category
|
|
131
|
-
|
|
132
|
-
# HIGHLY EXPERIMENTAL Not correctly working yet. Try it if you feel "beta".
|
|
133
|
-
#
|
|
134
|
-
# Also a range search see #define_ranged_category, but on the earth's surface.
|
|
135
|
-
#
|
|
136
|
-
# Parameters:
|
|
137
|
-
# * name: The name as used in #define_category.
|
|
138
|
-
# * radius: The distance (in km) around the query point which we search for results.
|
|
139
|
-
#
|
|
140
|
-
# Note: Picky uses a square, not a circle. We hope that's ok for most usages.
|
|
141
|
-
#
|
|
142
|
-
# -----------------------------
|
|
143
|
-
# | |
|
|
144
|
-
# | |
|
|
145
|
-
# | |
|
|
146
|
-
# | |
|
|
147
|
-
# | |
|
|
148
|
-
# | *<- radius ->|
|
|
149
|
-
# | |
|
|
150
|
-
# | |
|
|
151
|
-
# | |
|
|
152
|
-
# | |
|
|
153
|
-
# | |
|
|
154
|
-
# -----------------------------
|
|
155
|
-
#
|
|
156
|
-
# Options
|
|
157
|
-
# * precision: Default 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
|
|
158
|
-
# * from: The data category to take the data for this category from.
|
|
159
|
-
#
|
|
160
|
-
# TODO Redo. Will have to write a wrapper that combines two categories that are indexed simultaneously.
|
|
161
|
-
#
|
|
162
|
-
def define_map_location name, radius, options = {} # :nodoc:
|
|
163
|
-
# The radius is given as if all the locations were on the equator.
|
|
164
|
-
#
|
|
165
|
-
# TODO Need to recalculate since not many locations are on the equator ;) This is just a prototype.
|
|
166
|
-
#
|
|
167
|
-
# This calculates km -> longitude (degrees).
|
|
168
|
-
#
|
|
169
|
-
# A degree on the equator is equal to ~111,319.9 meters.
|
|
170
|
-
# So a km on the equator is equal to 0.00898312 degrees.
|
|
171
|
-
#
|
|
172
|
-
define_ranged_category name, radius * 0.00898312, options
|
|
173
|
-
end
|
|
174
|
-
alias map_location define_map_location
|
|
175
|
-
end
|
data/lib/picky/indexed/bundle.rb
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
#
|
|
3
|
-
module Indexed # :nodoc:all
|
|
4
|
-
|
|
5
|
-
# This is the _actual_ index.
|
|
6
|
-
#
|
|
7
|
-
# Handles exact/partial index, weights index, and similarity index.
|
|
8
|
-
#
|
|
9
|
-
# Delegates file handling and checking to an *Indexed*::*Files* object.
|
|
10
|
-
#
|
|
11
|
-
class Bundle < Index::Bundle
|
|
12
|
-
|
|
13
|
-
# Get the ids for the given symbol.
|
|
14
|
-
#
|
|
15
|
-
def ids sym
|
|
16
|
-
@index[sym] || []
|
|
17
|
-
end
|
|
18
|
-
# Get a weight for the given symbol.
|
|
19
|
-
#
|
|
20
|
-
def weight sym
|
|
21
|
-
@weights[sym]
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
# Loads all indexes.
|
|
25
|
-
#
|
|
26
|
-
def load
|
|
27
|
-
load_index
|
|
28
|
-
load_weights
|
|
29
|
-
load_similarity
|
|
30
|
-
load_configuration
|
|
31
|
-
end
|
|
32
|
-
# Loads the core index.
|
|
33
|
-
#
|
|
34
|
-
def load_index
|
|
35
|
-
self.index = files.load_index
|
|
36
|
-
end
|
|
37
|
-
# Loads the weights index.
|
|
38
|
-
#
|
|
39
|
-
def load_weights
|
|
40
|
-
self.weights = files.load_weights
|
|
41
|
-
end
|
|
42
|
-
# Loads the similarity index.
|
|
43
|
-
#
|
|
44
|
-
def load_similarity
|
|
45
|
-
self.similarity = files.load_similarity
|
|
46
|
-
end
|
|
47
|
-
# Loads the configuration.
|
|
48
|
-
#
|
|
49
|
-
def load_configuration
|
|
50
|
-
self.configuration = files.load_configuration
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
end
|
|
54
|
-
end
|
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
module Indexed
|
|
2
|
-
|
|
3
|
-
class Categories
|
|
4
|
-
|
|
5
|
-
attr_reader :categories, :category_hash, :ignore_unassigned_tokens
|
|
6
|
-
|
|
7
|
-
each_delegate :load_from_cache,
|
|
8
|
-
:to => :categories
|
|
9
|
-
|
|
10
|
-
# A list of indexed categories.
|
|
11
|
-
#
|
|
12
|
-
# Options:
|
|
13
|
-
# * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
|
|
14
|
-
# The default behaviour is that if a token does not match to
|
|
15
|
-
# any category, the query will not return anything (since a
|
|
16
|
-
# single token cannot be matched). If you set this option to
|
|
17
|
-
# true, any token that cannot be matched to a category will be
|
|
18
|
-
# simply ignored.
|
|
19
|
-
# Use this if only a few matched words are important, like for
|
|
20
|
-
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
|
21
|
-
# you only want to match the zipcode, to have the search engine
|
|
22
|
-
# display advertisements on the side for the zipcode.
|
|
23
|
-
# Nifty! :)
|
|
24
|
-
#
|
|
25
|
-
def initialize options = {}
|
|
26
|
-
clear
|
|
27
|
-
|
|
28
|
-
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# Clears both the array of categories and the hash of categories.
|
|
32
|
-
#
|
|
33
|
-
def clear
|
|
34
|
-
@categories = []
|
|
35
|
-
@category_hash = {}
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
# Add the given category to the list of categories.
|
|
39
|
-
#
|
|
40
|
-
def << category
|
|
41
|
-
categories << category
|
|
42
|
-
# Note: [category] is an optimization, since I need an array
|
|
43
|
-
# of categories.
|
|
44
|
-
# It's faster to just package it in an array on loading
|
|
45
|
-
# Picky than doing it over and over with each query.
|
|
46
|
-
#
|
|
47
|
-
category_hash[category.name] = [category]
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
# Return all possible combinations for the given token.
|
|
51
|
-
#
|
|
52
|
-
# This checks if it needs to also search through similar
|
|
53
|
-
# tokens, if for example, the token is one with ~.
|
|
54
|
-
# If yes, it puts together all solutions.
|
|
55
|
-
#
|
|
56
|
-
def possible_combinations_for token
|
|
57
|
-
token.similar? ? similar_possible_for(token) : possible_for(token)
|
|
58
|
-
end
|
|
59
|
-
# Gets all similar tokens and puts together the possible combinations
|
|
60
|
-
# for each found similar token.
|
|
61
|
-
#
|
|
62
|
-
def similar_possible_for token
|
|
63
|
-
# Get as many similar tokens as necessary
|
|
64
|
-
#
|
|
65
|
-
tokens = similar_tokens_for token
|
|
66
|
-
# possible combinations
|
|
67
|
-
#
|
|
68
|
-
inject_possible_for tokens
|
|
69
|
-
end
|
|
70
|
-
def similar_tokens_for token
|
|
71
|
-
text = token.text
|
|
72
|
-
categories.inject([]) do |result, category|
|
|
73
|
-
next_token = token
|
|
74
|
-
# Note: We could also break off here if not all the available
|
|
75
|
-
# similars are needed.
|
|
76
|
-
# Wait for a concrete case that needs this before taking
|
|
77
|
-
# action.
|
|
78
|
-
#
|
|
79
|
-
while next_token = next_token.next_similar_token(category)
|
|
80
|
-
result << next_token if next_token && next_token.text != text
|
|
81
|
-
end
|
|
82
|
-
result
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
def inject_possible_for tokens
|
|
86
|
-
tokens.inject([]) do |result, token|
|
|
87
|
-
possible = possible_categories token
|
|
88
|
-
result + possible_for(token, possible)
|
|
89
|
-
end
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
# Returns possible Combinations for the token.
|
|
93
|
-
#
|
|
94
|
-
# Note: The preselected_categories param is an optimization.
|
|
95
|
-
#
|
|
96
|
-
# Note: Returns [] if no categories matched (will produce no result).
|
|
97
|
-
# Returns nil if this token needs to be removed from the query.
|
|
98
|
-
# (Also none of the categories matched, but the ignore unassigned
|
|
99
|
-
# tokens option is true)
|
|
100
|
-
#
|
|
101
|
-
def possible_for token, preselected_categories = nil
|
|
102
|
-
possible = (preselected_categories || possible_categories(token)).map { |category| category.combination_for(token) }
|
|
103
|
-
possible.compact!
|
|
104
|
-
# This is an optimization to mark tokens that are ignored.
|
|
105
|
-
#
|
|
106
|
-
return if ignore_unassigned_tokens && possible.empty?
|
|
107
|
-
possible # wrap in combinations
|
|
108
|
-
end
|
|
109
|
-
# This returns the possible categories for this token.
|
|
110
|
-
# If the user has already preselected a category for this token,
|
|
111
|
-
# like "artist:moby", if not just return all for the given token,
|
|
112
|
-
# since all are possible.
|
|
113
|
-
#
|
|
114
|
-
# Note: Once I thought this was called too often. But it is not (18.01.2011).
|
|
115
|
-
#
|
|
116
|
-
def possible_categories token
|
|
117
|
-
user_defined_categories(token) || categories
|
|
118
|
-
end
|
|
119
|
-
# This returns the array of categories if the user has defined
|
|
120
|
-
# an existing category.
|
|
121
|
-
#
|
|
122
|
-
# Note: Returns nil if the user did not define one
|
|
123
|
-
# or if he/she has defined a non-existing one.
|
|
124
|
-
#
|
|
125
|
-
def user_defined_categories token
|
|
126
|
-
category_hash[token.user_defined_category_name]
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
end
|