picky 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
  2. data/lib/picky/application.rb +18 -19
  3. data/lib/picky/cores.rb +1 -1
  4. data/lib/picky/generators/aliases.rb +3 -0
  5. data/lib/picky/index/base.rb +179 -0
  6. data/lib/picky/index/memory.rb +28 -0
  7. data/lib/picky/index/redis.rb +28 -0
  8. data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
  9. data/lib/picky/indexed/indexes.rb +11 -7
  10. data/lib/picky/indexing/indexes.rb +14 -8
  11. data/lib/picky/internals/adapters/rack/base.rb +27 -0
  12. data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
  13. data/lib/picky/internals/adapters/rack/query.rb +63 -0
  14. data/lib/picky/internals/adapters/rack.rb +34 -0
  15. data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
  16. data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
  17. data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
  18. data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
  19. data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
  20. data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
  21. data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
  22. data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
  23. data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
  24. data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
  25. data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
  26. data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
  27. data/lib/picky/internals/generators/base.rb +19 -0
  28. data/lib/picky/internals/generators/partial/default.rb +7 -0
  29. data/lib/picky/internals/generators/partial/none.rb +35 -0
  30. data/lib/picky/internals/generators/partial/strategy.rb +29 -0
  31. data/lib/picky/internals/generators/partial/substring.rb +122 -0
  32. data/lib/picky/internals/generators/partial_generator.rb +19 -0
  33. data/lib/picky/internals/generators/similarity/default.rb +9 -0
  34. data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
  35. data/lib/picky/internals/generators/similarity/none.rb +35 -0
  36. data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
  37. data/lib/picky/internals/generators/similarity_generator.rb +19 -0
  38. data/lib/picky/internals/generators/strategy.rb +18 -0
  39. data/lib/picky/internals/generators/weights/default.rb +9 -0
  40. data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
  41. data/lib/picky/internals/generators/weights/strategy.rb +11 -0
  42. data/lib/picky/internals/generators/weights_generator.rb +19 -0
  43. data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
  44. data/lib/picky/internals/index/backend.rb +113 -0
  45. data/lib/picky/internals/index/file/basic.rb +101 -0
  46. data/lib/picky/internals/index/file/json.rb +38 -0
  47. data/lib/picky/internals/index/file/marshal.rb +38 -0
  48. data/lib/picky/internals/index/file/text.rb +60 -0
  49. data/lib/picky/internals/index/files.rb +24 -0
  50. data/lib/picky/internals/index/redis/basic.rb +77 -0
  51. data/lib/picky/internals/index/redis/list_hash.rb +46 -0
  52. data/lib/picky/internals/index/redis/string_hash.rb +35 -0
  53. data/lib/picky/internals/index/redis.rb +44 -0
  54. data/lib/picky/internals/indexed/bundle/base.rb +72 -0
  55. data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
  56. data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
  57. data/lib/picky/internals/indexed/categories.rb +135 -0
  58. data/lib/picky/internals/indexed/category.rb +90 -0
  59. data/lib/picky/internals/indexed/index.rb +57 -0
  60. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
  61. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
  62. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
  63. data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
  64. data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
  65. data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
  66. data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
  67. data/lib/picky/internals/indexing/bundle/base.rb +219 -0
  68. data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
  69. data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
  70. data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
  71. data/lib/picky/internals/indexing/categories.rb +42 -0
  72. data/lib/picky/internals/indexing/category.rb +120 -0
  73. data/lib/picky/internals/indexing/index.rb +67 -0
  74. data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
  75. data/lib/picky/internals/query/allocation.rb +88 -0
  76. data/lib/picky/internals/query/allocations.rb +137 -0
  77. data/lib/picky/internals/query/combination.rb +80 -0
  78. data/lib/picky/internals/query/combinations/base.rb +84 -0
  79. data/lib/picky/internals/query/combinations/memory.rb +58 -0
  80. data/lib/picky/internals/query/combinations/redis.rb +59 -0
  81. data/lib/picky/internals/query/indexes.rb +180 -0
  82. data/lib/picky/internals/query/qualifiers.rb +81 -0
  83. data/lib/picky/internals/query/token.rb +215 -0
  84. data/lib/picky/internals/query/tokens.rb +89 -0
  85. data/lib/picky/{query → internals/query}/weights.rb +0 -0
  86. data/lib/picky/internals/results/base.rb +106 -0
  87. data/lib/picky/internals/results/full.rb +17 -0
  88. data/lib/picky/internals/results/live.rb +17 -0
  89. data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
  90. data/lib/picky/internals/tokenizers/base.rb +166 -0
  91. data/lib/picky/internals/tokenizers/index.rb +63 -0
  92. data/lib/picky/internals/tokenizers/query.rb +79 -0
  93. data/lib/picky/loader.rb +148 -112
  94. data/lib/picky/query/base.rb +57 -26
  95. data/lib/picky/query/full.rb +1 -1
  96. data/lib/picky/query/live.rb +1 -1
  97. data/lib/picky/sources/db.rb +27 -6
  98. data/lib/tasks/index.rake +3 -3
  99. data/lib/tasks/try.rake +2 -2
  100. data/spec/lib/aliases_spec.rb +9 -0
  101. data/spec/lib/application_spec.rb +3 -3
  102. data/spec/lib/generators/aliases_spec.rb +1 -0
  103. data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
  104. data/spec/lib/index_bundle_spec.rb +71 -0
  105. data/spec/lib/indexed/indexes_spec.rb +61 -0
  106. data/spec/lib/indexing/indexes_spec.rb +94 -24
  107. data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
  108. data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
  109. data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
  110. data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
  111. data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
  112. data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
  113. data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
  114. data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
  115. data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
  116. data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
  117. data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
  118. data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
  119. data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
  120. data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
  121. data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
  122. data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
  123. data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
  124. data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
  125. data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
  126. data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
  127. data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
  128. data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
  129. data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
  130. data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
  131. data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
  132. data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
  133. data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
  134. data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
  135. data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
  136. data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
  137. data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
  138. data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
  139. data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
  140. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
  141. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
  142. data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
  143. data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
  144. data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
  145. data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
  146. data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
  147. data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
  148. data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
  149. data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
  150. data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
  151. data/spec/lib/internals/results/base_spec.rb +105 -0
  152. data/spec/lib/internals/results/full_spec.rb +78 -0
  153. data/spec/lib/internals/results/live_spec.rb +88 -0
  154. data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
  155. data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
  156. data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
  157. data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
  158. data/spec/lib/query/allocation_spec.rb +12 -12
  159. data/spec/lib/query/allocations_spec.rb +19 -19
  160. data/spec/lib/query/base_spec.rb +28 -4
  161. data/spec/lib/query/combination_spec.rb +8 -9
  162. data/spec/lib/query/combinations/base_spec.rb +116 -0
  163. data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
  164. data/spec/lib/query/combinations/redis_spec.rb +132 -0
  165. data/spec/lib/query/full_spec.rb +2 -2
  166. data/spec/lib/query/indexes_spec.rb +81 -0
  167. data/spec/lib/query/live_spec.rb +3 -3
  168. data/spec/lib/query/qualifiers_spec.rb +6 -6
  169. data/spec/lib/query/token_spec.rb +38 -38
  170. data/spec/lib/query/tokens_spec.rb +35 -35
  171. data/spec/lib/sources/db_spec.rb +23 -18
  172. metadata +212 -181
  173. data/lib/picky/adapters/rack/base.rb +0 -23
  174. data/lib/picky/adapters/rack/live_parameters.rb +0 -33
  175. data/lib/picky/adapters/rack/query.rb +0 -59
  176. data/lib/picky/adapters/rack.rb +0 -28
  177. data/lib/picky/cacher/convenience.rb +0 -3
  178. data/lib/picky/cacher/generator.rb +0 -15
  179. data/lib/picky/cacher/partial/default.rb +0 -5
  180. data/lib/picky/cacher/partial/none.rb +0 -31
  181. data/lib/picky/cacher/partial/strategy.rb +0 -21
  182. data/lib/picky/cacher/partial/substring.rb +0 -118
  183. data/lib/picky/cacher/partial_generator.rb +0 -15
  184. data/lib/picky/cacher/similarity/default.rb +0 -7
  185. data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
  186. data/lib/picky/cacher/similarity/none.rb +0 -31
  187. data/lib/picky/cacher/similarity/strategy.rb +0 -9
  188. data/lib/picky/cacher/similarity_generator.rb +0 -15
  189. data/lib/picky/cacher/strategy.rb +0 -12
  190. data/lib/picky/cacher/weights/default.rb +0 -7
  191. data/lib/picky/cacher/weights/logarithmic.rb +0 -39
  192. data/lib/picky/cacher/weights/strategy.rb +0 -9
  193. data/lib/picky/cacher/weights_generator.rb +0 -15
  194. data/lib/picky/frontend_adapters/rack.rb +0 -150
  195. data/lib/picky/index/bundle.rb +0 -54
  196. data/lib/picky/index/file/basic.rb +0 -97
  197. data/lib/picky/index/file/json.rb +0 -34
  198. data/lib/picky/index/file/marshal.rb +0 -34
  199. data/lib/picky/index/file/text.rb +0 -56
  200. data/lib/picky/index/files.rb +0 -118
  201. data/lib/picky/index_api.rb +0 -175
  202. data/lib/picky/indexed/bundle.rb +0 -54
  203. data/lib/picky/indexed/categories.rb +0 -131
  204. data/lib/picky/indexed/category.rb +0 -85
  205. data/lib/picky/indexed/index.rb +0 -39
  206. data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
  207. data/lib/picky/indexing/bundle.rb +0 -213
  208. data/lib/picky/indexing/categories.rb +0 -38
  209. data/lib/picky/indexing/category.rb +0 -117
  210. data/lib/picky/indexing/index.rb +0 -55
  211. data/lib/picky/query/allocation.rb +0 -82
  212. data/lib/picky/query/allocations.rb +0 -130
  213. data/lib/picky/query/combination.rb +0 -74
  214. data/lib/picky/query/combinations.rb +0 -105
  215. data/lib/picky/query/qualifiers.rb +0 -77
  216. data/lib/picky/query/token.rb +0 -202
  217. data/lib/picky/query/tokens.rb +0 -86
  218. data/lib/picky/query/weigher.rb +0 -165
  219. data/lib/picky/results/base.rb +0 -102
  220. data/lib/picky/results/full.rb +0 -13
  221. data/lib/picky/results/live.rb +0 -13
  222. data/lib/picky/tokenizers/base.rb +0 -161
  223. data/lib/picky/tokenizers/index.rb +0 -58
  224. data/lib/picky/tokenizers/query.rb +0 -74
  225. data/spec/lib/cacher/partial/default_spec.rb +0 -15
  226. data/spec/lib/cacher/partial/none_spec.rb +0 -17
  227. data/spec/lib/cacher/weights_generator_spec.rb +0 -21
  228. data/spec/lib/results/base_spec.rb +0 -257
  229. data/spec/lib/results/live_spec.rb +0 -15
@@ -1,97 +0,0 @@
1
- module Index
2
-
3
- # Handles all aspects of index files, such as dumping/loading.
4
- #
5
- module File
6
-
7
- # Base class for all index files.
8
- #
9
- # Provides necessary helper methods for its
10
- # subclasses.
11
- # Not directly useable, as it does not provide
12
- # dump/load methods.
13
- #
14
- class Basic
15
-
16
- attr_reader :cache_path
17
-
18
- # An index cache takes a path, without file extension,
19
- # which will be provided by the subclasses.
20
- #
21
- def initialize cache_path
22
- @cache_path = "#{cache_path}.#{extension}"
23
- end
24
-
25
- # The default extension for index files is "index".
26
- #
27
- def extension
28
- :index
29
- end
30
-
31
- # Will copy the index file to a location that
32
- # is in a directory named "backup" right under
33
- # the directory the index file is in.
34
- #
35
- def backup
36
- prepare_backup backup_directory
37
- FileUtils.cp cache_path, target, verbose: true
38
- end
39
- # The backup directory of this file.
40
- # Equal to the file's dirname plus /backup
41
- #
42
- def backup_directory
43
- ::File.join ::File.dirname(cache_path), 'backup'
44
- end
45
- # Prepares the backup directory for the file.
46
- #
47
- def prepare_backup target
48
- FileUtils.mkdir target unless Dir.exists?(target)
49
- end
50
-
51
- # Copies the file from its backup location back
52
- # to the original location.
53
- #
54
- def restore
55
- FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
56
- end
57
- # The backup filename.
58
- #
59
- def backup_file_path_of path
60
- dir, name = ::File.split path
61
- ::File.join dir, 'backup', name
62
- end
63
-
64
- # Deletes the file.
65
- #
66
- def delete
67
- `rm -Rf #{cache_path}`
68
- end
69
-
70
- # Checks.
71
- #
72
-
73
- # Is this cache file suspiciously small?
74
- # (less than 8 Bytes of size)
75
- #
76
- def cache_small?
77
- size_of(cache_path) < 8
78
- end
79
- # Is the cache ok? (existing and larger than
80
- # zero Bytes in size)
81
- #
82
- # A small cache is still ok.
83
- #
84
- def cache_ok?
85
- size_of(cache_path) > 0
86
- end
87
- # Extracts the size of the file in Bytes.
88
- #
89
- def size_of path
90
- `ls -l #{path} | awk '{print $5}'`.to_i
91
- end
92
-
93
- end
94
-
95
- end
96
-
97
- end
@@ -1,34 +0,0 @@
1
- module Index
2
-
3
- module File
4
-
5
- # Index files dumped in the JSON format.
6
- #
7
- class JSON < Basic
8
-
9
- # Uses the extension "json".
10
- #
11
- def extension
12
- :json
13
- end
14
- # Loads the index hash from json format.
15
- #
16
- def load
17
- Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
18
- end
19
- # Dumps the index hash in json format.
20
- #
21
- def dump hash
22
- hash.dump_json cache_path
23
- end
24
- # A json file does not provide retrieve functionality.
25
- #
26
- def retrieve
27
- raise "Can't retrieve from JSON file. Use text file."
28
- end
29
-
30
- end
31
-
32
- end
33
-
34
- end
@@ -1,34 +0,0 @@
1
- module Index
2
-
3
- module File
4
-
5
- # Index data in the Ruby Marshal format.
6
- #
7
- class Marshal < Basic
8
-
9
- # Uses the extension "dump".
10
- #
11
- def extension
12
- :dump
13
- end
14
- # Loads the index hash from marshal format.
15
- #
16
- def load
17
- ::Marshal.load ::File.open(cache_path, 'r:binary')
18
- end
19
- # Dumps the index hash in marshal format.
20
- #
21
- def dump hash
22
- hash.dump_marshalled cache_path
23
- end
24
- # A marshal file does not provide retrieve functionality.
25
- #
26
- def retrieve
27
- raise "Can't retrieve from marshalled file. Use text file."
28
- end
29
-
30
- end
31
-
32
- end
33
-
34
- end
@@ -1,56 +0,0 @@
1
- module Index
2
-
3
- module File
4
-
5
- # Index data dumped in the text format.
6
- #
7
- class Text < Basic
8
-
9
- # Uses the extension "txt".
10
- #
11
- def extension
12
- :txt
13
- end
14
- # Text files are used exclusively for
15
- # prepared data files.
16
- #
17
- def load
18
- raise "Can't load from text file. Use JSON or Marshal."
19
- end
20
- # Text files are used exclusively for
21
- # prepared data files.
22
- #
23
- def dump hash
24
- raise "Can't dump to text file. Use JSON or Marshal."
25
- end
26
-
27
- # Retrieves prepared index data in the form
28
- # * id,data\n
29
- # * id,data\n
30
- # * id,data\n
31
- #
32
- # Yields an id string and a symbol token.
33
- #
34
- def retrieve
35
- id = nil
36
- token = nil
37
- ::File.open(cache_path, 'r:binary') do |file|
38
- file.each_line do |line|
39
- id, token = line.split ?,, 2
40
- yield id, (token.chomp! || token).to_sym
41
- end
42
- end
43
- end
44
-
45
- #
46
- #
47
- def open_for_indexing &block
48
- ::File.open cache_path, 'w:binary', &block
49
- end
50
-
51
-
52
- end
53
-
54
- end
55
-
56
- end
@@ -1,118 +0,0 @@
1
- module Index
2
-
3
- class Files
4
-
5
- attr_reader :bundle_name
6
- attr_reader :prepared, :index, :weights, :similarity, :configuration
7
-
8
- delegate :index_name, :category_name, :to => :@config
9
-
10
- def initialize bundle_name, config
11
- @bundle_name = bundle_name
12
- @config = config
13
-
14
- # Note: We marshal the similarity, as the
15
- # Yajl json lib cannot load symbolized
16
- # values, just keys.
17
- #
18
- @prepared = File::Text.new config.prepared_index_path
19
- @index = File::JSON.new config.index_path(bundle_name, :index)
20
- @weights = File::JSON.new config.index_path(bundle_name, :weights)
21
- @similarity = File::Marshal.new config.index_path(bundle_name, :similarity)
22
- @configuration = File::JSON.new config.index_path(bundle_name, :configuration)
23
- end
24
-
25
- # Delegators.
26
- #
27
-
28
- # Retrieving data.
29
- #
30
- def retrieve &block
31
- prepared.retrieve &block
32
- end
33
-
34
- # Dumping.
35
- #
36
- def dump_index index_hash
37
- index.dump index_hash
38
- end
39
- def dump_weights weights_hash
40
- weights.dump weights_hash
41
- end
42
- def dump_similarity similarity_hash
43
- similarity.dump similarity_hash
44
- end
45
- def dump_configuration configuration_hash
46
- configuration.dump configuration_hash
47
- end
48
-
49
- # Loading.
50
- #
51
- def load_index
52
- index.load
53
- end
54
- def load_similarity
55
- similarity.load
56
- end
57
- def load_weights
58
- weights.load
59
- end
60
- def load_configuration
61
- configuration.load
62
- end
63
-
64
- # Cache ok?
65
- #
66
- def index_cache_ok?
67
- index.cache_ok?
68
- end
69
- def similarity_cache_ok?
70
- similarity.cache_ok?
71
- end
72
- def weights_cache_ok?
73
- weights.cache_ok?
74
- end
75
-
76
- # Cache small?
77
- #
78
- def index_cache_small?
79
- index.cache_small?
80
- end
81
- def similarity_cache_small?
82
- similarity.cache_small?
83
- end
84
- def weights_cache_small?
85
- weights.cache_small?
86
- end
87
-
88
- # Copies the indexes to the "backup" directory.
89
- #
90
- def backup
91
- index.backup
92
- weights.backup
93
- similarity.backup
94
- configuration.backup
95
- end
96
-
97
- # Restores the indexes from the "backup" directory.
98
- #
99
- def restore
100
- index.restore
101
- weights.restore
102
- similarity.restore
103
- configuration.restore
104
- end
105
-
106
-
107
- # Delete all index files.
108
- #
109
- def delete
110
- index.delete
111
- weights.delete
112
- similarity.delete
113
- configuration.delete
114
- end
115
-
116
- end
117
-
118
- end
@@ -1,175 +0,0 @@
1
- # This class defines the indexing and index API that is exposed to the user
2
- # as the #index method inside the Application class.
3
- #
4
- # It provides a single front for both indexing and index options. We suggest to always use the index API.
5
- #
6
- # Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
7
- #
8
- class IndexAPI
9
-
10
- attr_reader :name, :indexing, :indexed
11
-
12
- # Create a new index with a given source.
13
- #
14
- # === Parameters
15
- # * name: A name that will be used for the index directory and in the Picky front end.
16
- # * source: Where the data comes from, e.g. Sources::CSV.new(...)
17
- #
18
- # === Options
19
- # * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
20
- # * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
21
- #
22
- def initialize name, source, options = {}
23
- @name = name
24
- @indexing = Indexing::Index.new name, source, options
25
- @indexed = Indexed::Index.new name, options
26
-
27
- # Centralized registry.
28
- #
29
- Indexes.register self
30
- end
31
-
32
- # Defines a searchable category on the index.
33
- #
34
- # === Parameters
35
- # * category_name: This identifier is used in the front end, but also to categorize query text. For example, “title:hobbit” will narrow the hobbit query on categories with the identifier :title.
36
- #
37
- # === Options
38
- # * partial: Partial::None.new or Partial::Substring.new(from: starting_char, to: ending_char). Default is Partial::Substring.new(from: -3, to: -1).
39
- # * similarity: Similarity::None.new or Similarity::Phonetic.new(similar_words_searched). Default is Similarity::None.new.
40
- # * qualifiers: An array of qualifiers with which you can define which category you’d like to search, for example “title:hobbit” will search for hobbit in just title categories. Example: qualifiers: [:t, :titre, :title] (use it for example with multiple languages). Default is the name of the category.
41
- # * qualifier: Convenience options if you just need a single qualifier, see above. Example: qualifiers => :title. Default is the name of the category.
42
- # * source: Use a different source than the index uses. If you think you need that, there might be a better solution to your problem. Please post to the mailing list first with your application.rb :)
43
- # * from: Take the data from the data category with this name. Example: You have a source Sources::CSV.new(:title, file:'some_file.csv') but you want the category to be called differently. The you use from: define_category(:similar_title, :from => :title).
44
- #
45
- def define_category category_name, options = {}
46
- category_name = category_name.to_sym
47
-
48
- indexing_category = indexing.define_category category_name, options
49
- indexed_category = indexed.define_category category_name, options
50
-
51
- yield indexing_category, indexed_category if block_given?
52
-
53
- self
54
- end
55
- alias category define_category
56
-
57
- # HIGHLY EXPERIMENTAL Try if you feel "beta" ;)
58
- #
59
- # Make this category range searchable with a fixed range. If you need other ranges, define another category with a different range value.
60
- #
61
- # Example:
62
- # You have data values inside 1..100, and you want to have Picky return
63
- # not only the results for 47 if you search for 47, but also results for
64
- # 45, 46, or 47.2, 48.9, in a range of 2 around 47, so (45..49).
65
- #
66
- # Then you use:
67
- # my_index.define_ranged_category :values_inside_1_100, 2
68
- #
69
- # Optionally, you give it a precision value to reduce the error margin
70
- # around 47 (Picky is a bit liberal).
71
- # my_index.define_ranged_category :values_inside_1_100, 2, precision: 5
72
- #
73
- # This will force Picky to maximally be wrong 5% of the given range value
74
- # (5% of 2 = 0.1) instead of the default 20% (20% of 2 = 0.4).
75
- #
76
- # We suggest not to use much more than 5 as a higher precision is more performance intensive for less and less precision gain.
77
- #
78
- # == Protip 1
79
- #
80
- # Create two ranged categories to make an area search:
81
- # index.define_ranged_category :x, 1
82
- # index.define_ranged_category :y, 1
83
- #
84
- # Search for it using for example:
85
- # x:133, y:120
86
- #
87
- # This will search this square area (* = 133, 120: The "search" point entered):
88
- #
89
- # 132 134
90
- # | |
91
- # --|---------|-- 121
92
- # | |
93
- # | * |
94
- # | |
95
- # --|---------|-- 119
96
- # | |
97
- #
98
- # Note: The area does not need to be square, but can be rectangular.
99
- #
100
- # == Protip 2
101
- #
102
- # Create three ranged categories to make a volume search.
103
- #
104
- # Or go crazy and use 4 ranged categories for a space/time search! ;)
105
- #
106
- # === Parameters
107
- # * category_name: The category_name as used in #define_category.
108
- # * range: The range (in the units of your data values) around the query point where we search for results.
109
- #
110
- # -----|<- range ->*------------|-----
111
- #
112
- # === Options
113
- # * precision: Default is 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
114
- # * ... all options of #define_category.
115
- #
116
- def define_ranged_category category_name, range, options = {}
117
- precision = options[:precision]
118
-
119
- options = { partial: Partial::None.new }.merge options
120
-
121
- define_category category_name, options do |indexing, indexed|
122
- indexing.source = Sources::Wrappers::Location.new indexing, grid: range, precision: precision
123
- indexing.tokenizer = Tokenizers::Index.new
124
-
125
- exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: range, precision: precision
126
- indexed.exact = exact_bundle
127
- indexed.partial = exact_bundle # A partial token also uses the exact index.
128
- end
129
- end
130
- alias ranged_category define_ranged_category
131
-
132
- # HIGHLY EXPERIMENTAL Not correctly working yet. Try it if you feel "beta".
133
- #
134
- # Also a range search see #define_ranged_category, but on the earth's surface.
135
- #
136
- # Parameters:
137
- # * name: The name as used in #define_category.
138
- # * radius: The distance (in km) around the query point which we search for results.
139
- #
140
- # Note: Picky uses a square, not a circle. We hope that's ok for most usages.
141
- #
142
- # -----------------------------
143
- # | |
144
- # | |
145
- # | |
146
- # | |
147
- # | |
148
- # | *<- radius ->|
149
- # | |
150
- # | |
151
- # | |
152
- # | |
153
- # | |
154
- # -----------------------------
155
- #
156
- # Options
157
- # * precision: Default 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
158
- # * from: The data category to take the data for this category from.
159
- #
160
- # TODO Redo. Will have to write a wrapper that combines two categories that are indexed simultaneously.
161
- #
162
- def define_map_location name, radius, options = {} # :nodoc:
163
- # The radius is given as if all the locations were on the equator.
164
- #
165
- # TODO Need to recalculate since not many locations are on the equator ;) This is just a prototype.
166
- #
167
- # This calculates km -> longitude (degrees).
168
- #
169
- # A degree on the equator is equal to ~111,319.9 meters.
170
- # So a km on the equator is equal to 0.00898312 degrees.
171
- #
172
- define_ranged_category name, radius * 0.00898312, options
173
- end
174
- alias map_location define_map_location
175
- end
@@ -1,54 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- module Indexed # :nodoc:all
4
-
5
- # This is the _actual_ index.
6
- #
7
- # Handles exact/partial index, weights index, and similarity index.
8
- #
9
- # Delegates file handling and checking to an *Indexed*::*Files* object.
10
- #
11
- class Bundle < Index::Bundle
12
-
13
- # Get the ids for the given symbol.
14
- #
15
- def ids sym
16
- @index[sym] || []
17
- end
18
- # Get a weight for the given symbol.
19
- #
20
- def weight sym
21
- @weights[sym]
22
- end
23
-
24
- # Loads all indexes.
25
- #
26
- def load
27
- load_index
28
- load_weights
29
- load_similarity
30
- load_configuration
31
- end
32
- # Loads the core index.
33
- #
34
- def load_index
35
- self.index = files.load_index
36
- end
37
- # Loads the weights index.
38
- #
39
- def load_weights
40
- self.weights = files.load_weights
41
- end
42
- # Loads the similarity index.
43
- #
44
- def load_similarity
45
- self.similarity = files.load_similarity
46
- end
47
- # Loads the configuration.
48
- #
49
- def load_configuration
50
- self.configuration = files.load_configuration
51
- end
52
-
53
- end
54
- end
@@ -1,131 +0,0 @@
1
- module Indexed
2
-
3
- class Categories
4
-
5
- attr_reader :categories, :category_hash, :ignore_unassigned_tokens
6
-
7
- each_delegate :load_from_cache,
8
- :to => :categories
9
-
10
- # A list of indexed categories.
11
- #
12
- # Options:
13
- # * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
14
- # The default behaviour is that if a token does not match to
15
- # any category, the query will not return anything (since a
16
- # single token cannot be matched). If you set this option to
17
- # true, any token that cannot be matched to a category will be
18
- # simply ignored.
19
- # Use this if only a few matched words are important, like for
20
- # example of the query "Jonathan Myers 86455 Las Cucarachas"
21
- # you only want to match the zipcode, to have the search engine
22
- # display advertisements on the side for the zipcode.
23
- # Nifty! :)
24
- #
25
- def initialize options = {}
26
- clear
27
-
28
- @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
29
- end
30
-
31
- # Clears both the array of categories and the hash of categories.
32
- #
33
- def clear
34
- @categories = []
35
- @category_hash = {}
36
- end
37
-
38
- # Add the given category to the list of categories.
39
- #
40
- def << category
41
- categories << category
42
- # Note: [category] is an optimization, since I need an array
43
- # of categories.
44
- # It's faster to just package it in an array on loading
45
- # Picky than doing it over and over with each query.
46
- #
47
- category_hash[category.name] = [category]
48
- end
49
-
50
- # Return all possible combinations for the given token.
51
- #
52
- # This checks if it needs to also search through similar
53
- # tokens, if for example, the token is one with ~.
54
- # If yes, it puts together all solutions.
55
- #
56
- def possible_combinations_for token
57
- token.similar? ? similar_possible_for(token) : possible_for(token)
58
- end
59
- # Gets all similar tokens and puts together the possible combinations
60
- # for each found similar token.
61
- #
62
- def similar_possible_for token
63
- # Get as many similar tokens as necessary
64
- #
65
- tokens = similar_tokens_for token
66
- # possible combinations
67
- #
68
- inject_possible_for tokens
69
- end
70
- def similar_tokens_for token
71
- text = token.text
72
- categories.inject([]) do |result, category|
73
- next_token = token
74
- # Note: We could also break off here if not all the available
75
- # similars are needed.
76
- # Wait for a concrete case that needs this before taking
77
- # action.
78
- #
79
- while next_token = next_token.next_similar_token(category)
80
- result << next_token if next_token && next_token.text != text
81
- end
82
- result
83
- end
84
- end
85
- def inject_possible_for tokens
86
- tokens.inject([]) do |result, token|
87
- possible = possible_categories token
88
- result + possible_for(token, possible)
89
- end
90
- end
91
-
92
- # Returns possible Combinations for the token.
93
- #
94
- # Note: The preselected_categories param is an optimization.
95
- #
96
- # Note: Returns [] if no categories matched (will produce no result).
97
- # Returns nil if this token needs to be removed from the query.
98
- # (Also none of the categories matched, but the ignore unassigned
99
- # tokens option is true)
100
- #
101
- def possible_for token, preselected_categories = nil
102
- possible = (preselected_categories || possible_categories(token)).map { |category| category.combination_for(token) }
103
- possible.compact!
104
- # This is an optimization to mark tokens that are ignored.
105
- #
106
- return if ignore_unassigned_tokens && possible.empty?
107
- possible # wrap in combinations
108
- end
109
- # This returns the possible categories for this token.
110
- # If the user has already preselected a category for this token,
111
- # like "artist:moby", if not just return all for the given token,
112
- # since all are possible.
113
- #
114
- # Note: Once I thought this was called too often. But it is not (18.01.2011).
115
- #
116
- def possible_categories token
117
- user_defined_categories(token) || categories
118
- end
119
- # This returns the array of categories if the user has defined
120
- # an existing category.
121
- #
122
- # Note: Returns nil if the user did not define one
123
- # or if he/she has defined a non-existing one.
124
- #
125
- def user_defined_categories token
126
- category_hash[token.user_defined_category_name]
127
- end
128
-
129
- end
130
-
131
- end