picky 1.4.1 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (229) hide show
  1. data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
  2. data/lib/picky/application.rb +18 -19
  3. data/lib/picky/cores.rb +1 -1
  4. data/lib/picky/generators/aliases.rb +3 -0
  5. data/lib/picky/index/base.rb +179 -0
  6. data/lib/picky/index/memory.rb +28 -0
  7. data/lib/picky/index/redis.rb +28 -0
  8. data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
  9. data/lib/picky/indexed/indexes.rb +11 -7
  10. data/lib/picky/indexing/indexes.rb +14 -8
  11. data/lib/picky/internals/adapters/rack/base.rb +27 -0
  12. data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
  13. data/lib/picky/internals/adapters/rack/query.rb +63 -0
  14. data/lib/picky/internals/adapters/rack.rb +34 -0
  15. data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
  16. data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
  17. data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
  18. data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
  19. data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
  20. data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
  21. data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
  22. data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
  23. data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
  24. data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
  25. data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
  26. data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
  27. data/lib/picky/internals/generators/base.rb +19 -0
  28. data/lib/picky/internals/generators/partial/default.rb +7 -0
  29. data/lib/picky/internals/generators/partial/none.rb +35 -0
  30. data/lib/picky/internals/generators/partial/strategy.rb +29 -0
  31. data/lib/picky/internals/generators/partial/substring.rb +122 -0
  32. data/lib/picky/internals/generators/partial_generator.rb +19 -0
  33. data/lib/picky/internals/generators/similarity/default.rb +9 -0
  34. data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
  35. data/lib/picky/internals/generators/similarity/none.rb +35 -0
  36. data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
  37. data/lib/picky/internals/generators/similarity_generator.rb +19 -0
  38. data/lib/picky/internals/generators/strategy.rb +18 -0
  39. data/lib/picky/internals/generators/weights/default.rb +9 -0
  40. data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
  41. data/lib/picky/internals/generators/weights/strategy.rb +11 -0
  42. data/lib/picky/internals/generators/weights_generator.rb +19 -0
  43. data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
  44. data/lib/picky/internals/index/backend.rb +113 -0
  45. data/lib/picky/internals/index/file/basic.rb +101 -0
  46. data/lib/picky/internals/index/file/json.rb +38 -0
  47. data/lib/picky/internals/index/file/marshal.rb +38 -0
  48. data/lib/picky/internals/index/file/text.rb +60 -0
  49. data/lib/picky/internals/index/files.rb +24 -0
  50. data/lib/picky/internals/index/redis/basic.rb +77 -0
  51. data/lib/picky/internals/index/redis/list_hash.rb +46 -0
  52. data/lib/picky/internals/index/redis/string_hash.rb +35 -0
  53. data/lib/picky/internals/index/redis.rb +44 -0
  54. data/lib/picky/internals/indexed/bundle/base.rb +72 -0
  55. data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
  56. data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
  57. data/lib/picky/internals/indexed/categories.rb +135 -0
  58. data/lib/picky/internals/indexed/category.rb +90 -0
  59. data/lib/picky/internals/indexed/index.rb +57 -0
  60. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
  61. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
  62. data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
  63. data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
  64. data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
  65. data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
  66. data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
  67. data/lib/picky/internals/indexing/bundle/base.rb +219 -0
  68. data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
  69. data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
  70. data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
  71. data/lib/picky/internals/indexing/categories.rb +42 -0
  72. data/lib/picky/internals/indexing/category.rb +120 -0
  73. data/lib/picky/internals/indexing/index.rb +67 -0
  74. data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
  75. data/lib/picky/internals/query/allocation.rb +88 -0
  76. data/lib/picky/internals/query/allocations.rb +137 -0
  77. data/lib/picky/internals/query/combination.rb +80 -0
  78. data/lib/picky/internals/query/combinations/base.rb +84 -0
  79. data/lib/picky/internals/query/combinations/memory.rb +58 -0
  80. data/lib/picky/internals/query/combinations/redis.rb +59 -0
  81. data/lib/picky/internals/query/indexes.rb +180 -0
  82. data/lib/picky/internals/query/qualifiers.rb +81 -0
  83. data/lib/picky/internals/query/token.rb +215 -0
  84. data/lib/picky/internals/query/tokens.rb +89 -0
  85. data/lib/picky/{query → internals/query}/weights.rb +0 -0
  86. data/lib/picky/internals/results/base.rb +106 -0
  87. data/lib/picky/internals/results/full.rb +17 -0
  88. data/lib/picky/internals/results/live.rb +17 -0
  89. data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
  90. data/lib/picky/internals/tokenizers/base.rb +166 -0
  91. data/lib/picky/internals/tokenizers/index.rb +63 -0
  92. data/lib/picky/internals/tokenizers/query.rb +79 -0
  93. data/lib/picky/loader.rb +148 -112
  94. data/lib/picky/query/base.rb +57 -26
  95. data/lib/picky/query/full.rb +1 -1
  96. data/lib/picky/query/live.rb +1 -1
  97. data/lib/picky/sources/db.rb +27 -6
  98. data/lib/tasks/index.rake +3 -3
  99. data/lib/tasks/try.rake +2 -2
  100. data/spec/lib/aliases_spec.rb +9 -0
  101. data/spec/lib/application_spec.rb +3 -3
  102. data/spec/lib/generators/aliases_spec.rb +1 -0
  103. data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
  104. data/spec/lib/index_bundle_spec.rb +71 -0
  105. data/spec/lib/indexed/indexes_spec.rb +61 -0
  106. data/spec/lib/indexing/indexes_spec.rb +94 -24
  107. data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
  108. data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
  109. data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
  110. data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
  111. data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
  112. data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
  113. data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
  114. data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
  115. data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
  116. data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
  117. data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
  118. data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
  119. data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
  120. data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
  121. data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
  122. data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
  123. data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
  124. data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
  125. data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
  126. data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
  127. data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
  128. data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
  129. data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
  130. data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
  131. data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
  132. data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
  133. data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
  134. data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
  135. data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
  136. data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
  137. data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
  138. data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
  139. data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
  140. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
  141. data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
  142. data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
  143. data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
  144. data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
  145. data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
  146. data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
  147. data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
  148. data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
  149. data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
  150. data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
  151. data/spec/lib/internals/results/base_spec.rb +105 -0
  152. data/spec/lib/internals/results/full_spec.rb +78 -0
  153. data/spec/lib/internals/results/live_spec.rb +88 -0
  154. data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
  155. data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
  156. data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
  157. data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
  158. data/spec/lib/query/allocation_spec.rb +12 -12
  159. data/spec/lib/query/allocations_spec.rb +19 -19
  160. data/spec/lib/query/base_spec.rb +28 -4
  161. data/spec/lib/query/combination_spec.rb +8 -9
  162. data/spec/lib/query/combinations/base_spec.rb +116 -0
  163. data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
  164. data/spec/lib/query/combinations/redis_spec.rb +132 -0
  165. data/spec/lib/query/full_spec.rb +2 -2
  166. data/spec/lib/query/indexes_spec.rb +81 -0
  167. data/spec/lib/query/live_spec.rb +3 -3
  168. data/spec/lib/query/qualifiers_spec.rb +6 -6
  169. data/spec/lib/query/token_spec.rb +38 -38
  170. data/spec/lib/query/tokens_spec.rb +35 -35
  171. data/spec/lib/sources/db_spec.rb +23 -18
  172. metadata +212 -181
  173. data/lib/picky/adapters/rack/base.rb +0 -23
  174. data/lib/picky/adapters/rack/live_parameters.rb +0 -33
  175. data/lib/picky/adapters/rack/query.rb +0 -59
  176. data/lib/picky/adapters/rack.rb +0 -28
  177. data/lib/picky/cacher/convenience.rb +0 -3
  178. data/lib/picky/cacher/generator.rb +0 -15
  179. data/lib/picky/cacher/partial/default.rb +0 -5
  180. data/lib/picky/cacher/partial/none.rb +0 -31
  181. data/lib/picky/cacher/partial/strategy.rb +0 -21
  182. data/lib/picky/cacher/partial/substring.rb +0 -118
  183. data/lib/picky/cacher/partial_generator.rb +0 -15
  184. data/lib/picky/cacher/similarity/default.rb +0 -7
  185. data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
  186. data/lib/picky/cacher/similarity/none.rb +0 -31
  187. data/lib/picky/cacher/similarity/strategy.rb +0 -9
  188. data/lib/picky/cacher/similarity_generator.rb +0 -15
  189. data/lib/picky/cacher/strategy.rb +0 -12
  190. data/lib/picky/cacher/weights/default.rb +0 -7
  191. data/lib/picky/cacher/weights/logarithmic.rb +0 -39
  192. data/lib/picky/cacher/weights/strategy.rb +0 -9
  193. data/lib/picky/cacher/weights_generator.rb +0 -15
  194. data/lib/picky/frontend_adapters/rack.rb +0 -150
  195. data/lib/picky/index/bundle.rb +0 -54
  196. data/lib/picky/index/file/basic.rb +0 -97
  197. data/lib/picky/index/file/json.rb +0 -34
  198. data/lib/picky/index/file/marshal.rb +0 -34
  199. data/lib/picky/index/file/text.rb +0 -56
  200. data/lib/picky/index/files.rb +0 -118
  201. data/lib/picky/index_api.rb +0 -175
  202. data/lib/picky/indexed/bundle.rb +0 -54
  203. data/lib/picky/indexed/categories.rb +0 -131
  204. data/lib/picky/indexed/category.rb +0 -85
  205. data/lib/picky/indexed/index.rb +0 -39
  206. data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
  207. data/lib/picky/indexing/bundle.rb +0 -213
  208. data/lib/picky/indexing/categories.rb +0 -38
  209. data/lib/picky/indexing/category.rb +0 -117
  210. data/lib/picky/indexing/index.rb +0 -55
  211. data/lib/picky/query/allocation.rb +0 -82
  212. data/lib/picky/query/allocations.rb +0 -130
  213. data/lib/picky/query/combination.rb +0 -74
  214. data/lib/picky/query/combinations.rb +0 -105
  215. data/lib/picky/query/qualifiers.rb +0 -77
  216. data/lib/picky/query/token.rb +0 -202
  217. data/lib/picky/query/tokens.rb +0 -86
  218. data/lib/picky/query/weigher.rb +0 -165
  219. data/lib/picky/results/base.rb +0 -102
  220. data/lib/picky/results/full.rb +0 -13
  221. data/lib/picky/results/live.rb +0 -13
  222. data/lib/picky/tokenizers/base.rb +0 -161
  223. data/lib/picky/tokenizers/index.rb +0 -58
  224. data/lib/picky/tokenizers/query.rb +0 -74
  225. data/spec/lib/cacher/partial/default_spec.rb +0 -15
  226. data/spec/lib/cacher/partial/none_spec.rb +0 -17
  227. data/spec/lib/cacher/weights_generator_spec.rb +0 -21
  228. data/spec/lib/results/base_spec.rb +0 -257
  229. data/spec/lib/results/live_spec.rb +0 -15
@@ -1,97 +0,0 @@
1
- module Index
2
-
3
- # Handles all aspects of index files, such as dumping/loading.
4
- #
5
- module File
6
-
7
- # Base class for all index files.
8
- #
9
- # Provides necessary helper methods for its
10
- # subclasses.
11
- # Not directly useable, as it does not provide
12
- # dump/load methods.
13
- #
14
- class Basic
15
-
16
- attr_reader :cache_path
17
-
18
- # An index cache takes a path, without file extension,
19
- # which will be provided by the subclasses.
20
- #
21
- def initialize cache_path
22
- @cache_path = "#{cache_path}.#{extension}"
23
- end
24
-
25
- # The default extension for index files is "index".
26
- #
27
- def extension
28
- :index
29
- end
30
-
31
- # Will copy the index file to a location that
32
- # is in a directory named "backup" right under
33
- # the directory the index file is in.
34
- #
35
- def backup
36
- prepare_backup backup_directory
37
- FileUtils.cp cache_path, target, verbose: true
38
- end
39
- # The backup directory of this file.
40
- # Equal to the file's dirname plus /backup
41
- #
42
- def backup_directory
43
- ::File.join ::File.dirname(cache_path), 'backup'
44
- end
45
- # Prepares the backup directory for the file.
46
- #
47
- def prepare_backup target
48
- FileUtils.mkdir target unless Dir.exists?(target)
49
- end
50
-
51
- # Copies the file from its backup location back
52
- # to the original location.
53
- #
54
- def restore
55
- FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
56
- end
57
- # The backup filename.
58
- #
59
- def backup_file_path_of path
60
- dir, name = ::File.split path
61
- ::File.join dir, 'backup', name
62
- end
63
-
64
- # Deletes the file.
65
- #
66
- def delete
67
- `rm -Rf #{cache_path}`
68
- end
69
-
70
- # Checks.
71
- #
72
-
73
- # Is this cache file suspiciously small?
74
- # (less than 8 Bytes of size)
75
- #
76
- def cache_small?
77
- size_of(cache_path) < 8
78
- end
79
- # Is the cache ok? (existing and larger than
80
- # zero Bytes in size)
81
- #
82
- # A small cache is still ok.
83
- #
84
- def cache_ok?
85
- size_of(cache_path) > 0
86
- end
87
- # Extracts the size of the file in Bytes.
88
- #
89
- def size_of path
90
- `ls -l #{path} | awk '{print $5}'`.to_i
91
- end
92
-
93
- end
94
-
95
- end
96
-
97
- end
@@ -1,34 +0,0 @@
1
- module Index
2
-
3
- module File
4
-
5
- # Index files dumped in the JSON format.
6
- #
7
- class JSON < Basic
8
-
9
- # Uses the extension "json".
10
- #
11
- def extension
12
- :json
13
- end
14
- # Loads the index hash from json format.
15
- #
16
- def load
17
- Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
18
- end
19
- # Dumps the index hash in json format.
20
- #
21
- def dump hash
22
- hash.dump_json cache_path
23
- end
24
- # A json file does not provide retrieve functionality.
25
- #
26
- def retrieve
27
- raise "Can't retrieve from JSON file. Use text file."
28
- end
29
-
30
- end
31
-
32
- end
33
-
34
- end
@@ -1,34 +0,0 @@
1
- module Index
2
-
3
- module File
4
-
5
- # Index data in the Ruby Marshal format.
6
- #
7
- class Marshal < Basic
8
-
9
- # Uses the extension "dump".
10
- #
11
- def extension
12
- :dump
13
- end
14
- # Loads the index hash from marshal format.
15
- #
16
- def load
17
- ::Marshal.load ::File.open(cache_path, 'r:binary')
18
- end
19
- # Dumps the index hash in marshal format.
20
- #
21
- def dump hash
22
- hash.dump_marshalled cache_path
23
- end
24
- # A marshal file does not provide retrieve functionality.
25
- #
26
- def retrieve
27
- raise "Can't retrieve from marshalled file. Use text file."
28
- end
29
-
30
- end
31
-
32
- end
33
-
34
- end
@@ -1,56 +0,0 @@
1
- module Index
2
-
3
- module File
4
-
5
- # Index data dumped in the text format.
6
- #
7
- class Text < Basic
8
-
9
- # Uses the extension "txt".
10
- #
11
- def extension
12
- :txt
13
- end
14
- # Text files are used exclusively for
15
- # prepared data files.
16
- #
17
- def load
18
- raise "Can't load from text file. Use JSON or Marshal."
19
- end
20
- # Text files are used exclusively for
21
- # prepared data files.
22
- #
23
- def dump hash
24
- raise "Can't dump to text file. Use JSON or Marshal."
25
- end
26
-
27
- # Retrieves prepared index data in the form
28
- # * id,data\n
29
- # * id,data\n
30
- # * id,data\n
31
- #
32
- # Yields an id string and a symbol token.
33
- #
34
- def retrieve
35
- id = nil
36
- token = nil
37
- ::File.open(cache_path, 'r:binary') do |file|
38
- file.each_line do |line|
39
- id, token = line.split ?,, 2
40
- yield id, (token.chomp! || token).to_sym
41
- end
42
- end
43
- end
44
-
45
- #
46
- #
47
- def open_for_indexing &block
48
- ::File.open cache_path, 'w:binary', &block
49
- end
50
-
51
-
52
- end
53
-
54
- end
55
-
56
- end
@@ -1,118 +0,0 @@
1
- module Index
2
-
3
- class Files
4
-
5
- attr_reader :bundle_name
6
- attr_reader :prepared, :index, :weights, :similarity, :configuration
7
-
8
- delegate :index_name, :category_name, :to => :@config
9
-
10
- def initialize bundle_name, config
11
- @bundle_name = bundle_name
12
- @config = config
13
-
14
- # Note: We marshal the similarity, as the
15
- # Yajl json lib cannot load symbolized
16
- # values, just keys.
17
- #
18
- @prepared = File::Text.new config.prepared_index_path
19
- @index = File::JSON.new config.index_path(bundle_name, :index)
20
- @weights = File::JSON.new config.index_path(bundle_name, :weights)
21
- @similarity = File::Marshal.new config.index_path(bundle_name, :similarity)
22
- @configuration = File::JSON.new config.index_path(bundle_name, :configuration)
23
- end
24
-
25
- # Delegators.
26
- #
27
-
28
- # Retrieving data.
29
- #
30
- def retrieve &block
31
- prepared.retrieve &block
32
- end
33
-
34
- # Dumping.
35
- #
36
- def dump_index index_hash
37
- index.dump index_hash
38
- end
39
- def dump_weights weights_hash
40
- weights.dump weights_hash
41
- end
42
- def dump_similarity similarity_hash
43
- similarity.dump similarity_hash
44
- end
45
- def dump_configuration configuration_hash
46
- configuration.dump configuration_hash
47
- end
48
-
49
- # Loading.
50
- #
51
- def load_index
52
- index.load
53
- end
54
- def load_similarity
55
- similarity.load
56
- end
57
- def load_weights
58
- weights.load
59
- end
60
- def load_configuration
61
- configuration.load
62
- end
63
-
64
- # Cache ok?
65
- #
66
- def index_cache_ok?
67
- index.cache_ok?
68
- end
69
- def similarity_cache_ok?
70
- similarity.cache_ok?
71
- end
72
- def weights_cache_ok?
73
- weights.cache_ok?
74
- end
75
-
76
- # Cache small?
77
- #
78
- def index_cache_small?
79
- index.cache_small?
80
- end
81
- def similarity_cache_small?
82
- similarity.cache_small?
83
- end
84
- def weights_cache_small?
85
- weights.cache_small?
86
- end
87
-
88
- # Copies the indexes to the "backup" directory.
89
- #
90
- def backup
91
- index.backup
92
- weights.backup
93
- similarity.backup
94
- configuration.backup
95
- end
96
-
97
- # Restores the indexes from the "backup" directory.
98
- #
99
- def restore
100
- index.restore
101
- weights.restore
102
- similarity.restore
103
- configuration.restore
104
- end
105
-
106
-
107
- # Delete all index files.
108
- #
109
- def delete
110
- index.delete
111
- weights.delete
112
- similarity.delete
113
- configuration.delete
114
- end
115
-
116
- end
117
-
118
- end
@@ -1,175 +0,0 @@
1
- # This class defines the indexing and index API that is exposed to the user
2
- # as the #index method inside the Application class.
3
- #
4
- # It provides a single front for both indexing and index options. We suggest to always use the index API.
5
- #
6
- # Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
7
- #
8
- class IndexAPI
9
-
10
- attr_reader :name, :indexing, :indexed
11
-
12
- # Create a new index with a given source.
13
- #
14
- # === Parameters
15
- # * name: A name that will be used for the index directory and in the Picky front end.
16
- # * source: Where the data comes from, e.g. Sources::CSV.new(...)
17
- #
18
- # === Options
19
- # * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
20
- # * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
21
- #
22
- def initialize name, source, options = {}
23
- @name = name
24
- @indexing = Indexing::Index.new name, source, options
25
- @indexed = Indexed::Index.new name, options
26
-
27
- # Centralized registry.
28
- #
29
- Indexes.register self
30
- end
31
-
32
- # Defines a searchable category on the index.
33
- #
34
- # === Parameters
35
- # * category_name: This identifier is used in the front end, but also to categorize query text. For example, “title:hobbit” will narrow the hobbit query on categories with the identifier :title.
36
- #
37
- # === Options
38
- # * partial: Partial::None.new or Partial::Substring.new(from: starting_char, to: ending_char). Default is Partial::Substring.new(from: -3, to: -1).
39
- # * similarity: Similarity::None.new or Similarity::Phonetic.new(similar_words_searched). Default is Similarity::None.new.
40
- # * qualifiers: An array of qualifiers with which you can define which category you’d like to search, for example “title:hobbit” will search for hobbit in just title categories. Example: qualifiers: [:t, :titre, :title] (use it for example with multiple languages). Default is the name of the category.
41
- # * qualifier: Convenience options if you just need a single qualifier, see above. Example: qualifiers => :title. Default is the name of the category.
42
- # * source: Use a different source than the index uses. If you think you need that, there might be a better solution to your problem. Please post to the mailing list first with your application.rb :)
43
- # * from: Take the data from the data category with this name. Example: You have a source Sources::CSV.new(:title, file:'some_file.csv') but you want the category to be called differently. The you use from: define_category(:similar_title, :from => :title).
44
- #
45
- def define_category category_name, options = {}
46
- category_name = category_name.to_sym
47
-
48
- indexing_category = indexing.define_category category_name, options
49
- indexed_category = indexed.define_category category_name, options
50
-
51
- yield indexing_category, indexed_category if block_given?
52
-
53
- self
54
- end
55
- alias category define_category
56
-
57
- # HIGHLY EXPERIMENTAL Try if you feel "beta" ;)
58
- #
59
- # Make this category range searchable with a fixed range. If you need other ranges, define another category with a different range value.
60
- #
61
- # Example:
62
- # You have data values inside 1..100, and you want to have Picky return
63
- # not only the results for 47 if you search for 47, but also results for
64
- # 45, 46, or 47.2, 48.9, in a range of 2 around 47, so (45..49).
65
- #
66
- # Then you use:
67
- # my_index.define_ranged_category :values_inside_1_100, 2
68
- #
69
- # Optionally, you give it a precision value to reduce the error margin
70
- # around 47 (Picky is a bit liberal).
71
- # my_index.define_ranged_category :values_inside_1_100, 2, precision: 5
72
- #
73
- # This will force Picky to maximally be wrong 5% of the given range value
74
- # (5% of 2 = 0.1) instead of the default 20% (20% of 2 = 0.4).
75
- #
76
- # We suggest not to use much more than 5 as a higher precision is more performance intensive for less and less precision gain.
77
- #
78
- # == Protip 1
79
- #
80
- # Create two ranged categories to make an area search:
81
- # index.define_ranged_category :x, 1
82
- # index.define_ranged_category :y, 1
83
- #
84
- # Search for it using for example:
85
- # x:133, y:120
86
- #
87
- # This will search this square area (* = 133, 120: The "search" point entered):
88
- #
89
- # 132 134
90
- # | |
91
- # --|---------|-- 121
92
- # | |
93
- # | * |
94
- # | |
95
- # --|---------|-- 119
96
- # | |
97
- #
98
- # Note: The area does not need to be square, but can be rectangular.
99
- #
100
- # == Protip 2
101
- #
102
- # Create three ranged categories to make a volume search.
103
- #
104
- # Or go crazy and use 4 ranged categories for a space/time search! ;)
105
- #
106
- # === Parameters
107
- # * category_name: The category_name as used in #define_category.
108
- # * range: The range (in the units of your data values) around the query point where we search for results.
109
- #
110
- # -----|<- range ->*------------|-----
111
- #
112
- # === Options
113
- # * precision: Default is 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
114
- # * ... all options of #define_category.
115
- #
116
- def define_ranged_category category_name, range, options = {}
117
- precision = options[:precision]
118
-
119
- options = { partial: Partial::None.new }.merge options
120
-
121
- define_category category_name, options do |indexing, indexed|
122
- indexing.source = Sources::Wrappers::Location.new indexing, grid: range, precision: precision
123
- indexing.tokenizer = Tokenizers::Index.new
124
-
125
- exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: range, precision: precision
126
- indexed.exact = exact_bundle
127
- indexed.partial = exact_bundle # A partial token also uses the exact index.
128
- end
129
- end
130
- alias ranged_category define_ranged_category
131
-
132
- # HIGHLY EXPERIMENTAL Not correctly working yet. Try it if you feel "beta".
133
- #
134
- # Also a range search see #define_ranged_category, but on the earth's surface.
135
- #
136
- # Parameters:
137
- # * name: The name as used in #define_category.
138
- # * radius: The distance (in km) around the query point which we search for results.
139
- #
140
- # Note: Picky uses a square, not a circle. We hope that's ok for most usages.
141
- #
142
- # -----------------------------
143
- # | |
144
- # | |
145
- # | |
146
- # | |
147
- # | |
148
- # | *<- radius ->|
149
- # | |
150
- # | |
151
- # | |
152
- # | |
153
- # | |
154
- # -----------------------------
155
- #
156
- # Options
157
- # * precision: Default 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
158
- # * from: The data category to take the data for this category from.
159
- #
160
- # TODO Redo. Will have to write a wrapper that combines two categories that are indexed simultaneously.
161
- #
162
- def define_map_location name, radius, options = {} # :nodoc:
163
- # The radius is given as if all the locations were on the equator.
164
- #
165
- # TODO Need to recalculate since not many locations are on the equator ;) This is just a prototype.
166
- #
167
- # This calculates km -> longitude (degrees).
168
- #
169
- # A degree on the equator is equal to ~111,319.9 meters.
170
- # So a km on the equator is equal to 0.00898312 degrees.
171
- #
172
- define_ranged_category name, radius * 0.00898312, options
173
- end
174
- alias map_location define_map_location
175
- end
@@ -1,54 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- module Indexed # :nodoc:all
4
-
5
- # This is the _actual_ index.
6
- #
7
- # Handles exact/partial index, weights index, and similarity index.
8
- #
9
- # Delegates file handling and checking to an *Indexed*::*Files* object.
10
- #
11
- class Bundle < Index::Bundle
12
-
13
- # Get the ids for the given symbol.
14
- #
15
- def ids sym
16
- @index[sym] || []
17
- end
18
- # Get a weight for the given symbol.
19
- #
20
- def weight sym
21
- @weights[sym]
22
- end
23
-
24
- # Loads all indexes.
25
- #
26
- def load
27
- load_index
28
- load_weights
29
- load_similarity
30
- load_configuration
31
- end
32
- # Loads the core index.
33
- #
34
- def load_index
35
- self.index = files.load_index
36
- end
37
- # Loads the weights index.
38
- #
39
- def load_weights
40
- self.weights = files.load_weights
41
- end
42
- # Loads the similarity index.
43
- #
44
- def load_similarity
45
- self.similarity = files.load_similarity
46
- end
47
- # Loads the configuration.
48
- #
49
- def load_configuration
50
- self.configuration = files.load_configuration
51
- end
52
-
53
- end
54
- end
@@ -1,131 +0,0 @@
1
- module Indexed
2
-
3
- class Categories
4
-
5
- attr_reader :categories, :category_hash, :ignore_unassigned_tokens
6
-
7
- each_delegate :load_from_cache,
8
- :to => :categories
9
-
10
- # A list of indexed categories.
11
- #
12
- # Options:
13
- # * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
14
- # The default behaviour is that if a token does not match to
15
- # any category, the query will not return anything (since a
16
- # single token cannot be matched). If you set this option to
17
- # true, any token that cannot be matched to a category will be
18
- # simply ignored.
19
- # Use this if only a few matched words are important, like for
20
- # example of the query "Jonathan Myers 86455 Las Cucarachas"
21
- # you only want to match the zipcode, to have the search engine
22
- # display advertisements on the side for the zipcode.
23
- # Nifty! :)
24
- #
25
- def initialize options = {}
26
- clear
27
-
28
- @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
29
- end
30
-
31
- # Clears both the array of categories and the hash of categories.
32
- #
33
- def clear
34
- @categories = []
35
- @category_hash = {}
36
- end
37
-
38
- # Add the given category to the list of categories.
39
- #
40
- def << category
41
- categories << category
42
- # Note: [category] is an optimization, since I need an array
43
- # of categories.
44
- # It's faster to just package it in an array on loading
45
- # Picky than doing it over and over with each query.
46
- #
47
- category_hash[category.name] = [category]
48
- end
49
-
50
- # Return all possible combinations for the given token.
51
- #
52
- # This checks if it needs to also search through similar
53
- # tokens, if for example, the token is one with ~.
54
- # If yes, it puts together all solutions.
55
- #
56
- def possible_combinations_for token
57
- token.similar? ? similar_possible_for(token) : possible_for(token)
58
- end
59
- # Gets all similar tokens and puts together the possible combinations
60
- # for each found similar token.
61
- #
62
- def similar_possible_for token
63
- # Get as many similar tokens as necessary
64
- #
65
- tokens = similar_tokens_for token
66
- # possible combinations
67
- #
68
- inject_possible_for tokens
69
- end
70
- def similar_tokens_for token
71
- text = token.text
72
- categories.inject([]) do |result, category|
73
- next_token = token
74
- # Note: We could also break off here if not all the available
75
- # similars are needed.
76
- # Wait for a concrete case that needs this before taking
77
- # action.
78
- #
79
- while next_token = next_token.next_similar_token(category)
80
- result << next_token if next_token && next_token.text != text
81
- end
82
- result
83
- end
84
- end
85
- def inject_possible_for tokens
86
- tokens.inject([]) do |result, token|
87
- possible = possible_categories token
88
- result + possible_for(token, possible)
89
- end
90
- end
91
-
92
- # Returns possible Combinations for the token.
93
- #
94
- # Note: The preselected_categories param is an optimization.
95
- #
96
- # Note: Returns [] if no categories matched (will produce no result).
97
- # Returns nil if this token needs to be removed from the query.
98
- # (Also none of the categories matched, but the ignore unassigned
99
- # tokens option is true)
100
- #
101
- def possible_for token, preselected_categories = nil
102
- possible = (preselected_categories || possible_categories(token)).map { |category| category.combination_for(token) }
103
- possible.compact!
104
- # This is an optimization to mark tokens that are ignored.
105
- #
106
- return if ignore_unassigned_tokens && possible.empty?
107
- possible # wrap in combinations
108
- end
109
- # This returns the possible categories for this token.
110
- # If the user has already preselected a category for this token,
111
- # like "artist:moby", if not just return all for the given token,
112
- # since all are possible.
113
- #
114
- # Note: Once I thought this was called too often. But it is not (18.01.2011).
115
- #
116
- def possible_categories token
117
- user_defined_categories(token) || categories
118
- end
119
- # This returns the array of categories if the user has defined
120
- # an existing category.
121
- #
122
- # Note: Returns nil if the user did not define one
123
- # or if he/she has defined a non-existing one.
124
- #
125
- def user_defined_categories token
126
- category_hash[token.user_defined_category_name]
127
- end
128
-
129
- end
130
-
131
- end