picky 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. data/bin/picky +14 -0
  2. data/lib/bundling.rb +10 -0
  3. data/lib/constants.rb +9 -0
  4. data/lib/deployment.rb +212 -0
  5. data/lib/picky/application.rb +40 -0
  6. data/lib/picky/cacher/convenience.rb +3 -0
  7. data/lib/picky/cacher/generator.rb +17 -0
  8. data/lib/picky/cacher/partial/default.rb +7 -0
  9. data/lib/picky/cacher/partial/none.rb +19 -0
  10. data/lib/picky/cacher/partial/strategy.rb +7 -0
  11. data/lib/picky/cacher/partial/subtoken.rb +91 -0
  12. data/lib/picky/cacher/partial_generator.rb +15 -0
  13. data/lib/picky/cacher/similarity/default.rb +7 -0
  14. data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
  15. data/lib/picky/cacher/similarity/none.rb +25 -0
  16. data/lib/picky/cacher/similarity/strategy.rb +7 -0
  17. data/lib/picky/cacher/similarity_generator.rb +15 -0
  18. data/lib/picky/cacher/weights/default.rb +7 -0
  19. data/lib/picky/cacher/weights/logarithmic.rb +39 -0
  20. data/lib/picky/cacher/weights/strategy.rb +7 -0
  21. data/lib/picky/cacher/weights_generator.rb +15 -0
  22. data/lib/picky/configuration/configuration.rb +13 -0
  23. data/lib/picky/configuration/field.rb +68 -0
  24. data/lib/picky/configuration/indexes.rb +60 -0
  25. data/lib/picky/configuration/queries.rb +32 -0
  26. data/lib/picky/configuration/type.rb +52 -0
  27. data/lib/picky/cores.rb +101 -0
  28. data/lib/picky/db/configuration.rb +23 -0
  29. data/lib/picky/ext/ruby19/extconf.rb +7 -0
  30. data/lib/picky/ext/ruby19/performant.c +339 -0
  31. data/lib/picky/extensions/array.rb +45 -0
  32. data/lib/picky/extensions/hash.rb +11 -0
  33. data/lib/picky/extensions/module.rb +15 -0
  34. data/lib/picky/extensions/symbol.rb +18 -0
  35. data/lib/picky/generator.rb +156 -0
  36. data/lib/picky/helpers/cache.rb +23 -0
  37. data/lib/picky/helpers/gc.rb +11 -0
  38. data/lib/picky/helpers/measuring.rb +45 -0
  39. data/lib/picky/helpers/search.rb +27 -0
  40. data/lib/picky/index/bundle.rb +328 -0
  41. data/lib/picky/index/category.rb +109 -0
  42. data/lib/picky/index/combined.rb +38 -0
  43. data/lib/picky/index/type.rb +30 -0
  44. data/lib/picky/indexers/base.rb +77 -0
  45. data/lib/picky/indexers/default.rb +3 -0
  46. data/lib/picky/indexers/field.rb +13 -0
  47. data/lib/picky/indexers/no_source_specified_error.rb +5 -0
  48. data/lib/picky/indexers/solr.rb +60 -0
  49. data/lib/picky/indexes.rb +180 -0
  50. data/lib/picky/initializers/ext.rb +6 -0
  51. data/lib/picky/initializers/mysql.rb +22 -0
  52. data/lib/picky/loader.rb +287 -0
  53. data/lib/picky/loggers/search.rb +19 -0
  54. data/lib/picky/performant/array.rb +23 -0
  55. data/lib/picky/query/allocation.rb +82 -0
  56. data/lib/picky/query/allocations.rb +131 -0
  57. data/lib/picky/query/base.rb +124 -0
  58. data/lib/picky/query/combination.rb +69 -0
  59. data/lib/picky/query/combinations.rb +106 -0
  60. data/lib/picky/query/combinator.rb +92 -0
  61. data/lib/picky/query/full.rb +15 -0
  62. data/lib/picky/query/live.rb +22 -0
  63. data/lib/picky/query/qualifiers.rb +73 -0
  64. data/lib/picky/query/solr.rb +77 -0
  65. data/lib/picky/query/token.rb +215 -0
  66. data/lib/picky/query/tokens.rb +102 -0
  67. data/lib/picky/query/weigher.rb +159 -0
  68. data/lib/picky/query/weights.rb +55 -0
  69. data/lib/picky/rack/harakiri.rb +37 -0
  70. data/lib/picky/results/base.rb +103 -0
  71. data/lib/picky/results/full.rb +19 -0
  72. data/lib/picky/results/live.rb +19 -0
  73. data/lib/picky/routing.rb +165 -0
  74. data/lib/picky/signals.rb +11 -0
  75. data/lib/picky/solr/schema_generator.rb +73 -0
  76. data/lib/picky/sources/base.rb +19 -0
  77. data/lib/picky/sources/csv.rb +30 -0
  78. data/lib/picky/sources/db.rb +77 -0
  79. data/lib/picky/tokenizers/base.rb +130 -0
  80. data/lib/picky/tokenizers/default.rb +3 -0
  81. data/lib/picky/tokenizers/index.rb +73 -0
  82. data/lib/picky/tokenizers/query.rb +70 -0
  83. data/lib/picky/umlaut_substituter.rb +21 -0
  84. data/lib/picky-tasks.rb +6 -0
  85. data/lib/picky.rb +18 -0
  86. data/lib/tasks/application.rake +5 -0
  87. data/lib/tasks/cache.rake +53 -0
  88. data/lib/tasks/framework.rake +4 -0
  89. data/lib/tasks/index.rake +29 -0
  90. data/lib/tasks/server.rake +48 -0
  91. data/lib/tasks/shortcuts.rake +13 -0
  92. data/lib/tasks/solr.rake +36 -0
  93. data/lib/tasks/spec.rake +11 -0
  94. data/lib/tasks/statistics.rake +13 -0
  95. data/lib/tasks/try.rake +29 -0
  96. data/prototype_project/Gemfile +23 -0
  97. data/prototype_project/Rakefile +1 -0
  98. data/prototype_project/app/README +6 -0
  99. data/prototype_project/app/application.rb +50 -0
  100. data/prototype_project/app/application.ru +29 -0
  101. data/prototype_project/app/db.yml +10 -0
  102. data/prototype_project/app/logging.rb +20 -0
  103. data/prototype_project/app/unicorn.ru +10 -0
  104. data/prototype_project/log/README +1 -0
  105. data/prototype_project/script/console +34 -0
  106. data/prototype_project/tmp/README +0 -0
  107. data/prototype_project/tmp/pids/README +0 -0
  108. data/spec/ext/performant_spec.rb +64 -0
  109. data/spec/lib/application_spec.rb +61 -0
  110. data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
  111. data/spec/lib/cacher/partial_generator_spec.rb +35 -0
  112. data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
  113. data/spec/lib/cacher/similarity/none_spec.rb +23 -0
  114. data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
  115. data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
  116. data/spec/lib/cacher/weights_generator_spec.rb +21 -0
  117. data/spec/lib/configuration/configuration_spec.rb +38 -0
  118. data/spec/lib/configuration/type_spec.rb +49 -0
  119. data/spec/lib/configuration_spec.rb +8 -0
  120. data/spec/lib/cores_spec.rb +65 -0
  121. data/spec/lib/extensions/array_spec.rb +37 -0
  122. data/spec/lib/extensions/hash_spec.rb +11 -0
  123. data/spec/lib/extensions/module_spec.rb +27 -0
  124. data/spec/lib/extensions/symbol_spec.rb +85 -0
  125. data/spec/lib/generator_spec.rb +135 -0
  126. data/spec/lib/helpers/cache_spec.rb +35 -0
  127. data/spec/lib/helpers/gc_spec.rb +71 -0
  128. data/spec/lib/helpers/measuring_spec.rb +18 -0
  129. data/spec/lib/helpers/search_spec.rb +50 -0
  130. data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
  131. data/spec/lib/index/bundle_spec.rb +260 -0
  132. data/spec/lib/index/category_spec.rb +203 -0
  133. data/spec/lib/indexers/base_spec.rb +73 -0
  134. data/spec/lib/indexers/field_spec.rb +20 -0
  135. data/spec/lib/loader_spec.rb +48 -0
  136. data/spec/lib/loggers/search_spec.rb +19 -0
  137. data/spec/lib/performant/array_spec.rb +13 -0
  138. data/spec/lib/query/allocation_spec.rb +194 -0
  139. data/spec/lib/query/allocations_spec.rb +336 -0
  140. data/spec/lib/query/base_spec.rb +104 -0
  141. data/spec/lib/query/combination_spec.rb +90 -0
  142. data/spec/lib/query/combinations_spec.rb +83 -0
  143. data/spec/lib/query/combinator_spec.rb +112 -0
  144. data/spec/lib/query/full_spec.rb +22 -0
  145. data/spec/lib/query/live_spec.rb +61 -0
  146. data/spec/lib/query/qualifiers_spec.rb +31 -0
  147. data/spec/lib/query/solr_spec.rb +51 -0
  148. data/spec/lib/query/token_spec.rb +297 -0
  149. data/spec/lib/query/tokens_spec.rb +189 -0
  150. data/spec/lib/query/weights_spec.rb +47 -0
  151. data/spec/lib/results/base_spec.rb +233 -0
  152. data/spec/lib/routing_spec.rb +318 -0
  153. data/spec/lib/solr/schema_generator_spec.rb +42 -0
  154. data/spec/lib/sources/db_spec.rb +91 -0
  155. data/spec/lib/tokenizers/base_spec.rb +61 -0
  156. data/spec/lib/tokenizers/index_spec.rb +51 -0
  157. data/spec/lib/tokenizers/query_spec.rb +105 -0
  158. data/spec/lib/umlaut_substituter_spec.rb +84 -0
  159. data/spec/specific/speed_spec.rb +55 -0
  160. metadata +371 -15
  161. data/README.textile +0 -9
data/bin/picky ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+
4
+ begin
5
+ require 'picky/generator'
6
+ rescue LoadError => e
7
+ require 'rubygems'
8
+ picky_path = File.expand_path('../../lib', __FILE__)
9
+ $:.unshift(picky_path) if File.directory?(picky_path) && !$:.include?(picky_path)
10
+ require 'picky/generator'
11
+ end
12
+
13
+ generator = Picky::Generator.new
14
+ generator.generate ARGV
data/lib/bundling.rb ADDED
@@ -0,0 +1,10 @@
1
+ # TODO Remove?
2
+ #
3
+ begin
4
+ require 'bundler'
5
+ rescue LoadError => e
6
+ require 'rubygems'
7
+ require 'bundler'
8
+ end
9
+ Bundler.setup SEARCH_ENVIRONMENT
10
+ Bundler.require
data/lib/constants.rb ADDED
@@ -0,0 +1,9 @@
1
+ # Set constants.
2
+ #
3
+
4
+ # Use rack's environment for the search engine.
5
+ #
6
+ ENV['SEARCH_ENV'] ||= ENV['RACK_ENV']
7
+
8
+ SEARCH_ENVIRONMENT = ENV['SEARCH_ENV'] || 'development' unless defined? SEARCH_ENVIRONMENT
9
+ SEARCH_ROOT = Dir.pwd unless defined? SEARCH_ROOT
data/lib/deployment.rb ADDED
@@ -0,0 +1,212 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'constants'))
2
+
3
+ module Picky
4
+ module Capistrano
5
+
6
+ # Include all
7
+ #
8
+ module All
9
+ def self.extended cap_config
10
+
11
+ cap_config.instance_eval do
12
+
13
+ # Executes a rake task on the server.
14
+ #
15
+ # Options:
16
+ # * env: The SEARCH_ENV. Will not set if set explicitly to false. Default: production.
17
+ # * All other options get passed on to the Capistrano run task.
18
+ #
19
+ def execute_rake_task name, options = {}, &block
20
+ env = options.delete :env
21
+ env = env == false ? '' : "SEARCH_ENV=#{env || 'production'}"
22
+ run "cd #{current_path}; rake #{name} #{env}", options, &block
23
+ end
24
+
25
+ end
26
+
27
+ cap_config.extend Standard
28
+ cap_config.extend Deploy
29
+ cap_config.extend Caching
30
+ cap_config.extend Overrides
31
+
32
+ end
33
+ end
34
+
35
+ # Removes unneeded Rails defaults.
36
+ #
37
+ module Overrides
38
+ def self.extended cap_config
39
+ cap_config.instance_eval do
40
+
41
+ namespace :deploy do
42
+ tasks.delete :check
43
+ tasks.delete :cold
44
+ tasks.delete :migrations
45
+ tasks.delete :migrate
46
+ tasks.delete :upload
47
+
48
+ namespace :web do
49
+ tasks.delete :enable
50
+ tasks.delete :disable
51
+ end
52
+ end
53
+
54
+ end
55
+ end
56
+ end
57
+
58
+ module Standard
59
+ def self.extended cap_config
60
+ cap_config.load 'standard'
61
+ cap_config.load 'deploy'
62
+ end
63
+ end
64
+
65
+ module Deploy
66
+
67
+ def self.extended cap_config
68
+ cap_config.instance_eval do
69
+
70
+ namespace :deploy do
71
+ %w(start stop).each do |action|
72
+ desc "#{action} the Servers"
73
+ task action.to_sym, :roles => :app do
74
+ execute_rake_task "server:#{action}"
75
+ end
76
+ end
77
+ desc "Restart the Servers sequentially"
78
+ task :restart, :roles => :app do
79
+ find_servers(:roles => :app).each do |server|
80
+ execute_rake_task "server:restart", :hosts => server.host
81
+ end
82
+ end
83
+
84
+ desc 'Hot deploy the code'
85
+ task 'hot', :roles => :app do
86
+ update
87
+ execute_rake_task 'server:usr1', :env => false # No env needed.
88
+ end
89
+
90
+ desc "Setup a GitHub-style deployment."
91
+ task :setup, :roles => :app do
92
+ cmd = "git clone #{repository} #{current_path}-clone-cache &&" +
93
+ "rm #{current_path} &&" +
94
+ "mv #{current_path}-clone-cache #{current_path}"
95
+ run cmd
96
+ end
97
+
98
+ desc "Deploy"
99
+ task :default, :roles => :app do
100
+ update
101
+ restart
102
+ end
103
+
104
+ desc "Update the deployed code."
105
+ task :update_code do # code needs to be updated with all servers
106
+ puts "updating code to branch #{branch}"
107
+ cmd = "cd #{current_path} &&" +
108
+ "git fetch origin &&" +
109
+ "(git checkout -f #{branch} || git checkout -b #{branch} origin/#{branch}) &&" +
110
+ "git pull;" +
111
+ "git branch"
112
+ run cmd
113
+ symlink
114
+ end
115
+
116
+ desc "Cleans up the git checkout"
117
+ task :cleanup, :roles => :app do
118
+ run "cd #{current_path} && git gc --aggressive"
119
+ end
120
+
121
+ desc "create the symlinks to the shared dirs"
122
+ task :symlink do
123
+ set :user, 'deploy'
124
+ run "rm -rf #{current_path}/log; ln -sf #{shared_path}/log #{current_path}/log"
125
+ run "rm -rf #{current_path}/index; ln -sf #{shared_path}/index #{current_path}/index"
126
+ # link database-config files
127
+ run "ln -sf #{shared_path}/config/base.yml #{current_path}/config/db/base.yml"
128
+ run "ln -sf #{shared_path}/config/source.yml #{current_path}/config/db/source.yml"
129
+ # link unicorn.ru
130
+ run "ln -sf #{shared_path}/config/unicorn.ru #{current_path}/config/unicorn.ru" # TODO change path
131
+ end
132
+
133
+ namespace :rollback do
134
+ desc "Rollback to last release."
135
+ task :default, :roles => :app do
136
+ set :branch, branches[-2]
137
+ puts "rolling back to branch #{branch}"
138
+ deploy.update_code
139
+ end
140
+
141
+ task :code, :roles => :app do
142
+ # implicit
143
+ end
144
+ end
145
+ end
146
+
147
+ end
148
+ end
149
+
150
+ end
151
+
152
+ module Caching
153
+
154
+ def self.extended cap_config
155
+ cap_config.instance_eval do
156
+ namespace :cache do
157
+ desc "check the index files if they are ready to be used"
158
+ task :check, :roles => :cache do
159
+ execute_rake_task 'cache:check'
160
+ end
161
+ end
162
+ namespace :cache do
163
+ namespace :structure do
164
+ desc "create the index cache structure"
165
+ task :create, :roles => :app do
166
+ execute_rake_task 'cache:structure:create'
167
+ end
168
+ end
169
+ end
170
+ namespace :solr do
171
+ desc "create the index cache structure"
172
+ task :index, :roles => :cache do
173
+ execute_rake_task 'solr:index'
174
+ end
175
+ %w|start stop restart|.collect(&:to_sym).each do |action|
176
+ desc "#{action} the solr server"
177
+ task action, :roles => :app do
178
+ execute_rake_task 'solr:start'
179
+ end
180
+ end
181
+ end
182
+ end
183
+ end
184
+
185
+ end
186
+
187
+ module Statistics
188
+
189
+ def self.extended cap_config
190
+ namespace :statistics do
191
+ desc 'Start the statistics server'
192
+ task :start, :roles => :statistics do
193
+ set :user, 'root'
194
+ run "daemonize -c #{current_path} -u deploy -v #{current_path}/script/statistics/start production"
195
+ end
196
+ desc 'Stop the statistics server'
197
+ task :stop, :roles => :statistics do
198
+ run "#{current_path}/script/statistics/stop production"
199
+ end
200
+ desc 'Restart the statistics server'
201
+ task :restart, :roles => :statistics do
202
+ stop
203
+ sleep 2
204
+ start
205
+ end
206
+ end
207
+ end
208
+
209
+ end
210
+
211
+ end
212
+ end
@@ -0,0 +1,40 @@
1
+ class Application
2
+
3
+ # An application simply delegates to the routing to handle a request.
4
+ #
5
+ def self.routing
6
+ @routing ||= Routing.new
7
+ end
8
+ def self.call env
9
+ routing.call env
10
+ end
11
+
12
+ # TODO Multiple indexes?
13
+ #
14
+ def self.indexes &block
15
+ indexes_configuration.instance_eval &block
16
+ # TODO Uglyyyyyy.
17
+ ::Indexes.configuration = indexes_configuration
18
+ ::Indexes.setup # TODO Think about setup/reload.
19
+ end
20
+ def self.indexes_configuration
21
+ @indexes || reset_indexes
22
+ end
23
+ def self.reset_indexes
24
+ @indexes = Configuration::Indexes.new # Is instance a problem?
25
+ end
26
+
27
+ # TODO Multiple Queries?
28
+ #
29
+ def self.queries &block
30
+ queries_configuration.instance_eval &block
31
+ routing.freeze
32
+ end
33
+ def self.queries_configuration
34
+ @queries || reset_queries
35
+ end
36
+ def self.reset_queries
37
+ @queries = Configuration::Queries.new routing # Is instance a problem?
38
+ end
39
+
40
+ end
@@ -0,0 +1,3 @@
1
+ Partial = Cacher::Partial
2
+ Similarity = Cacher::Similarity
3
+ Weights = Cacher::Weights
@@ -0,0 +1,17 @@
1
+ module Cacher
2
+
3
+ # A cache generator holds an index type.
4
+ #
5
+ # TODO Rename to index_type.
6
+ #
7
+ class Generator
8
+
9
+ attr_reader :index
10
+
11
+ def initialize index
12
+ @index = index
13
+ end
14
+
15
+ end
16
+
17
+ end
@@ -0,0 +1,7 @@
1
+ module Cacher
2
+ module Partial
3
+ # Default is Subtoken, down to 1.
4
+ #
5
+ Default = Subtoken.new :down_to => 1
6
+ end
7
+ end
@@ -0,0 +1,19 @@
1
+ module Cacher
2
+
3
+ module Partial
4
+
5
+ # Does not generate a partial index.
6
+ #
7
+ class None < Strategy
8
+
9
+ # Returns an empty index.
10
+ #
11
+ def generate_from index
12
+ {}
13
+ end
14
+
15
+ end
16
+
17
+ end
18
+
19
+ end
@@ -0,0 +1,7 @@
1
+ module Cacher
2
+ module Partial
3
+ # Superclass for partial strategies.
4
+ #
5
+ class Strategy; end
6
+ end
7
+ end
@@ -0,0 +1,91 @@
1
+ module Cacher
2
+
3
+ module Partial
4
+
5
+ # The subtoken partial strategy.
6
+ #
7
+ # If given
8
+ # "florian"
9
+ # will index
10
+ # "floria"
11
+ # "flori"
12
+ # "flor"
13
+ # "flo"
14
+ # "fl"
15
+ # "f"
16
+ # Depending on what the given down_to value is. (Example with down_to == 1)
17
+ #
18
+ class Subtoken < Strategy
19
+
20
+ attr_reader :down_to, :starting_at
21
+
22
+ # Down to is how far it will go down in generating the subtokens.
23
+ #
24
+ # Examples:
25
+ # With :hello, and starting_at 0
26
+ # * down to == 1: [:hello, :hell, :hel, :he, :h]
27
+ # * down to == 4: [:hello, :hell]
28
+ #
29
+ # With :hello, and starting_at -1
30
+ # * down to == 1: [:hell, :hel, :he, :h]
31
+ # * down to == 4: [:hell]
32
+ #
33
+ def initialize options = {}
34
+ @down_to = options[:down_to] || 1
35
+ starting_at = options[:starting_at] || 0
36
+ @starting_at = starting_at.zero? ? 0 : starting_at - 1
37
+ end
38
+
39
+ # Generates a partial index from the given index.
40
+ #
41
+ def generate_from index
42
+ result = {}
43
+
44
+ # Generate for each key token the subtokens.
45
+ #
46
+ i = 5000
47
+ index.each_key do |token|
48
+ i -= 1
49
+ if i == 0
50
+ puts "#{Time.now}: Generating partial tokens for token #{token}. This appears every 5000 tokens."
51
+ i = 5000
52
+ end
53
+ generate_for token, index, result
54
+ end
55
+
56
+ # Remove duplicate ids.
57
+ #
58
+ # TODO If it is unique for a subtoken, it is
59
+ # unique for all derived longer tokens.
60
+ #
61
+ result.each_value &:uniq! # Removed because of the set combination operation below
62
+
63
+ result
64
+ end
65
+
66
+ private
67
+
68
+ # To each shortened token of :test
69
+ # :test, :tes, :te, :t
70
+ # add all ids of :test
71
+ #
72
+ # "token" here means just text.
73
+ #
74
+ # TODO Could be improved by appending the aforegoing ids?
75
+ #
76
+ def generate_for token, index, result
77
+ clipped_token = starting_at.zero? ? token : token[0..starting_at].to_sym
78
+ clipped_token.subtokens(down_to).each do |subtoken|
79
+ if result[subtoken]
80
+ result[subtoken] += index[token] # unique
81
+ else
82
+ result[subtoken] = index[token].dup
83
+ end
84
+ end
85
+ end
86
+
87
+ end
88
+
89
+ end
90
+
91
+ end
@@ -0,0 +1,15 @@
1
+ module Cacher
2
+
3
+ # The partial generator uses a subtoken(downto:1) generator as default.
4
+ #
5
+ class PartialGenerator < Generator
6
+
7
+ # Generate a similarity index based on the given index.
8
+ #
9
+ def generate strategy = Partial::Subtoken.new(:down_to => 1)
10
+ strategy.generate_from self.index
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,7 @@
1
+ module Cacher
2
+ module Similarity
3
+ # Default is no similarity.
4
+ #
5
+ Default = None.new
6
+ end
7
+ end
@@ -0,0 +1,73 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Cacher
4
+
5
+ module Similarity
6
+
7
+ # DoubleLevensthone means that it's a combination of
8
+ # * DoubleMetaphone
9
+ # and
10
+ # * Levenshtein
11
+ # :)
12
+ #
13
+ class DoubleLevenshtone < Strategy
14
+
15
+ attr_reader :amount
16
+
17
+ #
18
+ #
19
+ def initialize amount = 10
20
+ @amount = amount
21
+ end
22
+
23
+ # Encodes the given symbol.
24
+ #
25
+ # Returns a symbol.
26
+ #
27
+ def encoded sym
28
+ codes = Text::Metaphone.double_metaphone sym.to_s
29
+ codes.first.to_sym unless codes.empty?
30
+ end
31
+
32
+ # Generates an index for the given index (in full index style).
33
+ #
34
+ # In the following form:
35
+ # [:meier, :mueller, :peter, :pater] => { :MR => [:meier], :MLR => [:mueller], :PTR => [:peter, :pater] }
36
+ #
37
+ def generate_from index
38
+ hash = hashify index.keys
39
+ sort hash
40
+ end
41
+
42
+ private
43
+
44
+ # Sorts the index values in place.
45
+ #
46
+ def sort index
47
+ index.each_pair.each do |code, ary|
48
+ ary.sort_by_levenshtein! code
49
+ ary.slice! amount, ary.size # size is not perfectly correct, but anyway
50
+ end
51
+ index
52
+ end
53
+
54
+ # Hashifies a list of symbols.
55
+ #
56
+ # Where:
57
+ # { encoded_sym => [syms] }
58
+ #
59
+ def hashify list
60
+ list.inject({}) do |total, element|
61
+ if code = encoded(element)
62
+ total[code] ||= []
63
+ total[code] << element
64
+ end
65
+ total
66
+ end
67
+ end
68
+
69
+ end
70
+
71
+ end
72
+
73
+ end
@@ -0,0 +1,25 @@
1
+ module Cacher
2
+
3
+ module Similarity
4
+
5
+ # Similarity strategy that does nothing.
6
+ #
7
+ class None < Strategy
8
+
9
+ # Does not encode text. Just returns nil.
10
+ #
11
+ def encoded text
12
+ nil
13
+ end
14
+
15
+ # Returns an empty index.
16
+ #
17
+ def generate_from index
18
+ {}
19
+ end
20
+
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,7 @@
1
+ module Cacher
2
+ module Similarity
3
+ # Base class for all similarity strategies.
4
+ #
5
+ class Strategy; end
6
+ end
7
+ end
@@ -0,0 +1,15 @@
1
+ module Cacher
2
+
3
+ # Uses no similarity as default.
4
+ #
5
+ class SimilarityGenerator < Generator
6
+
7
+ # Generate a similarity index based on the given index.
8
+ #
9
+ def generate strategy = Similarity::None.new
10
+ strategy.generate_from self.index
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,7 @@
1
+ module Cacher
2
+ module Weights
3
+ # Default is Logarithmic.
4
+ #
5
+ Default = Logarithmic.new
6
+ end
7
+ end
@@ -0,0 +1,39 @@
1
+ module Cacher
2
+
3
+ module Weights
4
+
5
+ # Uses a logarithmic weight.
6
+ # If for a key k we have x ids, the weight is:
7
+ # w(x): log(x)
8
+ # Special case: If x < 1, then we use 0.
9
+ #
10
+ class Logarithmic < Strategy
11
+
12
+ # Generates a partial index from the given index.
13
+ #
14
+ def generate_from index
15
+ index.inject({}) do |hash, text_ids|
16
+ text, ids = *text_ids
17
+ weight = weight_for ids.size
18
+ hash[text] ||= weight.round(2) if weight
19
+ hash
20
+ end
21
+ end
22
+
23
+ # Sets the weight value.
24
+ #
25
+ # If the size is 0 or one, we would get -Infinity or 0.0.
26
+ # Thus we do not set a value if there is just one. The default, dynamically, is 0.
27
+ #
28
+ # BUT: We need the value, even if 0. To designate that there is a weight!
29
+ #
30
+ def weight_for amount
31
+ return 0 if amount < 1
32
+ Math.log amount
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+
39
+ end
@@ -0,0 +1,7 @@
1
+ module Cacher
2
+ module Weights
3
+ # Superclass for weighing strategies.
4
+ #
5
+ class Strategy; end
6
+ end
7
+ end
@@ -0,0 +1,15 @@
1
+ module Cacher
2
+
3
+ # Uses a logarithmic algorithm as default.
4
+ #
5
+ class WeightsGenerator < Generator
6
+
7
+ # Generate a weights index based on the given index.
8
+ #
9
+ def generate strategy = Weights::Logarithmic.new
10
+ strategy.generate_from self.index
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,13 @@
1
+ # module Configuration
2
+ #
3
+ # def self.indexes *types
4
+ # Indexes.new(*types).save
5
+ # end
6
+ # def self.type name, *fields
7
+ # Type.new name, *fields
8
+ # end
9
+ # def self.field name, options = {}
10
+ # Field.new name, options
11
+ # end
12
+ #
13
+ # end