picky 0.0.0 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. data/bin/picky +14 -0
  2. data/lib/bundling.rb +10 -0
  3. data/lib/constants.rb +9 -0
  4. data/lib/deployment.rb +212 -0
  5. data/lib/picky/application.rb +40 -0
  6. data/lib/picky/cacher/convenience.rb +3 -0
  7. data/lib/picky/cacher/generator.rb +17 -0
  8. data/lib/picky/cacher/partial/default.rb +7 -0
  9. data/lib/picky/cacher/partial/none.rb +19 -0
  10. data/lib/picky/cacher/partial/strategy.rb +7 -0
  11. data/lib/picky/cacher/partial/subtoken.rb +91 -0
  12. data/lib/picky/cacher/partial_generator.rb +15 -0
  13. data/lib/picky/cacher/similarity/default.rb +7 -0
  14. data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
  15. data/lib/picky/cacher/similarity/none.rb +25 -0
  16. data/lib/picky/cacher/similarity/strategy.rb +7 -0
  17. data/lib/picky/cacher/similarity_generator.rb +15 -0
  18. data/lib/picky/cacher/weights/default.rb +7 -0
  19. data/lib/picky/cacher/weights/logarithmic.rb +39 -0
  20. data/lib/picky/cacher/weights/strategy.rb +7 -0
  21. data/lib/picky/cacher/weights_generator.rb +15 -0
  22. data/lib/picky/configuration/configuration.rb +13 -0
  23. data/lib/picky/configuration/field.rb +68 -0
  24. data/lib/picky/configuration/indexes.rb +60 -0
  25. data/lib/picky/configuration/queries.rb +32 -0
  26. data/lib/picky/configuration/type.rb +52 -0
  27. data/lib/picky/cores.rb +101 -0
  28. data/lib/picky/db/configuration.rb +23 -0
  29. data/lib/picky/ext/ruby19/extconf.rb +7 -0
  30. data/lib/picky/ext/ruby19/performant.c +339 -0
  31. data/lib/picky/extensions/array.rb +45 -0
  32. data/lib/picky/extensions/hash.rb +11 -0
  33. data/lib/picky/extensions/module.rb +15 -0
  34. data/lib/picky/extensions/symbol.rb +18 -0
  35. data/lib/picky/generator.rb +156 -0
  36. data/lib/picky/helpers/cache.rb +23 -0
  37. data/lib/picky/helpers/gc.rb +11 -0
  38. data/lib/picky/helpers/measuring.rb +45 -0
  39. data/lib/picky/helpers/search.rb +27 -0
  40. data/lib/picky/index/bundle.rb +328 -0
  41. data/lib/picky/index/category.rb +109 -0
  42. data/lib/picky/index/combined.rb +38 -0
  43. data/lib/picky/index/type.rb +30 -0
  44. data/lib/picky/indexers/base.rb +77 -0
  45. data/lib/picky/indexers/default.rb +3 -0
  46. data/lib/picky/indexers/field.rb +13 -0
  47. data/lib/picky/indexers/no_source_specified_error.rb +5 -0
  48. data/lib/picky/indexers/solr.rb +60 -0
  49. data/lib/picky/indexes.rb +180 -0
  50. data/lib/picky/initializers/ext.rb +6 -0
  51. data/lib/picky/initializers/mysql.rb +22 -0
  52. data/lib/picky/loader.rb +287 -0
  53. data/lib/picky/loggers/search.rb +19 -0
  54. data/lib/picky/performant/array.rb +23 -0
  55. data/lib/picky/query/allocation.rb +82 -0
  56. data/lib/picky/query/allocations.rb +131 -0
  57. data/lib/picky/query/base.rb +124 -0
  58. data/lib/picky/query/combination.rb +69 -0
  59. data/lib/picky/query/combinations.rb +106 -0
  60. data/lib/picky/query/combinator.rb +92 -0
  61. data/lib/picky/query/full.rb +15 -0
  62. data/lib/picky/query/live.rb +22 -0
  63. data/lib/picky/query/qualifiers.rb +73 -0
  64. data/lib/picky/query/solr.rb +77 -0
  65. data/lib/picky/query/token.rb +215 -0
  66. data/lib/picky/query/tokens.rb +102 -0
  67. data/lib/picky/query/weigher.rb +159 -0
  68. data/lib/picky/query/weights.rb +55 -0
  69. data/lib/picky/rack/harakiri.rb +37 -0
  70. data/lib/picky/results/base.rb +103 -0
  71. data/lib/picky/results/full.rb +19 -0
  72. data/lib/picky/results/live.rb +19 -0
  73. data/lib/picky/routing.rb +165 -0
  74. data/lib/picky/signals.rb +11 -0
  75. data/lib/picky/solr/schema_generator.rb +73 -0
  76. data/lib/picky/sources/base.rb +19 -0
  77. data/lib/picky/sources/csv.rb +30 -0
  78. data/lib/picky/sources/db.rb +77 -0
  79. data/lib/picky/tokenizers/base.rb +130 -0
  80. data/lib/picky/tokenizers/default.rb +3 -0
  81. data/lib/picky/tokenizers/index.rb +73 -0
  82. data/lib/picky/tokenizers/query.rb +70 -0
  83. data/lib/picky/umlaut_substituter.rb +21 -0
  84. data/lib/picky-tasks.rb +6 -0
  85. data/lib/picky.rb +18 -0
  86. data/lib/tasks/application.rake +5 -0
  87. data/lib/tasks/cache.rake +53 -0
  88. data/lib/tasks/framework.rake +4 -0
  89. data/lib/tasks/index.rake +29 -0
  90. data/lib/tasks/server.rake +48 -0
  91. data/lib/tasks/shortcuts.rake +13 -0
  92. data/lib/tasks/solr.rake +36 -0
  93. data/lib/tasks/spec.rake +11 -0
  94. data/lib/tasks/statistics.rake +13 -0
  95. data/lib/tasks/try.rake +29 -0
  96. data/prototype_project/Gemfile +23 -0
  97. data/prototype_project/Rakefile +1 -0
  98. data/prototype_project/app/README +6 -0
  99. data/prototype_project/app/application.rb +50 -0
  100. data/prototype_project/app/application.ru +29 -0
  101. data/prototype_project/app/db.yml +10 -0
  102. data/prototype_project/app/logging.rb +20 -0
  103. data/prototype_project/app/unicorn.ru +10 -0
  104. data/prototype_project/log/README +1 -0
  105. data/prototype_project/script/console +34 -0
  106. data/prototype_project/tmp/README +0 -0
  107. data/prototype_project/tmp/pids/README +0 -0
  108. data/spec/ext/performant_spec.rb +64 -0
  109. data/spec/lib/application_spec.rb +61 -0
  110. data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
  111. data/spec/lib/cacher/partial_generator_spec.rb +35 -0
  112. data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
  113. data/spec/lib/cacher/similarity/none_spec.rb +23 -0
  114. data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
  115. data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
  116. data/spec/lib/cacher/weights_generator_spec.rb +21 -0
  117. data/spec/lib/configuration/configuration_spec.rb +38 -0
  118. data/spec/lib/configuration/type_spec.rb +49 -0
  119. data/spec/lib/configuration_spec.rb +8 -0
  120. data/spec/lib/cores_spec.rb +65 -0
  121. data/spec/lib/extensions/array_spec.rb +37 -0
  122. data/spec/lib/extensions/hash_spec.rb +11 -0
  123. data/spec/lib/extensions/module_spec.rb +27 -0
  124. data/spec/lib/extensions/symbol_spec.rb +85 -0
  125. data/spec/lib/generator_spec.rb +135 -0
  126. data/spec/lib/helpers/cache_spec.rb +35 -0
  127. data/spec/lib/helpers/gc_spec.rb +71 -0
  128. data/spec/lib/helpers/measuring_spec.rb +18 -0
  129. data/spec/lib/helpers/search_spec.rb +50 -0
  130. data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
  131. data/spec/lib/index/bundle_spec.rb +260 -0
  132. data/spec/lib/index/category_spec.rb +203 -0
  133. data/spec/lib/indexers/base_spec.rb +73 -0
  134. data/spec/lib/indexers/field_spec.rb +20 -0
  135. data/spec/lib/loader_spec.rb +48 -0
  136. data/spec/lib/loggers/search_spec.rb +19 -0
  137. data/spec/lib/performant/array_spec.rb +13 -0
  138. data/spec/lib/query/allocation_spec.rb +194 -0
  139. data/spec/lib/query/allocations_spec.rb +336 -0
  140. data/spec/lib/query/base_spec.rb +104 -0
  141. data/spec/lib/query/combination_spec.rb +90 -0
  142. data/spec/lib/query/combinations_spec.rb +83 -0
  143. data/spec/lib/query/combinator_spec.rb +112 -0
  144. data/spec/lib/query/full_spec.rb +22 -0
  145. data/spec/lib/query/live_spec.rb +61 -0
  146. data/spec/lib/query/qualifiers_spec.rb +31 -0
  147. data/spec/lib/query/solr_spec.rb +51 -0
  148. data/spec/lib/query/token_spec.rb +297 -0
  149. data/spec/lib/query/tokens_spec.rb +189 -0
  150. data/spec/lib/query/weights_spec.rb +47 -0
  151. data/spec/lib/results/base_spec.rb +233 -0
  152. data/spec/lib/routing_spec.rb +318 -0
  153. data/spec/lib/solr/schema_generator_spec.rb +42 -0
  154. data/spec/lib/sources/db_spec.rb +91 -0
  155. data/spec/lib/tokenizers/base_spec.rb +61 -0
  156. data/spec/lib/tokenizers/index_spec.rb +51 -0
  157. data/spec/lib/tokenizers/query_spec.rb +105 -0
  158. data/spec/lib/umlaut_substituter_spec.rb +84 -0
  159. data/spec/specific/speed_spec.rb +55 -0
  160. metadata +371 -15
  161. data/README.textile +0 -9
data/bin/picky ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+
4
+ begin
5
+ require 'picky/generator'
6
+ rescue LoadError => e
7
+ require 'rubygems'
8
+ picky_path = File.expand_path('../../lib', __FILE__)
9
+ $:.unshift(picky_path) if File.directory?(picky_path) && !$:.include?(picky_path)
10
+ require 'picky/generator'
11
+ end
12
+
13
+ generator = Picky::Generator.new
14
+ generator.generate ARGV
data/lib/bundling.rb ADDED
@@ -0,0 +1,10 @@
1
+ # TODO Remove?
2
+ #
3
+ begin
4
+ require 'bundler'
5
+ rescue LoadError => e
6
+ require 'rubygems'
7
+ require 'bundler'
8
+ end
9
+ Bundler.setup SEARCH_ENVIRONMENT
10
+ Bundler.require
data/lib/constants.rb ADDED
@@ -0,0 +1,9 @@
1
+ # Set constants.
2
+ #
3
+
4
+ # Use rack's environment for the search engine.
5
+ #
6
+ ENV['SEARCH_ENV'] ||= ENV['RACK_ENV']
7
+
8
+ SEARCH_ENVIRONMENT = ENV['SEARCH_ENV'] || 'development' unless defined? SEARCH_ENVIRONMENT
9
+ SEARCH_ROOT = Dir.pwd unless defined? SEARCH_ROOT
data/lib/deployment.rb ADDED
@@ -0,0 +1,212 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'constants'))
2
+
3
+ module Picky
4
+ module Capistrano
5
+
6
+ # Include all
7
+ #
8
+ module All
9
+ def self.extended cap_config
10
+
11
+ cap_config.instance_eval do
12
+
13
+ # Executes a rake task on the server.
14
+ #
15
+ # Options:
16
+ # * env: The SEARCH_ENV. Will not set if set explicitly to false. Default: production.
17
+ # * All other options get passed on to the Capistrano run task.
18
+ #
19
+ def execute_rake_task name, options = {}, &block
20
+ env = options.delete :env
21
+ env = env == false ? '' : "SEARCH_ENV=#{env || 'production'}"
22
+ run "cd #{current_path}; rake #{name} #{env}", options, &block
23
+ end
24
+
25
+ end
26
+
27
+ cap_config.extend Standard
28
+ cap_config.extend Deploy
29
+ cap_config.extend Caching
30
+ cap_config.extend Overrides
31
+
32
+ end
33
+ end
34
+
35
+ # Removes unneeded Rails defaults.
36
+ #
37
+ module Overrides
38
+ def self.extended cap_config
39
+ cap_config.instance_eval do
40
+
41
+ namespace :deploy do
42
+ tasks.delete :check
43
+ tasks.delete :cold
44
+ tasks.delete :migrations
45
+ tasks.delete :migrate
46
+ tasks.delete :upload
47
+
48
+ namespace :web do
49
+ tasks.delete :enable
50
+ tasks.delete :disable
51
+ end
52
+ end
53
+
54
+ end
55
+ end
56
+ end
57
+
58
+ module Standard
59
+ def self.extended cap_config
60
+ cap_config.load 'standard'
61
+ cap_config.load 'deploy'
62
+ end
63
+ end
64
+
65
+ module Deploy
66
+
67
+ def self.extended cap_config
68
+ cap_config.instance_eval do
69
+
70
+ namespace :deploy do
71
+ %w(start stop).each do |action|
72
+ desc "#{action} the Servers"
73
+ task action.to_sym, :roles => :app do
74
+ execute_rake_task "server:#{action}"
75
+ end
76
+ end
77
+ desc "Restart the Servers sequentially"
78
+ task :restart, :roles => :app do
79
+ find_servers(:roles => :app).each do |server|
80
+ execute_rake_task "server:restart", :hosts => server.host
81
+ end
82
+ end
83
+
84
+ desc 'Hot deploy the code'
85
+ task 'hot', :roles => :app do
86
+ update
87
+ execute_rake_task 'server:usr1', :env => false # No env needed.
88
+ end
89
+
90
+ desc "Setup a GitHub-style deployment."
91
+ task :setup, :roles => :app do
92
+ cmd = "git clone #{repository} #{current_path}-clone-cache &&" +
93
+ "rm #{current_path} &&" +
94
+ "mv #{current_path}-clone-cache #{current_path}"
95
+ run cmd
96
+ end
97
+
98
+ desc "Deploy"
99
+ task :default, :roles => :app do
100
+ update
101
+ restart
102
+ end
103
+
104
+ desc "Update the deployed code."
105
+ task :update_code do # code needs to be updated with all servers
106
+ puts "updating code to branch #{branch}"
107
+ cmd = "cd #{current_path} &&" +
108
+ "git fetch origin &&" +
109
+ "(git checkout -f #{branch} || git checkout -b #{branch} origin/#{branch}) &&" +
110
+ "git pull;" +
111
+ "git branch"
112
+ run cmd
113
+ symlink
114
+ end
115
+
116
+ desc "Cleans up the git checkout"
117
+ task :cleanup, :roles => :app do
118
+ run "cd #{current_path} && git gc --aggressive"
119
+ end
120
+
121
+ desc "create the symlinks to the shared dirs"
122
+ task :symlink do
123
+ set :user, 'deploy'
124
+ run "rm -rf #{current_path}/log; ln -sf #{shared_path}/log #{current_path}/log"
125
+ run "rm -rf #{current_path}/index; ln -sf #{shared_path}/index #{current_path}/index"
126
+ # link database-config files
127
+ run "ln -sf #{shared_path}/config/base.yml #{current_path}/config/db/base.yml"
128
+ run "ln -sf #{shared_path}/config/source.yml #{current_path}/config/db/source.yml"
129
+ # link unicorn.ru
130
+ run "ln -sf #{shared_path}/config/unicorn.ru #{current_path}/config/unicorn.ru" # TODO change path
131
+ end
132
+
133
+ namespace :rollback do
134
+ desc "Rollback to last release."
135
+ task :default, :roles => :app do
136
+ set :branch, branches[-2]
137
+ puts "rolling back to branch #{branch}"
138
+ deploy.update_code
139
+ end
140
+
141
+ task :code, :roles => :app do
142
+ # implicit
143
+ end
144
+ end
145
+ end
146
+
147
+ end
148
+ end
149
+
150
+ end
151
+
152
+ module Caching
153
+
154
+ def self.extended cap_config
155
+ cap_config.instance_eval do
156
+ namespace :cache do
157
+ desc "check the index files if they are ready to be used"
158
+ task :check, :roles => :cache do
159
+ execute_rake_task 'cache:check'
160
+ end
161
+ end
162
+ namespace :cache do
163
+ namespace :structure do
164
+ desc "create the index cache structure"
165
+ task :create, :roles => :app do
166
+ execute_rake_task 'cache:structure:create'
167
+ end
168
+ end
169
+ end
170
+ namespace :solr do
171
+ desc "create the index cache structure"
172
+ task :index, :roles => :cache do
173
+ execute_rake_task 'solr:index'
174
+ end
175
+ %w|start stop restart|.collect(&:to_sym).each do |action|
176
+ desc "#{action} the solr server"
177
+ task action, :roles => :app do
178
+ execute_rake_task 'solr:start'
179
+ end
180
+ end
181
+ end
182
+ end
183
+ end
184
+
185
+ end
186
+
187
+ module Statistics
188
+
189
+ def self.extended cap_config
190
+ namespace :statistics do
191
+ desc 'Start the statistics server'
192
+ task :start, :roles => :statistics do
193
+ set :user, 'root'
194
+ run "daemonize -c #{current_path} -u deploy -v #{current_path}/script/statistics/start production"
195
+ end
196
+ desc 'Stop the statistics server'
197
+ task :stop, :roles => :statistics do
198
+ run "#{current_path}/script/statistics/stop production"
199
+ end
200
+ desc 'Restart the statistics server'
201
+ task :restart, :roles => :statistics do
202
+ stop
203
+ sleep 2
204
+ start
205
+ end
206
+ end
207
+ end
208
+
209
+ end
210
+
211
+ end
212
+ end
@@ -0,0 +1,40 @@
1
+ class Application
2
+
3
+ # An application simply delegates to the routing to handle a request.
4
+ #
5
+ def self.routing
6
+ @routing ||= Routing.new
7
+ end
8
+ def self.call env
9
+ routing.call env
10
+ end
11
+
12
+ # TODO Multiple indexes?
13
+ #
14
+ def self.indexes &block
15
+ indexes_configuration.instance_eval &block
16
+ # TODO Uglyyyyyy.
17
+ ::Indexes.configuration = indexes_configuration
18
+ ::Indexes.setup # TODO Think about setup/reload.
19
+ end
20
+ def self.indexes_configuration
21
+ @indexes || reset_indexes
22
+ end
23
+ def self.reset_indexes
24
+ @indexes = Configuration::Indexes.new # Is instance a problem?
25
+ end
26
+
27
+ # TODO Multiple Queries?
28
+ #
29
+ def self.queries &block
30
+ queries_configuration.instance_eval &block
31
+ routing.freeze
32
+ end
33
+ def self.queries_configuration
34
+ @queries || reset_queries
35
+ end
36
+ def self.reset_queries
37
+ @queries = Configuration::Queries.new routing # Is instance a problem?
38
+ end
39
+
40
+ end
@@ -0,0 +1,3 @@
1
+ Partial = Cacher::Partial
2
+ Similarity = Cacher::Similarity
3
+ Weights = Cacher::Weights
@@ -0,0 +1,17 @@
1
+ module Cacher
2
+
3
+ # A cache generator holds an index type.
4
+ #
5
+ # TODO Rename to index_type.
6
+ #
7
+ class Generator
8
+
9
+ attr_reader :index
10
+
11
+ def initialize index
12
+ @index = index
13
+ end
14
+
15
+ end
16
+
17
+ end
@@ -0,0 +1,7 @@
1
+ module Cacher
2
+ module Partial
3
+ # Default is Subtoken, down to 1.
4
+ #
5
+ Default = Subtoken.new :down_to => 1
6
+ end
7
+ end
@@ -0,0 +1,19 @@
1
+ module Cacher
2
+
3
+ module Partial
4
+
5
+ # Does not generate a partial index.
6
+ #
7
+ class None < Strategy
8
+
9
+ # Returns an empty index.
10
+ #
11
+ def generate_from index
12
+ {}
13
+ end
14
+
15
+ end
16
+
17
+ end
18
+
19
+ end
@@ -0,0 +1,7 @@
1
+ module Cacher
2
+ module Partial
3
+ # Superclass for partial strategies.
4
+ #
5
+ class Strategy; end
6
+ end
7
+ end
@@ -0,0 +1,91 @@
1
+ module Cacher
2
+
3
+ module Partial
4
+
5
+ # The subtoken partial strategy.
6
+ #
7
+ # If given
8
+ # "florian"
9
+ # will index
10
+ # "floria"
11
+ # "flori"
12
+ # "flor"
13
+ # "flo"
14
+ # "fl"
15
+ # "f"
16
+ # Depending on what the given down_to value is. (Example with down_to == 1)
17
+ #
18
+ class Subtoken < Strategy
19
+
20
+ attr_reader :down_to, :starting_at
21
+
22
+ # Down to is how far it will go down in generating the subtokens.
23
+ #
24
+ # Examples:
25
+ # With :hello, and starting_at 0
26
+ # * down to == 1: [:hello, :hell, :hel, :he, :h]
27
+ # * down to == 4: [:hello, :hell]
28
+ #
29
+ # With :hello, and starting_at -1
30
+ # * down to == 1: [:hell, :hel, :he, :h]
31
+ # * down to == 4: [:hell]
32
+ #
33
+ def initialize options = {}
34
+ @down_to = options[:down_to] || 1
35
+ starting_at = options[:starting_at] || 0
36
+ @starting_at = starting_at.zero? ? 0 : starting_at - 1
37
+ end
38
+
39
+ # Generates a partial index from the given index.
40
+ #
41
+ def generate_from index
42
+ result = {}
43
+
44
+ # Generate for each key token the subtokens.
45
+ #
46
+ i = 5000
47
+ index.each_key do |token|
48
+ i -= 1
49
+ if i == 0
50
+ puts "#{Time.now}: Generating partial tokens for token #{token}. This appears every 5000 tokens."
51
+ i = 5000
52
+ end
53
+ generate_for token, index, result
54
+ end
55
+
56
+ # Remove duplicate ids.
57
+ #
58
+ # TODO If it is unique for a subtoken, it is
59
+ # unique for all derived longer tokens.
60
+ #
61
+ result.each_value &:uniq! # Removed because of the set combination operation below
62
+
63
+ result
64
+ end
65
+
66
+ private
67
+
68
+ # To each shortened token of :test
69
+ # :test, :tes, :te, :t
70
+ # add all ids of :test
71
+ #
72
+ # "token" here means just text.
73
+ #
74
+ # TODO Could be improved by appending the aforegoing ids?
75
+ #
76
+ def generate_for token, index, result
77
+ clipped_token = starting_at.zero? ? token : token[0..starting_at].to_sym
78
+ clipped_token.subtokens(down_to).each do |subtoken|
79
+ if result[subtoken]
80
+ result[subtoken] += index[token] # unique
81
+ else
82
+ result[subtoken] = index[token].dup
83
+ end
84
+ end
85
+ end
86
+
87
+ end
88
+
89
+ end
90
+
91
+ end
@@ -0,0 +1,15 @@
1
+ module Cacher
2
+
3
+ # The partial generator uses a subtoken(downto:1) generator as default.
4
+ #
5
+ class PartialGenerator < Generator
6
+
7
+ # Generate a similarity index based on the given index.
8
+ #
9
+ def generate strategy = Partial::Subtoken.new(:down_to => 1)
10
+ strategy.generate_from self.index
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,7 @@
1
+ module Cacher
2
+ module Similarity
3
+ # Default is no similarity.
4
+ #
5
+ Default = None.new
6
+ end
7
+ end
@@ -0,0 +1,73 @@
1
+ # encoding: utf-8
2
+ #
3
+ module Cacher
4
+
5
+ module Similarity
6
+
7
+ # DoubleLevensthone means that it's a combination of
8
+ # * DoubleMetaphone
9
+ # and
10
+ # * Levenshtein
11
+ # :)
12
+ #
13
+ class DoubleLevenshtone < Strategy
14
+
15
+ attr_reader :amount
16
+
17
+ #
18
+ #
19
+ def initialize amount = 10
20
+ @amount = amount
21
+ end
22
+
23
+ # Encodes the given symbol.
24
+ #
25
+ # Returns a symbol.
26
+ #
27
+ def encoded sym
28
+ codes = Text::Metaphone.double_metaphone sym.to_s
29
+ codes.first.to_sym unless codes.empty?
30
+ end
31
+
32
+ # Generates an index for the given index (in full index style).
33
+ #
34
+ # In the following form:
35
+ # [:meier, :mueller, :peter, :pater] => { :MR => [:meier], :MLR => [:mueller], :PTR => [:peter, :pater] }
36
+ #
37
+ def generate_from index
38
+ hash = hashify index.keys
39
+ sort hash
40
+ end
41
+
42
+ private
43
+
44
+ # Sorts the index values in place.
45
+ #
46
+ def sort index
47
+ index.each_pair.each do |code, ary|
48
+ ary.sort_by_levenshtein! code
49
+ ary.slice! amount, ary.size # size is not perfectly correct, but anyway
50
+ end
51
+ index
52
+ end
53
+
54
+ # Hashifies a list of symbols.
55
+ #
56
+ # Where:
57
+ # { encoded_sym => [syms] }
58
+ #
59
+ def hashify list
60
+ list.inject({}) do |total, element|
61
+ if code = encoded(element)
62
+ total[code] ||= []
63
+ total[code] << element
64
+ end
65
+ total
66
+ end
67
+ end
68
+
69
+ end
70
+
71
+ end
72
+
73
+ end
@@ -0,0 +1,25 @@
1
+ module Cacher
2
+
3
+ module Similarity
4
+
5
+ # Similarity strategy that does nothing.
6
+ #
7
+ class None < Strategy
8
+
9
+ # Does not encode text. Just returns nil.
10
+ #
11
+ def encoded text
12
+ nil
13
+ end
14
+
15
+ # Returns an empty index.
16
+ #
17
+ def generate_from index
18
+ {}
19
+ end
20
+
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,7 @@
1
+ module Cacher
2
+ module Similarity
3
+ # Base class for all similarity strategies.
4
+ #
5
+ class Strategy; end
6
+ end
7
+ end
@@ -0,0 +1,15 @@
1
+ module Cacher
2
+
3
+ # Uses no similarity as default.
4
+ #
5
+ class SimilarityGenerator < Generator
6
+
7
+ # Generate a similarity index based on the given index.
8
+ #
9
+ def generate strategy = Similarity::None.new
10
+ strategy.generate_from self.index
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,7 @@
1
+ module Cacher
2
+ module Weights
3
+ # Default is Logarithmic.
4
+ #
5
+ Default = Logarithmic.new
6
+ end
7
+ end
@@ -0,0 +1,39 @@
1
+ module Cacher
2
+
3
+ module Weights
4
+
5
+ # Uses a logarithmic weight.
6
+ # If for a key k we have x ids, the weight is:
7
+ # w(x): log(x)
8
+ # Special case: If x < 1, then we use 0.
9
+ #
10
+ class Logarithmic < Strategy
11
+
12
+ # Generates a partial index from the given index.
13
+ #
14
+ def generate_from index
15
+ index.inject({}) do |hash, text_ids|
16
+ text, ids = *text_ids
17
+ weight = weight_for ids.size
18
+ hash[text] ||= weight.round(2) if weight
19
+ hash
20
+ end
21
+ end
22
+
23
+ # Sets the weight value.
24
+ #
25
+ # If the size is 0 or one, we would get -Infinity or 0.0.
26
+ # Thus we do not set a value if there is just one. The default, dynamically, is 0.
27
+ #
28
+ # BUT: We need the value, even if 0. To designate that there is a weight!
29
+ #
30
+ def weight_for amount
31
+ return 0 if amount < 1
32
+ Math.log amount
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+
39
+ end
@@ -0,0 +1,7 @@
1
+ module Cacher
2
+ module Weights
3
+ # Superclass for weighing strategies.
4
+ #
5
+ class Strategy; end
6
+ end
7
+ end
@@ -0,0 +1,15 @@
1
+ module Cacher
2
+
3
+ # Uses a logarithmic algorithm as default.
4
+ #
5
+ class WeightsGenerator < Generator
6
+
7
+ # Generate a weights index based on the given index.
8
+ #
9
+ def generate strategy = Weights::Logarithmic.new
10
+ strategy.generate_from self.index
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,13 @@
1
+ # module Configuration
2
+ #
3
+ # def self.indexes *types
4
+ # Indexes.new(*types).save
5
+ # end
6
+ # def self.type name, *fields
7
+ # Type.new name, *fields
8
+ # end
9
+ # def self.field name, options = {}
10
+ # Field.new name, options
11
+ # end
12
+ #
13
+ # end