ultrasphinx 1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,41 @@
1
+
2
+
3
+ module Ultrasphinx
4
+
5
+ =begin rdoc
6
+ == Spelling support
7
+
8
+ In order to spellcheck your user's query, Ultrasphinx bundles a small spelling module. First, make sure Aspell 0.6, an appropriate Aspell dictionary, and the Rubygem 'raspell' are all installed.
9
+
10
+ Then, copy <tt>examples/app.multi</tt> into your Aspell dictionary folder. It allows you to use Sphinx to generate a custom wordlist for your app. Modify it if you don't want to also use the default American English dictionary.
11
+
12
+ Then, to build the custom wordlist, run:
13
+ rake ultrasphinx:spelling:build
14
+
15
+ Now you can see if a query is correctly spelled as so:
16
+ @correction = Ultrasphinx::Spell.correct(@search.query)
17
+
18
+ If @correction is not nil, go ahead and suggest it to the user. Otherwise, the query was already correct.
19
+
20
+ =end
21
+
22
+ module Spell
23
+ SP = Aspell.new("app")
24
+ SP.suggestion_mode = Aspell::NORMAL
25
+ SP.set_option("ignore-case", "true")
26
+
27
+ def self.correct string
28
+ correction = string.gsub(/[\w\']+/) do |word|
29
+ unless SP.check(word)
30
+ SP.suggest(word).first
31
+ else
32
+ word
33
+ end
34
+ end
35
+
36
+ correction if correction != string
37
+ end
38
+
39
+ end
40
+ end
41
+
@@ -0,0 +1,276 @@
1
+
2
+ module Ultrasphinx
3
+
4
+ class Exception < ::Exception #:nodoc:
5
+ end
6
+ class ConfigurationError < Exception #:nodoc:
7
+ end
8
+ class DaemonError < Exception #:nodoc:
9
+ end
10
+
11
+ # internal file paths
12
+
13
+ SUBDIR = "config/ultrasphinx"
14
+
15
+ DIR = "#{RAILS_ROOT}/#{SUBDIR}"
16
+
17
+ CONF_PATH = "#{DIR}/#{RAILS_ENV}.conf"
18
+
19
+ ENV_BASE_PATH = "#{DIR}/#{RAILS_ENV}.base"
20
+
21
+ GENERIC_BASE_PATH = "#{DIR}/default.base"
22
+
23
+ BASE_PATH = (File.exist?(ENV_BASE_PATH) ? ENV_BASE_PATH : GENERIC_BASE_PATH)
24
+
25
+ raise ConfigurationError, "Please create a '#{SUBDIR}/#{RAILS_ENV}.base' or '#{SUBDIR}/default.base' file in order to use Ultrasphinx in your #{RAILS_ENV} environment." unless File.exist? BASE_PATH # XXX lame
26
+
27
+ # some miscellaneous constants
28
+
29
+ MAX_INT = 2**32-1
30
+
31
+ MAX_WORDS = 2**16 # maximum number of stopwords built
32
+
33
+ UNIFIED_INDEX_NAME = "complete"
34
+
35
+ COLUMN_TYPES = {:string => 'text', :text => 'text', :integer => 'numeric', :date => 'date', :datetime => 'date' }
36
+
37
+ CONFIG_MAP = {:username => 'sql_user',
38
+ :password => 'sql_pass',
39
+ :host => 'sql_host',
40
+ :database => 'sql_db',
41
+ :port => 'sql_port',
42
+ :socket => 'sql_sock'}
43
+
44
+ OPTIONAL_SPHINX_KEYS = ['morphology', 'stopwords', 'min_word_len', 'charset_type', 'charset_table', 'docinfo']
45
+
46
+ # some default settings for the sphinx conf files
47
+
48
+ SOURCE_DEFAULTS = %(
49
+ strip_html = 0
50
+ index_html_attrs =
51
+ sql_query_post =
52
+ sql_range_step = 20000
53
+ )
54
+
55
+ ADAPTER_DEFAULTS = {
56
+ "mysql" => %(
57
+ type = mysql
58
+ sql_query_pre = SET SESSION group_concat_max_len = 65535
59
+ sql_query_pre = SET NAMES utf8
60
+ ),
61
+ "postgresql" => %(
62
+ type = pgsql
63
+ )}
64
+
65
+
66
+ # Configuration file parser.
67
+ def self.options_for(heading, path)
68
+
69
+ section = open(path).read[/^#{heading}.*?\{(.*?)\}/m, 1]
70
+ unless section
71
+ Ultrasphinx.say "#{path} appears to be corrupted; please delete file"
72
+ raise ConfigurationError, "Missing heading #{heading.inspect}"
73
+ end
74
+
75
+ options = section.split("\n").map do |line|
76
+ line =~ /\s*(.*?)\s*=\s*([^\#]*)/
77
+ $1 ? [$1, $2.strip] : []
78
+ end
79
+
80
+ Hash[*options.flatten]
81
+ end
82
+
83
+ # introspect on the existing generated conf files
84
+
85
+ PLUGIN_SETTINGS = options_for('ultrasphinx', BASE_PATH)
86
+
87
+ DAEMON_SETTINGS = options_for('searchd', BASE_PATH)
88
+
89
+ STOPWORDS_PATH = "#{Ultrasphinx::PLUGIN_SETTINGS['path']}/stopwords.txt}"
90
+
91
+ MODEL_CONFIGURATION = {}
92
+
93
+
94
+
95
+ class << self
96
+
97
+ # Logger.
98
+ def say msg
99
+ $stderr.puts "** ultrasphinx: #{msg}"
100
+ end
101
+
102
+ # Force all the indexed models to load and fill the MODEL_CONFIGURATION hash.
103
+ def load_constants
104
+
105
+ Dir["#{RAILS_ROOT}/app/models/**/*.rb"].each do |filename|
106
+ next if filename =~ /\/(\.svn|CVS|\.bzr)\//
107
+ begin
108
+ open(filename) {|file| load filename if file.grep(/is_indexed/).any?}
109
+ rescue Object => e
110
+ say "warning; possibly critical autoload error on #{filename}"
111
+ say e.inspect
112
+ end
113
+ end
114
+
115
+ # build the field-to-type mappings
116
+ Fields.instance.configure(MODEL_CONFIGURATION)
117
+ end
118
+
119
+ # Complain if the database names go out of sync.
120
+ def verify_database_name
121
+ if File.exist? CONF_PATH
122
+ if options_for("source", CONF_PATH)['sql_db'] != ActiveRecord::Base.connection.instance_variable_get("@config")[:database]
123
+ say "warning; configured database name is out-of-date"
124
+ say "please run 'rake ultrasphinx:configure'"
125
+ end rescue nil
126
+ end
127
+ end
128
+
129
+
130
+ # Main SQL builder.
131
+ def configure
132
+ load_constants
133
+
134
+ puts "Rebuilding Ultrasphinx configurations for #{ENV['RAILS_ENV']} environment"
135
+ puts "Available models are #{MODEL_CONFIGURATION.keys.to_sentence}"
136
+ File.open(CONF_PATH, "w") do |conf|
137
+ conf.puts "\n# Auto-generated at #{Time.now}.\n# Hand modifications will be overwritten.\n"
138
+
139
+ conf.puts "\n# #{BASE_PATH}"
140
+ conf.puts open(BASE_PATH).read.sub(/^ultrasphinx.*?\{.*?\}/m, '') + "\n"
141
+
142
+ sphinx_source_list = []
143
+
144
+ conf.puts "\n# Source configuration\n\n"
145
+
146
+ puts "Generating SQL"
147
+ MODEL_CONFIGURATION.each_with_index do |model_options, class_id|
148
+ model, options = model_options
149
+ klass, source = model.constantize, model.tableize
150
+
151
+ # puts "SQL for #{model}"
152
+
153
+ sphinx_source_list << source
154
+
155
+ conf.puts "source #{source}\n{"
156
+ conf.puts SOURCE_DEFAULTS
157
+
158
+ # apparently we're supporting postgres now
159
+ connection_settings = klass.connection.instance_variable_get("@config")
160
+
161
+ adapter_defaults = ADAPTER_DEFAULTS[connection_settings[:adapter]]
162
+ raise ConfigurationError, "Unsupported database adapter" unless adapter_defaults
163
+ conf.puts adapter_defaults
164
+
165
+ connection_settings.each do |key, value|
166
+ conf.puts "#{CONFIG_MAP[key]} = #{value}" if CONFIG_MAP[key]
167
+ end
168
+
169
+ table, pkey = klass.table_name, klass.primary_key
170
+ condition_strings, join_strings = Array(options[:conditions]).map{|condition| "(#{condition})"}, []
171
+ column_strings = ["(#{table}.#{pkey} * #{MODEL_CONFIGURATION.size} + #{class_id}) AS id",
172
+ "#{class_id} AS class_id", "'#{klass.name}' AS class"]
173
+ remaining_columns = Fields.instance.keys - ["class", "class_id"]
174
+
175
+ conf.puts "\nsql_query_range = SELECT MIN(#{pkey}), MAX(#{pkey}) FROM #{table}"
176
+
177
+ options[:fields].to_a.each do |f|
178
+ column, as = f.is_a?(Hash) ? [f[:field], f[:as]] : [f, f]
179
+ column_strings << Fields.instance.cast("#{table}.#{column}", as)
180
+ remaining_columns.delete(as)
181
+ end
182
+
183
+ options[:includes].to_a.each do |join|
184
+ join_klass = join[:model].constantize
185
+ association = klass.reflect_on_association(join[:model].underscore.to_sym)
186
+ if not association
187
+ if not join[:association_sql]
188
+ raise ConfigurationError, "Unknown association from #{klass} to #{join[:model]}"
189
+ else
190
+ join_strings << join[:association_sql]
191
+ end
192
+ else
193
+ join_strings << "LEFT OUTER JOIN #{join_klass.table_name} ON " +
194
+ if (macro = association.macro) == :belongs_to
195
+ "#{join_klass.table_name}.#{join_klass.primary_key} = #{table}.#{association.primary_key_name}"
196
+ elsif macro == :has_one
197
+ "#{table}.#{klass.primary_key} = #{join_klass.table_name}.#{association.instance_variable_get('@foreign_key_name')}"
198
+ else
199
+ raise ConfigurationError, "Unidentified association macro #{macro.inspect}"
200
+ end
201
+ end
202
+ column_strings << "#{join_klass.table_name}.#{join[:field]} AS #{join[:as] or join[:field]}"
203
+ remaining_columns.delete(join[:as] || join[:field])
204
+ end
205
+
206
+ options[:concats].to_a.select{|concat| concat[:model] and concat[:field]}.each do |group|
207
+ # only has_many's or explicit sql right now
208
+ join_klass = group[:model].constantize
209
+ if group[:association_sql]
210
+ join_strings << group[:association_sql]
211
+ else
212
+ association = klass.reflect_on_association(group[:association_name] ? group[:association_name].to_sym : group[:model].underscore.pluralize.to_sym)
213
+ join_strings << "LEFT OUTER JOIN #{join_klass.table_name} ON #{table}.#{klass.primary_key} = #{join_klass.table_name}.#{association.primary_key_name}" + (" AND (#{group[:conditions]})" if group[:conditions]).to_s # XXX make sure foreign key is right for polymorphic relationships
214
+ end
215
+ column_strings << Fields.instance.cast("GROUP_CONCAT(#{join_klass.table_name}.#{group[:field]} SEPARATOR ' ')", group[:as])
216
+ remaining_columns.delete(group[:as])
217
+ end
218
+
219
+ options[:concats].to_a.select{|concat| concat[:fields]}.each do |concat|
220
+ column_strings << Fields.instance.cast("CONCAT_WS(' ', #{concat[:fields].map{|field| "#{table}.#{field}"}.join(', ')})", concat[:as])
221
+ remaining_columns.delete(concat[:as])
222
+ end
223
+
224
+ # puts "#{model} has #{remaining_columns.inspect} remaining"
225
+ remaining_columns.each do |field|
226
+ column_strings << Fields.instance.null(field)
227
+ end
228
+
229
+ query_strings = ["SELECT", column_strings.sort_by do |string|
230
+ # sphinx wants them always in the same order, but "id" must be first
231
+ (field = string[/.*AS (.*)/, 1]) == "id" ? "*" : field
232
+ end.join(", ")]
233
+ query_strings << "FROM #{table}"
234
+ query_strings += join_strings.uniq
235
+ query_strings << "WHERE #{table}.#{pkey} >= $start AND #{table}.#{pkey} <= $end"
236
+ query_strings += condition_strings.uniq.map{|s| "AND #{s}"}
237
+ query_strings << "GROUP BY id"
238
+
239
+ conf.puts "sql_query = #{query_strings.join(" ")}"
240
+
241
+ groups = []
242
+ # group and date sorting params... this really only would have to be run once
243
+ Fields.instance.each do |field, type|
244
+ case type
245
+ when 'numeric'
246
+ groups << "sql_group_column = #{field}"
247
+ when 'date'
248
+ groups << "sql_date_column = #{field}"
249
+ end
250
+ end
251
+ conf.puts "\n" + groups.sort_by{|s| s[/= (.*)/, 1]}.join("\n")
252
+ conf.puts "\nsql_query_info = SELECT * FROM #{table} WHERE #{table}.#{pkey} = (($id - #{class_id}) / #{MODEL_CONFIGURATION.size})"
253
+ conf.puts "}\n\n"
254
+ end
255
+
256
+ conf.puts "\n# Index configuration\n\n"
257
+
258
+
259
+ # only output the unified index; no one uses the individual ones anyway
260
+
261
+ conf.puts "index #{UNIFIED_INDEX_NAME}"
262
+ conf.puts "{"
263
+ conf.puts sphinx_source_list.map {|s| "source = #{s}" }
264
+
265
+ OPTIONAL_SPHINX_KEYS.each do |key|
266
+ conf.puts "#{key} = #{PLUGIN_SETTINGS[key]}" if PLUGIN_SETTINGS[key]
267
+ end
268
+
269
+ conf.puts "path = #{PLUGIN_SETTINGS["path"]}/sphinx_index_#{UNIFIED_INDEX_NAME}"
270
+ conf.puts "}\n\n"
271
+ end
272
+
273
+ end
274
+
275
+ end
276
+ end
@@ -0,0 +1,125 @@
1
+
2
+ ENV['RAILS_ENV'] ||= "development"
3
+
4
+ namespace :ultrasphinx do
5
+
6
+ desc "Bootstrap a full Sphinx environment"
7
+ task :bootstrap => [:environment, :configure, :index, :start] do
8
+ end
9
+
10
+ desc "Rebuild the configuration file for this particular environment."
11
+ task :configure => :environment do
12
+ Ultrasphinx::configure
13
+ end
14
+
15
+ desc "Reindex the database and send an update signal to the search daemon."
16
+ task :index => :environment do
17
+ cmd = "indexer --config #{Ultrasphinx::CONF_PATH}"
18
+ cmd << " #{ENV['OPTS']} " if ENV['OPTS']
19
+ cmd << " --rotate" if ultrasphinx_daemon_running?
20
+ cmd << " #{Ultrasphinx::UNIFIED_INDEX_NAME}"
21
+ puts cmd
22
+ system cmd
23
+ end
24
+
25
+ namespace :daemon do
26
+ desc "Start the search daemon"
27
+ task :start => :environment do
28
+ raise Ultrasphinx::DaemonError, "Already running" if ultrasphinx_daemon_running?
29
+ # remove lockfiles
30
+ Dir[Ultrasphinx::PLUGIN_SETTINGS["path"] + "*spl"].each {|file| File.delete(file)}
31
+ system "searchd --config #{Ultrasphinx::CONF_PATH}"
32
+ sleep(2) # give daemon a chance to write the pid file
33
+ if ultrasphinx_daemon_running?
34
+ puts "Started successfully"
35
+ else
36
+ puts "Failed to start"
37
+ end
38
+ end
39
+
40
+ desc "Stop the search daemon"
41
+ task :stop => [:environment] do
42
+ raise Ultrasphinx::DaemonError, "Doesn't seem to be running" unless ultrasphinx_daemon_running?
43
+ system "kill #{pid = ultrasphinx_daemon_pid}"
44
+ puts "Stopped #{pid}."
45
+ end
46
+
47
+ desc "Restart the search daemon"
48
+ task :restart => [:environment, :stop, :start] do
49
+ end
50
+
51
+ desc "Tail queries in the log"
52
+ task :tail => :environment do
53
+ require 'file/tail'
54
+ puts "Tailing #{filename = Ultrasphinx::DAEMON_SETTINGS['query_log']}"
55
+ File.open(filename) do |log|
56
+ log.extend(File::Tail)
57
+ log.interval = 1
58
+ log.backward(10)
59
+ last = nil
60
+ log.tail do |line|
61
+ current = line[/\[\*\](.*)$/, 1]
62
+ last = current and puts current unless current == last
63
+ end
64
+ end
65
+ end
66
+
67
+ desc "Check if the search daemon is running"
68
+ task :status => :environment do
69
+ if ultrasphinx_daemon_running?
70
+ puts "Daemon is running."
71
+ else
72
+ puts "Daemon is stopped."
73
+ end
74
+ end
75
+ end
76
+
77
+
78
+
79
+ namespace :spelling do
80
+ desc "Rebuild custom spelling dictionary"
81
+ task :build => :environment do
82
+ ENV['OPTS'] = "--buildstops #{Ultrasphinx::STOPWORDS_PATH} #{Ultrasphinx::MAX_WORDS} --buildfreqs"
83
+ Rake::Task["ultrasphinx:index"].invoke
84
+ tmpfile = "/tmp/custom_words.txt"
85
+ words = []
86
+ puts "Filtering"
87
+ File.open(Ultrasphinx::STOPWORDS_PATH).each do |line|
88
+ if line =~ /^([^\s\d_]{4,}) (\d+)/
89
+ # XXX should be configurable
90
+ words << $1 if $2.to_i > 40
91
+ # ideally we would also skip words within X edit distance of a correction
92
+ # by aspell-en, in order to not add typos to the dictionary
93
+ end
94
+ end
95
+ puts "Writing #{words.size} words"
96
+ File.open(tmpfile, 'w').write(words.join("\n"))
97
+ puts "Loading into aspell"
98
+ system("aspell --lang=en create master custom.rws < #{tmpfile}")
99
+ end
100
+ end
101
+
102
+ end
103
+
104
+ # task shortcuts
105
+ namespace :us do
106
+ task :start => ["ultrasphinx:daemon:start"]
107
+ task :restart => ["ultrasphinx:daemon:restart"]
108
+ task :stop => ["ultrasphinx:daemon:stop"]
109
+ task :in => ["ultrasphinx:index"]
110
+ task :spell => ["ultrasphinx:spelling:build"]
111
+ task :conf => ["ultrasphinx:configure"]
112
+ task :boot => ["ultrasphinx:bootstrap"]
113
+ end
114
+
115
+ # support methods
116
+
117
+ def ultrasphinx_daemon_pid
118
+ open(open(Ultrasphinx::BASE_PATH).readlines.map do |line|
119
+ line[/^\s*pid_file\s*=\s*([^\s\#]*)/, 1]
120
+ end.compact.first).readline.chomp rescue nil # XXX ridiculous
121
+ end
122
+
123
+ def ultrasphinx_daemon_running?
124
+ ultrasphinx_daemon_pid and `ps #{ultrasphinx_daemon_pid} | wc`.to_i > 1
125
+ end
@@ -0,0 +1,40 @@
1
+ =Sphinx Client API 0.3.0
2
+
3
+ Patched for Ultrasphinx.
4
+
5
+ This document gives an overview of what is Sphinx itself and how to use in
6
+ within Ruby on Rails. For more information or documentation,
7
+ please go to http://www.sphinxsearch.com
8
+
9
+ ==Sphinx
10
+
11
+ Sphinx is a standalone full-text search engine, meant to provide fast,
12
+ size-efficient and relevant fulltext search functions to other applications.
13
+ Sphinx was specially designed to integrate well with SQL databases and
14
+ scripting languages. Currently built-in data sources support fetching data
15
+ either via direct connection to MySQL, or from an XML pipe.
16
+
17
+ Simplest way to communicate with Sphinx is to use <tt>searchd</tt> -
18
+ a daemon to search through fulltext indices from external software.
19
+
20
+ ==Documentation
21
+
22
+ You can create the documentation by running:
23
+
24
+ rake rdoc
25
+
26
+ ==Latest version
27
+
28
+ You can always get latest version from
29
+ http://kpumuk.info/projects/ror-plugins/sphinx
30
+
31
+ ==Credits
32
+
33
+ Dmytro Shteflyuk <kpumuk@kpumuk.info> http://kpumuk.info
34
+
35
+ Special thanks to Alexey Kovyrin <alexey@kovyrin.net> http://blog.kovyrin.net
36
+
37
+ ==License
38
+
39
+ This library is distributed under the terms of the Ruby license.
40
+ You can freely distribute/modify this library.