pager-ultrasphinx 1.0.20080510

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,227 @@
1
+
2
+ require 'ultrasphinx'
3
+
4
+ module ActiveRecord
5
+ class Base
6
+
7
+ =begin rdoc
8
+
9
+ The is_indexed method configures a model for indexing. Its parameters help generate SQL queries for Sphinx.
10
+
11
+ = Options
12
+
13
+ == Including regular fields
14
+
15
+ Use the <tt>:fields</tt> key.
16
+
17
+ Accepts an array of field names or field hashes.
18
+ :fields => [
19
+ 'created_at',
20
+ 'title',
21
+ {:field => 'body', :as => 'description'},
22
+ {:field => 'user_category', :facet => true, :as => 'category' }
23
+ ]
24
+
25
+ To alias a field, pass a hash instead of a string and set the <tt>:as</tt> key.
26
+
27
+ To allow faceting support on a text field, also pass a hash and set the <tt>:facet</tt> key to <tt>true</tt>. Faceting is off by default for text fields because there is some indexing overhead associated with it. Faceting is always on for numeric or date fields.
28
+
29
+ To allow sorting by a text field, also pass a hash and set the <tt>:sortable</tt> key to true. This is turned off by default for the same reason as above. Sorting is always on for numeric or date fields.
30
+
31
+ To apply an SQL function to a field before it is indexed, use the key <tt>:function_sql</tt>. Pass a string such as <tt>"REPLACE(?, '_', ' ')"</tt>. The table and column name for your field will be interpolated into the first <tt>?</tt> in the string.
32
+
33
+ Note that <tt>float</tt> fields are supported, but require Sphinx 0.98.
34
+
35
+ == Requiring conditions
36
+
37
+ Use the <tt>:conditions</tt> key.
38
+
39
+ SQL conditions, to scope which records are selected for indexing. Accepts a string.
40
+
41
+ :conditions => "created_at < NOW() AND deleted IS NOT NULL"
42
+
43
+ The <tt>:conditions</tt> key is especially useful if you delete records by marking them deleted rather than removing them from the database.
44
+
45
+ == Ordering subgroups
46
+
47
+ Use the <tt>:order</tt> key.
48
+
49
+ An SQL order string.
50
+
51
+ :order => 'posts.id ASC'
52
+
53
+
54
+
55
+ == Including a field from an association
56
+
57
+ Use the <tt>:include</tt> key.
58
+
59
+ Accepts an array of hashes.
60
+
61
+ :include => [{:association_name => 'category', :field => 'name', :as => 'category_name'}]
62
+
63
+ Each should contain an <tt>:association_name</tt> key (the association name for the included model), a <tt>:field</tt> key (the name of the field to include), and an optional <tt>:as</tt> key (what to name the field in the parent).
64
+
65
+ <tt>:include</tt> hashes also accept their own <tt>:conditions</tt> key. You can use this if you need custom WHERE conditions for this particular association (e.g, this JOIN).
66
+
67
+ The keys <tt>:facet</tt>, <tt>:sortable</tt>, <tt>:class_name</tt>, <tt>:association_sql</tt>, and <tt>:function_sql</tt> are also recognized.
68
+
69
+ == Concatenating several fields within one record
70
+
71
+ Use the <tt>:concatenate</tt> key.
72
+
73
+ Accepts an array of option hashes.
74
+
75
+ To concatenate several fields within one record as a combined field, use a regular (or lateral) concatenation. Regular concatenations contain a <tt>:fields</tt> key (again, an array of field names), and a mandatory <tt>:as</tt> key (the name of the result of the concatenation). For example, to concatenate the <tt>title</tt> and <tt>body</tt> into one field called <tt>text</tt>:
76
+ :concatenate => [{:fields => ['title', 'body'], :as => 'text'}]
77
+
78
+ The keys <tt>:facet</tt>, <tt>:sortable</tt>, <tt>:conditions</tt>, <tt>:function_sql</tt>, <tt>:class_name</tt>, and <tt>:association_sql</tt>, are also recognized.
79
+
80
+ Lateral concatenations are implemented with CONCAT_WS on MySQL and with a stored procedure on PostgreSQL.
81
+
82
+ == Concatenating the same field from a set of associated records
83
+
84
+ Also use the <tt>:concatenate</tt> key.
85
+
86
+ To concatenate one field from a set of associated records as a combined field in the parent record, use a group (or vertical) concatenation. A group concatenation should contain an <tt>:association_name</tt> key (the association name for the included model), a <tt>:field</tt> key (the field on the included model to concatenate), and an optional <tt>:as</tt> key (also the name of the result of the concatenation). For example, to concatenate all <tt>Post#body</tt> contents into the parent's <tt>responses</tt> field:
87
+ :concatenate => [{:association_name => 'posts', :field => 'body', :as => 'responses'}]
88
+
89
+ The keys <tt>:facet</tt>, <tt>:sortable</tt>, <tt>:order</tt>, <tt>:conditions</tt>, <tt>:function_sql</tt>, <tt>:class_name</tt>, and <tt>:association_sql</tt>, are also recognized.
90
+
91
+ Vertical concatenations are implemented with GROUP_CONCAT on MySQL and with an aggregate and a stored procedure on PostgreSQL. Note that <tt>:order</tt> is useful if you need to order the grouping so that proximity search works correctly, and <tt>:conditions</tt> are currently ignored if you have <tt>:association_sql</tt> defined.
92
+
93
+ == Custom joins
94
+
95
+ <tt>:include</tt> and <tt>:concatenate</tt> accept an <tt>:association_sql</tt> key. You can use this if you need to pass a custom JOIN string, for example, a double JOIN for a <tt>has_many :through</tt>). If <tt>:association_sql</tt> is present, the default JOIN for <tt>belongs_to</tt> will not be generated.
96
+
97
+ Also, If you want to include a model that you don't have an actual ActiveRecord association for, you can use <tt>:association_sql</tt> combined with <tt>:class_name</tt> instead of <tt>:association_name</tt>. <tt>:class_name</tt> should be camelcase.
98
+
99
+ Ultrasphinx is not an object-relational mapper, and the association generation is intended to stay minimal--don't be afraid of <tt>:association_sql</tt>.
100
+
101
+ == Enabling delta indexing
102
+
103
+ Use the <tt>:delta</tt> key.
104
+
105
+ Accepts either <tt>true</tt>, or a hash with a <tt>:field</tt> key.
106
+
107
+ If you pass <tt>true</tt>, the <tt>updated_at</tt> column will be used for choosing the delta records, if it exists. If it doesn't exist, the entire table will be reindexed at every delta. Example:
108
+
109
+ :delta => true
110
+
111
+ If you need to use a non-default column name, use a hash:
112
+
113
+ :delta => {:field => 'created_at'}
114
+
115
+ Note that the column type must be time-comparable in the DB. Also note that faceting may return higher counts than actually exist on delta-indexed tables, and that sorting by string columns will not work well. These are both limitations of Sphinx's index merge scheme. You can perhaps mitigate the issues by only searching the main index for facets or sorts:
116
+
117
+ Ultrasphinx::Search.new(:query => "query", :indexes => Ultrasphinx::MAIN_INDEX)
118
+
119
+ The date range of the delta include is set in the <tt>.base</tt> file.
120
+
121
+ = Examples
122
+
123
+ == Complex configuration
124
+
125
+ Here's an example configuration using most of the options, taken from production code:
126
+
127
+ class Story < ActiveRecord::Base
128
+ is_indexed :fields => [
129
+ 'title',
130
+ 'published_at',
131
+ {:field => 'author', :facet => true}
132
+ ],
133
+ :include => [
134
+ {:association_name => 'category', :field => 'name', :as => 'category_name'}
135
+ ],
136
+ :concatenate => [
137
+ {:fields => ['title', 'long_description', 'short_description'],
138
+ :as => 'editorial'},
139
+ {:association_name => 'pages', :field => 'body', :as => 'body'},
140
+ {:association_name => 'comments', :field => 'body', :as => 'comments',
141
+ :conditions => "comments.item_type = '#{base_class}'"}
142
+ ],
143
+ :delta => {:field => 'published_at'},
144
+ :conditions => self.live_condition_string
145
+ end
146
+
147
+ Note how setting the <tt>:conditions</tt> on Comment is enough to configure a polymorphic <tt>has_many</tt>.
148
+
149
+ == Association scoping
150
+
151
+ A common use case is to only search records that belong to a particular parent model. Ultrasphinx configures Sphinx to support a <tt>:filters</tt> element on any date or numeric field, so any <tt>*_id</tt> fields you have will be filterable.
152
+
153
+ For example, say a Company <tt>has_many :users</tt> and each User <tt>has_many :articles</tt>. If you want to to filter Articles by Company, add <tt>company_id</tt> to the Article's <tt>is_indexed</tt> method. The best way is to grab it from the User association:
154
+
155
+ class Article < ActiveRecord::Base
156
+ is_indexed :include => [{:association_name => 'users', :field => 'company_id'}]
157
+ end
158
+
159
+ Now you can run:
160
+
161
+ @search = Ultrasphinx::Search.new('something',
162
+ :filters => {'company_id' => 493})
163
+
164
+ If the associations weren't just <tt>has_many</tt> and <tt>belongs_to</tt>, you would need to use the <tt>:association_sql</tt> key to set up a custom JOIN.
165
+
166
+ =end
167
+
168
+ def self.is_indexed opts = {}
169
+ opts.stringify_keys!
170
+ opts.assert_valid_keys ['fields', 'concatenate', 'conditions', 'include', 'delta', 'order']
171
+
172
+ # Single options
173
+
174
+ if opts['conditions']
175
+ # Do nothing
176
+ end
177
+
178
+ if opts['delta']
179
+ if opts['delta'] == true
180
+ opts['delta'] = {'field' => 'updated_at'}
181
+ elsif opts['delta'].is_a? String
182
+ opts['delta'] = {'field' => opts['delta']}
183
+ end
184
+
185
+ opts['delta']._stringify_all!
186
+ opts['delta'].assert_valid_keys ['field']
187
+ end
188
+
189
+ # Enumerable options
190
+
191
+ opts['fields'] = Array(opts['fields'])
192
+ opts['concatenate'] = Array(opts['concatenate'])
193
+ opts['include'] = Array(opts['include'])
194
+
195
+ opts['fields'].map! do |entry|
196
+ if entry.is_a? Hash
197
+ entry._stringify_all!('sortable', 'facet')
198
+ entry.assert_valid_keys ['field', 'as', 'facet', 'function_sql', 'sortable', 'table_alias']
199
+ entry
200
+ else
201
+ # Single strings
202
+ {'field' => entry.to_s}
203
+ end
204
+ end
205
+
206
+ opts['concatenate'].each do |entry|
207
+ entry._stringify_all!('fields', 'sortable', 'facet')
208
+
209
+ entry.assert_valid_keys ['class_name', 'association_name', 'conditions', 'field', 'as', 'fields', 'association_sql', 'facet', 'function_sql', 'sortable', 'order', 'table_alias']
210
+ raise Ultrasphinx::ConfigurationError, "You can't mix regular concat and group concats" if entry['fields'] and (entry['field'] or entry['class_name'] or entry['association_name'])
211
+ raise Ultrasphinx::ConfigurationError, "Concatenations must specify an :as key" unless entry['as']
212
+ raise Ultrasphinx::ConfigurationError, "Group concatenations must not have multiple fields" if entry['field'].is_a? Array
213
+ raise Ultrasphinx::ConfigurationError, "Regular concatenations should have multiple fields" if entry['fields'] and !entry['fields'].is_a?(Array)
214
+ raise Ultrasphinx::ConfigurationError, "Regular concatenations can't specify an order" if entry['fields'] and entry['order']
215
+
216
+ entry['fields'].map!(&:to_s) if entry['fields'] # Stringify fields array
217
+ end
218
+
219
+ opts['include'].each do |entry|
220
+ entry._stringify_all!('sortable', 'facet')
221
+ entry.assert_valid_keys ['class_name', 'association_name', 'field', 'as', 'association_sql', 'facet', 'function_sql', 'sortable', 'table_alias']
222
+ end
223
+
224
+ Ultrasphinx::MODEL_CONFIGURATION[self.name] = opts
225
+ end
226
+ end
227
+ end
@@ -0,0 +1,35 @@
1
+
2
+ /* http://osdir.com/ml/db.postgresql.admIN/2003-08/msg00057.html */
3
+
4
+ CREATE FUNCTION MAKE_CONCAT_WS() RETURNS text AS '
5
+ declare
6
+ v_args int := 32;
7
+ v_first text := ''CREATE FUNCTION CONCAT_WS(text,text,text) RETURNS text AS ''''SELECT CASE WHEN $1 IS NULL THEN NULL WHEN $3 IS NULL THEN $2 ELSE $2 || $1 || $3 END'''' LANGUAGE sql IMMUTABLE'';
8
+ v_part1 text := ''CREATE FUNCTION CONCAT_WS(text,text'';
9
+ v_part2 text := '') RETURNS text AS ''''SELECT CONCAT_WS($1,CONCAT_WS($1,$2'';
10
+ v_part3 text := '')'''' LANGUAGE sql IMMUTABLE'';
11
+ v_sql text;
12
+
13
+ BEGIN
14
+ EXECUTE v_first;
15
+ FOR i IN 4 .. v_args loop
16
+ v_sql := v_part1;
17
+ FOR j IN 3 .. i loop
18
+ v_sql := v_sql || '',text'';
19
+ END loop;
20
+
21
+ v_sql := v_sql || v_part2;
22
+
23
+ FOR j IN 3 .. i - 1 loop
24
+ v_sql := v_sql || '',$'' || j::text;
25
+ END loop;
26
+ v_sql := v_sql || ''),$'' || i::text;
27
+
28
+ v_sql := v_sql || v_part3;
29
+ EXECUTE v_sql;
30
+ END loop;
31
+ RETURN ''OK'';
32
+ END;
33
+ ' LANGUAGE 'plpgsql';
34
+
35
+ SELECT MAKE_CONCAT_WS();
@@ -0,0 +1,15 @@
1
+
2
+ /* Fake CRC32 */
3
+
4
+ CREATE FUNCTION crc32(text)
5
+ RETURNS bigint AS $$
6
+ DECLARE
7
+ tmp bigint;
8
+ BEGIN
9
+ tmp = (hex_to_int(SUBSTRING(MD5($1) FROM 1 FOR 8))::bigint);
10
+ IF tmp < 0 THEN
11
+ tmp = 4294967296 + tmp;
12
+ END IF;
13
+ return tmp;
14
+ END
15
+ $$ IMMUTABLE STRICT LANGUAGE plpgsql;
@@ -0,0 +1,23 @@
1
+
2
+ /*
3
+ mysqlcompat-1.0b3
4
+ public domain
5
+ GROUP_CONCAT()
6
+ Note: For DISTINCT and ORDER BY a subquery is required
7
+ */
8
+
9
+ CREATE FUNCTION _group_concat(text, text)
10
+ RETURNS text AS $$
11
+ SELECT CASE
12
+ WHEN $2 IS NULL THEN $1
13
+ WHEN $1 IS NULL THEN $2
14
+ ELSE $1 operator(pg_catalog.||) ' ' operator(pg_catalog.||) $2
15
+ END
16
+ $$ IMMUTABLE LANGUAGE SQL;
17
+
18
+ CREATE AGGREGATE group_concat (
19
+ BASETYPE = text,
20
+ SFUNC = _group_concat,
21
+ STYPE = text
22
+ );
23
+
@@ -0,0 +1,15 @@
1
+
2
+ CREATE FUNCTION hex_to_int(varchar) RETURNS int4 AS '
3
+ DECLARE
4
+ h alias for $1;
5
+ exec varchar;
6
+ curs refcursor;
7
+ res int;
8
+ BEGIN
9
+ exec := ''SELECT x'''''' || h || ''''''::int4'';
10
+ OPEN curs FOR EXECUTE exec;
11
+ FETCH curs INTO res;
12
+ CLOSE curs;
13
+ return res;
14
+ END;'
15
+ LANGUAGE 'plpgsql' IMMUTABLE STRICT;
@@ -0,0 +1 @@
1
+ CREATE LANGUAGE 'plpgsql';
@@ -0,0 +1,12 @@
1
+
2
+ /*
3
+ mysqlcompat-1.0b3
4
+ public domain
5
+ modified
6
+ UNIX_TIMESTAMP(date)
7
+ */
8
+
9
+ CREATE FUNCTION unix_timestamp(timestamp without time zone)
10
+ RETURNS bigint AS $$
11
+ SELECT EXTRACT(EPOCH FROM $1)::bigint
12
+ $$ VOLATILE LANGUAGE SQL;