centostrano 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +215 -0
- data/COPYING +19 -0
- data/LICENSE +339 -0
- data/README +166 -0
- data/THANKS +5 -0
- data/bin/centify +137 -0
- data/docs/ANNOUNCE.deprec2 +47 -0
- data/docs/README.install +88 -0
- data/docs/README.nagios +28 -0
- data/docs/README.rails +20 -0
- data/docs/README.svn +31 -0
- data/docs/ROADMAP.txt +74 -0
- data/docs/deprec-1.x/deprec-1.x.quickstart +50 -0
- data/docs/deprec-1.x/notes.txt +12 -0
- data/docs/deprec_banner.gif +0 -0
- data/lib/centostrano.rb +9 -0
- data/lib/deprec/capistrano_extensions.rb +391 -0
- data/lib/deprec/centostrano.rb +46 -0
- data/lib/deprec/recipes/apache.rb +188 -0
- data/lib/deprec/recipes/canonical.rb +57 -0
- data/lib/deprec/recipes/deprec.rb +188 -0
- data/lib/deprec/recipes/deprecated.rb +71 -0
- data/lib/deprec/recipes/example.rb +115 -0
- data/lib/deprec/recipes/git.rb +106 -0
- data/lib/deprec/recipes/gitosis.rb +134 -0
- data/lib/deprec/recipes/logrotate.rb +54 -0
- data/lib/deprec/recipes/memcache.rb +53 -0
- data/lib/deprec/recipes/merb.rb +57 -0
- data/lib/deprec/recipes/mongrel.rb +220 -0
- data/lib/deprec/recipes/monit.rb +139 -0
- data/lib/deprec/recipes/mysql.rb +147 -0
- data/lib/deprec/recipes/nginx.rb +172 -0
- data/lib/deprec/recipes/postgresql.rb +132 -0
- data/lib/deprec/recipes/rails.rb +297 -0
- data/lib/deprec/recipes/ruby.rb +71 -0
- data/lib/deprec/recipes/sphinx.rb +89 -0
- data/lib/deprec/recipes/ssh.rb +93 -0
- data/lib/deprec/recipes/svn.rb +167 -0
- data/lib/deprec/recipes/users.rb +90 -0
- data/lib/deprec/recipes.rb +33 -0
- data/lib/deprec/templates/aoe/aoe-init +55 -0
- data/lib/deprec/templates/aoe/fence_aoemask +351 -0
- data/lib/deprec/templates/apache/httpd-vhost-app.conf.erb +144 -0
- data/lib/deprec/templates/apache/httpd.conf +465 -0
- data/lib/deprec/templates/apache/index.html.erb +37 -0
- data/lib/deprec/templates/apache/master.css +72 -0
- data/lib/deprec/templates/centos/repository.erb +6 -0
- data/lib/deprec/templates/coraid/aoe-init +55 -0
- data/lib/deprec/templates/deprec/caprc.erb +14 -0
- data/lib/deprec/templates/heartbeat/authkeys.erb +2 -0
- data/lib/deprec/templates/heartbeat/ha.cf.erb +15 -0
- data/lib/deprec/templates/heartbeat/haresources.erb +1 -0
- data/lib/deprec/templates/logrotate/logrotate.conf.erb +32 -0
- data/lib/deprec/templates/mongrel/logrotate.conf.erb +11 -0
- data/lib/deprec/templates/mongrel/logrotate.erb +0 -0
- data/lib/deprec/templates/mongrel/mongrel_cluster-init-script +54 -0
- data/lib/deprec/templates/mongrel/mongrel_cluster.logrotate.d +14 -0
- data/lib/deprec/templates/mongrel/mongrel_cluster.yml.erb +10 -0
- data/lib/deprec/templates/mongrel/monit.conf.erb +17 -0
- data/lib/deprec/templates/monit/monit-init-script +104 -0
- data/lib/deprec/templates/monit/monitrc.erb +227 -0
- data/lib/deprec/templates/monit/nothing +0 -0
- data/lib/deprec/templates/mysql/create_databases.sql +20 -0
- data/lib/deprec/templates/mysql/database.yml.prod +6 -0
- data/lib/deprec/templates/mysql/database.yml.stage +6 -0
- data/lib/deprec/templates/mysql/my.cnf.erb +140 -0
- data/lib/deprec/templates/mysql/sphinx.conf.prod +542 -0
- data/lib/deprec/templates/mysql/sphinx.conf.stage +542 -0
- data/lib/deprec/templates/nagios/cgi.cfg.erb +321 -0
- data/lib/deprec/templates/nagios/commands.cfg.erb +240 -0
- data/lib/deprec/templates/nagios/contacts.cfg.erb +57 -0
- data/lib/deprec/templates/nagios/hosts.cfg.erb +143 -0
- data/lib/deprec/templates/nagios/htpasswd.users +1 -0
- data/lib/deprec/templates/nagios/localhost.cfg.erb +157 -0
- data/lib/deprec/templates/nagios/nagios.cfg.erb +1274 -0
- data/lib/deprec/templates/nagios/nagios_apache_vhost.conf.erb +45 -0
- data/lib/deprec/templates/nagios/nrpe.cfg.erb +210 -0
- data/lib/deprec/templates/nagios/nrpe.xinetd.erb +16 -0
- data/lib/deprec/templates/nagios/resource.cfg.erb +34 -0
- data/lib/deprec/templates/nagios/services.cfg.erb +79 -0
- data/lib/deprec/templates/nagios/templates.cfg.erb +9 -0
- data/lib/deprec/templates/nagios/timeperiods.cfg.erb +94 -0
- data/lib/deprec/templates/network/hostname.erb +1 -0
- data/lib/deprec/templates/network/hosts.erb +2 -0
- data/lib/deprec/templates/network/interfaces.erb +22 -0
- data/lib/deprec/templates/nginx/logrotate.conf.erb +13 -0
- data/lib/deprec/templates/nginx/logrotate.erb +0 -0
- data/lib/deprec/templates/nginx/mime.types.erb +70 -0
- data/lib/deprec/templates/nginx/nginx-init-script +109 -0
- data/lib/deprec/templates/nginx/nginx.conf.erb +120 -0
- data/lib/deprec/templates/nginx/nginx.logrotate.d +12 -0
- data/lib/deprec/templates/nginx/nothing.conf +1 -0
- data/lib/deprec/templates/nginx/rails_nginx_vhost.conf.erb +41 -0
- data/lib/deprec/templates/ntp/ntp.conf.erb +42 -0
- data/lib/deprec/templates/postfix/aliases.erb +3 -0
- data/lib/deprec/templates/postfix/dynamicmaps.cf.erb +8 -0
- data/lib/deprec/templates/postfix/main.cf.erb +41 -0
- data/lib/deprec/templates/postfix/master.cf.erb +77 -0
- data/lib/deprec/templates/postgresql/pg_hba.conf.erb +76 -0
- data/lib/deprec/templates/sphinx/monit.conf.erb +5 -0
- data/lib/deprec/templates/ssh/ssh_config.erb +50 -0
- data/lib/deprec/templates/ssh/sshd_config.erb +78 -0
- data/lib/deprec/templates/subversion/svn.apache.vhost.erb +43 -0
- data/lib/deprec/templates/trac/apache_vhost.conf.erb +24 -0
- data/lib/deprec/templates/trac/trac.ini.erb +106 -0
- data/lib/deprec/templates/trac/trac_deprec.png +0 -0
- data/lib/deprec/templates/trac/tracd-init.erb +43 -0
- data/lib/deprec/templates/xen/15-disable-hwclock +40 -0
- data/lib/deprec/templates/xen/network-bridge-wrapper +3 -0
- data/lib/deprec/templates/xen/xen-tools.conf.erb +220 -0
- data/lib/deprec/templates/xen/xend-config.sxp.erb +195 -0
- data/lib/deprec/templates/xen/xend-init.erb +69 -0
- data/lib/deprec/templates/xen/xendomains.erb +137 -0
- data/lib/deprec/templates/xen/xm.tmpl.erb +85 -0
- data/lib/deprec_cmd_completion.sh +26 -0
- data/lib/vmbuilder_plugins/all.rb +20 -0
- data/lib/vmbuilder_plugins/apt.rb +93 -0
- data/lib/vmbuilder_plugins/emerge.rb +76 -0
- data/lib/vmbuilder_plugins/gem.rb +90 -0
- data/lib/vmbuilder_plugins/std.rb +203 -0
- metadata +207 -0
@@ -0,0 +1,542 @@
|
|
1
|
+
#
|
2
|
+
# Sphinx configuration file sample
|
3
|
+
#
|
4
|
+
|
5
|
+
#############################################################################
|
6
|
+
## data source definition
|
7
|
+
#############################################################################
|
8
|
+
|
9
|
+
source src1
|
10
|
+
{
|
11
|
+
# data source type
|
12
|
+
# for now, known types are 'mysql', 'pgsql' and 'xmlpipe'
|
13
|
+
# MUST be defined
|
14
|
+
type = mysql
|
15
|
+
|
16
|
+
# whether to strip HTML
|
17
|
+
# values can be 0 (don't strip) or 1 (do strip)
|
18
|
+
# WARNING, only works with mysql source for now
|
19
|
+
# WARNING, should work ok for PERFECTLY formed XHTML for now
|
20
|
+
# WARNING, POSSIBLE TO BUG on malformed everday HTML
|
21
|
+
# optional, default is 0
|
22
|
+
strip_html = 0
|
23
|
+
|
24
|
+
# what HTML attributes to index if stripping HTML
|
25
|
+
# format is as follows:
|
26
|
+
#
|
27
|
+
# index_html_attrs = img=alt,title; a=title;
|
28
|
+
#
|
29
|
+
# optional, default is to not index anything
|
30
|
+
index_html_attrs =
|
31
|
+
|
32
|
+
#####################################################################
|
33
|
+
|
34
|
+
# some straightforward parameters for 'mysql' source type
|
35
|
+
sql_host = <%= @mysql_host %>
|
36
|
+
sql_user = <%= @username %>_db
|
37
|
+
sql_pass = <%= @mysqlpass %>
|
38
|
+
sql_db = <%= @username %>_stage
|
39
|
+
sql_port = 3306 # optional, default is 3306
|
40
|
+
|
41
|
+
# sql_sock = /tmp/mysql.sock
|
42
|
+
#
|
43
|
+
# optional
|
44
|
+
# usually '/var/lib/mysql/mysql.sock' on Linux
|
45
|
+
# usually '/tmp/mysql.sock' on FreeBSD
|
46
|
+
|
47
|
+
# pre-query, executed before the main fetch query
|
48
|
+
# useful eg. to setup encoding or mark records
|
49
|
+
# optional, default is empty
|
50
|
+
#
|
51
|
+
# sql_query_pre = SET CHARACTER_SET_RESULTS=cp1251
|
52
|
+
sql_query_pre =
|
53
|
+
|
54
|
+
# main document fetch query
|
55
|
+
#
|
56
|
+
# you can specify up to 32 (formally SPH_MAX_FIELDS in sphinx.h) fields;
|
57
|
+
# all of the fields which are not document_id or attributes (see below)
|
58
|
+
# will be full-text indexed
|
59
|
+
#
|
60
|
+
# document_id MUST be the very first field
|
61
|
+
# document_id MUST be positive (non-zero, non-negative)
|
62
|
+
# document_id MUST fit into 32 bits
|
63
|
+
# document_id MUST be unique
|
64
|
+
#
|
65
|
+
# mandatory
|
66
|
+
sql_query = \
|
67
|
+
SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
|
68
|
+
FROM documents
|
69
|
+
|
70
|
+
# query range setup
|
71
|
+
#
|
72
|
+
# useful to avoid MyISAM table locks and big result sets
|
73
|
+
# when indexing lots of data
|
74
|
+
#
|
75
|
+
# to use query ranges, you should
|
76
|
+
# 1) provide a query to fetch min/max id (ie. id range) from data set;
|
77
|
+
# 2) configure step size in which this range will be walked;
|
78
|
+
# 3) use $start and $end macros somewhere in the main fetch query.
|
79
|
+
#
|
80
|
+
# 'sql_query_range' must return exactly two integer fields
|
81
|
+
# in exactly min_id, max_id order
|
82
|
+
#
|
83
|
+
# 'sql_range_step' must be a positive integer
|
84
|
+
# optional, default is 1024
|
85
|
+
#
|
86
|
+
# 'sql_query' must contain both '$start' and '$end' macros
|
87
|
+
# if you are using query ranges (because it obviously would be an
|
88
|
+
# error to index the whole table many times)
|
89
|
+
#
|
90
|
+
# note that the intervals specified by $start/$end do not
|
91
|
+
# overlap, so you should NOT remove document ids which are exactly
|
92
|
+
# equal to $start or $end in your query
|
93
|
+
#
|
94
|
+
# here's an example which will index 'documents' table
|
95
|
+
# fetching (at most) one thousand entries at a time:
|
96
|
+
#
|
97
|
+
# sql_query_range = SELECT MIN(id),MAX(id) FROM documents
|
98
|
+
# sql_range_step = 1000
|
99
|
+
# sql_query = \
|
100
|
+
# SELECT doc.id, doc.id AS group, doc.title, doc.data \
|
101
|
+
# FROM documents doc \
|
102
|
+
# WHERE id>=$start AND id<=$end
|
103
|
+
|
104
|
+
|
105
|
+
# attribute columns
|
106
|
+
#
|
107
|
+
# attribute values MUST be positive (non-zero, non-negative) integers
|
108
|
+
# attribute values MUST fit into 32 bits
|
109
|
+
#
|
110
|
+
# attributes are additional values associated with each document which
|
111
|
+
# may be used to perform additional filtering and sorting during search.
|
112
|
+
# attributes are NOT full-text indexed; they are stored in the full text
|
113
|
+
# index as is.
|
114
|
+
#
|
115
|
+
# a good example would be a forum posts table. one might need to search
|
116
|
+
# through 'title' and 'content' fields but to limit search to specific
|
117
|
+
# values of 'author_id', or 'forum_id', or to sort by 'post_date', or to
|
118
|
+
# group matches by 'thread_id', or to group posts by month of the
|
119
|
+
# 'post_date' and provide statistics.
|
120
|
+
#
|
121
|
+
# this all can be achieved by specifying all the mentioned columns
|
122
|
+
# (excluding 'title' and 'content' which are full-text fields) as
|
123
|
+
# attributes and then using API calls to setup filtering, sorting,
|
124
|
+
# and grouping.
|
125
|
+
#
|
126
|
+
# sql_group_column is used to declare integer attributes.
|
127
|
+
#
|
128
|
+
# sql_date_column is used to declare UNIX timestamp attributes.
|
129
|
+
#
|
130
|
+
# sql_str2ordinal_column is used to declare integer attributes which
|
131
|
+
# values are computed as ordinal numbers of corresponding column value
|
132
|
+
# in sorted list of column values. WARNING, all such strings values
|
133
|
+
# are going to be stored in RAM while indexing, and "C" locale will
|
134
|
+
# be used when sorting!
|
135
|
+
#
|
136
|
+
# starting with 0.9.7, there may be multiple attribute columns specified.
|
137
|
+
# here's an example for that mentioned posts table:
|
138
|
+
#
|
139
|
+
# sql_group_column = author_id
|
140
|
+
# sql_group_column = forum_id
|
141
|
+
# sql_group_column = thread_id
|
142
|
+
# sql_date_column = post_unix_timestamp
|
143
|
+
# sql_date_column = last_edit_unix_timestamp
|
144
|
+
#
|
145
|
+
# optional, default is empty
|
146
|
+
sql_group_column = group_id
|
147
|
+
sql_date_column = date_added
|
148
|
+
# sql_str2ordinal_column = author_name
|
149
|
+
|
150
|
+
# post-query, executed on the end of main fetch query
|
151
|
+
#
|
152
|
+
# note that indexing is NOT completed at the point when post-query
|
153
|
+
# gets executed and might very well fail
|
154
|
+
#
|
155
|
+
# optional, default is empty
|
156
|
+
sql_query_post =
|
157
|
+
|
158
|
+
# post-index-query, executed on succsefully completed indexing
|
159
|
+
#
|
160
|
+
# $maxid macro is the max document ID which was actually
|
161
|
+
# fetched from the database
|
162
|
+
#
|
163
|
+
# optional, default is empty
|
164
|
+
#
|
165
|
+
# sql_query_post_index = REPLACE INTO counters ( id, val ) \
|
166
|
+
# VALUES ( 'max_indexed_id', $maxid )
|
167
|
+
|
168
|
+
|
169
|
+
# document info query
|
170
|
+
#
|
171
|
+
# ONLY used by search utility to display document information
|
172
|
+
# MUST be able to fetch document info by its id, therefore
|
173
|
+
# MUST contain '$id' macro
|
174
|
+
#
|
175
|
+
# optional, default is empty
|
176
|
+
sql_query_info = SELECT * FROM documents WHERE id=$id
|
177
|
+
|
178
|
+
#####################################################################
|
179
|
+
|
180
|
+
# demo config for 'xmlpipe' source type is a little below
|
181
|
+
#
|
182
|
+
# with xmlpipe, indexer opens a pipe to a given command,
|
183
|
+
# and then reads documents from stdin
|
184
|
+
#
|
185
|
+
# indexer expects one or more documents from xmlpipe stdin
|
186
|
+
# each document must be formatted exactly as follows:
|
187
|
+
#
|
188
|
+
# <document>
|
189
|
+
# <id>123</id>
|
190
|
+
# <group>45</group>
|
191
|
+
# <timestamp>1132223498</timestamp>
|
192
|
+
# <title>test title</title>
|
193
|
+
# <body>
|
194
|
+
# this is my document body
|
195
|
+
# </body>
|
196
|
+
# </document>
|
197
|
+
#
|
198
|
+
# timestamp element is optional, its default value is 1
|
199
|
+
# all the other elements are mandatory
|
200
|
+
|
201
|
+
# type = xmlpipe
|
202
|
+
# xmlpipe_command = cat /usr/local/var/test.xml
|
203
|
+
}
|
204
|
+
|
205
|
+
|
206
|
+
# inherited source example
|
207
|
+
#
|
208
|
+
# all the parameters are copied from the parent source,
|
209
|
+
# and may then be overridden in this source definition
|
210
|
+
source src1stripped : src1
|
211
|
+
{
|
212
|
+
strip_html = 1
|
213
|
+
}
|
214
|
+
|
215
|
+
#############################################################################
|
216
|
+
## index definition
|
217
|
+
#############################################################################
|
218
|
+
|
219
|
+
# local index example
|
220
|
+
#
|
221
|
+
# this is an index which is stored locally in the filesystem
|
222
|
+
#
|
223
|
+
# all indexing-time options (such as morphology and charsets)
|
224
|
+
# are configured per local index
|
225
|
+
index test1
|
226
|
+
{
|
227
|
+
# which document source to index
|
228
|
+
# at least one MUST be defined
|
229
|
+
#
|
230
|
+
# multiple sources MAY be specified; to do so, just add more
|
231
|
+
# "source = NAME" lines. in this case, ALL the document IDs
|
232
|
+
# in ALL the specified sources MUST be unique
|
233
|
+
source = src1
|
234
|
+
|
235
|
+
# this is path and index file name without extension
|
236
|
+
#
|
237
|
+
# indexer will append different extensions to this path to
|
238
|
+
# generate names for both permanent and temporary index files
|
239
|
+
#
|
240
|
+
# .tmp* files are temporary and can be safely removed
|
241
|
+
# if indexer fails to remove them automatically
|
242
|
+
#
|
243
|
+
# .sp* files are fulltext index data files. specifically,
|
244
|
+
# .spa contains attribute values attached to each document id
|
245
|
+
# .spd contains doclists and hitlists
|
246
|
+
# .sph contains index header (schema and other settings)
|
247
|
+
# .spi contains wordlists
|
248
|
+
#
|
249
|
+
# MUST be defined
|
250
|
+
path = /data/<%= @username %>/shared/log/indexes/test1
|
251
|
+
|
252
|
+
# docinfo (ie. per-document attribute values) storage strategy
|
253
|
+
# defines how docinfo will be stored
|
254
|
+
#
|
255
|
+
# available values are "none", "inline" and "extern"
|
256
|
+
#
|
257
|
+
# "none" means there'll be no docinfo at all (no groups/dates)
|
258
|
+
#
|
259
|
+
# "inline" means that the docinfo will be stored in the .spd
|
260
|
+
# file along with the document ID lists (doclists)
|
261
|
+
#
|
262
|
+
# "extern" means that the docinfo will be stored in the .spa
|
263
|
+
# file separately
|
264
|
+
#
|
265
|
+
# externally stored docinfo should (basically) be kept in RAM
|
266
|
+
# when querying; therefore, "inline" may be the only viable option
|
267
|
+
# for really huge (50-100+ million docs) datasets. however, for
|
268
|
+
# smaller datasets "extern" storage makes both indexing and
|
269
|
+
# searching MUCH more efficient.
|
270
|
+
#
|
271
|
+
# additional search-time memory requirements for extern storage are
|
272
|
+
#
|
273
|
+
# ( 1 + number_of_attrs )*number_of_docs*4 bytes
|
274
|
+
#
|
275
|
+
# so 10 million docs with 2 groups and 1 timestamp will take
|
276
|
+
# (1+2+1)*10M*4 = 160 MB of RAM. this is PER DAEMON, ie. searchd
|
277
|
+
# will alloc 160 MB on startup, read the data and keep it shared
|
278
|
+
# between queries; the children will NOT allocate additional
|
279
|
+
# copies of this data.
|
280
|
+
#
|
281
|
+
# default is "extern" (as most collections are smaller than 100M docs)
|
282
|
+
docinfo = extern
|
283
|
+
|
284
|
+
# morphology
|
285
|
+
#
|
286
|
+
# currently supported morphology preprocessors are Porter stemmers
|
287
|
+
# for English and Russian, and Soundex. more stemmers could be added
|
288
|
+
# at users request.
|
289
|
+
#
|
290
|
+
# available values are "none", "stem_en", "stem_ru", "stem_enru",
|
291
|
+
# and "soundex"
|
292
|
+
#
|
293
|
+
# optional, default is "none"
|
294
|
+
#
|
295
|
+
# morphology = none
|
296
|
+
# morphology = stem_en
|
297
|
+
# morphology = stem_ru
|
298
|
+
# morphology = stem_enru
|
299
|
+
# morphology = soundex
|
300
|
+
morphology = none
|
301
|
+
|
302
|
+
# stopwords file
|
303
|
+
#
|
304
|
+
# format is plain text in whatever encoding you use
|
305
|
+
# optional, default is empty
|
306
|
+
#
|
307
|
+
# stopwords = /usr/local/var/data/stopwords.txt
|
308
|
+
stopwords =
|
309
|
+
|
310
|
+
# minimum word length
|
311
|
+
#
|
312
|
+
# only the words that are of this length and above will be indexed;
|
313
|
+
# for example, if min_word_len is 4, "the" won't be indexed,
|
314
|
+
# but "they" will be.
|
315
|
+
#
|
316
|
+
# default is 1, which (obviously) means to index everything
|
317
|
+
min_word_len = 1
|
318
|
+
|
319
|
+
# charset encoding type
|
320
|
+
#
|
321
|
+
# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
|
322
|
+
#
|
323
|
+
# optional, default is sbcs
|
324
|
+
charset_type = sbcs
|
325
|
+
|
326
|
+
# charset definition and case folding rules "table"
|
327
|
+
#
|
328
|
+
# optional, default value depends on charset_type
|
329
|
+
#
|
330
|
+
# for now, defaults are configured to support English and Russian
|
331
|
+
# this behavior MAY change in future versions
|
332
|
+
#
|
333
|
+
# 'sbcs' default value is
|
334
|
+
# charset_table = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
|
335
|
+
#
|
336
|
+
# 'utf-8' default value is
|
337
|
+
# charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
|
338
|
+
|
339
|
+
# minimum prefix length
|
340
|
+
#
|
341
|
+
# if prefix length is positive, indexer will not only index all words,
|
342
|
+
# but all the possible prefixes (ie. word beginnings) as well
|
343
|
+
#
|
344
|
+
# for instance, "exam" query against such index will match documents
|
345
|
+
# which contain "example" word, even if they do not contain "exam"
|
346
|
+
#
|
347
|
+
# indexing prefixes will make the index grow significantly
|
348
|
+
# and could degrade search times
|
349
|
+
#
|
350
|
+
# currently there's no way to rank perfect word matches higher
|
351
|
+
# than prefix matches using only one index; you could setup two
|
352
|
+
# indexes for that
|
353
|
+
#
|
354
|
+
# default is 0, which means NOT to index prefixes
|
355
|
+
min_prefix_len = 0
|
356
|
+
|
357
|
+
# minimum infix length
|
358
|
+
#
|
359
|
+
# if infix length is positive, indexer will not only index all words,
|
360
|
+
# but all the possible infixes (ie. characters subsequences starting
|
361
|
+
# anywhere inside the word) as well
|
362
|
+
#
|
363
|
+
# for instance, "amp" query against such index will match documents
|
364
|
+
# which contain "example" word, even if they do not contain "amp"
|
365
|
+
#
|
366
|
+
# indexing prefixes will make the index grow significantly
|
367
|
+
# and could degrade search times
|
368
|
+
#
|
369
|
+
# currently there's no way to rank perfect word matches higher
|
370
|
+
# than infix matches using only one index; you could setup two
|
371
|
+
# indexes for that
|
372
|
+
#
|
373
|
+
# default is 0, which means NOT to index infixes
|
374
|
+
min_infix_len = 0
|
375
|
+
|
376
|
+
# n-grams length
|
377
|
+
#
|
378
|
+
# n-grams provide basic CJK support for unsegmented texts. if using
|
379
|
+
# n-grams, streams of CJK characters are indexed as n-grams. for example,
|
380
|
+
# if incoming stream is ABCDEF and n is 2, this text would be indexed
|
381
|
+
# as if it was AB BC CD DE EF.
|
382
|
+
#
|
383
|
+
# this feature is in alpha version state and only n=1 is currently
|
384
|
+
# supported; this is going to be improved.
|
385
|
+
#
|
386
|
+
# note that if search query is segmented (ie. words are separated with
|
387
|
+
# whitespace), words are in quotes and extended matching mode is used,
|
388
|
+
# then all matching documents will be returned even if their text was
|
389
|
+
# *not* segmented. in the example above, ABCDEF text will be indexed as
|
390
|
+
# A B C D E F, and "BCD" query will be transformed to "B C D" (where
|
391
|
+
# quotes is phrase matching operator), so the document will match.
|
392
|
+
#
|
393
|
+
# optional, default is 0, which means NOT to use n-grams
|
394
|
+
#
|
395
|
+
# ngram_len = 1
|
396
|
+
|
397
|
+
# n-gram characters table
|
398
|
+
#
|
399
|
+
# specifies what specific characters are subject to n-gram
|
400
|
+
# extraction. format is similar to charset_table.
|
401
|
+
#
|
402
|
+
# optional, default is empty
|
403
|
+
#
|
404
|
+
# ngrams_chars = U+3000..U+2FA1F
|
405
|
+
}
|
406
|
+
|
407
|
+
|
408
|
+
# inherited index example
|
409
|
+
#
|
410
|
+
# all the parameters are copied from the parent index,
|
411
|
+
# and may then be overridden in this index definition
|
412
|
+
index test1stemmed : test1
|
413
|
+
{
|
414
|
+
path = /data/<%= @username %>/shared/log/indexes/test1stemmed
|
415
|
+
morphology = stem_en
|
416
|
+
}
|
417
|
+
|
418
|
+
|
419
|
+
# distributed index example
|
420
|
+
#
|
421
|
+
# this is a virtual index which can NOT be directly indexed,
|
422
|
+
# and only contains references to other local and/or remote indexes
|
423
|
+
#
|
424
|
+
# if searchd receives a query against this index,
|
425
|
+
# it does the following:
|
426
|
+
#
|
427
|
+
# 1) connects to all the specified remote agents,
|
428
|
+
# 2) issues the query,
|
429
|
+
# 3) searches local indexes (while the remote agents are searching),
|
430
|
+
# 4) collects remote search results,
|
431
|
+
# 5) merges all the results together (removing the duplicates),
|
432
|
+
# 6) sends the merged resuls to client.
|
433
|
+
#
|
434
|
+
# this index type is primarily intenteded to be able to split huge (100GB+)
|
435
|
+
# datasets into chunks placed on different physical servers and them search
|
436
|
+
# through those chunks in parallel, reducing response times and server load;
|
437
|
+
# it seems, however, that it could also be used to take advantage of
|
438
|
+
# multi-processor systems or to implement HA (high-availability) search.
|
439
|
+
index dist1
|
440
|
+
{
|
441
|
+
# 'distributed' index type MUST be specified
|
442
|
+
type = distributed
|
443
|
+
|
444
|
+
# local index to be searched
|
445
|
+
# there can be many local indexes configured
|
446
|
+
local = test1
|
447
|
+
local = test1stemmed
|
448
|
+
|
449
|
+
# remote agent
|
450
|
+
# multiple remote agents may be specified
|
451
|
+
# syntax is 'hostname:port:index1,[index2[,...]]
|
452
|
+
agent = localhost:3313:remote1
|
453
|
+
agent = localhost:3314:remote2,remote3
|
454
|
+
|
455
|
+
# remote agent connection timeout, milliseconds
|
456
|
+
# optional, default is 1000 ms, ie. 1 sec
|
457
|
+
agent_connect_timeout = 1000
|
458
|
+
|
459
|
+
# remote agent query timeout, milliseconds
|
460
|
+
# optional, default is 3000 ms, ie. 3 sec
|
461
|
+
agent_query_timeout = 3000
|
462
|
+
}
|
463
|
+
|
464
|
+
#############################################################################
|
465
|
+
## indexer settings
|
466
|
+
#############################################################################
|
467
|
+
|
468
|
+
indexer
|
469
|
+
{
|
470
|
+
# memory limit
|
471
|
+
#
|
472
|
+
# may be specified in bytes (no postfix), kilobytes (mem_limit=1000K)
|
473
|
+
# or megabytes (mem_limit=10M)
|
474
|
+
#
|
475
|
+
# will grow if set unacceptably low
|
476
|
+
# will warn if set too low and potentially hurting the performance
|
477
|
+
#
|
478
|
+
# optional, default is 32M
|
479
|
+
mem_limit = 32M
|
480
|
+
}
|
481
|
+
|
482
|
+
#############################################################################
|
483
|
+
## searchd settings
|
484
|
+
#############################################################################
|
485
|
+
|
486
|
+
searchd
|
487
|
+
{
|
488
|
+
# IP address on which search daemon will bind and accept
|
489
|
+
# incoming network requests
|
490
|
+
#
|
491
|
+
# optional, default is to listen on all addresses,
|
492
|
+
# ie. address = 0.0.0.0
|
493
|
+
#
|
494
|
+
# address = 127.0.0.1
|
495
|
+
# address = 192.168.0.1
|
496
|
+
|
497
|
+
|
498
|
+
# port on which search daemon will listen
|
499
|
+
port = 3312
|
500
|
+
|
501
|
+
|
502
|
+
# log file
|
503
|
+
# searchd run info is logged here
|
504
|
+
log = /var/log/engineyard/sphinx/<%= @username %>/searchd.log
|
505
|
+
|
506
|
+
|
507
|
+
# query log file
|
508
|
+
# all the search queries are logged here
|
509
|
+
query_log = /var/log/engineyard/sphinx/<%= @username %>/searchd_query.log
|
510
|
+
|
511
|
+
|
512
|
+
# client read timeout, seconds
|
513
|
+
read_timeout = 5
|
514
|
+
|
515
|
+
|
516
|
+
# maximum amount of children to fork
|
517
|
+
# useful to control server load
|
518
|
+
max_children = 30
|
519
|
+
|
520
|
+
|
521
|
+
# a file which will contain searchd process ID
|
522
|
+
# used for different external automation scripts
|
523
|
+
# MUST be present
|
524
|
+
pid_file = /data/<%= @username %>/shared/log/searchd.pid
|
525
|
+
|
526
|
+
|
527
|
+
# maximum amount of matches this daemon would ever retrieve
|
528
|
+
# from each index and serve to client
|
529
|
+
#
|
530
|
+
# this parameter affects per-client memory and CPU usage
|
531
|
+
# (16+ bytes per match) in match sorting phase; so blindly raising
|
532
|
+
# it to 1 million is definitely NOT recommended
|
533
|
+
#
|
534
|
+
# starting from 0.9.7, it can be decreased on the fly through
|
535
|
+
# the corresponding API call; increasing is prohibited to protect
|
536
|
+
# against malicious and/or malformed requests
|
537
|
+
#
|
538
|
+
# default is 1000 (just like with Google)
|
539
|
+
max_matches = 1000
|
540
|
+
}
|
541
|
+
|
542
|
+
# --eof--
|