centostrano 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +215 -0
- data/COPYING +19 -0
- data/LICENSE +339 -0
- data/README +166 -0
- data/THANKS +5 -0
- data/bin/centify +137 -0
- data/docs/ANNOUNCE.deprec2 +47 -0
- data/docs/README.install +88 -0
- data/docs/README.nagios +28 -0
- data/docs/README.rails +20 -0
- data/docs/README.svn +31 -0
- data/docs/ROADMAP.txt +74 -0
- data/docs/deprec-1.x/deprec-1.x.quickstart +50 -0
- data/docs/deprec-1.x/notes.txt +12 -0
- data/docs/deprec_banner.gif +0 -0
- data/lib/centostrano.rb +9 -0
- data/lib/deprec/capistrano_extensions.rb +391 -0
- data/lib/deprec/centostrano.rb +46 -0
- data/lib/deprec/recipes/apache.rb +188 -0
- data/lib/deprec/recipes/canonical.rb +57 -0
- data/lib/deprec/recipes/deprec.rb +188 -0
- data/lib/deprec/recipes/deprecated.rb +71 -0
- data/lib/deprec/recipes/example.rb +115 -0
- data/lib/deprec/recipes/git.rb +106 -0
- data/lib/deprec/recipes/gitosis.rb +134 -0
- data/lib/deprec/recipes/logrotate.rb +54 -0
- data/lib/deprec/recipes/memcache.rb +53 -0
- data/lib/deprec/recipes/merb.rb +57 -0
- data/lib/deprec/recipes/mongrel.rb +220 -0
- data/lib/deprec/recipes/monit.rb +139 -0
- data/lib/deprec/recipes/mysql.rb +147 -0
- data/lib/deprec/recipes/nginx.rb +172 -0
- data/lib/deprec/recipes/postgresql.rb +132 -0
- data/lib/deprec/recipes/rails.rb +297 -0
- data/lib/deprec/recipes/ruby.rb +71 -0
- data/lib/deprec/recipes/sphinx.rb +89 -0
- data/lib/deprec/recipes/ssh.rb +93 -0
- data/lib/deprec/recipes/svn.rb +167 -0
- data/lib/deprec/recipes/users.rb +90 -0
- data/lib/deprec/recipes.rb +33 -0
- data/lib/deprec/templates/aoe/aoe-init +55 -0
- data/lib/deprec/templates/aoe/fence_aoemask +351 -0
- data/lib/deprec/templates/apache/httpd-vhost-app.conf.erb +144 -0
- data/lib/deprec/templates/apache/httpd.conf +465 -0
- data/lib/deprec/templates/apache/index.html.erb +37 -0
- data/lib/deprec/templates/apache/master.css +72 -0
- data/lib/deprec/templates/centos/repository.erb +6 -0
- data/lib/deprec/templates/coraid/aoe-init +55 -0
- data/lib/deprec/templates/deprec/caprc.erb +14 -0
- data/lib/deprec/templates/heartbeat/authkeys.erb +2 -0
- data/lib/deprec/templates/heartbeat/ha.cf.erb +15 -0
- data/lib/deprec/templates/heartbeat/haresources.erb +1 -0
- data/lib/deprec/templates/logrotate/logrotate.conf.erb +32 -0
- data/lib/deprec/templates/mongrel/logrotate.conf.erb +11 -0
- data/lib/deprec/templates/mongrel/logrotate.erb +0 -0
- data/lib/deprec/templates/mongrel/mongrel_cluster-init-script +54 -0
- data/lib/deprec/templates/mongrel/mongrel_cluster.logrotate.d +14 -0
- data/lib/deprec/templates/mongrel/mongrel_cluster.yml.erb +10 -0
- data/lib/deprec/templates/mongrel/monit.conf.erb +17 -0
- data/lib/deprec/templates/monit/monit-init-script +104 -0
- data/lib/deprec/templates/monit/monitrc.erb +227 -0
- data/lib/deprec/templates/monit/nothing +0 -0
- data/lib/deprec/templates/mysql/create_databases.sql +20 -0
- data/lib/deprec/templates/mysql/database.yml.prod +6 -0
- data/lib/deprec/templates/mysql/database.yml.stage +6 -0
- data/lib/deprec/templates/mysql/my.cnf.erb +140 -0
- data/lib/deprec/templates/mysql/sphinx.conf.prod +542 -0
- data/lib/deprec/templates/mysql/sphinx.conf.stage +542 -0
- data/lib/deprec/templates/nagios/cgi.cfg.erb +321 -0
- data/lib/deprec/templates/nagios/commands.cfg.erb +240 -0
- data/lib/deprec/templates/nagios/contacts.cfg.erb +57 -0
- data/lib/deprec/templates/nagios/hosts.cfg.erb +143 -0
- data/lib/deprec/templates/nagios/htpasswd.users +1 -0
- data/lib/deprec/templates/nagios/localhost.cfg.erb +157 -0
- data/lib/deprec/templates/nagios/nagios.cfg.erb +1274 -0
- data/lib/deprec/templates/nagios/nagios_apache_vhost.conf.erb +45 -0
- data/lib/deprec/templates/nagios/nrpe.cfg.erb +210 -0
- data/lib/deprec/templates/nagios/nrpe.xinetd.erb +16 -0
- data/lib/deprec/templates/nagios/resource.cfg.erb +34 -0
- data/lib/deprec/templates/nagios/services.cfg.erb +79 -0
- data/lib/deprec/templates/nagios/templates.cfg.erb +9 -0
- data/lib/deprec/templates/nagios/timeperiods.cfg.erb +94 -0
- data/lib/deprec/templates/network/hostname.erb +1 -0
- data/lib/deprec/templates/network/hosts.erb +2 -0
- data/lib/deprec/templates/network/interfaces.erb +22 -0
- data/lib/deprec/templates/nginx/logrotate.conf.erb +13 -0
- data/lib/deprec/templates/nginx/logrotate.erb +0 -0
- data/lib/deprec/templates/nginx/mime.types.erb +70 -0
- data/lib/deprec/templates/nginx/nginx-init-script +109 -0
- data/lib/deprec/templates/nginx/nginx.conf.erb +120 -0
- data/lib/deprec/templates/nginx/nginx.logrotate.d +12 -0
- data/lib/deprec/templates/nginx/nothing.conf +1 -0
- data/lib/deprec/templates/nginx/rails_nginx_vhost.conf.erb +41 -0
- data/lib/deprec/templates/ntp/ntp.conf.erb +42 -0
- data/lib/deprec/templates/postfix/aliases.erb +3 -0
- data/lib/deprec/templates/postfix/dynamicmaps.cf.erb +8 -0
- data/lib/deprec/templates/postfix/main.cf.erb +41 -0
- data/lib/deprec/templates/postfix/master.cf.erb +77 -0
- data/lib/deprec/templates/postgresql/pg_hba.conf.erb +76 -0
- data/lib/deprec/templates/sphinx/monit.conf.erb +5 -0
- data/lib/deprec/templates/ssh/ssh_config.erb +50 -0
- data/lib/deprec/templates/ssh/sshd_config.erb +78 -0
- data/lib/deprec/templates/subversion/svn.apache.vhost.erb +43 -0
- data/lib/deprec/templates/trac/apache_vhost.conf.erb +24 -0
- data/lib/deprec/templates/trac/trac.ini.erb +106 -0
- data/lib/deprec/templates/trac/trac_deprec.png +0 -0
- data/lib/deprec/templates/trac/tracd-init.erb +43 -0
- data/lib/deprec/templates/xen/15-disable-hwclock +40 -0
- data/lib/deprec/templates/xen/network-bridge-wrapper +3 -0
- data/lib/deprec/templates/xen/xen-tools.conf.erb +220 -0
- data/lib/deprec/templates/xen/xend-config.sxp.erb +195 -0
- data/lib/deprec/templates/xen/xend-init.erb +69 -0
- data/lib/deprec/templates/xen/xendomains.erb +137 -0
- data/lib/deprec/templates/xen/xm.tmpl.erb +85 -0
- data/lib/deprec_cmd_completion.sh +26 -0
- data/lib/vmbuilder_plugins/all.rb +20 -0
- data/lib/vmbuilder_plugins/apt.rb +93 -0
- data/lib/vmbuilder_plugins/emerge.rb +76 -0
- data/lib/vmbuilder_plugins/gem.rb +90 -0
- data/lib/vmbuilder_plugins/std.rb +203 -0
- metadata +207 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
## Monit control file
|
|
3
|
+
###############################################################################
|
|
4
|
+
##
|
|
5
|
+
## Comments begin with a '#' and extend through the end of the line. Keywords
|
|
6
|
+
## are case insensitive. All path's MUST BE FULLY QUALIFIED, starting with '/'.
|
|
7
|
+
##
|
|
8
|
+
## Below you will find examples of some frequently used statements. For
|
|
9
|
+
## information about the control file, a complete list of statements and
|
|
10
|
+
## options please have a look in the monit manual.
|
|
11
|
+
##
|
|
12
|
+
##
|
|
13
|
+
###############################################################################
|
|
14
|
+
## Global section
|
|
15
|
+
###############################################################################
|
|
16
|
+
##
|
|
17
|
+
## Start monit in the background (run as a daemon) and check services at
|
|
18
|
+
## 2-minute intervals.
|
|
19
|
+
#
|
|
20
|
+
set daemon <%= monit_check_interval %>
|
|
21
|
+
#
|
|
22
|
+
#
|
|
23
|
+
## Set syslog logging with the 'daemon' facility. If the FACILITY option is
|
|
24
|
+
## omitted, monit will use 'user' facility by default. If you want to log to
|
|
25
|
+
## a stand alone log file instead, specify the path to a log file
|
|
26
|
+
#
|
|
27
|
+
set logfile <%= monit_log %>
|
|
28
|
+
#
|
|
29
|
+
#
|
|
30
|
+
## Set the list of mail servers for alert delivery. Multiple servers may be
|
|
31
|
+
## specified using comma separator. By default monit uses port 25 - this
|
|
32
|
+
## is possible to override with the PORT option.
|
|
33
|
+
#
|
|
34
|
+
<%= '# ' unless monit_mailserver %>set mailserver <%= monit_mailserver || 'localhost' %> # primary mailserver (append a comma
|
|
35
|
+
# # to this line if you add more.)
|
|
36
|
+
# backup.bar.baz port 10025, # backup mailserver on port 10025
|
|
37
|
+
# localhost # fallback relay
|
|
38
|
+
#
|
|
39
|
+
#
|
|
40
|
+
## By default monit will drop alert events if no mail servers are available.
|
|
41
|
+
## If you want to keep the alerts for a later delivery retry, you can use the
|
|
42
|
+
## EVENTQUEUE statement. The base directory where undelivered alerts will be
|
|
43
|
+
## stored is specified by the BASEDIR option. You can limit the maximal queue
|
|
44
|
+
## size using the SLOTS option (if omitted, the queue is limited by space
|
|
45
|
+
## available in the back end filesystem).
|
|
46
|
+
#
|
|
47
|
+
# set eventqueue
|
|
48
|
+
# basedir /var/monit # set the base directory where events will be stored
|
|
49
|
+
# slots 100 # optionaly limit the queue size
|
|
50
|
+
#
|
|
51
|
+
#
|
|
52
|
+
## Monit by default uses the following alert mail format:
|
|
53
|
+
##
|
|
54
|
+
## --8<--
|
|
55
|
+
## From: monit@$HOST # sender
|
|
56
|
+
## Subject: monit alert -- $EVENT $SERVICE # subject
|
|
57
|
+
##
|
|
58
|
+
## $EVENT Service $SERVICE #
|
|
59
|
+
## #
|
|
60
|
+
## Date: $DATE #
|
|
61
|
+
## Action: $ACTION #
|
|
62
|
+
## Host: $HOST # body
|
|
63
|
+
## Description: $DESCRIPTION #
|
|
64
|
+
## #
|
|
65
|
+
## Your faithful employee, #
|
|
66
|
+
## monit #
|
|
67
|
+
## --8<--
|
|
68
|
+
##
|
|
69
|
+
## You can override this message format or parts of it, such as subject
|
|
70
|
+
## or sender using the MAIL-FORMAT statement. Macros such as $DATE, etc.
|
|
71
|
+
## are expanded at runtime. For example, to override the sender:
|
|
72
|
+
#
|
|
73
|
+
<%= '# ' unless monit_mailserver %>set mail-format { from: <%= monit_mail_from %> }
|
|
74
|
+
#
|
|
75
|
+
#
|
|
76
|
+
## You can set alert recipients here whom will receive alerts if/when a
|
|
77
|
+
## service defined in this file has errors. Alerts may be restricted on
|
|
78
|
+
## events by using a filter as in the second example below.
|
|
79
|
+
#
|
|
80
|
+
<% monit_alert_recipients.each do |recipient| %>
|
|
81
|
+
<%= '# ' unless monit_mailserver %>set alert <%= recipient %> # receive all alerts
|
|
82
|
+
<% end %>
|
|
83
|
+
<% monit_timeout_recipients.each do |recipient| %>
|
|
84
|
+
<%= '# ' unless monit_mailserver %>set alert <%= recipient %> only on { timeout } # receive just service-
|
|
85
|
+
# timeout alert
|
|
86
|
+
<% end %>
|
|
87
|
+
|
|
88
|
+
#
|
|
89
|
+
#
|
|
90
|
+
## Monit has an embedded web server which can be used to view status of
|
|
91
|
+
## services monitored, the current configuration, actual services parameters
|
|
92
|
+
## and manage services from a web interface.
|
|
93
|
+
#
|
|
94
|
+
<% if monit_webserver_enabled %>
|
|
95
|
+
set httpd port <%= monit_webserver_port %> and
|
|
96
|
+
use address <%= monit_webserver_address %> # listen on <%= monit_webserver_address %>
|
|
97
|
+
<% monit_webserver_allowed_hosts_and_networks.each do |host| %>
|
|
98
|
+
allow <%= host %> # allow <%= host %> to connect to the server
|
|
99
|
+
<% end %>
|
|
100
|
+
allow <%= monit_webserver_auth_user %>:<%= monit_webserver_auth_pass %> # require user <%= monit_webserver_auth_user %> with password <%= monit_webserver_auth_pass %>
|
|
101
|
+
<% end %>
|
|
102
|
+
#
|
|
103
|
+
#
|
|
104
|
+
###############################################################################
|
|
105
|
+
## Services
|
|
106
|
+
###############################################################################
|
|
107
|
+
##
|
|
108
|
+
## Check general system resources such as load average, cpu and memory
|
|
109
|
+
## usage. Each test specifies a resource, conditions and the action to be
|
|
110
|
+
## performed should a test fail.
|
|
111
|
+
#
|
|
112
|
+
# check system myhost.mydomain.tld
|
|
113
|
+
# if loadavg (1min) > 4 then alert
|
|
114
|
+
# if loadavg (5min) > 2 then alert
|
|
115
|
+
# if memory usage > 75% then alert
|
|
116
|
+
# if cpu usage (user) > 70% then alert
|
|
117
|
+
# if cpu usage (system) > 30% then alert
|
|
118
|
+
# if cpu usage (wait) > 20% then alert
|
|
119
|
+
#
|
|
120
|
+
#
|
|
121
|
+
## Check a file for existence, checksum, permissions, uid and gid. In addition
|
|
122
|
+
## to alert recipients in the global section, customized alert will be sent to
|
|
123
|
+
## additional recipients by specifying a local alert handler. The service may
|
|
124
|
+
## be grouped using the GROUP option.
|
|
125
|
+
#
|
|
126
|
+
# check file apache_bin with path /usr/local/apache/bin/httpd
|
|
127
|
+
# if failed checksum and
|
|
128
|
+
# expect the sum 8f7f419955cefa0b33a2ba316cba3659 then unmonitor
|
|
129
|
+
# if failed permission 755 then unmonitor
|
|
130
|
+
# if failed uid root then unmonitor
|
|
131
|
+
# if failed gid root then unmonitor
|
|
132
|
+
# alert security@foo.bar on {
|
|
133
|
+
# checksum, permission, uid, gid, unmonitor
|
|
134
|
+
# } with the mail-format { subject: Alarm! }
|
|
135
|
+
# group server
|
|
136
|
+
#
|
|
137
|
+
#
|
|
138
|
+
## Check that a process is running, in this case Apache, and that it respond
|
|
139
|
+
## to HTTP and HTTPS requests. Check its resource usage such as cpu and memory,
|
|
140
|
+
## and number of children. If the process is not running, monit will restart
|
|
141
|
+
## it by default. In case the service was restarted very often and the
|
|
142
|
+
## problem remains, it is possible to disable monitoring using the TIMEOUT
|
|
143
|
+
## statement. This service depends on another service (apache_bin) which
|
|
144
|
+
## is defined above.
|
|
145
|
+
#
|
|
146
|
+
# check process apache with pidfile /usr/local/apache/logs/httpd.pid
|
|
147
|
+
# start program = "/etc/init.d/httpd start"
|
|
148
|
+
# stop program = "/etc/init.d/httpd stop"
|
|
149
|
+
# if cpu > 60% for 2 cycles then alert
|
|
150
|
+
# if cpu > 80% for 5 cycles then restart
|
|
151
|
+
# if totalmem > 200.0 MB for 5 cycles then restart
|
|
152
|
+
# if children > 250 then restart
|
|
153
|
+
# if loadavg(5min) greater than 10 for 8 cycles then stop
|
|
154
|
+
# if failed host www.tildeslash.com port 80 protocol http
|
|
155
|
+
# and request "/monit/doc/next.php"
|
|
156
|
+
# then restart
|
|
157
|
+
# if failed port 443 type tcpssl protocol http
|
|
158
|
+
# with timeout 15 seconds
|
|
159
|
+
# then restart
|
|
160
|
+
# if 3 restarts within 5 cycles then timeout
|
|
161
|
+
# depends on apache_bin
|
|
162
|
+
# group server
|
|
163
|
+
#
|
|
164
|
+
#
|
|
165
|
+
## Check device permissions, uid, gid, space and inode usage. Other services,
|
|
166
|
+
## such as databases, may depend on this resource and an automatically graceful
|
|
167
|
+
## stop may be cascaded to them before the filesystem will become full and data
|
|
168
|
+
## lost.
|
|
169
|
+
#
|
|
170
|
+
# check device datafs with path /dev/sdb1
|
|
171
|
+
# start program = "/bin/mount /data"
|
|
172
|
+
# stop program = "/bin/umount /data"
|
|
173
|
+
# if failed permission 660 then unmonitor
|
|
174
|
+
# if failed uid root then unmonitor
|
|
175
|
+
# if failed gid disk then unmonitor
|
|
176
|
+
# if space usage > 80% for 5 times within 15 cycles then alert
|
|
177
|
+
# if space usage > 99% then stop
|
|
178
|
+
# if inode usage > 30000 then alert
|
|
179
|
+
# if inode usage > 99% then stop
|
|
180
|
+
# group server
|
|
181
|
+
#
|
|
182
|
+
#
|
|
183
|
+
## Check a file's timestamp. In this example, we test if a file is older
|
|
184
|
+
## than 15 minutes and assume something is wrong if its not updated. Also,
|
|
185
|
+
## if the file size exceed a given limit, execute a script
|
|
186
|
+
#
|
|
187
|
+
# check file database with path /data/mydatabase.db
|
|
188
|
+
# if failed permission 700 then alert
|
|
189
|
+
# if failed uid data then alert
|
|
190
|
+
# if failed gid data then alert
|
|
191
|
+
# if timestamp > 15 minutes then alert
|
|
192
|
+
# if size > 100 MB then exec "/my/cleanup/script"
|
|
193
|
+
#
|
|
194
|
+
#
|
|
195
|
+
## Check directory permission, uid and gid. An event is triggered if the
|
|
196
|
+
## directory does not belong to the user with uid 0 and gid 0. In addition,
|
|
197
|
+
## the permissions have to match the octal description of 755 (see chmod(1)).
|
|
198
|
+
#
|
|
199
|
+
# check directory bin with path /bin
|
|
200
|
+
# if failed permission 755 then unmonitor
|
|
201
|
+
# if failed uid 0 then unmonitor
|
|
202
|
+
# if failed gid 0 then unmonitor
|
|
203
|
+
#
|
|
204
|
+
#
|
|
205
|
+
## Check a remote host network services availability using a ping test and
|
|
206
|
+
## check response content from a web server. Up to three pings are sent and
|
|
207
|
+
## connection to a port and a application level network check is performed.
|
|
208
|
+
#
|
|
209
|
+
# check host myserver with address 192.168.1.1
|
|
210
|
+
# if failed icmp type echo count 3 with timeout 3 seconds then alert
|
|
211
|
+
# if failed port 3306 protocol mysql with timeout 15 seconds then alert
|
|
212
|
+
# if failed url
|
|
213
|
+
# http://user:password@www.foo.bar:8080/?querystring
|
|
214
|
+
# and content == 'action="j_security_check"'
|
|
215
|
+
# then alert
|
|
216
|
+
#
|
|
217
|
+
#
|
|
218
|
+
###############################################################################
|
|
219
|
+
## Includes
|
|
220
|
+
###############################################################################
|
|
221
|
+
##
|
|
222
|
+
## It is possible to include additional configuration parts from other files or
|
|
223
|
+
## directories.
|
|
224
|
+
#
|
|
225
|
+
include /etc/monit.d/*
|
|
226
|
+
#
|
|
227
|
+
#
|
|
File without changes
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
create database <%= @username %>_prod;
|
|
2
|
+
GRANT USAGE ON *.* TO '<%= @username %>_db'@'%';
|
|
3
|
+
|
|
4
|
+
GRANT ALL PRIVILEGES
|
|
5
|
+
ON <%= @username %>_prod.*
|
|
6
|
+
TO <%= @username %>_db@'%'
|
|
7
|
+
IDENTIFIED BY '<%= @mysqlpass %>';
|
|
8
|
+
|
|
9
|
+
FLUSH PRIVILEGES;
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
create database <%= @username %>_stage;
|
|
13
|
+
GRANT USAGE ON *.* TO '<%= @username %>_db'@'%';
|
|
14
|
+
|
|
15
|
+
GRANT ALL PRIVILEGES
|
|
16
|
+
ON <%= @username %>_stage.*
|
|
17
|
+
TO <%= @username %>_db@'%'
|
|
18
|
+
IDENTIFIED BY '<%= @mysqlpass %>';
|
|
19
|
+
|
|
20
|
+
FLUSH PRIVILEGES;
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
#
|
|
2
|
+
# The MySQL database server configuration file.
|
|
3
|
+
#
|
|
4
|
+
# You can copy this to one of:
|
|
5
|
+
# - "/etc/mysql/my.cnf" to set global options,
|
|
6
|
+
# - "~/.my.cnf" to set user-specific options.
|
|
7
|
+
#
|
|
8
|
+
# One can use all long options that the program supports.
|
|
9
|
+
# Run program with --help to get a list of available options and with
|
|
10
|
+
# --print-defaults to see which it would actually understand and use.
|
|
11
|
+
#
|
|
12
|
+
# For explanations see
|
|
13
|
+
# http://dev.mysql.com/doc/mysql/en/server-system-variables.html
|
|
14
|
+
|
|
15
|
+
# This will be passed to all mysql clients
|
|
16
|
+
# It has been reported that passwords should be enclosed with ticks/quotes
|
|
17
|
+
# escpecially if they contain "#" chars...
|
|
18
|
+
# Remember to edit /etc/mysql/debian.cnf when changing the socket location.
|
|
19
|
+
[client]
|
|
20
|
+
port = 3306
|
|
21
|
+
socket = /var/run/mysqld/mysqld.sock
|
|
22
|
+
|
|
23
|
+
# Here is entries for some specific programs
|
|
24
|
+
# The following values assume you have at least 32M ram
|
|
25
|
+
|
|
26
|
+
# This was formally known as [safe_mysqld]. Both versions are currently parsed.
|
|
27
|
+
[mysqld_safe]
|
|
28
|
+
socket = /var/run/mysqld/mysqld.sock
|
|
29
|
+
nice = 0
|
|
30
|
+
|
|
31
|
+
[mysqld]
|
|
32
|
+
#
|
|
33
|
+
# * Basic Settings
|
|
34
|
+
#
|
|
35
|
+
user = mysql
|
|
36
|
+
pid-file = /var/run/mysqld/mysqld.pid
|
|
37
|
+
socket = /var/run/mysqld/mysqld.sock
|
|
38
|
+
port = 3306
|
|
39
|
+
basedir = /usr
|
|
40
|
+
datadir = /var/lib/mysql
|
|
41
|
+
tmpdir = /tmp
|
|
42
|
+
language = /usr/share/mysql/english
|
|
43
|
+
skip-external-locking
|
|
44
|
+
#
|
|
45
|
+
# Instead of skip-networking the default is now to listen only on
|
|
46
|
+
# localhost which is more compatible and is not less secure.
|
|
47
|
+
bind-address = 127.0.0.1
|
|
48
|
+
#
|
|
49
|
+
# * Fine Tuning
|
|
50
|
+
#
|
|
51
|
+
key_buffer = 16M
|
|
52
|
+
max_allowed_packet = 16M
|
|
53
|
+
thread_stack = 128K
|
|
54
|
+
thread_cache_size = 8
|
|
55
|
+
#max_connections = 100
|
|
56
|
+
#table_cache = 64
|
|
57
|
+
#thread_concurrency = 10
|
|
58
|
+
#
|
|
59
|
+
# * Query Cache Configuration
|
|
60
|
+
#
|
|
61
|
+
query_cache_limit = 1M
|
|
62
|
+
query_cache_size = 16M
|
|
63
|
+
#
|
|
64
|
+
# * Logging and Replication
|
|
65
|
+
#
|
|
66
|
+
# Both location gets rotated by the cronjob.
|
|
67
|
+
# Be aware that this log type is a performance killer.
|
|
68
|
+
#log = /var/log/mysql/mysql.log
|
|
69
|
+
#
|
|
70
|
+
# Error logging goes to syslog. This is a Debian improvement :)
|
|
71
|
+
#
|
|
72
|
+
# Here you can see queries with especially long duration
|
|
73
|
+
#log_slow_queries = /var/log/mysql/mysql-slow.log
|
|
74
|
+
#long_query_time = 2
|
|
75
|
+
#log-queries-not-using-indexes
|
|
76
|
+
#
|
|
77
|
+
# The following can be used as easy to replay backup logs or for replication.
|
|
78
|
+
# note: if you are setting up a replication slave, see README.Debian about
|
|
79
|
+
# other settings you may need to change.
|
|
80
|
+
#server-id = 1
|
|
81
|
+
log_bin = /var/log/mysql/mysql-bin.log
|
|
82
|
+
# WARNING: Using expire_logs_days without bin_log crashes the server! See README.Debian!
|
|
83
|
+
expire_logs_days = 10
|
|
84
|
+
max_binlog_size = 100M
|
|
85
|
+
#binlog_do_db = include_database_name
|
|
86
|
+
#binlog_ignore_db = include_database_name
|
|
87
|
+
#
|
|
88
|
+
# * BerkeleyDB
|
|
89
|
+
#
|
|
90
|
+
# Using BerkeleyDB is now discouraged as its support will cease in 5.1.12.
|
|
91
|
+
skip-bdb
|
|
92
|
+
#
|
|
93
|
+
# * InnoDB
|
|
94
|
+
#
|
|
95
|
+
# InnoDB is enabled by default with a 10MB datafile in /var/lib/mysql/.
|
|
96
|
+
# Read the manual for more InnoDB related options. There are many!
|
|
97
|
+
# You might want to disable InnoDB to shrink the mysqld process by circa 100MB.
|
|
98
|
+
#skip-innodb
|
|
99
|
+
#
|
|
100
|
+
# * Security Features
|
|
101
|
+
#
|
|
102
|
+
# Read the manual, too, if you want chroot!
|
|
103
|
+
# chroot = /var/lib/mysql/
|
|
104
|
+
#
|
|
105
|
+
# For generating SSL certificates I recommend the OpenSSL GUI "tinyca".
|
|
106
|
+
#
|
|
107
|
+
# ssl-ca=/etc/mysql/cacert.pem
|
|
108
|
+
# ssl-cert=/etc/mysql/server-cert.pem
|
|
109
|
+
# ssl-key=/etc/mysql/server-key.pem
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
[mysqldump]
|
|
114
|
+
quick
|
|
115
|
+
quote-names
|
|
116
|
+
max_allowed_packet = 16M
|
|
117
|
+
|
|
118
|
+
[mysql]
|
|
119
|
+
#no-auto-rehash # faster start of mysql but no tab completition
|
|
120
|
+
|
|
121
|
+
[isamchk]
|
|
122
|
+
key_buffer = 16M
|
|
123
|
+
|
|
124
|
+
#
|
|
125
|
+
# * NDB Cluster
|
|
126
|
+
#
|
|
127
|
+
# See /usr/share/doc/mysql-server-*/README.Debian for more information.
|
|
128
|
+
#
|
|
129
|
+
# The following configuration is read by the NDB Data Nodes (ndbd processes)
|
|
130
|
+
# not from the NDB Management Nodes (ndb_mgmd processes).
|
|
131
|
+
#
|
|
132
|
+
# [MYSQL_CLUSTER]
|
|
133
|
+
# ndb-connectstring=127.0.0.1
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
#
|
|
137
|
+
# * IMPORTANT: Additional settings that can override those from this file!
|
|
138
|
+
#
|
|
139
|
+
!includedir /etc/mysql/conf.d/
|
|
140
|
+
|
|
@@ -0,0 +1,542 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Sphinx configuration file sample
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
#############################################################################
|
|
6
|
+
## data source definition
|
|
7
|
+
#############################################################################
|
|
8
|
+
|
|
9
|
+
source src1
|
|
10
|
+
{
|
|
11
|
+
# data source type
|
|
12
|
+
# for now, known types are 'mysql', 'pgsql' and 'xmlpipe'
|
|
13
|
+
# MUST be defined
|
|
14
|
+
type = mysql
|
|
15
|
+
|
|
16
|
+
# whether to strip HTML
|
|
17
|
+
# values can be 0 (don't strip) or 1 (do strip)
|
|
18
|
+
# WARNING, only works with mysql source for now
|
|
19
|
+
# WARNING, should work ok for PERFECTLY formed XHTML for now
|
|
20
|
+
# WARNING, POSSIBLE TO BUG on malformed everday HTML
|
|
21
|
+
# optional, default is 0
|
|
22
|
+
strip_html = 0
|
|
23
|
+
|
|
24
|
+
# what HTML attributes to index if stripping HTML
|
|
25
|
+
# format is as follows:
|
|
26
|
+
#
|
|
27
|
+
# index_html_attrs = img=alt,title; a=title;
|
|
28
|
+
#
|
|
29
|
+
# optional, default is to not index anything
|
|
30
|
+
index_html_attrs =
|
|
31
|
+
|
|
32
|
+
#####################################################################
|
|
33
|
+
|
|
34
|
+
# some straightforward parameters for 'mysql' source type
|
|
35
|
+
sql_host = <%= @mysql_host %>
|
|
36
|
+
sql_user = <%= @username %>_db
|
|
37
|
+
sql_pass = <%= @mysqlpass %>
|
|
38
|
+
sql_db = <%= @username %>_prod
|
|
39
|
+
sql_port = 3306 # optional, default is 3306
|
|
40
|
+
|
|
41
|
+
# sql_sock = /tmp/mysql.sock
|
|
42
|
+
#
|
|
43
|
+
# optional
|
|
44
|
+
# usually '/var/lib/mysql/mysql.sock' on Linux
|
|
45
|
+
# usually '/tmp/mysql.sock' on FreeBSD
|
|
46
|
+
|
|
47
|
+
# pre-query, executed before the main fetch query
|
|
48
|
+
# useful eg. to setup encoding or mark records
|
|
49
|
+
# optional, default is empty
|
|
50
|
+
#
|
|
51
|
+
# sql_query_pre = SET CHARACTER_SET_RESULTS=cp1251
|
|
52
|
+
sql_query_pre =
|
|
53
|
+
|
|
54
|
+
# main document fetch query
|
|
55
|
+
#
|
|
56
|
+
# you can specify up to 32 (formally SPH_MAX_FIELDS in sphinx.h) fields;
|
|
57
|
+
# all of the fields which are not document_id or attributes (see below)
|
|
58
|
+
# will be full-text indexed
|
|
59
|
+
#
|
|
60
|
+
# document_id MUST be the very first field
|
|
61
|
+
# document_id MUST be positive (non-zero, non-negative)
|
|
62
|
+
# document_id MUST fit into 32 bits
|
|
63
|
+
# document_id MUST be unique
|
|
64
|
+
#
|
|
65
|
+
# mandatory
|
|
66
|
+
sql_query = \
|
|
67
|
+
SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
|
|
68
|
+
FROM documents
|
|
69
|
+
|
|
70
|
+
# query range setup
|
|
71
|
+
#
|
|
72
|
+
# useful to avoid MyISAM table locks and big result sets
|
|
73
|
+
# when indexing lots of data
|
|
74
|
+
#
|
|
75
|
+
# to use query ranges, you should
|
|
76
|
+
# 1) provide a query to fetch min/max id (ie. id range) from data set;
|
|
77
|
+
# 2) configure step size in which this range will be walked;
|
|
78
|
+
# 3) use $start and $end macros somewhere in the main fetch query.
|
|
79
|
+
#
|
|
80
|
+
# 'sql_query_range' must return exactly two integer fields
|
|
81
|
+
# in exactly min_id, max_id order
|
|
82
|
+
#
|
|
83
|
+
# 'sql_range_step' must be a positive integer
|
|
84
|
+
# optional, default is 1024
|
|
85
|
+
#
|
|
86
|
+
# 'sql_query' must contain both '$start' and '$end' macros
|
|
87
|
+
# if you are using query ranges (because it obviously would be an
|
|
88
|
+
# error to index the whole table many times)
|
|
89
|
+
#
|
|
90
|
+
# note that the intervals specified by $start/$end do not
|
|
91
|
+
# overlap, so you should NOT remove document ids which are exactly
|
|
92
|
+
# equal to $start or $end in your query
|
|
93
|
+
#
|
|
94
|
+
# here's an example which will index 'documents' table
|
|
95
|
+
# fetching (at most) one thousand entries at a time:
|
|
96
|
+
#
|
|
97
|
+
# sql_query_range = SELECT MIN(id),MAX(id) FROM documents
|
|
98
|
+
# sql_range_step = 1000
|
|
99
|
+
# sql_query = \
|
|
100
|
+
# SELECT doc.id, doc.id AS group, doc.title, doc.data \
|
|
101
|
+
# FROM documents doc \
|
|
102
|
+
# WHERE id>=$start AND id<=$end
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# attribute columns
|
|
106
|
+
#
|
|
107
|
+
# attribute values MUST be positive (non-zero, non-negative) integers
|
|
108
|
+
# attribute values MUST fit into 32 bits
|
|
109
|
+
#
|
|
110
|
+
# attributes are additional values associated with each document which
|
|
111
|
+
# may be used to perform additional filtering and sorting during search.
|
|
112
|
+
# attributes are NOT full-text indexed; they are stored in the full text
|
|
113
|
+
# index as is.
|
|
114
|
+
#
|
|
115
|
+
# a good example would be a forum posts table. one might need to search
|
|
116
|
+
# through 'title' and 'content' fields but to limit search to specific
|
|
117
|
+
# values of 'author_id', or 'forum_id', or to sort by 'post_date', or to
|
|
118
|
+
# group matches by 'thread_id', or to group posts by month of the
|
|
119
|
+
# 'post_date' and provide statistics.
|
|
120
|
+
#
|
|
121
|
+
# this all can be achieved by specifying all the mentioned columns
|
|
122
|
+
# (excluding 'title' and 'content' which are full-text fields) as
|
|
123
|
+
# attributes and then using API calls to setup filtering, sorting,
|
|
124
|
+
# and grouping.
|
|
125
|
+
#
|
|
126
|
+
# sql_group_column is used to declare integer attributes.
|
|
127
|
+
#
|
|
128
|
+
# sql_date_column is used to declare UNIX timestamp attributes.
|
|
129
|
+
#
|
|
130
|
+
# sql_str2ordinal_column is used to declare integer attributes which
|
|
131
|
+
# values are computed as ordinal numbers of corresponding column value
|
|
132
|
+
# in sorted list of column values. WARNING, all such strings values
|
|
133
|
+
# are going to be stored in RAM while indexing, and "C" locale will
|
|
134
|
+
# be used when sorting!
|
|
135
|
+
#
|
|
136
|
+
# starting with 0.9.7, there may be multiple attribute columns specified.
|
|
137
|
+
# here's an example for that mentioned posts table:
|
|
138
|
+
#
|
|
139
|
+
# sql_group_column = author_id
|
|
140
|
+
# sql_group_column = forum_id
|
|
141
|
+
# sql_group_column = thread_id
|
|
142
|
+
# sql_date_column = post_unix_timestamp
|
|
143
|
+
# sql_date_column = last_edit_unix_timestamp
|
|
144
|
+
#
|
|
145
|
+
# optional, default is empty
|
|
146
|
+
sql_group_column = group_id
|
|
147
|
+
sql_date_column = date_added
|
|
148
|
+
# sql_str2ordinal_column = author_name
|
|
149
|
+
|
|
150
|
+
# post-query, executed on the end of main fetch query
|
|
151
|
+
#
|
|
152
|
+
# note that indexing is NOT completed at the point when post-query
|
|
153
|
+
# gets executed and might very well fail
|
|
154
|
+
#
|
|
155
|
+
# optional, default is empty
|
|
156
|
+
sql_query_post =
|
|
157
|
+
|
|
158
|
+
# post-index-query, executed on succsefully completed indexing
|
|
159
|
+
#
|
|
160
|
+
# $maxid macro is the max document ID which was actually
|
|
161
|
+
# fetched from the database
|
|
162
|
+
#
|
|
163
|
+
# optional, default is empty
|
|
164
|
+
#
|
|
165
|
+
# sql_query_post_index = REPLACE INTO counters ( id, val ) \
|
|
166
|
+
# VALUES ( 'max_indexed_id', $maxid )
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# document info query
|
|
170
|
+
#
|
|
171
|
+
# ONLY used by search utility to display document information
|
|
172
|
+
# MUST be able to fetch document info by its id, therefore
|
|
173
|
+
# MUST contain '$id' macro
|
|
174
|
+
#
|
|
175
|
+
# optional, default is empty
|
|
176
|
+
sql_query_info = SELECT * FROM documents WHERE id=$id
|
|
177
|
+
|
|
178
|
+
#####################################################################
|
|
179
|
+
|
|
180
|
+
# demo config for 'xmlpipe' source type is a little below
|
|
181
|
+
#
|
|
182
|
+
# with xmlpipe, indexer opens a pipe to a given command,
|
|
183
|
+
# and then reads documents from stdin
|
|
184
|
+
#
|
|
185
|
+
# indexer expects one or more documents from xmlpipe stdin
|
|
186
|
+
# each document must be formatted exactly as follows:
|
|
187
|
+
#
|
|
188
|
+
# <document>
|
|
189
|
+
# <id>123</id>
|
|
190
|
+
# <group>45</group>
|
|
191
|
+
# <timestamp>1132223498</timestamp>
|
|
192
|
+
# <title>test title</title>
|
|
193
|
+
# <body>
|
|
194
|
+
# this is my document body
|
|
195
|
+
# </body>
|
|
196
|
+
# </document>
|
|
197
|
+
#
|
|
198
|
+
# timestamp element is optional, its default value is 1
|
|
199
|
+
# all the other elements are mandatory
|
|
200
|
+
|
|
201
|
+
# type = xmlpipe
|
|
202
|
+
# xmlpipe_command = cat /usr/local/var/test.xml
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# inherited source example
|
|
207
|
+
#
|
|
208
|
+
# all the parameters are copied from the parent source,
|
|
209
|
+
# and may then be overridden in this source definition
|
|
210
|
+
source src1stripped : src1
|
|
211
|
+
{
|
|
212
|
+
strip_html = 1
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
#############################################################################
|
|
216
|
+
## index definition
|
|
217
|
+
#############################################################################
|
|
218
|
+
|
|
219
|
+
# local index example
|
|
220
|
+
#
|
|
221
|
+
# this is an index which is stored locally in the filesystem
|
|
222
|
+
#
|
|
223
|
+
# all indexing-time options (such as morphology and charsets)
|
|
224
|
+
# are configured per local index
|
|
225
|
+
index test1
|
|
226
|
+
{
|
|
227
|
+
# which document source to index
|
|
228
|
+
# at least one MUST be defined
|
|
229
|
+
#
|
|
230
|
+
# multiple sources MAY be specified; to do so, just add more
|
|
231
|
+
# "source = NAME" lines. in this case, ALL the document IDs
|
|
232
|
+
# in ALL the specified sources MUST be unique
|
|
233
|
+
source = src1
|
|
234
|
+
|
|
235
|
+
# this is path and index file name without extension
|
|
236
|
+
#
|
|
237
|
+
# indexer will append different extensions to this path to
|
|
238
|
+
# generate names for both permanent and temporary index files
|
|
239
|
+
#
|
|
240
|
+
# .tmp* files are temporary and can be safely removed
|
|
241
|
+
# if indexer fails to remove them automatically
|
|
242
|
+
#
|
|
243
|
+
# .sp* files are fulltext index data files. specifically,
|
|
244
|
+
# .spa contains attribute values attached to each document id
|
|
245
|
+
# .spd contains doclists and hitlists
|
|
246
|
+
# .sph contains index header (schema and other settings)
|
|
247
|
+
# .spi contains wordlists
|
|
248
|
+
#
|
|
249
|
+
# MUST be defined
|
|
250
|
+
path = /data/<%= @username %>/shared/log/indexes/test1
|
|
251
|
+
|
|
252
|
+
# docinfo (ie. per-document attribute values) storage strategy
|
|
253
|
+
# defines how docinfo will be stored
|
|
254
|
+
#
|
|
255
|
+
# available values are "none", "inline" and "extern"
|
|
256
|
+
#
|
|
257
|
+
# "none" means there'll be no docinfo at all (no groups/dates)
|
|
258
|
+
#
|
|
259
|
+
# "inline" means that the docinfo will be stored in the .spd
|
|
260
|
+
# file along with the document ID lists (doclists)
|
|
261
|
+
#
|
|
262
|
+
# "extern" means that the docinfo will be stored in the .spa
|
|
263
|
+
# file separately
|
|
264
|
+
#
|
|
265
|
+
# externally stored docinfo should (basically) be kept in RAM
|
|
266
|
+
# when querying; therefore, "inline" may be the only viable option
|
|
267
|
+
# for really huge (50-100+ million docs) datasets. however, for
|
|
268
|
+
# smaller datasets "extern" storage makes both indexing and
|
|
269
|
+
# searching MUCH more efficient.
|
|
270
|
+
#
|
|
271
|
+
# additional search-time memory requirements for extern storage are
|
|
272
|
+
#
|
|
273
|
+
# ( 1 + number_of_attrs )*number_of_docs*4 bytes
|
|
274
|
+
#
|
|
275
|
+
# so 10 million docs with 2 groups and 1 timestamp will take
|
|
276
|
+
# (1+2+1)*10M*4 = 160 MB of RAM. this is PER DAEMON, ie. searchd
|
|
277
|
+
# will alloc 160 MB on startup, read the data and keep it shared
|
|
278
|
+
# between queries; the children will NOT allocate additional
|
|
279
|
+
# copies of this data.
|
|
280
|
+
#
|
|
281
|
+
# default is "extern" (as most collections are smaller than 100M docs)
|
|
282
|
+
docinfo = extern
|
|
283
|
+
|
|
284
|
+
# morphology
|
|
285
|
+
#
|
|
286
|
+
# currently supported morphology preprocessors are Porter stemmers
|
|
287
|
+
# for English and Russian, and Soundex. more stemmers could be added
|
|
288
|
+
# at users request.
|
|
289
|
+
#
|
|
290
|
+
# available values are "none", "stem_en", "stem_ru", "stem_enru",
|
|
291
|
+
# and "soundex"
|
|
292
|
+
#
|
|
293
|
+
# optional, default is "none"
|
|
294
|
+
#
|
|
295
|
+
# morphology = none
|
|
296
|
+
# morphology = stem_en
|
|
297
|
+
# morphology = stem_ru
|
|
298
|
+
# morphology = stem_enru
|
|
299
|
+
# morphology = soundex
|
|
300
|
+
morphology = none
|
|
301
|
+
|
|
302
|
+
# stopwords file
|
|
303
|
+
#
|
|
304
|
+
# format is plain text in whatever encoding you use
|
|
305
|
+
# optional, default is empty
|
|
306
|
+
#
|
|
307
|
+
# stopwords = /usr/local/var/data/stopwords.txt
|
|
308
|
+
stopwords =
|
|
309
|
+
|
|
310
|
+
# minimum word length
|
|
311
|
+
#
|
|
312
|
+
# only the words that are of this length and above will be indexed;
|
|
313
|
+
# for example, if min_word_len is 4, "the" won't be indexed,
|
|
314
|
+
# but "they" will be.
|
|
315
|
+
#
|
|
316
|
+
# default is 1, which (obviously) means to index everything
|
|
317
|
+
min_word_len = 1
|
|
318
|
+
|
|
319
|
+
# charset encoding type
|
|
320
|
+
#
|
|
321
|
+
# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
|
|
322
|
+
#
|
|
323
|
+
# optional, default is sbcs
|
|
324
|
+
charset_type = sbcs
|
|
325
|
+
|
|
326
|
+
# charset definition and case folding rules "table"
|
|
327
|
+
#
|
|
328
|
+
# optional, default value depends on charset_type
|
|
329
|
+
#
|
|
330
|
+
# for now, defaults are configured to support English and Russian
|
|
331
|
+
# this behavior MAY change in future versions
|
|
332
|
+
#
|
|
333
|
+
# 'sbcs' default value is
|
|
334
|
+
# charset_table = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
|
|
335
|
+
#
|
|
336
|
+
# 'utf-8' default value is
|
|
337
|
+
# charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
|
|
338
|
+
|
|
339
|
+
# minimum prefix length
|
|
340
|
+
#
|
|
341
|
+
# if prefix length is positive, indexer will not only index all words,
|
|
342
|
+
# but all the possible prefixes (ie. word beginnings) as well
|
|
343
|
+
#
|
|
344
|
+
# for instance, "exam" query against such index will match documents
|
|
345
|
+
# which contain "example" word, even if they do not contain "exam"
|
|
346
|
+
#
|
|
347
|
+
# indexing prefixes will make the index grow significantly
|
|
348
|
+
# and could degrade search times
|
|
349
|
+
#
|
|
350
|
+
# currently there's no way to rank perfect word matches higher
|
|
351
|
+
# than prefix matches using only one index; you could setup two
|
|
352
|
+
# indexes for that
|
|
353
|
+
#
|
|
354
|
+
# default is 0, which means NOT to index prefixes
|
|
355
|
+
min_prefix_len = 0
|
|
356
|
+
|
|
357
|
+
# minimum infix length
|
|
358
|
+
#
|
|
359
|
+
# if infix length is positive, indexer will not only index all words,
|
|
360
|
+
# but all the possible infixes (ie. characters subsequences starting
|
|
361
|
+
# anywhere inside the word) as well
|
|
362
|
+
#
|
|
363
|
+
# for instance, "amp" query against such index will match documents
|
|
364
|
+
# which contain "example" word, even if they do not contain "amp"
|
|
365
|
+
#
|
|
366
|
+
# indexing prefixes will make the index grow significantly
|
|
367
|
+
# and could degrade search times
|
|
368
|
+
#
|
|
369
|
+
# currently there's no way to rank perfect word matches higher
|
|
370
|
+
# than infix matches using only one index; you could setup two
|
|
371
|
+
# indexes for that
|
|
372
|
+
#
|
|
373
|
+
# default is 0, which means NOT to index infixes
|
|
374
|
+
min_infix_len = 0
|
|
375
|
+
|
|
376
|
+
# n-grams length
|
|
377
|
+
#
|
|
378
|
+
# n-grams provide basic CJK support for unsegmented texts. if using
|
|
379
|
+
# n-grams, streams of CJK characters are indexed as n-grams. for example,
|
|
380
|
+
# if incoming stream is ABCDEF and n is 2, this text would be indexed
|
|
381
|
+
# as if it was AB BC CD DE EF.
|
|
382
|
+
#
|
|
383
|
+
# this feature is in alpha version state and only n=1 is currently
|
|
384
|
+
# supported; this is going to be improved.
|
|
385
|
+
#
|
|
386
|
+
# note that if search query is segmented (ie. words are separated with
|
|
387
|
+
# whitespace), words are in quotes and extended matching mode is used,
|
|
388
|
+
# then all matching documents will be returned even if their text was
|
|
389
|
+
# *not* segmented. in the example above, ABCDEF text will be indexed as
|
|
390
|
+
# A B C D E F, and "BCD" query will be transformed to "B C D" (where
|
|
391
|
+
# quotes is phrase matching operator), so the document will match.
|
|
392
|
+
#
|
|
393
|
+
# optional, default is 0, which means NOT to use n-grams
|
|
394
|
+
#
|
|
395
|
+
# ngram_len = 1
|
|
396
|
+
|
|
397
|
+
# n-gram characters table
|
|
398
|
+
#
|
|
399
|
+
# specifies what specific characters are subject to n-gram
|
|
400
|
+
# extraction. format is similar to charset_table.
|
|
401
|
+
#
|
|
402
|
+
# optional, default is empty
|
|
403
|
+
#
|
|
404
|
+
# ngrams_chars = U+3000..U+2FA1F
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
# inherited index example
|
|
409
|
+
#
|
|
410
|
+
# all the parameters are copied from the parent index,
|
|
411
|
+
# and may then be overridden in this index definition
|
|
412
|
+
index test1stemmed : test1
|
|
413
|
+
{
|
|
414
|
+
path = /data/<%= @username %>/shared/log/indexes/test1stemmed
|
|
415
|
+
morphology = stem_en
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
# distributed index example
|
|
420
|
+
#
|
|
421
|
+
# this is a virtual index which can NOT be directly indexed,
|
|
422
|
+
# and only contains references to other local and/or remote indexes
|
|
423
|
+
#
|
|
424
|
+
# if searchd receives a query against this index,
|
|
425
|
+
# it does the following:
|
|
426
|
+
#
|
|
427
|
+
# 1) connects to all the specified remote agents,
|
|
428
|
+
# 2) issues the query,
|
|
429
|
+
# 3) searches local indexes (while the remote agents are searching),
|
|
430
|
+
# 4) collects remote search results,
|
|
431
|
+
# 5) merges all the results together (removing the duplicates),
|
|
432
|
+
# 6) sends the merged resuls to client.
|
|
433
|
+
#
|
|
434
|
+
# this index type is primarily intenteded to be able to split huge (100GB+)
|
|
435
|
+
# datasets into chunks placed on different physical servers and them search
|
|
436
|
+
# through those chunks in parallel, reducing response times and server load;
|
|
437
|
+
# it seems, however, that it could also be used to take advantage of
|
|
438
|
+
# multi-processor systems or to implement HA (high-availability) search.
|
|
439
|
+
index dist1
|
|
440
|
+
{
|
|
441
|
+
# 'distributed' index type MUST be specified
|
|
442
|
+
type = distributed
|
|
443
|
+
|
|
444
|
+
# local index to be searched
|
|
445
|
+
# there can be many local indexes configured
|
|
446
|
+
local = test1
|
|
447
|
+
local = test1stemmed
|
|
448
|
+
|
|
449
|
+
# remote agent
|
|
450
|
+
# multiple remote agents may be specified
|
|
451
|
+
# syntax is 'hostname:port:index1,[index2[,...]]
|
|
452
|
+
agent = localhost:3313:remote1
|
|
453
|
+
agent = localhost:3314:remote2,remote3
|
|
454
|
+
|
|
455
|
+
# remote agent connection timeout, milliseconds
|
|
456
|
+
# optional, default is 1000 ms, ie. 1 sec
|
|
457
|
+
agent_connect_timeout = 1000
|
|
458
|
+
|
|
459
|
+
# remote agent query timeout, milliseconds
|
|
460
|
+
# optional, default is 3000 ms, ie. 3 sec
|
|
461
|
+
agent_query_timeout = 3000
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
#############################################################################
|
|
465
|
+
## indexer settings
|
|
466
|
+
#############################################################################
|
|
467
|
+
|
|
468
|
+
indexer
|
|
469
|
+
{
|
|
470
|
+
# memory limit
|
|
471
|
+
#
|
|
472
|
+
# may be specified in bytes (no postfix), kilobytes (mem_limit=1000K)
|
|
473
|
+
# or megabytes (mem_limit=10M)
|
|
474
|
+
#
|
|
475
|
+
# will grow if set unacceptably low
|
|
476
|
+
# will warn if set too low and potentially hurting the performance
|
|
477
|
+
#
|
|
478
|
+
# optional, default is 32M
|
|
479
|
+
mem_limit = 32M
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
#############################################################################
|
|
483
|
+
## searchd settings
|
|
484
|
+
#############################################################################
|
|
485
|
+
|
|
486
|
+
searchd
|
|
487
|
+
{
|
|
488
|
+
# IP address on which search daemon will bind and accept
|
|
489
|
+
# incoming network requests
|
|
490
|
+
#
|
|
491
|
+
# optional, default is to listen on all addresses,
|
|
492
|
+
# ie. address = 0.0.0.0
|
|
493
|
+
#
|
|
494
|
+
# address = 127.0.0.1
|
|
495
|
+
# address = 192.168.0.1
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
# port on which search daemon will listen
|
|
499
|
+
port = 3312
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
# log file
|
|
503
|
+
# searchd run info is logged here
|
|
504
|
+
log = /var/log/engineyard/sphinx/<%= @username %>/searchd.log
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
# query log file
|
|
508
|
+
# all the search queries are logged here
|
|
509
|
+
query_log = /var/log/engineyard/sphinx/<%= @username %>/searchd_query.log
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
# client read timeout, seconds
|
|
513
|
+
read_timeout = 5
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
# maximum amount of children to fork
|
|
517
|
+
# useful to control server load
|
|
518
|
+
max_children = 30
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
# a file which will contain searchd process ID
|
|
522
|
+
# used for different external automation scripts
|
|
523
|
+
# MUST be present
|
|
524
|
+
pid_file = /data/<%= @username %>/shared/log/searchd.pid
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
# maximum amount of matches this daemon would ever retrieve
|
|
528
|
+
# from each index and serve to client
|
|
529
|
+
#
|
|
530
|
+
# this parameter affects per-client memory and CPU usage
|
|
531
|
+
# (16+ bytes per match) in match sorting phase; so blindly raising
|
|
532
|
+
# it to 1 million is definitely NOT recommended
|
|
533
|
+
#
|
|
534
|
+
# starting from 0.9.7, it can be decreased on the fly through
|
|
535
|
+
# the corresponding API call; increasing is prohibited to protect
|
|
536
|
+
# against malicious and/or malformed requests
|
|
537
|
+
#
|
|
538
|
+
# default is 1000 (just like with Google)
|
|
539
|
+
max_matches = 1000
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
# --eof--
|