le1t0-deprec 2.1.6.001
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +444 -0
- data/COPYING +19 -0
- data/LICENSE +339 -0
- data/README +154 -0
- data/THANKS +17 -0
- data/bin/depify +139 -0
- data/docs/EXAMPLE-installing_tracks.txt +41 -0
- data/docs/README.nagios +22 -0
- data/docs/README.rails +17 -0
- data/docs/config_gen_explained.txt +39 -0
- data/docs/deprec-1.x/deprec-1.x.quickstart +50 -0
- data/docs/deprec-1.x/notes.txt +12 -0
- data/docs/old/deprec_banner.gif +0 -0
- data/docs/windows_linux.txt +350 -0
- data/docs/xen/traffic_monitoring_with_vnstat.txt +95 -0
- data/docs/xen/xen-tools-notes.txt +31 -0
- data/docs/xen/xen_on_hardy.txt +39 -0
- data/lib/deprec.rb +42 -0
- data/lib/deprec/capistrano_extensions.rb +446 -0
- data/lib/deprec/recipes.rb +7 -0
- data/lib/deprec/recipes/aoe.rb +79 -0
- data/lib/deprec/recipes/app/mongrel.rb +213 -0
- data/lib/deprec/recipes/app/passenger.rb +190 -0
- data/lib/deprec/recipes/ar_sendmail.rb +67 -0
- data/lib/deprec/recipes/aspell.rb +22 -0
- data/lib/deprec/recipes/canonical.rb +68 -0
- data/lib/deprec/recipes/cap.rb +39 -0
- data/lib/deprec/recipes/chef.rb +68 -0
- data/lib/deprec/recipes/collectd.rb +112 -0
- data/lib/deprec/recipes/db/couchdb.rb +107 -0
- data/lib/deprec/recipes/db/mysql.rb +194 -0
- data/lib/deprec/recipes/db/postgresql.rb +104 -0
- data/lib/deprec/recipes/db/sqlite.rb +37 -0
- data/lib/deprec/recipes/ddclient.rb +51 -0
- data/lib/deprec/recipes/deprec.rb +167 -0
- data/lib/deprec/recipes/deprecated.rb +71 -0
- data/lib/deprec/recipes/drbd.rb +137 -0
- data/lib/deprec/recipes/dummy.rb +22 -0
- data/lib/deprec/recipes/example.rb +115 -0
- data/lib/deprec/recipes/git.rb +97 -0
- data/lib/deprec/recipes/gitosis.rb.disabled +55 -0
- data/lib/deprec/recipes/glusterfs.rb +176 -0
- data/lib/deprec/recipes/god.rb +70 -0
- data/lib/deprec/recipes/haproxy.rb +115 -0
- data/lib/deprec/recipes/heartbeat.rb +138 -0
- data/lib/deprec/recipes/imagemagick/imagemagick_bin.rb +24 -0
- data/lib/deprec/recipes/imagemagick/imagemagick_src.rb +39 -0
- data/lib/deprec/recipes/integrity.rb +108 -0
- data/lib/deprec/recipes/iptables.rb +94 -0
- data/lib/deprec/recipes/java.rb +23 -0
- data/lib/deprec/recipes/keepalived.rb +78 -0
- data/lib/deprec/recipes/ldap.rb +52 -0
- data/lib/deprec/recipes/logrotate.rb +56 -0
- data/lib/deprec/recipes/lvm.rb +20 -0
- data/lib/deprec/recipes/memcache.rb +49 -0
- data/lib/deprec/recipes/mongodb.rb +94 -0
- data/lib/deprec/recipes/monit.rb +135 -0
- data/lib/deprec/recipes/mysql_proxy.rb +67 -0
- data/lib/deprec/recipes/nagios.rb +361 -0
- data/lib/deprec/recipes/network.rb +116 -0
- data/lib/deprec/recipes/ntp.rb +103 -0
- data/lib/deprec/recipes/php.rb +58 -0
- data/lib/deprec/recipes/postfix.rb +115 -0
- data/lib/deprec/recipes/profiles.rb +125 -0
- data/lib/deprec/recipes/raid/amcc_3ware.rb +21 -0
- data/lib/deprec/recipes/rails.rb +330 -0
- data/lib/deprec/recipes/redhat_cluster.rb +228 -0
- data/lib/deprec/recipes/redis.rb +90 -0
- data/lib/deprec/recipes/ruby/mri.rb +55 -0
- data/lib/deprec/recipes/ruby/ree.rb +40 -0
- data/lib/deprec/recipes/rvm.rb +32 -0
- data/lib/deprec/recipes/s3utils.rb +63 -0
- data/lib/deprec/recipes/sphinx/thinking_sphinx.rb +105 -0
- data/lib/deprec/recipes/sphinx/ultrasphinx.rb +86 -0
- data/lib/deprec/recipes/ssh.rb +147 -0
- data/lib/deprec/recipes/ssl.rb +56 -0
- data/lib/deprec/recipes/starling.rb +119 -0
- data/lib/deprec/recipes/svn.rb +171 -0
- data/lib/deprec/recipes/syslog.rb +63 -0
- data/lib/deprec/recipes/trac.rb.disabled +277 -0
- data/lib/deprec/recipes/tsung.rb +74 -0
- data/lib/deprec/recipes/ubuntu.rb +90 -0
- data/lib/deprec/recipes/users.rb +90 -0
- data/lib/deprec/recipes/utils.rb +58 -0
- data/lib/deprec/recipes/vnstat.rb +85 -0
- data/lib/deprec/recipes/web/apache.rb +143 -0
- data/lib/deprec/recipes/web/nginx.rb +172 -0
- data/lib/deprec/recipes/wordpress.rb.notworking +96 -0
- data/lib/deprec/recipes/wpmu.rb +103 -0
- data/lib/deprec/recipes/xen.rb +364 -0
- data/lib/deprec/recipes/xentools.rb +101 -0
- data/lib/deprec/recipes_minus_rails.rb +125 -0
- data/lib/deprec/templates/aoe/aoe-init +55 -0
- data/lib/deprec/templates/aoe/fence_aoemask +351 -0
- data/lib/deprec/templates/apache/namevirtualhosts.conf +5 -0
- data/lib/deprec/templates/apache/ports.conf.erb +5 -0
- data/lib/deprec/templates/apache/status.conf.erb +17 -0
- data/lib/deprec/templates/ar_sendmail/logrotate.conf.erb +9 -0
- data/lib/deprec/templates/ar_sendmail/monit.conf.erb +5 -0
- data/lib/deprec/templates/chef/chef.json.erb +1 -0
- data/lib/deprec/templates/chef/solo.rb +2 -0
- data/lib/deprec/templates/collectd/collectd-init.d +153 -0
- data/lib/deprec/templates/collectd/collectd.conf.erb +686 -0
- data/lib/deprec/templates/ddclient/ddclient.conf.erb +11 -0
- data/lib/deprec/templates/ddclient/ddclient.erb +15 -0
- data/lib/deprec/templates/deprec/caprc.erb +14 -0
- data/lib/deprec/templates/drbd/drbd.conf.erb +531 -0
- data/lib/deprec/templates/glusterfs/glusterfsd-init.erb +95 -0
- data/lib/deprec/templates/gnbd_client/cluster.conf.erb +21 -0
- data/lib/deprec/templates/gnbd_client/clvm-default.erb +3 -0
- data/lib/deprec/templates/gnbd_client/cman-default.erb +6 -0
- data/lib/deprec/templates/gnbd_client/gnbdimports.conf.erb +1 -0
- data/lib/deprec/templates/gnbd_server/cluster.conf.erb +20 -0
- data/lib/deprec/templates/gnbd_server/clvm-default.erb +3 -0
- data/lib/deprec/templates/gnbd_server/cman-default.erb +6 -0
- data/lib/deprec/templates/gnbd_server/gnbd-server-default.erb +1 -0
- data/lib/deprec/templates/gnbd_server/gnbdexports.conf.erb +1 -0
- data/lib/deprec/templates/god/god-conf.erb +2 -0
- data/lib/deprec/templates/god/god-init.erb +47 -0
- data/lib/deprec/templates/haproxy/haproxy-init.d +120 -0
- data/lib/deprec/templates/haproxy/haproxy.cfg.erb +31 -0
- data/lib/deprec/templates/heartbeat/authkeys.erb +2 -0
- data/lib/deprec/templates/heartbeat/ha.cf.erb +15 -0
- data/lib/deprec/templates/heartbeat/haresources.erb +1 -0
- data/lib/deprec/templates/integrity/apache_vhost.erb +4 -0
- data/lib/deprec/templates/integrity/config.ru.erb +21 -0
- data/lib/deprec/templates/integrity/config.yml.erb +43 -0
- data/lib/deprec/templates/iptables/firewall-default.erb +13 -0
- data/lib/deprec/templates/iptables/firewall-init.erb +171 -0
- data/lib/deprec/templates/keepalived/keepalived.conf.erb +18 -0
- data/lib/deprec/templates/logrotate/logrotate.conf.erb +32 -0
- data/lib/deprec/templates/mongodb/mongodb-init.d +88 -0
- data/lib/deprec/templates/mongrel/apache_vhost.conf.erb +148 -0
- data/lib/deprec/templates/mongrel/logrotate.conf.erb +11 -0
- data/lib/deprec/templates/mongrel/mongrel_cluster-init-script +54 -0
- data/lib/deprec/templates/mongrel/mongrel_cluster.yml.erb +10 -0
- data/lib/deprec/templates/mongrel/monit.conf.erb +17 -0
- data/lib/deprec/templates/mongrel/nginx_vhost.conf.erb +41 -0
- data/lib/deprec/templates/monit/monit-init-script +104 -0
- data/lib/deprec/templates/monit/monitrc.erb +250 -0
- data/lib/deprec/templates/monit/nothing.monitrc +0 -0
- data/lib/deprec/templates/mysql/create_databases.sql +20 -0
- data/lib/deprec/templates/mysql/database.yml.prod +6 -0
- data/lib/deprec/templates/mysql/database.yml.stage +6 -0
- data/lib/deprec/templates/mysql/my.cnf.erb +140 -0
- data/lib/deprec/templates/mysql/sphinx.conf.prod +542 -0
- data/lib/deprec/templates/mysql/sphinx.conf.stage +542 -0
- data/lib/deprec/templates/mysql_proxy/mysql-proxy-default.erb +4 -0
- data/lib/deprec/templates/nagios/README +32 -0
- data/lib/deprec/templates/nagios/cgi.cfg.erb +357 -0
- data/lib/deprec/templates/nagios/check_linux_free_memory.pl +118 -0
- data/lib/deprec/templates/nagios/check_mongrel_cluster.rb +82 -0
- data/lib/deprec/templates/nagios/htpasswd.users +1 -0
- data/lib/deprec/templates/nagios/mrtg.cfg +180 -0
- data/lib/deprec/templates/nagios/nagios.cfg.erb +1325 -0
- data/lib/deprec/templates/nagios/nrpe.cfg.erb +222 -0
- data/lib/deprec/templates/nagios/nrpe.xinetd.erb +16 -0
- data/lib/deprec/templates/nagios/objects/commands.cfg.erb +265 -0
- data/lib/deprec/templates/nagios/objects/contacts.cfg.erb +89 -0
- data/lib/deprec/templates/nagios/objects/hosts.cfg.erb +114 -0
- data/lib/deprec/templates/nagios/objects/localhost.cfg.erb +116 -0
- data/lib/deprec/templates/nagios/objects/services.cfg.erb +165 -0
- data/lib/deprec/templates/nagios/objects/timeperiods.cfg.erb +94 -0
- data/lib/deprec/templates/nagios/resource.cfg.erb +34 -0
- data/lib/deprec/templates/network/hostname.erb +1 -0
- data/lib/deprec/templates/network/hosts.erb +2 -0
- data/lib/deprec/templates/network/interfaces.erb +18 -0
- data/lib/deprec/templates/network/resolv.conf.erb +6 -0
- data/lib/deprec/templates/nginx/logrotate.conf.erb +13 -0
- data/lib/deprec/templates/nginx/mime.types.erb +70 -0
- data/lib/deprec/templates/nginx/nginx-init-script +62 -0
- data/lib/deprec/templates/nginx/nginx.conf.erb +125 -0
- data/lib/deprec/templates/nginx/nginx.logrotate.d +12 -0
- data/lib/deprec/templates/nginx/nothing.conf +1 -0
- data/lib/deprec/templates/nginx/rails_nginx_vhost.conf.erb +41 -0
- data/lib/deprec/templates/ntp/ntp.conf.erb +42 -0
- data/lib/deprec/templates/passenger/apache_vhost.erb +29 -0
- data/lib/deprec/templates/passenger/logrotate.conf.erb +12 -0
- data/lib/deprec/templates/passenger/passenger.conf.erb +21 -0
- data/lib/deprec/templates/passenger/passenger.load.erb +3 -0
- data/lib/deprec/templates/postfix/aliases.erb +3 -0
- data/lib/deprec/templates/postfix/dynamicmaps.cf.erb +8 -0
- data/lib/deprec/templates/postfix/main.cf.erb +36 -0
- data/lib/deprec/templates/postfix/master.cf.erb +77 -0
- data/lib/deprec/templates/redis/redis-conf.erb +132 -0
- data/lib/deprec/templates/redis/redis-init.erb +50 -0
- data/lib/deprec/templates/s3utils/s3cfg +35 -0
- data/lib/deprec/templates/s3utils/s3config.yml +3 -0
- data/lib/deprec/templates/sphinx/monit.conf.erb +5 -0
- data/lib/deprec/templates/ssh/ssh_config.erb +50 -0
- data/lib/deprec/templates/ssh/sshd_config.erb +78 -0
- data/lib/deprec/templates/ssl/make-ssl-cert +138 -0
- data/lib/deprec/templates/ssl/ssl-cert-snakeoil.key +15 -0
- data/lib/deprec/templates/ssl/ssl-cert-snakeoil.pem +19 -0
- data/lib/deprec/templates/starling/monit.conf.erb +14 -0
- data/lib/deprec/templates/starling/starling-init-script.erb +71 -0
- data/lib/deprec/templates/subversion/svn.apache.vhost.erb +43 -0
- data/lib/deprec/templates/syslog/syslog.conf.erb +71 -0
- data/lib/deprec/templates/syslog/syslogd.erb +13 -0
- data/lib/deprec/templates/trac/apache_vhost.conf.erb +24 -0
- data/lib/deprec/templates/trac/nginx_vhost.conf.erb +26 -0
- data/lib/deprec/templates/trac/trac.ini.erb +169 -0
- data/lib/deprec/templates/trac/trac_deprec.png +0 -0
- data/lib/deprec/templates/trac/tracd-init.erb +43 -0
- data/lib/deprec/templates/trac/users.htdigest.erb +0 -0
- data/lib/deprec/templates/tsung/tsung.xml.erb +47 -0
- data/lib/deprec/templates/vnstat/config.php +57 -0
- data/lib/deprec/templates/wordpress/apache2_wordpress_vhost.conf.erb +31 -0
- data/lib/deprec/templates/wordpress/wp-config.php.erb +31 -0
- data/lib/deprec/templates/wpmu/apache_vhost.conf.erb +13 -0
- data/lib/deprec/templates/xen/network-bridge-wrapper +3 -0
- data/lib/deprec/templates/xen/xend-config.sxp.erb +195 -0
- data/lib/deprec/templates/xen/xend-init.erb +57 -0
- data/lib/deprec/templates/xen/xendomains.erb +137 -0
- data/lib/deprec/templates/xentools/100-ubuntu-setup +26 -0
- data/lib/deprec/templates/xentools/15-disable-hwclock +40 -0
- data/lib/deprec/templates/xentools/30-disable-gettys +57 -0
- data/lib/deprec/templates/xentools/31-ubuntu-setup +32 -0
- data/lib/deprec/templates/xentools/40-setup-networking +145 -0
- data/lib/deprec/templates/xentools/98-custom +17 -0
- data/lib/deprec/templates/xentools/xen-tools.conf.erb +278 -0
- data/lib/deprec/templates/xentools/xm.tmpl.erb +138 -0
- data/lib/deprec_cmd_completion.sh +26 -0
- data/lib/deprec_minus_rails.rb +12 -0
- data/lib/vmbuilder_plugins/all.rb +20 -0
- data/lib/vmbuilder_plugins/apt.rb +93 -0
- data/lib/vmbuilder_plugins/emerge.rb +76 -0
- data/lib/vmbuilder_plugins/gem.rb +100 -0
- data/lib/vmbuilder_plugins/std.rb +203 -0
- metadata +304 -0
@@ -0,0 +1,542 @@
|
|
1
|
+
#
|
2
|
+
# Sphinx configuration file sample
|
3
|
+
#
|
4
|
+
|
5
|
+
#############################################################################
|
6
|
+
## data source definition
|
7
|
+
#############################################################################
|
8
|
+
|
9
|
+
source src1
|
10
|
+
{
|
11
|
+
# data source type
|
12
|
+
# for now, known types are 'mysql', 'pgsql' and 'xmlpipe'
|
13
|
+
# MUST be defined
|
14
|
+
type = mysql
|
15
|
+
|
16
|
+
# whether to strip HTML
|
17
|
+
# values can be 0 (don't strip) or 1 (do strip)
|
18
|
+
# WARNING, only works with mysql source for now
|
19
|
+
# WARNING, should work ok for PERFECTLY formed XHTML for now
|
20
|
+
# WARNING, POSSIBLE TO BUG on malformed everday HTML
|
21
|
+
# optional, default is 0
|
22
|
+
strip_html = 0
|
23
|
+
|
24
|
+
# what HTML attributes to index if stripping HTML
|
25
|
+
# format is as follows:
|
26
|
+
#
|
27
|
+
# index_html_attrs = img=alt,title; a=title;
|
28
|
+
#
|
29
|
+
# optional, default is to not index anything
|
30
|
+
index_html_attrs =
|
31
|
+
|
32
|
+
#####################################################################
|
33
|
+
|
34
|
+
# some straightforward parameters for 'mysql' source type
|
35
|
+
sql_host = <%= @mysql_host %>
|
36
|
+
sql_user = <%= @username %>_db
|
37
|
+
sql_pass = <%= @mysqlpass %>
|
38
|
+
sql_db = <%= @username %>_stage
|
39
|
+
sql_port = 3306 # optional, default is 3306
|
40
|
+
|
41
|
+
# sql_sock = /tmp/mysql.sock
|
42
|
+
#
|
43
|
+
# optional
|
44
|
+
# usually '/var/lib/mysql/mysql.sock' on Linux
|
45
|
+
# usually '/tmp/mysql.sock' on FreeBSD
|
46
|
+
|
47
|
+
# pre-query, executed before the main fetch query
|
48
|
+
# useful eg. to setup encoding or mark records
|
49
|
+
# optional, default is empty
|
50
|
+
#
|
51
|
+
# sql_query_pre = SET CHARACTER_SET_RESULTS=cp1251
|
52
|
+
sql_query_pre =
|
53
|
+
|
54
|
+
# main document fetch query
|
55
|
+
#
|
56
|
+
# you can specify up to 32 (formally SPH_MAX_FIELDS in sphinx.h) fields;
|
57
|
+
# all of the fields which are not document_id or attributes (see below)
|
58
|
+
# will be full-text indexed
|
59
|
+
#
|
60
|
+
# document_id MUST be the very first field
|
61
|
+
# document_id MUST be positive (non-zero, non-negative)
|
62
|
+
# document_id MUST fit into 32 bits
|
63
|
+
# document_id MUST be unique
|
64
|
+
#
|
65
|
+
# mandatory
|
66
|
+
sql_query = \
|
67
|
+
SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
|
68
|
+
FROM documents
|
69
|
+
|
70
|
+
# query range setup
|
71
|
+
#
|
72
|
+
# useful to avoid MyISAM table locks and big result sets
|
73
|
+
# when indexing lots of data
|
74
|
+
#
|
75
|
+
# to use query ranges, you should
|
76
|
+
# 1) provide a query to fetch min/max id (ie. id range) from data set;
|
77
|
+
# 2) configure step size in which this range will be walked;
|
78
|
+
# 3) use $start and $end macros somewhere in the main fetch query.
|
79
|
+
#
|
80
|
+
# 'sql_query_range' must return exactly two integer fields
|
81
|
+
# in exactly min_id, max_id order
|
82
|
+
#
|
83
|
+
# 'sql_range_step' must be a positive integer
|
84
|
+
# optional, default is 1024
|
85
|
+
#
|
86
|
+
# 'sql_query' must contain both '$start' and '$end' macros
|
87
|
+
# if you are using query ranges (because it obviously would be an
|
88
|
+
# error to index the whole table many times)
|
89
|
+
#
|
90
|
+
# note that the intervals specified by $start/$end do not
|
91
|
+
# overlap, so you should NOT remove document ids which are exactly
|
92
|
+
# equal to $start or $end in your query
|
93
|
+
#
|
94
|
+
# here's an example which will index 'documents' table
|
95
|
+
# fetching (at most) one thousand entries at a time:
|
96
|
+
#
|
97
|
+
# sql_query_range = SELECT MIN(id),MAX(id) FROM documents
|
98
|
+
# sql_range_step = 1000
|
99
|
+
# sql_query = \
|
100
|
+
# SELECT doc.id, doc.id AS group, doc.title, doc.data \
|
101
|
+
# FROM documents doc \
|
102
|
+
# WHERE id>=$start AND id<=$end
|
103
|
+
|
104
|
+
|
105
|
+
# attribute columns
|
106
|
+
#
|
107
|
+
# attribute values MUST be positive (non-zero, non-negative) integers
|
108
|
+
# attribute values MUST fit into 32 bits
|
109
|
+
#
|
110
|
+
# attributes are additional values associated with each document which
|
111
|
+
# may be used to perform additional filtering and sorting during search.
|
112
|
+
# attributes are NOT full-text indexed; they are stored in the full text
|
113
|
+
# index as is.
|
114
|
+
#
|
115
|
+
# a good example would be a forum posts table. one might need to search
|
116
|
+
# through 'title' and 'content' fields but to limit search to specific
|
117
|
+
# values of 'author_id', or 'forum_id', or to sort by 'post_date', or to
|
118
|
+
# group matches by 'thread_id', or to group posts by month of the
|
119
|
+
# 'post_date' and provide statistics.
|
120
|
+
#
|
121
|
+
# this all can be achieved by specifying all the mentioned columns
|
122
|
+
# (excluding 'title' and 'content' which are full-text fields) as
|
123
|
+
# attributes and then using API calls to setup filtering, sorting,
|
124
|
+
# and grouping.
|
125
|
+
#
|
126
|
+
# sql_group_column is used to declare integer attributes.
|
127
|
+
#
|
128
|
+
# sql_date_column is used to declare UNIX timestamp attributes.
|
129
|
+
#
|
130
|
+
# sql_str2ordinal_column is used to declare integer attributes which
|
131
|
+
# values are computed as ordinal numbers of corresponding column value
|
132
|
+
# in sorted list of column values. WARNING, all such strings values
|
133
|
+
# are going to be stored in RAM while indexing, and "C" locale will
|
134
|
+
# be used when sorting!
|
135
|
+
#
|
136
|
+
# starting with 0.9.7, there may be multiple attribute columns specified.
|
137
|
+
# here's an example for that mentioned posts table:
|
138
|
+
#
|
139
|
+
# sql_group_column = author_id
|
140
|
+
# sql_group_column = forum_id
|
141
|
+
# sql_group_column = thread_id
|
142
|
+
# sql_date_column = post_unix_timestamp
|
143
|
+
# sql_date_column = last_edit_unix_timestamp
|
144
|
+
#
|
145
|
+
# optional, default is empty
|
146
|
+
sql_group_column = group_id
|
147
|
+
sql_date_column = date_added
|
148
|
+
# sql_str2ordinal_column = author_name
|
149
|
+
|
150
|
+
# post-query, executed on the end of main fetch query
|
151
|
+
#
|
152
|
+
# note that indexing is NOT completed at the point when post-query
|
153
|
+
# gets executed and might very well fail
|
154
|
+
#
|
155
|
+
# optional, default is empty
|
156
|
+
sql_query_post =
|
157
|
+
|
158
|
+
# post-index-query, executed on succsefully completed indexing
|
159
|
+
#
|
160
|
+
# $maxid macro is the max document ID which was actually
|
161
|
+
# fetched from the database
|
162
|
+
#
|
163
|
+
# optional, default is empty
|
164
|
+
#
|
165
|
+
# sql_query_post_index = REPLACE INTO counters ( id, val ) \
|
166
|
+
# VALUES ( 'max_indexed_id', $maxid )
|
167
|
+
|
168
|
+
|
169
|
+
# document info query
|
170
|
+
#
|
171
|
+
# ONLY used by search utility to display document information
|
172
|
+
# MUST be able to fetch document info by its id, therefore
|
173
|
+
# MUST contain '$id' macro
|
174
|
+
#
|
175
|
+
# optional, default is empty
|
176
|
+
sql_query_info = SELECT * FROM documents WHERE id=$id
|
177
|
+
|
178
|
+
#####################################################################
|
179
|
+
|
180
|
+
# demo config for 'xmlpipe' source type is a little below
|
181
|
+
#
|
182
|
+
# with xmlpipe, indexer opens a pipe to a given command,
|
183
|
+
# and then reads documents from stdin
|
184
|
+
#
|
185
|
+
# indexer expects one or more documents from xmlpipe stdin
|
186
|
+
# each document must be formatted exactly as follows:
|
187
|
+
#
|
188
|
+
# <document>
|
189
|
+
# <id>123</id>
|
190
|
+
# <group>45</group>
|
191
|
+
# <timestamp>1132223498</timestamp>
|
192
|
+
# <title>test title</title>
|
193
|
+
# <body>
|
194
|
+
# this is my document body
|
195
|
+
# </body>
|
196
|
+
# </document>
|
197
|
+
#
|
198
|
+
# timestamp element is optional, its default value is 1
|
199
|
+
# all the other elements are mandatory
|
200
|
+
|
201
|
+
# type = xmlpipe
|
202
|
+
# xmlpipe_command = cat /usr/local/var/test.xml
|
203
|
+
}
|
204
|
+
|
205
|
+
|
206
|
+
# inherited source example
|
207
|
+
#
|
208
|
+
# all the parameters are copied from the parent source,
|
209
|
+
# and may then be overridden in this source definition
|
210
|
+
source src1stripped : src1
|
211
|
+
{
|
212
|
+
strip_html = 1
|
213
|
+
}
|
214
|
+
|
215
|
+
#############################################################################
|
216
|
+
## index definition
|
217
|
+
#############################################################################
|
218
|
+
|
219
|
+
# local index example
|
220
|
+
#
|
221
|
+
# this is an index which is stored locally in the filesystem
|
222
|
+
#
|
223
|
+
# all indexing-time options (such as morphology and charsets)
|
224
|
+
# are configured per local index
|
225
|
+
index test1
|
226
|
+
{
|
227
|
+
# which document source to index
|
228
|
+
# at least one MUST be defined
|
229
|
+
#
|
230
|
+
# multiple sources MAY be specified; to do so, just add more
|
231
|
+
# "source = NAME" lines. in this case, ALL the document IDs
|
232
|
+
# in ALL the specified sources MUST be unique
|
233
|
+
source = src1
|
234
|
+
|
235
|
+
# this is path and index file name without extension
|
236
|
+
#
|
237
|
+
# indexer will append different extensions to this path to
|
238
|
+
# generate names for both permanent and temporary index files
|
239
|
+
#
|
240
|
+
# .tmp* files are temporary and can be safely removed
|
241
|
+
# if indexer fails to remove them automatically
|
242
|
+
#
|
243
|
+
# .sp* files are fulltext index data files. specifically,
|
244
|
+
# .spa contains attribute values attached to each document id
|
245
|
+
# .spd contains doclists and hitlists
|
246
|
+
# .sph contains index header (schema and other settings)
|
247
|
+
# .spi contains wordlists
|
248
|
+
#
|
249
|
+
# MUST be defined
|
250
|
+
path = /data/<%= @username %>/shared/log/indexes/test1
|
251
|
+
|
252
|
+
# docinfo (ie. per-document attribute values) storage strategy
|
253
|
+
# defines how docinfo will be stored
|
254
|
+
#
|
255
|
+
# available values are "none", "inline" and "extern"
|
256
|
+
#
|
257
|
+
# "none" means there'll be no docinfo at all (no groups/dates)
|
258
|
+
#
|
259
|
+
# "inline" means that the docinfo will be stored in the .spd
|
260
|
+
# file along with the document ID lists (doclists)
|
261
|
+
#
|
262
|
+
# "extern" means that the docinfo will be stored in the .spa
|
263
|
+
# file separately
|
264
|
+
#
|
265
|
+
# externally stored docinfo should (basically) be kept in RAM
|
266
|
+
# when querying; therefore, "inline" may be the only viable option
|
267
|
+
# for really huge (50-100+ million docs) datasets. however, for
|
268
|
+
# smaller datasets "extern" storage makes both indexing and
|
269
|
+
# searching MUCH more efficient.
|
270
|
+
#
|
271
|
+
# additional search-time memory requirements for extern storage are
|
272
|
+
#
|
273
|
+
# ( 1 + number_of_attrs )*number_of_docs*4 bytes
|
274
|
+
#
|
275
|
+
# so 10 million docs with 2 groups and 1 timestamp will take
|
276
|
+
# (1+2+1)*10M*4 = 160 MB of RAM. this is PER DAEMON, ie. searchd
|
277
|
+
# will alloc 160 MB on startup, read the data and keep it shared
|
278
|
+
# between queries; the children will NOT allocate additional
|
279
|
+
# copies of this data.
|
280
|
+
#
|
281
|
+
# default is "extern" (as most collections are smaller than 100M docs)
|
282
|
+
docinfo = extern
|
283
|
+
|
284
|
+
# morphology
|
285
|
+
#
|
286
|
+
# currently supported morphology preprocessors are Porter stemmers
|
287
|
+
# for English and Russian, and Soundex. more stemmers could be added
|
288
|
+
# at users request.
|
289
|
+
#
|
290
|
+
# available values are "none", "stem_en", "stem_ru", "stem_enru",
|
291
|
+
# and "soundex"
|
292
|
+
#
|
293
|
+
# optional, default is "none"
|
294
|
+
#
|
295
|
+
# morphology = none
|
296
|
+
# morphology = stem_en
|
297
|
+
# morphology = stem_ru
|
298
|
+
# morphology = stem_enru
|
299
|
+
# morphology = soundex
|
300
|
+
morphology = none
|
301
|
+
|
302
|
+
# stopwords file
|
303
|
+
#
|
304
|
+
# format is plain text in whatever encoding you use
|
305
|
+
# optional, default is empty
|
306
|
+
#
|
307
|
+
# stopwords = /usr/local/var/data/stopwords.txt
|
308
|
+
stopwords =
|
309
|
+
|
310
|
+
# minimum word length
|
311
|
+
#
|
312
|
+
# only the words that are of this length and above will be indexed;
|
313
|
+
# for example, if min_word_len is 4, "the" won't be indexed,
|
314
|
+
# but "they" will be.
|
315
|
+
#
|
316
|
+
# default is 1, which (obviously) means to index everything
|
317
|
+
min_word_len = 1
|
318
|
+
|
319
|
+
# charset encoding type
|
320
|
+
#
|
321
|
+
# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
|
322
|
+
#
|
323
|
+
# optional, default is sbcs
|
324
|
+
charset_type = sbcs
|
325
|
+
|
326
|
+
# charset definition and case folding rules "table"
|
327
|
+
#
|
328
|
+
# optional, default value depends on charset_type
|
329
|
+
#
|
330
|
+
# for now, defaults are configured to support English and Russian
|
331
|
+
# this behavior MAY change in future versions
|
332
|
+
#
|
333
|
+
# 'sbcs' default value is
|
334
|
+
# charset_table = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
|
335
|
+
#
|
336
|
+
# 'utf-8' default value is
|
337
|
+
# charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
|
338
|
+
|
339
|
+
# minimum prefix length
|
340
|
+
#
|
341
|
+
# if prefix length is positive, indexer will not only index all words,
|
342
|
+
# but all the possible prefixes (ie. word beginnings) as well
|
343
|
+
#
|
344
|
+
# for instance, "exam" query against such index will match documents
|
345
|
+
# which contain "example" word, even if they do not contain "exam"
|
346
|
+
#
|
347
|
+
# indexing prefixes will make the index grow significantly
|
348
|
+
# and could degrade search times
|
349
|
+
#
|
350
|
+
# currently there's no way to rank perfect word matches higher
|
351
|
+
# than prefix matches using only one index; you could setup two
|
352
|
+
# indexes for that
|
353
|
+
#
|
354
|
+
# default is 0, which means NOT to index prefixes
|
355
|
+
min_prefix_len = 0
|
356
|
+
|
357
|
+
# minimum infix length
|
358
|
+
#
|
359
|
+
# if infix length is positive, indexer will not only index all words,
|
360
|
+
# but all the possible infixes (ie. characters subsequences starting
|
361
|
+
# anywhere inside the word) as well
|
362
|
+
#
|
363
|
+
# for instance, "amp" query against such index will match documents
|
364
|
+
# which contain "example" word, even if they do not contain "amp"
|
365
|
+
#
|
366
|
+
# indexing prefixes will make the index grow significantly
|
367
|
+
# and could degrade search times
|
368
|
+
#
|
369
|
+
# currently there's no way to rank perfect word matches higher
|
370
|
+
# than infix matches using only one index; you could setup two
|
371
|
+
# indexes for that
|
372
|
+
#
|
373
|
+
# default is 0, which means NOT to index infixes
|
374
|
+
min_infix_len = 0
|
375
|
+
|
376
|
+
# n-grams length
|
377
|
+
#
|
378
|
+
# n-grams provide basic CJK support for unsegmented texts. if using
|
379
|
+
# n-grams, streams of CJK characters are indexed as n-grams. for example,
|
380
|
+
# if incoming stream is ABCDEF and n is 2, this text would be indexed
|
381
|
+
# as if it was AB BC CD DE EF.
|
382
|
+
#
|
383
|
+
# this feature is in alpha version state and only n=1 is currently
|
384
|
+
# supported; this is going to be improved.
|
385
|
+
#
|
386
|
+
# note that if search query is segmented (ie. words are separated with
|
387
|
+
# whitespace), words are in quotes and extended matching mode is used,
|
388
|
+
# then all matching documents will be returned even if their text was
|
389
|
+
# *not* segmented. in the example above, ABCDEF text will be indexed as
|
390
|
+
# A B C D E F, and "BCD" query will be transformed to "B C D" (where
|
391
|
+
# quotes is phrase matching operator), so the document will match.
|
392
|
+
#
|
393
|
+
# optional, default is 0, which means NOT to use n-grams
|
394
|
+
#
|
395
|
+
# ngram_len = 1
|
396
|
+
|
397
|
+
# n-gram characters table
|
398
|
+
#
|
399
|
+
# specifies what specific characters are subject to n-gram
|
400
|
+
# extraction. format is similar to charset_table.
|
401
|
+
#
|
402
|
+
# optional, default is empty
|
403
|
+
#
|
404
|
+
# ngrams_chars = U+3000..U+2FA1F
|
405
|
+
}
|
406
|
+
|
407
|
+
|
408
|
+
# inherited index example
|
409
|
+
#
|
410
|
+
# all the parameters are copied from the parent index,
|
411
|
+
# and may then be overridden in this index definition
|
412
|
+
index test1stemmed : test1
|
413
|
+
{
|
414
|
+
path = /data/<%= @username %>/shared/log/indexes/test1stemmed
|
415
|
+
morphology = stem_en
|
416
|
+
}
|
417
|
+
|
418
|
+
|
419
|
+
# distributed index example
|
420
|
+
#
|
421
|
+
# this is a virtual index which can NOT be directly indexed,
|
422
|
+
# and only contains references to other local and/or remote indexes
|
423
|
+
#
|
424
|
+
# if searchd receives a query against this index,
|
425
|
+
# it does the following:
|
426
|
+
#
|
427
|
+
# 1) connects to all the specified remote agents,
|
428
|
+
# 2) issues the query,
|
429
|
+
# 3) searches local indexes (while the remote agents are searching),
|
430
|
+
# 4) collects remote search results,
|
431
|
+
# 5) merges all the results together (removing the duplicates),
|
432
|
+
# 6) sends the merged resuls to client.
|
433
|
+
#
|
434
|
+
# this index type is primarily intenteded to be able to split huge (100GB+)
|
435
|
+
# datasets into chunks placed on different physical servers and them search
|
436
|
+
# through those chunks in parallel, reducing response times and server load;
|
437
|
+
# it seems, however, that it could also be used to take advantage of
|
438
|
+
# multi-processor systems or to implement HA (high-availability) search.
|
439
|
+
index dist1
|
440
|
+
{
|
441
|
+
# 'distributed' index type MUST be specified
|
442
|
+
type = distributed
|
443
|
+
|
444
|
+
# local index to be searched
|
445
|
+
# there can be many local indexes configured
|
446
|
+
local = test1
|
447
|
+
local = test1stemmed
|
448
|
+
|
449
|
+
# remote agent
|
450
|
+
# multiple remote agents may be specified
|
451
|
+
# syntax is 'hostname:port:index1,[index2[,...]]
|
452
|
+
agent = localhost:3313:remote1
|
453
|
+
agent = localhost:3314:remote2,remote3
|
454
|
+
|
455
|
+
# remote agent connection timeout, milliseconds
|
456
|
+
# optional, default is 1000 ms, ie. 1 sec
|
457
|
+
agent_connect_timeout = 1000
|
458
|
+
|
459
|
+
# remote agent query timeout, milliseconds
|
460
|
+
# optional, default is 3000 ms, ie. 3 sec
|
461
|
+
agent_query_timeout = 3000
|
462
|
+
}
|
463
|
+
|
464
|
+
#############################################################################
|
465
|
+
## indexer settings
|
466
|
+
#############################################################################
|
467
|
+
|
468
|
+
indexer
|
469
|
+
{
|
470
|
+
# memory limit
|
471
|
+
#
|
472
|
+
# may be specified in bytes (no postfix), kilobytes (mem_limit=1000K)
|
473
|
+
# or megabytes (mem_limit=10M)
|
474
|
+
#
|
475
|
+
# will grow if set unacceptably low
|
476
|
+
# will warn if set too low and potentially hurting the performance
|
477
|
+
#
|
478
|
+
# optional, default is 32M
|
479
|
+
mem_limit = 32M
|
480
|
+
}
|
481
|
+
|
482
|
+
#############################################################################
|
483
|
+
## searchd settings
|
484
|
+
#############################################################################
|
485
|
+
|
486
|
+
searchd
|
487
|
+
{
|
488
|
+
# IP address on which search daemon will bind and accept
|
489
|
+
# incoming network requests
|
490
|
+
#
|
491
|
+
# optional, default is to listen on all addresses,
|
492
|
+
# ie. address = 0.0.0.0
|
493
|
+
#
|
494
|
+
# address = 127.0.0.1
|
495
|
+
# address = 192.168.0.1
|
496
|
+
|
497
|
+
|
498
|
+
# port on which search daemon will listen
|
499
|
+
port = 3312
|
500
|
+
|
501
|
+
|
502
|
+
# log file
|
503
|
+
# searchd run info is logged here
|
504
|
+
log = /var/log/engineyard/sphinx/<%= @username %>/searchd.log
|
505
|
+
|
506
|
+
|
507
|
+
# query log file
|
508
|
+
# all the search queries are logged here
|
509
|
+
query_log = /var/log/engineyard/sphinx/<%= @username %>/searchd_query.log
|
510
|
+
|
511
|
+
|
512
|
+
# client read timeout, seconds
|
513
|
+
read_timeout = 5
|
514
|
+
|
515
|
+
|
516
|
+
# maximum amount of children to fork
|
517
|
+
# useful to control server load
|
518
|
+
max_children = 30
|
519
|
+
|
520
|
+
|
521
|
+
# a file which will contain searchd process ID
|
522
|
+
# used for different external automation scripts
|
523
|
+
# MUST be present
|
524
|
+
pid_file = /data/<%= @username %>/shared/log/searchd.pid
|
525
|
+
|
526
|
+
|
527
|
+
# maximum amount of matches this daemon would ever retrieve
|
528
|
+
# from each index and serve to client
|
529
|
+
#
|
530
|
+
# this parameter affects per-client memory and CPU usage
|
531
|
+
# (16+ bytes per match) in match sorting phase; so blindly raising
|
532
|
+
# it to 1 million is definitely NOT recommended
|
533
|
+
#
|
534
|
+
# starting from 0.9.7, it can be decreased on the fly through
|
535
|
+
# the corresponding API call; increasing is prohibited to protect
|
536
|
+
# against malicious and/or malformed requests
|
537
|
+
#
|
538
|
+
# default is 1000 (just like with Google)
|
539
|
+
max_matches = 1000
|
540
|
+
}
|
541
|
+
|
542
|
+
# --eof--
|