capitate 0.1.9 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Capfile +3 -2
- data/History.txt +6 -0
- data/Manifest.txt +28 -11
- data/bin/capitate +13 -61
- data/config/hoe.rb +2 -2
- data/docs/nginx.README +12 -0
- data/docs/recipes/centos.txt +33 -0
- data/docs/recipes/deploy-pending.txt +25 -0
- data/docs/recipes/deploy-web.txt +33 -0
- data/docs/recipes/deploy.txt +159 -0
- data/docs/recipes/docs.txt +26 -0
- data/docs/recipes/imagemagick-centos.txt +20 -0
- data/docs/recipes/imagemagick.txt +8 -0
- data/docs/recipes/index.txt +26 -0
- data/docs/recipes/memcached-centos.txt +32 -0
- data/docs/recipes/memcached.txt +35 -0
- data/docs/recipes/mongrel_cluster-centos.txt +27 -0
- data/docs/recipes/mongrel_cluster.txt +40 -0
- data/docs/recipes/monit-centos.txt +32 -0
- data/docs/recipes/monit.txt +8 -0
- data/docs/recipes/mysql-centos.txt +22 -0
- data/docs/recipes/mysql.txt +58 -0
- data/docs/recipes/nginx-centos.txt +43 -0
- data/docs/recipes/nginx.txt +51 -0
- data/docs/recipes/rails.txt +58 -0
- data/docs/recipes/ruby-centos.txt +22 -0
- data/docs/recipes/ruby.txt +8 -0
- data/docs/recipes/sphinx-centos.txt +35 -0
- data/docs/recipes/sphinx.txt +89 -0
- data/lib/capitate/cap_ext/run_via.rb +14 -0
- data/lib/capitate/plugins/gem.rb +2 -3
- data/lib/capitate/plugins/prompt.rb +24 -0
- data/lib/capitate/plugins/script.rb +29 -15
- data/lib/capitate/plugins/templates.rb +45 -40
- data/lib/capitate/plugins/yum.rb +12 -22
- data/lib/capitate/task_node.rb +28 -9
- data/lib/capitate/version.rb +2 -2
- data/lib/capitate.rb +4 -5
- data/lib/deployment/deploy.rb +42 -0
- data/lib/deployment/install-centos-rubyweb.rb +131 -0
- data/lib/recipes/centos/centos.rb +30 -21
- data/lib/recipes/centos/imagemagick.rb +9 -8
- data/lib/recipes/centos/memcached.rb +11 -21
- data/lib/recipes/centos/mongrel_cluster.rb +7 -11
- data/lib/recipes/centos/monit.rb +13 -18
- data/lib/recipes/centos/mysql.rb +6 -9
- data/lib/recipes/centos/nginx.rb +17 -30
- data/lib/recipes/centos/ruby.rb +14 -19
- data/lib/recipes/centos/sphinx.rb +14 -28
- data/lib/recipes/docs.rb +22 -6
- data/lib/recipes/memcached.rb +14 -8
- data/lib/recipes/mongrel_cluster.rb +11 -14
- data/lib/recipes/mysql.rb +22 -40
- data/lib/recipes/nginx.rb +9 -4
- data/lib/recipes/rails.rb +53 -6
- data/lib/recipes/sphinx.rb +86 -17
- data/lib/templates/capistrano/Capfile +2 -6
- data/lib/templates/monit/cert.sh +1 -1
- data/lib/templates/mysql/install_db.sql.erb +2 -2
- data/lib/templates/sphinx/sphinx.conf.erb +28 -399
- data/lib/templates/sphinx/sphinx_app.initd.centos.erb +1 -1
- data/website/index.html +20 -34
- data/website/index.txt +12 -22
- data/website/stylesheets/screen.css +22 -11
- data/website/template.rhtml +7 -5
- data/website/template_recipe.rhtml +7 -3
- metadata +55 -15
- data/lib/capitate/plugins/package.rb +0 -30
- data/lib/capitate/plugins/profiles.rb +0 -33
- data/lib/capitate/plugins/wget.rb +0 -23
- data/lib/profiles/centos-sick.rb +0 -116
- data/lib/recipes/README +0 -5
- data/lib/recipes/centos/README +0 -3
- data/lib/recipes/gems.rb +0 -20
- data/lib/recipes/packages.rb +0 -39
- data/lib/recipes/recipes.rb +0 -30
- data/lib/templates/centos/setup_for_web.sh +0 -17
- data/lib/templates/centos/sudoers +0 -95
data/lib/recipes/sphinx.rb
CHANGED
@@ -1,38 +1,107 @@
|
|
1
1
|
# Sphinx recipes
|
2
2
|
namespace :sphinx do
|
3
3
|
|
4
|
-
desc
|
4
|
+
desc <<-DESC
|
5
|
+
Create monit configuration for sphinx.\n
|
6
|
+
*monit_conf_dir*: Destination for monitrc. _Defaults to "/etc/monit"_\n
|
7
|
+
*sphinx_pid_path*: Location for sphinx pid. _Defaults to "[shared_path]/pids/searchd.pid"_\n
|
8
|
+
DESC
|
5
9
|
task :setup_monit do
|
6
|
-
|
10
|
+
|
11
|
+
# Settings
|
12
|
+
fetch_or_default(:monit_conf_dir, "/etc/monit")
|
13
|
+
fetch_or_default(:sphinx_pid_path, "#{shared_path}/pids/searchd.pid")
|
7
14
|
|
8
15
|
put template.load("sphinx/sphinx.monitrc.erb"), "/tmp/sphinx_#{application}.monitrc"
|
9
|
-
sudo "install -o root /tmp/sphinx_#{application}.monitrc /
|
16
|
+
sudo "install -o root /tmp/sphinx_#{application}.monitrc #{monit_conf_dir}/sphinx_#{application}.monitrc"
|
10
17
|
end
|
11
18
|
|
12
|
-
desc
|
13
|
-
|
19
|
+
desc <<-DESC
|
20
|
+
Update sphinx for application.
|
21
|
+
|
22
|
+
*sphinx_conf_template*: Path to sphinx.conf.erb. _Defaults to "config/templates/sphinx.conf.erb"_\n
|
23
|
+
*sphinx_conf_path*: Path to sphinx.conf. _Defaults to "[shared_path]/config/sphinx.conf"_\n
|
24
|
+
*sphinx_port*: Sphinx port. _Defaults to 3312_\n
|
25
|
+
*sphinx_conf_root*: Directory for sphinx configuration, like stopwords.txt. _Defaults to [current_path]/config_\n
|
26
|
+
*sphinx_index_root*: Directory for sphinx indexes. _Defaults to "[shared_path]/var/index"_\n
|
27
|
+
*sphinx_log_root*: Directory for sphinx logs. _Defaults to "[shared_path]/log"_\n
|
28
|
+
*sphinx_pid_root*: Directory for sphinx pids. _Defaults to "[shared_path]/pids"_\n
|
29
|
+
|
30
|
+
*sphinx_db_user*: Sphinx DB user. _Defaults to db_user_\n
|
31
|
+
*sphinx_db_pass*: Sphinx DB password. _Defaults to db_pass_\n
|
32
|
+
*sphinx_db_name*: Sphinx DB name. _Defaults to db_name_\n
|
33
|
+
|
34
|
+
*sphinx_db_host*: Sphinx DB host. _Defaults to location for primary :db role_\n
|
35
|
+
*sphinx_host*: Sphinx DB host. _Defaults to location for :search role_\n
|
36
|
+
|
37
|
+
DESC
|
38
|
+
task :update_conf do
|
39
|
+
|
40
|
+
fetch_or_default(:sphinx_conf_template, "config/templates/sphinx.conf.erb")
|
41
|
+
fetch_or_default(:sphinx_port, 3312)
|
42
|
+
fetch_or_default(:sphinx_conf_path, "#{shared_path}/config/sphinx.conf")
|
43
|
+
fetch_or_default(:sphinx_conf_root, "#{current_path}/config")
|
44
|
+
fetch_or_default(:sphinx_index_root, "#{shared_path}/var/index")
|
45
|
+
fetch_or_default(:sphinx_log_root, "#{shared_path}/log")
|
46
|
+
fetch_or_default(:sphinx_pid_path, "#{shared_path}/pids/searchd.pid")
|
47
|
+
|
48
|
+
fetch_or_default(:sphinx_db_user, db_user)
|
49
|
+
fetch_or_default(:sphinx_db_pass, db_pass)
|
50
|
+
fetch_or_default(:sphinx_db_name, db_name)
|
14
51
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
52
|
+
unless exists?(:sphinx_db_host)
|
53
|
+
db_servers = roles[:db]
|
54
|
+
unless db_servers.empty?
|
55
|
+
set :sphinx_db_host, db_servers.first.host
|
56
|
+
else
|
57
|
+
raise "No :db roles, and no :sphinx_db_host setting specified"
|
58
|
+
end
|
59
|
+
end
|
19
60
|
|
20
|
-
|
61
|
+
unless exists?(:sphinx_host)
|
62
|
+
search_servers = roles[:search]
|
63
|
+
unless search_servers.empty?
|
64
|
+
set :sphinx_host, search_servers.first.host
|
65
|
+
else
|
66
|
+
raise "No :search roles, and no :sphinx_host setting specified"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
put template.load(sphinx_conf_template), sphinx_conf_path
|
21
71
|
end
|
22
72
|
|
23
|
-
desc
|
73
|
+
desc <<-DESC
|
74
|
+
Rotate sphinx index for application.
|
75
|
+
|
76
|
+
*sphinx_prefix*: Location to sphinx install. _Defaults to nil_\n
|
77
|
+
*sphinx_conf*: Location to sphinx conf. _Defaults to "[shared_path]/config/sphinx.conf"_\n
|
78
|
+
DESC
|
24
79
|
task :rotate_all do
|
25
|
-
|
80
|
+
fetch_or_default(:sphinx_prefix, nil)
|
81
|
+
fetch_or_default(:sphinx_conf, "#{shared_path}/config/sphinx.conf")
|
82
|
+
|
83
|
+
indexer_path = sphinx_prefix ? "#{sphinx_prefix}/bin/indexer" : "indexer"
|
84
|
+
|
85
|
+
run "#{indexer_path} --config #{sphinx_conf} --rotate --all"
|
26
86
|
end
|
27
87
|
|
28
|
-
desc
|
88
|
+
desc <<-DESC
|
89
|
+
Build sphinx indexes for application.
|
90
|
+
|
91
|
+
*sphinx_prefix*: Location to sphinx install. _Defaults to nil_\n
|
92
|
+
*sphinx_conf*: Location to sphinx conf. _Defaults to "[shared_path]/config/sphinx.conf"_\n
|
93
|
+
DESC
|
29
94
|
task :index_all do
|
30
|
-
|
95
|
+
fetch_or_default(:sphinx_prefix, nil)
|
96
|
+
fetch_or_default(:sphinx_conf, "#{shared_path}/config/sphinx.conf")
|
97
|
+
|
98
|
+
indexer_path = sphinx_prefix ? "#{sphinx_prefix}/bin/indexer" : "indexer"
|
99
|
+
|
100
|
+
run "#{indexer_path} --config #{sphinx_conf} --all"
|
31
101
|
end
|
32
102
|
|
33
|
-
desc "
|
34
|
-
task :
|
35
|
-
# TODO: Monit
|
103
|
+
desc "Restart sphinx"
|
104
|
+
task :restart do
|
36
105
|
sudo "/sbin/service monit restart sphinx_#{application}"
|
37
106
|
end
|
38
107
|
end
|
data/lib/templates/monit/cert.sh
CHANGED
@@ -7,7 +7,7 @@ mkdir -p /var/certs
|
|
7
7
|
mv /tmp/monit.cnf /var/certs/monit.cnf
|
8
8
|
|
9
9
|
echo "Generating PEM..."
|
10
|
-
openssl req -new -x509 -days 365 -nodes -config /var/certs/monit.cnf -out /var/certs/monit.pem -keyout /var/certs/monit.pem -batch > /var/certs/debug_req.log
|
10
|
+
openssl req -new -x509 -days 365 -nodes -config /var/certs/monit.cnf -out /var/certs/monit.pem -keyout /var/certs/monit.pem -batch > /var/certs/debug_req.log 2>&1
|
11
11
|
openssl gendh 512 >> /var/certs/monit.pem 2> /var/certs/debug_gendh.log
|
12
12
|
echo "Generating x509..."
|
13
13
|
openssl x509 -subject -dates -fingerprint -noout -in /var/certs/monit.pem > /var/certs/debug_x509.log
|
@@ -1,5 +1,5 @@
|
|
1
|
-
<%
|
2
|
-
GRANT
|
1
|
+
<% grant_locations.each do |location| %>
|
2
|
+
GRANT <%= grant %> ON <%= db_name %>.* TO '<%= db_user %>'@'<%= location %>' IDENTIFIED BY '<%= db_pass %>';
|
3
3
|
<% end %>
|
4
4
|
|
5
5
|
CREATE DATABASE IF NOT EXISTS <%= db_name %>;
|
@@ -1,422 +1,51 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
# TODO: This isn't currently used by any recipe. The current setup recipe uses sphinx conf on per application
|
4
|
-
# basis.
|
5
|
-
#
|
6
|
-
|
7
|
-
#############################################################################
|
8
|
-
## data source definition
|
9
|
-
#############################################################################
|
10
|
-
|
11
|
-
source <%= application %>
|
1
|
+
# sphinx config
|
2
|
+
source pages
|
12
3
|
{
|
13
|
-
# data source type
|
14
|
-
# for now, known types are 'mysql', 'pgsql' and 'xmlpipe'
|
15
|
-
# MUST be defined
|
16
4
|
type = mysql
|
17
|
-
|
18
5
|
# whether to strip HTML
|
19
6
|
# values can be 0 (don't strip) or 1 (do strip)
|
20
7
|
# WARNING, only works with mysql source for now
|
21
8
|
# WARNING, should work ok for PERFECTLY formed XHTML for now
|
22
9
|
# WARNING, POSSIBLE TO BUG on malformed everday HTML
|
23
10
|
# optional, default is 0
|
24
|
-
strip_html =
|
11
|
+
strip_html = 1
|
25
12
|
|
26
13
|
# what HTML attributes to index if stripping HTML
|
27
14
|
# format is as follows:
|
28
15
|
#
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
# some straightforward parameters for 'mysql' source type
|
37
|
-
sql_host = 127.0.0.1
|
38
|
-
sql_user = <%= db_user %>
|
39
|
-
sql_pass = <%= db_pass %>
|
40
|
-
sql_db = <%= db_name %>
|
41
|
-
sql_port = 3306 # optional, default is 3306
|
42
|
-
|
43
|
-
# sql_sock = /tmp/mysql.sock
|
44
|
-
#
|
45
|
-
# optional
|
46
|
-
# usually '/var/lib/mysql/mysql.sock' on Linux
|
47
|
-
# usually '/tmp/mysql.sock' on FreeBSD
|
48
|
-
|
49
|
-
# pre-query, executed before the main fetch query
|
50
|
-
# useful eg. to setup encoding or mark records
|
51
|
-
# optional, default is empty
|
52
|
-
#
|
16
|
+
index_html_attrs = img=alt,title; a=title;
|
17
|
+
|
18
|
+
sql_host = <%= sphinx_db_host %>
|
19
|
+
sql_user = <%= sphinx_db_user %>
|
20
|
+
sql_pass = <%= sphinx_db_pass %>
|
21
|
+
sql_db = <%= sphinx_db_name %>
|
22
|
+
sql_port = <%= sphinx_db_port %> # optional, default is 3306
|
53
23
|
# sql_query_pre = SET CHARACTER_SET_RESULTS=cp1251
|
54
24
|
sql_query_pre = SET NAMES UTF8
|
55
|
-
|
56
|
-
# main document fetch query
|
57
|
-
#
|
58
|
-
# you can specify up to 32 (formally SPH_MAX_FIELDS in sphinx.h) fields;
|
59
|
-
# all of the fields which are not document_id or attributes (see below)
|
60
|
-
# will be full-text indexed
|
61
|
-
#
|
62
|
-
# document_id MUST be the very first field
|
63
|
-
# document_id MUST be positive (non-zero, non-negative)
|
64
|
-
# document_id MUST fit into 32 bits
|
65
|
-
# document_id MUST be unique
|
66
|
-
#
|
67
25
|
# mandatory
|
68
|
-
sql_query
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
#
|
76
|
-
# to use query ranges, you should
|
77
|
-
# 1) provide a query to fetch min/max id (ie. id range) from data set;
|
78
|
-
# 2) configure step size in which this range will be walked;
|
79
|
-
# 3) use $start and $end macros somewhere in the main fetch query.
|
80
|
-
#
|
81
|
-
# 'sql_query_range' must return exactly two integer fields
|
82
|
-
# in exactly min_id, max_id order
|
83
|
-
#
|
84
|
-
# 'sql_range_step' must be a positive integer
|
85
|
-
# optional, default is 1024
|
86
|
-
#
|
87
|
-
# 'sql_query' must contain both '$start' and '$end' macros
|
88
|
-
# if you are using query ranges (because it obviously would be an
|
89
|
-
# error to index the whole table many times)
|
90
|
-
#
|
91
|
-
# note that the intervals specified by $start/$end do not
|
92
|
-
# overlap, so you should NOT remove document ids which are exactly
|
93
|
-
# equal to $start or $end in your query
|
94
|
-
#
|
95
|
-
# here's an example which will index 'documents' table
|
96
|
-
# fetching (at most) one thousand entries at a time:
|
97
|
-
#
|
98
|
-
# sql_query_range = SELECT MIN(id),MAX(id) FROM documents
|
99
|
-
# sql_range_step = 1000
|
100
|
-
# sql_query = \
|
101
|
-
# SELECT doc.id, doc.id AS group, doc.title, doc.data \
|
102
|
-
# FROM documents doc \
|
103
|
-
# WHERE id>=$start AND id<=$end
|
104
|
-
|
105
|
-
|
106
|
-
# attribute columns
|
107
|
-
#
|
108
|
-
# attribute values MUST be positive (non-zero, non-negative) integers
|
109
|
-
# attribute values MUST fit into 32 bits
|
110
|
-
#
|
111
|
-
# attributes are additional values associated with each document which
|
112
|
-
# may be used to perform additional filtering and sorting during search.
|
113
|
-
# attributes are NOT full-text indexed; they are stored in the full text
|
114
|
-
# index as is.
|
115
|
-
#
|
116
|
-
# a good example would be a forum posts table. one might need to search
|
117
|
-
# through 'title' and 'content' fields but to limit search to specific
|
118
|
-
# values of 'author_id', or 'forum_id', or to sort by 'post_date', or to
|
119
|
-
# group matches by 'thread_id', or to group posts by month of the
|
120
|
-
# 'post_date' and provide statistics.
|
121
|
-
#
|
122
|
-
# this all can be achieved by specifying all the mentioned columns
|
123
|
-
# (excluding 'title' and 'content' which are full-text fields) as
|
124
|
-
# attributes and then using API calls to setup filtering, sorting,
|
125
|
-
# and grouping.
|
126
|
-
#
|
127
|
-
# sql_group_column is used to declare integer attributes.
|
128
|
-
#
|
129
|
-
# sql_date_column is used to declare UNIX timestamp attributes.
|
130
|
-
#
|
131
|
-
# sql_str2ordinal_column is used to declare integer attributes which
|
132
|
-
# values are computed as ordinal numbers of corresponding column value
|
133
|
-
# in sorted list of column values. WARNING, all such strings values
|
134
|
-
# are going to be stored in RAM while indexing, and "C" locale will
|
135
|
-
# be used when sorting!
|
136
|
-
#
|
137
|
-
# starting with 0.9.7, there may be multiple attribute columns specified.
|
138
|
-
# here's an example for that mentioned posts table:
|
139
|
-
#
|
140
|
-
# sql_group_column = author_id
|
141
|
-
# sql_group_column = forum_id
|
142
|
-
# sql_group_column = thread_id
|
143
|
-
# sql_date_column = post_unix_timestamp
|
144
|
-
# sql_date_column = last_edit_unix_timestamp
|
145
|
-
#
|
146
|
-
# optional, default is empty
|
147
|
-
##sql_group_column = group_id
|
148
|
-
sql_date_column = created_at
|
149
|
-
# sql_str2ordinal_column = author_name
|
150
|
-
|
151
|
-
# post-query, executed on the end of main fetch query
|
152
|
-
#
|
153
|
-
# note that indexing is NOT completed at the point when post-query
|
154
|
-
# gets executed and might very well fail
|
155
|
-
#
|
156
|
-
# optional, default is empty
|
157
|
-
##sql_query_post =
|
158
|
-
|
159
|
-
# post-index-query, executed on succsefully completed indexing
|
160
|
-
#
|
161
|
-
# $maxid macro is the max document ID which was actually
|
162
|
-
# fetched from the database
|
163
|
-
#
|
164
|
-
# optional, default is empty
|
165
|
-
#
|
166
|
-
# sql_query_post_index = REPLACE INTO counters ( id, val ) \
|
167
|
-
# VALUES ( 'max_indexed_id', $maxid )
|
168
|
-
|
169
|
-
|
170
|
-
# document info query
|
171
|
-
#
|
172
|
-
# ONLY used by search utility to display document information
|
173
|
-
# MUST be able to fetch document info by its id, therefore
|
174
|
-
# MUST contain '$id' macro
|
175
|
-
#
|
176
|
-
# optional, default is empty
|
177
|
-
##sql_query_info = SELECT * FROM documents WHERE id=$id
|
178
|
-
|
179
|
-
#####################################################################
|
180
|
-
|
181
|
-
# demo config for 'xmlpipe' source type is a little below
|
182
|
-
#
|
183
|
-
# with xmlpipe, indexer opens a pipe to a given command,
|
184
|
-
# and then reads documents from stdin
|
185
|
-
#
|
186
|
-
# indexer expects one or more documents from xmlpipe stdin
|
187
|
-
# each document must be formatted exactly as follows:
|
188
|
-
#
|
189
|
-
# <document>
|
190
|
-
# <id>123</id>
|
191
|
-
# <group>45</group>
|
192
|
-
# <timestamp>1132223498</timestamp>
|
193
|
-
# <title>test title</title>
|
194
|
-
# <body>
|
195
|
-
# this is my document body
|
196
|
-
# </body>
|
197
|
-
# </document>
|
198
|
-
#
|
199
|
-
# timestamp element is optional, its default value is 1
|
200
|
-
# all the other elements are mandatory
|
201
|
-
|
202
|
-
# type = xmlpipe
|
203
|
-
# xmlpipe_command = cat /var/test.xml
|
26
|
+
sql_query = QUERY
|
27
|
+
sql_query_range = SELECT MIN(id),MAX(id) FROM TABLE_NAME
|
28
|
+
sql_range_step = 1000
|
29
|
+
sql_group_column = user_id
|
30
|
+
sql_group_column = language
|
31
|
+
sql_date_column = published_date
|
32
|
+
sql_date_column = last_modified
|
204
33
|
}
|
205
34
|
|
206
35
|
|
207
|
-
|
208
|
-
#
|
209
|
-
# all the parameters are copied from the parent source,
|
210
|
-
# and may then be overridden in this source definition
|
211
|
-
##source src1stripped : src1
|
212
|
-
##{
|
213
|
-
## strip_html = 1
|
214
|
-
##}
|
215
|
-
|
216
|
-
#############################################################################
|
217
|
-
## index definition
|
218
|
-
#############################################################################
|
219
|
-
|
220
|
-
# local index example
|
221
|
-
#
|
222
|
-
# this is an index which is stored locally in the filesystem
|
223
|
-
#
|
224
|
-
# all indexing-time options (such as morphology and charsets)
|
225
|
-
# are configured per local index
|
226
|
-
index <%= application %>
|
36
|
+
index pages
|
227
37
|
{
|
228
|
-
|
229
|
-
|
230
|
-
#
|
231
|
-
# multiple sources MAY be specified; to do so, just add more
|
232
|
-
# "source = NAME" lines. in this case, ALL the document IDs
|
233
|
-
# in ALL the specified sources MUST be unique
|
234
|
-
source = <%= application %>
|
235
|
-
|
236
|
-
# this is path and index file name without extension
|
237
|
-
#
|
238
|
-
# indexer will append different extensions to this path to
|
239
|
-
# generate names for both permanent and temporary index files
|
240
|
-
#
|
241
|
-
# .tmp* files are temporary and can be safely removed
|
242
|
-
# if indexer fails to remove them automatically
|
243
|
-
#
|
244
|
-
# .sp* files are fulltext index data files. specifically,
|
245
|
-
# .spa contains attribute values attached to each document id
|
246
|
-
# .spd contains doclists and hitlists
|
247
|
-
# .sph contains index header (schema and other settings)
|
248
|
-
# .spi contains wordlists
|
249
|
-
#
|
250
|
-
# MUST be defined
|
251
|
-
path = /var/sphinx/<%= application %>
|
252
|
-
|
253
|
-
# docinfo (ie. per-document attribute values) storage strategy
|
254
|
-
# defines how docinfo will be stored
|
255
|
-
#
|
256
|
-
# available values are "none", "inline" and "extern"
|
257
|
-
#
|
258
|
-
# "none" means there'll be no docinfo at all (no groups/dates)
|
259
|
-
#
|
260
|
-
# "inline" means that the docinfo will be stored in the .spd
|
261
|
-
# file along with the document ID lists (doclists)
|
262
|
-
#
|
263
|
-
# "extern" means that the docinfo will be stored in the .spa
|
264
|
-
# file separately
|
265
|
-
#
|
266
|
-
# externally stored docinfo should (basically) be kept in RAM
|
267
|
-
# when querying; therefore, "inline" may be the only viable option
|
268
|
-
# for really huge (50-100+ million docs) datasets. however, for
|
269
|
-
# smaller datasets "extern" storage makes both indexing and
|
270
|
-
# searching MUCH more efficient.
|
271
|
-
#
|
272
|
-
# additional search-time memory requirements for extern storage are
|
273
|
-
#
|
274
|
-
# ( 1 + number_of_attrs )*number_of_docs*4 bytes
|
275
|
-
#
|
276
|
-
# so 10 million docs with 2 groups and 1 timestamp will take
|
277
|
-
# (1+2+1)*10M*4 = 160 MB of RAM. this is PER DAEMON, ie. searchd
|
278
|
-
# will alloc 160 MB on startup, read the data and keep it shared
|
279
|
-
# between queries; the children will NOT allocate additional
|
280
|
-
# copies of this data.
|
281
|
-
#
|
282
|
-
# default is "extern" (as most collections are smaller than 100M docs)
|
38
|
+
source = pages
|
39
|
+
path = <%= sphinx_index_root %>/pages
|
283
40
|
docinfo = extern
|
284
|
-
|
285
|
-
|
286
|
-
#
|
287
|
-
# currently supported morphology preprocessors are Porter stemmers
|
288
|
-
# for English and Russian, and Soundex. more stemmers could be added
|
289
|
-
# at users request.
|
290
|
-
#
|
291
|
-
# available values are "none", "stem_en", "stem_ru", "stem_enru",
|
292
|
-
# and "soundex"
|
293
|
-
#
|
294
|
-
# optional, default is "none"
|
295
|
-
#
|
296
|
-
# morphology = none
|
297
|
-
# morphology = stem_en
|
298
|
-
# morphology = stem_ru
|
299
|
-
# morphology = stem_enru
|
300
|
-
# morphology = soundex
|
301
|
-
morphology = none
|
302
|
-
|
303
|
-
# stopwords file
|
304
|
-
#
|
305
|
-
# format is plain text in whatever encoding you use
|
306
|
-
# optional, default is empty
|
307
|
-
#
|
308
|
-
# stopwords = /var/data/stopwords.txt
|
309
|
-
#stopwords =
|
310
|
-
|
311
|
-
# minimum word length
|
312
|
-
#
|
313
|
-
# only the words that are of this length and above will be indexed;
|
314
|
-
# for example, if min_word_len is 4, "the" won't be indexed,
|
315
|
-
# but "they" will be.
|
316
|
-
#
|
317
|
-
# default is 1, which (obviously) means to index everything
|
41
|
+
morphology = stem_en
|
42
|
+
stopwords = <%= sphinx_conf_path %>/stopwords.txt
|
318
43
|
min_word_len = 1
|
319
|
-
|
320
|
-
# charset encoding type
|
321
|
-
#
|
322
|
-
# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
|
323
|
-
#
|
324
|
-
# optional, default is sbcs
|
325
44
|
charset_type = utf-8
|
326
|
-
|
327
|
-
# charset definition and case folding rules "table"
|
328
|
-
#
|
329
|
-
# optional, default value depends on charset_type
|
330
|
-
#
|
331
|
-
# for now, defaults are configured to support English and Russian
|
332
|
-
# this behavior MAY change in future versions
|
333
|
-
#
|
334
|
-
# 'sbcs' default value is
|
335
|
-
# charset_table = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
|
336
|
-
#
|
337
|
-
# 'utf-8' default value is
|
338
|
-
# charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
|
339
|
-
|
340
|
-
# minimum prefix length
|
341
|
-
#
|
342
|
-
# if prefix length is positive, indexer will not only index all words,
|
343
|
-
# but all the possible prefixes (ie. word beginnings) as well
|
344
|
-
#
|
345
|
-
# for instance, "exam" query against such index will match documents
|
346
|
-
# which contain "example" word, even if they do not contain "exam"
|
347
|
-
#
|
348
|
-
# indexing prefixes will make the index grow significantly
|
349
|
-
# and could degrade search times
|
350
|
-
#
|
351
|
-
# currently there's no way to rank perfect word matches higher
|
352
|
-
# than prefix matches using only one index; you could setup two
|
353
|
-
# indexes for that
|
354
|
-
#
|
355
|
-
# default is 0, which means NOT to index prefixes
|
356
45
|
min_prefix_len = 0
|
357
|
-
|
358
|
-
# minimum infix length
|
359
|
-
#
|
360
|
-
# if infix length is positive, indexer will not only index all words,
|
361
|
-
# but all the possible infixes (ie. characters subsequences starting
|
362
|
-
# anywhere inside the word) as well
|
363
|
-
#
|
364
|
-
# for instance, "amp" query against such index will match documents
|
365
|
-
# which contain "example" word, even if they do not contain "amp"
|
366
|
-
#
|
367
|
-
# indexing prefixes will make the index grow significantly
|
368
|
-
# and could degrade search times
|
369
|
-
#
|
370
|
-
# currently there's no way to rank perfect word matches higher
|
371
|
-
# than infix matches using only one index; you could setup two
|
372
|
-
# indexes for that
|
373
|
-
#
|
374
|
-
# default is 0, which means NOT to index infixes
|
375
46
|
min_infix_len = 0
|
376
|
-
|
377
|
-
# n-grams length
|
378
|
-
#
|
379
|
-
# n-grams provide basic CJK support for unsegmented texts. if using
|
380
|
-
# n-grams, streams of CJK characters are indexed as n-grams. for example,
|
381
|
-
# if incoming stream is ABCDEF and n is 2, this text would be indexed
|
382
|
-
# as if it was AB BC CD DE EF.
|
383
|
-
#
|
384
|
-
# this feature is in alpha version state and only n=1 is currently
|
385
|
-
# supported; this is going to be improved.
|
386
|
-
#
|
387
|
-
# note that if search query is segmented (ie. words are separated with
|
388
|
-
# whitespace), words are in quotes and extended matching mode is used,
|
389
|
-
# then all matching documents will be returned even if their text was
|
390
|
-
# *not* segmented. in the example above, ABCDEF text will be indexed as
|
391
|
-
# A B C D E F, and "BCD" query will be transformed to "B C D" (where
|
392
|
-
# quotes is phrase matching operator), so the document will match.
|
393
|
-
#
|
394
|
-
# optional, default is 0, which means NOT to use n-grams
|
395
|
-
#
|
396
|
-
# ngram_len = 1
|
397
|
-
|
398
|
-
# n-gram characters table
|
399
|
-
#
|
400
|
-
# specifies what specific characters are subject to n-gram
|
401
|
-
# extraction. format is similar to charset_table.
|
402
|
-
#
|
403
|
-
# optional, default is empty
|
404
|
-
#
|
405
|
-
# ngrams_chars = U+3000..U+2FA1F
|
406
47
|
}
|
407
48
|
|
408
|
-
|
409
|
-
# inherited index example
|
410
|
-
#
|
411
|
-
# all the parameters are copied from the parent index,
|
412
|
-
# and may then be overridden in this index definition
|
413
|
-
##index test1stemmed : test1
|
414
|
-
##{
|
415
|
-
## path = /var/data/test1stemmed
|
416
|
-
## morphology = stem_en
|
417
|
-
##}
|
418
|
-
|
419
|
-
|
420
49
|
#############################################################################
|
421
50
|
## indexer settings
|
422
51
|
#############################################################################
|
@@ -432,7 +61,7 @@ indexer
|
|
432
61
|
# will warn if set too low and potentially hurting the performance
|
433
62
|
#
|
434
63
|
# optional, default is 32M
|
435
|
-
mem_limit =
|
64
|
+
mem_limit = 64M
|
436
65
|
}
|
437
66
|
|
438
67
|
#############################################################################
|
@@ -447,22 +76,22 @@ searchd
|
|
447
76
|
# optional, default is to listen on all addresses,
|
448
77
|
# ie. address = 0.0.0.0
|
449
78
|
#
|
450
|
-
address =
|
79
|
+
address = <%= sphinx_host %>
|
451
80
|
# address = 192.168.0.1
|
452
81
|
|
453
82
|
|
454
83
|
# port on which search daemon will listen
|
455
|
-
port =
|
84
|
+
port = <%= sphinx_port %>
|
456
85
|
|
457
86
|
|
458
87
|
# log file
|
459
88
|
# searchd run info is logged here
|
460
|
-
log =
|
89
|
+
log = <%= sphinx_log_root %>/searchd.log
|
461
90
|
|
462
91
|
|
463
92
|
# query log file
|
464
93
|
# all the search queries are logged here
|
465
|
-
query_log =
|
94
|
+
query_log = <%= sphinx_log_root %>/query.log
|
466
95
|
|
467
96
|
|
468
97
|
# client read timeout, seconds
|
@@ -477,7 +106,7 @@ searchd
|
|
477
106
|
# a file which will contain searchd process ID
|
478
107
|
# used for different external automation scripts
|
479
108
|
# MUST be present
|
480
|
-
pid_file =
|
109
|
+
pid_file = <%= sphinx_pid_path %>
|
481
110
|
|
482
111
|
|
483
112
|
# maximum amount of matches this daemon would ever retrieve
|