capitate 0.1.9 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Capfile +3 -2
- data/History.txt +6 -0
- data/Manifest.txt +28 -11
- data/bin/capitate +13 -61
- data/config/hoe.rb +2 -2
- data/docs/nginx.README +12 -0
- data/docs/recipes/centos.txt +33 -0
- data/docs/recipes/deploy-pending.txt +25 -0
- data/docs/recipes/deploy-web.txt +33 -0
- data/docs/recipes/deploy.txt +159 -0
- data/docs/recipes/docs.txt +26 -0
- data/docs/recipes/imagemagick-centos.txt +20 -0
- data/docs/recipes/imagemagick.txt +8 -0
- data/docs/recipes/index.txt +26 -0
- data/docs/recipes/memcached-centos.txt +32 -0
- data/docs/recipes/memcached.txt +35 -0
- data/docs/recipes/mongrel_cluster-centos.txt +27 -0
- data/docs/recipes/mongrel_cluster.txt +40 -0
- data/docs/recipes/monit-centos.txt +32 -0
- data/docs/recipes/monit.txt +8 -0
- data/docs/recipes/mysql-centos.txt +22 -0
- data/docs/recipes/mysql.txt +58 -0
- data/docs/recipes/nginx-centos.txt +43 -0
- data/docs/recipes/nginx.txt +51 -0
- data/docs/recipes/rails.txt +58 -0
- data/docs/recipes/ruby-centos.txt +22 -0
- data/docs/recipes/ruby.txt +8 -0
- data/docs/recipes/sphinx-centos.txt +35 -0
- data/docs/recipes/sphinx.txt +89 -0
- data/lib/capitate/cap_ext/run_via.rb +14 -0
- data/lib/capitate/plugins/gem.rb +2 -3
- data/lib/capitate/plugins/prompt.rb +24 -0
- data/lib/capitate/plugins/script.rb +29 -15
- data/lib/capitate/plugins/templates.rb +45 -40
- data/lib/capitate/plugins/yum.rb +12 -22
- data/lib/capitate/task_node.rb +28 -9
- data/lib/capitate/version.rb +2 -2
- data/lib/capitate.rb +4 -5
- data/lib/deployment/deploy.rb +42 -0
- data/lib/deployment/install-centos-rubyweb.rb +131 -0
- data/lib/recipes/centos/centos.rb +30 -21
- data/lib/recipes/centos/imagemagick.rb +9 -8
- data/lib/recipes/centos/memcached.rb +11 -21
- data/lib/recipes/centos/mongrel_cluster.rb +7 -11
- data/lib/recipes/centos/monit.rb +13 -18
- data/lib/recipes/centos/mysql.rb +6 -9
- data/lib/recipes/centos/nginx.rb +17 -30
- data/lib/recipes/centos/ruby.rb +14 -19
- data/lib/recipes/centos/sphinx.rb +14 -28
- data/lib/recipes/docs.rb +22 -6
- data/lib/recipes/memcached.rb +14 -8
- data/lib/recipes/mongrel_cluster.rb +11 -14
- data/lib/recipes/mysql.rb +22 -40
- data/lib/recipes/nginx.rb +9 -4
- data/lib/recipes/rails.rb +53 -6
- data/lib/recipes/sphinx.rb +86 -17
- data/lib/templates/capistrano/Capfile +2 -6
- data/lib/templates/monit/cert.sh +1 -1
- data/lib/templates/mysql/install_db.sql.erb +2 -2
- data/lib/templates/sphinx/sphinx.conf.erb +28 -399
- data/lib/templates/sphinx/sphinx_app.initd.centos.erb +1 -1
- data/website/index.html +20 -34
- data/website/index.txt +12 -22
- data/website/stylesheets/screen.css +22 -11
- data/website/template.rhtml +7 -5
- data/website/template_recipe.rhtml +7 -3
- metadata +55 -15
- data/lib/capitate/plugins/package.rb +0 -30
- data/lib/capitate/plugins/profiles.rb +0 -33
- data/lib/capitate/plugins/wget.rb +0 -23
- data/lib/profiles/centos-sick.rb +0 -116
- data/lib/recipes/README +0 -5
- data/lib/recipes/centos/README +0 -3
- data/lib/recipes/gems.rb +0 -20
- data/lib/recipes/packages.rb +0 -39
- data/lib/recipes/recipes.rb +0 -30
- data/lib/templates/centos/setup_for_web.sh +0 -17
- data/lib/templates/centos/sudoers +0 -95
data/lib/recipes/sphinx.rb
CHANGED
@@ -1,38 +1,107 @@
|
|
1
1
|
# Sphinx recipes
|
2
2
|
namespace :sphinx do
|
3
3
|
|
4
|
-
desc
|
4
|
+
desc <<-DESC
|
5
|
+
Create monit configuration for sphinx.\n
|
6
|
+
*monit_conf_dir*: Destination for monitrc. _Defaults to "/etc/monit"_\n
|
7
|
+
*sphinx_pid_path*: Location for sphinx pid. _Defaults to "[shared_path]/pids/searchd.pid"_\n
|
8
|
+
DESC
|
5
9
|
task :setup_monit do
|
6
|
-
|
10
|
+
|
11
|
+
# Settings
|
12
|
+
fetch_or_default(:monit_conf_dir, "/etc/monit")
|
13
|
+
fetch_or_default(:sphinx_pid_path, "#{shared_path}/pids/searchd.pid")
|
7
14
|
|
8
15
|
put template.load("sphinx/sphinx.monitrc.erb"), "/tmp/sphinx_#{application}.monitrc"
|
9
|
-
sudo "install -o root /tmp/sphinx_#{application}.monitrc /
|
16
|
+
sudo "install -o root /tmp/sphinx_#{application}.monitrc #{monit_conf_dir}/sphinx_#{application}.monitrc"
|
10
17
|
end
|
11
18
|
|
12
|
-
desc
|
13
|
-
|
19
|
+
desc <<-DESC
|
20
|
+
Update sphinx for application.
|
21
|
+
|
22
|
+
*sphinx_conf_template*: Path to sphinx.conf.erb. _Defaults to "config/templates/sphinx.conf.erb"_\n
|
23
|
+
*sphinx_conf_path*: Path to sphinx.conf. _Defaults to "[shared_path]/config/sphinx.conf"_\n
|
24
|
+
*sphinx_port*: Sphinx port. _Defaults to 3312_\n
|
25
|
+
*sphinx_conf_root*: Directory for sphinx configuration, like stopwords.txt. _Defaults to [current_path]/config_\n
|
26
|
+
*sphinx_index_root*: Directory for sphinx indexes. _Defaults to "[shared_path]/var/index"_\n
|
27
|
+
*sphinx_log_root*: Directory for sphinx logs. _Defaults to "[shared_path]/log"_\n
|
28
|
+
*sphinx_pid_root*: Directory for sphinx pids. _Defaults to "[shared_path]/pids"_\n
|
29
|
+
|
30
|
+
*sphinx_db_user*: Sphinx DB user. _Defaults to db_user_\n
|
31
|
+
*sphinx_db_pass*: Sphinx DB password. _Defaults to db_pass_\n
|
32
|
+
*sphinx_db_name*: Sphinx DB name. _Defaults to db_name_\n
|
33
|
+
|
34
|
+
*sphinx_db_host*: Sphinx DB host. _Defaults to location for primary :db role_\n
|
35
|
+
*sphinx_host*: Sphinx DB host. _Defaults to location for :search role_\n
|
36
|
+
|
37
|
+
DESC
|
38
|
+
task :update_conf do
|
39
|
+
|
40
|
+
fetch_or_default(:sphinx_conf_template, "config/templates/sphinx.conf.erb")
|
41
|
+
fetch_or_default(:sphinx_port, 3312)
|
42
|
+
fetch_or_default(:sphinx_conf_path, "#{shared_path}/config/sphinx.conf")
|
43
|
+
fetch_or_default(:sphinx_conf_root, "#{current_path}/config")
|
44
|
+
fetch_or_default(:sphinx_index_root, "#{shared_path}/var/index")
|
45
|
+
fetch_or_default(:sphinx_log_root, "#{shared_path}/log")
|
46
|
+
fetch_or_default(:sphinx_pid_path, "#{shared_path}/pids/searchd.pid")
|
47
|
+
|
48
|
+
fetch_or_default(:sphinx_db_user, db_user)
|
49
|
+
fetch_or_default(:sphinx_db_pass, db_pass)
|
50
|
+
fetch_or_default(:sphinx_db_name, db_name)
|
14
51
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
52
|
+
unless exists?(:sphinx_db_host)
|
53
|
+
db_servers = roles[:db]
|
54
|
+
unless db_servers.empty?
|
55
|
+
set :sphinx_db_host, db_servers.first.host
|
56
|
+
else
|
57
|
+
raise "No :db roles, and no :sphinx_db_host setting specified"
|
58
|
+
end
|
59
|
+
end
|
19
60
|
|
20
|
-
|
61
|
+
unless exists?(:sphinx_host)
|
62
|
+
search_servers = roles[:search]
|
63
|
+
unless search_servers.empty?
|
64
|
+
set :sphinx_host, search_servers.first.host
|
65
|
+
else
|
66
|
+
raise "No :search roles, and no :sphinx_host setting specified"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
put template.load(sphinx_conf_template), sphinx_conf_path
|
21
71
|
end
|
22
72
|
|
23
|
-
desc
|
73
|
+
desc <<-DESC
|
74
|
+
Rotate sphinx index for application.
|
75
|
+
|
76
|
+
*sphinx_prefix*: Location to sphinx install. _Defaults to nil_\n
|
77
|
+
*sphinx_conf*: Location to sphinx conf. _Defaults to "[shared_path]/config/sphinx.conf"_\n
|
78
|
+
DESC
|
24
79
|
task :rotate_all do
|
25
|
-
|
80
|
+
fetch_or_default(:sphinx_prefix, nil)
|
81
|
+
fetch_or_default(:sphinx_conf, "#{shared_path}/config/sphinx.conf")
|
82
|
+
|
83
|
+
indexer_path = sphinx_prefix ? "#{sphinx_prefix}/bin/indexer" : "indexer"
|
84
|
+
|
85
|
+
run "#{indexer_path} --config #{sphinx_conf} --rotate --all"
|
26
86
|
end
|
27
87
|
|
28
|
-
desc
|
88
|
+
desc <<-DESC
|
89
|
+
Build sphinx indexes for application.
|
90
|
+
|
91
|
+
*sphinx_prefix*: Location to sphinx install. _Defaults to nil_\n
|
92
|
+
*sphinx_conf*: Location to sphinx conf. _Defaults to "[shared_path]/config/sphinx.conf"_\n
|
93
|
+
DESC
|
29
94
|
task :index_all do
|
30
|
-
|
95
|
+
fetch_or_default(:sphinx_prefix, nil)
|
96
|
+
fetch_or_default(:sphinx_conf, "#{shared_path}/config/sphinx.conf")
|
97
|
+
|
98
|
+
indexer_path = sphinx_prefix ? "#{sphinx_prefix}/bin/indexer" : "indexer"
|
99
|
+
|
100
|
+
run "#{indexer_path} --config #{sphinx_conf} --all"
|
31
101
|
end
|
32
102
|
|
33
|
-
desc "
|
34
|
-
task :
|
35
|
-
# TODO: Monit
|
103
|
+
desc "Restart sphinx"
|
104
|
+
task :restart do
|
36
105
|
sudo "/sbin/service monit restart sphinx_#{application}"
|
37
106
|
end
|
38
107
|
end
|
data/lib/templates/monit/cert.sh
CHANGED
@@ -7,7 +7,7 @@ mkdir -p /var/certs
|
|
7
7
|
mv /tmp/monit.cnf /var/certs/monit.cnf
|
8
8
|
|
9
9
|
echo "Generating PEM..."
|
10
|
-
openssl req -new -x509 -days 365 -nodes -config /var/certs/monit.cnf -out /var/certs/monit.pem -keyout /var/certs/monit.pem -batch > /var/certs/debug_req.log
|
10
|
+
openssl req -new -x509 -days 365 -nodes -config /var/certs/monit.cnf -out /var/certs/monit.pem -keyout /var/certs/monit.pem -batch > /var/certs/debug_req.log 2>&1
|
11
11
|
openssl gendh 512 >> /var/certs/monit.pem 2> /var/certs/debug_gendh.log
|
12
12
|
echo "Generating x509..."
|
13
13
|
openssl x509 -subject -dates -fingerprint -noout -in /var/certs/monit.pem > /var/certs/debug_x509.log
|
@@ -1,5 +1,5 @@
|
|
1
|
-
<%
|
2
|
-
GRANT
|
1
|
+
<% grant_locations.each do |location| %>
|
2
|
+
GRANT <%= grant %> ON <%= db_name %>.* TO '<%= db_user %>'@'<%= location %>' IDENTIFIED BY '<%= db_pass %>';
|
3
3
|
<% end %>
|
4
4
|
|
5
5
|
CREATE DATABASE IF NOT EXISTS <%= db_name %>;
|
@@ -1,422 +1,51 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
# TODO: This isn't currently used by any recipe. The current setup recipe uses sphinx conf on per application
|
4
|
-
# basis.
|
5
|
-
#
|
6
|
-
|
7
|
-
#############################################################################
|
8
|
-
## data source definition
|
9
|
-
#############################################################################
|
10
|
-
|
11
|
-
source <%= application %>
|
1
|
+
# sphinx config
|
2
|
+
source pages
|
12
3
|
{
|
13
|
-
# data source type
|
14
|
-
# for now, known types are 'mysql', 'pgsql' and 'xmlpipe'
|
15
|
-
# MUST be defined
|
16
4
|
type = mysql
|
17
|
-
|
18
5
|
# whether to strip HTML
|
19
6
|
# values can be 0 (don't strip) or 1 (do strip)
|
20
7
|
# WARNING, only works with mysql source for now
|
21
8
|
# WARNING, should work ok for PERFECTLY formed XHTML for now
|
22
9
|
# WARNING, POSSIBLE TO BUG on malformed everday HTML
|
23
10
|
# optional, default is 0
|
24
|
-
strip_html =
|
11
|
+
strip_html = 1
|
25
12
|
|
26
13
|
# what HTML attributes to index if stripping HTML
|
27
14
|
# format is as follows:
|
28
15
|
#
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
# some straightforward parameters for 'mysql' source type
|
37
|
-
sql_host = 127.0.0.1
|
38
|
-
sql_user = <%= db_user %>
|
39
|
-
sql_pass = <%= db_pass %>
|
40
|
-
sql_db = <%= db_name %>
|
41
|
-
sql_port = 3306 # optional, default is 3306
|
42
|
-
|
43
|
-
# sql_sock = /tmp/mysql.sock
|
44
|
-
#
|
45
|
-
# optional
|
46
|
-
# usually '/var/lib/mysql/mysql.sock' on Linux
|
47
|
-
# usually '/tmp/mysql.sock' on FreeBSD
|
48
|
-
|
49
|
-
# pre-query, executed before the main fetch query
|
50
|
-
# useful eg. to setup encoding or mark records
|
51
|
-
# optional, default is empty
|
52
|
-
#
|
16
|
+
index_html_attrs = img=alt,title; a=title;
|
17
|
+
|
18
|
+
sql_host = <%= sphinx_db_host %>
|
19
|
+
sql_user = <%= sphinx_db_user %>
|
20
|
+
sql_pass = <%= sphinx_db_pass %>
|
21
|
+
sql_db = <%= sphinx_db_name %>
|
22
|
+
sql_port = <%= sphinx_db_port %> # optional, default is 3306
|
53
23
|
# sql_query_pre = SET CHARACTER_SET_RESULTS=cp1251
|
54
24
|
sql_query_pre = SET NAMES UTF8
|
55
|
-
|
56
|
-
# main document fetch query
|
57
|
-
#
|
58
|
-
# you can specify up to 32 (formally SPH_MAX_FIELDS in sphinx.h) fields;
|
59
|
-
# all of the fields which are not document_id or attributes (see below)
|
60
|
-
# will be full-text indexed
|
61
|
-
#
|
62
|
-
# document_id MUST be the very first field
|
63
|
-
# document_id MUST be positive (non-zero, non-negative)
|
64
|
-
# document_id MUST fit into 32 bits
|
65
|
-
# document_id MUST be unique
|
66
|
-
#
|
67
25
|
# mandatory
|
68
|
-
sql_query
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
#
|
76
|
-
# to use query ranges, you should
|
77
|
-
# 1) provide a query to fetch min/max id (ie. id range) from data set;
|
78
|
-
# 2) configure step size in which this range will be walked;
|
79
|
-
# 3) use $start and $end macros somewhere in the main fetch query.
|
80
|
-
#
|
81
|
-
# 'sql_query_range' must return exactly two integer fields
|
82
|
-
# in exactly min_id, max_id order
|
83
|
-
#
|
84
|
-
# 'sql_range_step' must be a positive integer
|
85
|
-
# optional, default is 1024
|
86
|
-
#
|
87
|
-
# 'sql_query' must contain both '$start' and '$end' macros
|
88
|
-
# if you are using query ranges (because it obviously would be an
|
89
|
-
# error to index the whole table many times)
|
90
|
-
#
|
91
|
-
# note that the intervals specified by $start/$end do not
|
92
|
-
# overlap, so you should NOT remove document ids which are exactly
|
93
|
-
# equal to $start or $end in your query
|
94
|
-
#
|
95
|
-
# here's an example which will index 'documents' table
|
96
|
-
# fetching (at most) one thousand entries at a time:
|
97
|
-
#
|
98
|
-
# sql_query_range = SELECT MIN(id),MAX(id) FROM documents
|
99
|
-
# sql_range_step = 1000
|
100
|
-
# sql_query = \
|
101
|
-
# SELECT doc.id, doc.id AS group, doc.title, doc.data \
|
102
|
-
# FROM documents doc \
|
103
|
-
# WHERE id>=$start AND id<=$end
|
104
|
-
|
105
|
-
|
106
|
-
# attribute columns
|
107
|
-
#
|
108
|
-
# attribute values MUST be positive (non-zero, non-negative) integers
|
109
|
-
# attribute values MUST fit into 32 bits
|
110
|
-
#
|
111
|
-
# attributes are additional values associated with each document which
|
112
|
-
# may be used to perform additional filtering and sorting during search.
|
113
|
-
# attributes are NOT full-text indexed; they are stored in the full text
|
114
|
-
# index as is.
|
115
|
-
#
|
116
|
-
# a good example would be a forum posts table. one might need to search
|
117
|
-
# through 'title' and 'content' fields but to limit search to specific
|
118
|
-
# values of 'author_id', or 'forum_id', or to sort by 'post_date', or to
|
119
|
-
# group matches by 'thread_id', or to group posts by month of the
|
120
|
-
# 'post_date' and provide statistics.
|
121
|
-
#
|
122
|
-
# this all can be achieved by specifying all the mentioned columns
|
123
|
-
# (excluding 'title' and 'content' which are full-text fields) as
|
124
|
-
# attributes and then using API calls to setup filtering, sorting,
|
125
|
-
# and grouping.
|
126
|
-
#
|
127
|
-
# sql_group_column is used to declare integer attributes.
|
128
|
-
#
|
129
|
-
# sql_date_column is used to declare UNIX timestamp attributes.
|
130
|
-
#
|
131
|
-
# sql_str2ordinal_column is used to declare integer attributes which
|
132
|
-
# values are computed as ordinal numbers of corresponding column value
|
133
|
-
# in sorted list of column values. WARNING, all such strings values
|
134
|
-
# are going to be stored in RAM while indexing, and "C" locale will
|
135
|
-
# be used when sorting!
|
136
|
-
#
|
137
|
-
# starting with 0.9.7, there may be multiple attribute columns specified.
|
138
|
-
# here's an example for that mentioned posts table:
|
139
|
-
#
|
140
|
-
# sql_group_column = author_id
|
141
|
-
# sql_group_column = forum_id
|
142
|
-
# sql_group_column = thread_id
|
143
|
-
# sql_date_column = post_unix_timestamp
|
144
|
-
# sql_date_column = last_edit_unix_timestamp
|
145
|
-
#
|
146
|
-
# optional, default is empty
|
147
|
-
##sql_group_column = group_id
|
148
|
-
sql_date_column = created_at
|
149
|
-
# sql_str2ordinal_column = author_name
|
150
|
-
|
151
|
-
# post-query, executed on the end of main fetch query
|
152
|
-
#
|
153
|
-
# note that indexing is NOT completed at the point when post-query
|
154
|
-
# gets executed and might very well fail
|
155
|
-
#
|
156
|
-
# optional, default is empty
|
157
|
-
##sql_query_post =
|
158
|
-
|
159
|
-
# post-index-query, executed on succsefully completed indexing
|
160
|
-
#
|
161
|
-
# $maxid macro is the max document ID which was actually
|
162
|
-
# fetched from the database
|
163
|
-
#
|
164
|
-
# optional, default is empty
|
165
|
-
#
|
166
|
-
# sql_query_post_index = REPLACE INTO counters ( id, val ) \
|
167
|
-
# VALUES ( 'max_indexed_id', $maxid )
|
168
|
-
|
169
|
-
|
170
|
-
# document info query
|
171
|
-
#
|
172
|
-
# ONLY used by search utility to display document information
|
173
|
-
# MUST be able to fetch document info by its id, therefore
|
174
|
-
# MUST contain '$id' macro
|
175
|
-
#
|
176
|
-
# optional, default is empty
|
177
|
-
##sql_query_info = SELECT * FROM documents WHERE id=$id
|
178
|
-
|
179
|
-
#####################################################################
|
180
|
-
|
181
|
-
# demo config for 'xmlpipe' source type is a little below
|
182
|
-
#
|
183
|
-
# with xmlpipe, indexer opens a pipe to a given command,
|
184
|
-
# and then reads documents from stdin
|
185
|
-
#
|
186
|
-
# indexer expects one or more documents from xmlpipe stdin
|
187
|
-
# each document must be formatted exactly as follows:
|
188
|
-
#
|
189
|
-
# <document>
|
190
|
-
# <id>123</id>
|
191
|
-
# <group>45</group>
|
192
|
-
# <timestamp>1132223498</timestamp>
|
193
|
-
# <title>test title</title>
|
194
|
-
# <body>
|
195
|
-
# this is my document body
|
196
|
-
# </body>
|
197
|
-
# </document>
|
198
|
-
#
|
199
|
-
# timestamp element is optional, its default value is 1
|
200
|
-
# all the other elements are mandatory
|
201
|
-
|
202
|
-
# type = xmlpipe
|
203
|
-
# xmlpipe_command = cat /var/test.xml
|
26
|
+
sql_query = QUERY
|
27
|
+
sql_query_range = SELECT MIN(id),MAX(id) FROM TABLE_NAME
|
28
|
+
sql_range_step = 1000
|
29
|
+
sql_group_column = user_id
|
30
|
+
sql_group_column = language
|
31
|
+
sql_date_column = published_date
|
32
|
+
sql_date_column = last_modified
|
204
33
|
}
|
205
34
|
|
206
35
|
|
207
|
-
|
208
|
-
#
|
209
|
-
# all the parameters are copied from the parent source,
|
210
|
-
# and may then be overridden in this source definition
|
211
|
-
##source src1stripped : src1
|
212
|
-
##{
|
213
|
-
## strip_html = 1
|
214
|
-
##}
|
215
|
-
|
216
|
-
#############################################################################
|
217
|
-
## index definition
|
218
|
-
#############################################################################
|
219
|
-
|
220
|
-
# local index example
|
221
|
-
#
|
222
|
-
# this is an index which is stored locally in the filesystem
|
223
|
-
#
|
224
|
-
# all indexing-time options (such as morphology and charsets)
|
225
|
-
# are configured per local index
|
226
|
-
index <%= application %>
|
36
|
+
index pages
|
227
37
|
{
|
228
|
-
|
229
|
-
|
230
|
-
#
|
231
|
-
# multiple sources MAY be specified; to do so, just add more
|
232
|
-
# "source = NAME" lines. in this case, ALL the document IDs
|
233
|
-
# in ALL the specified sources MUST be unique
|
234
|
-
source = <%= application %>
|
235
|
-
|
236
|
-
# this is path and index file name without extension
|
237
|
-
#
|
238
|
-
# indexer will append different extensions to this path to
|
239
|
-
# generate names for both permanent and temporary index files
|
240
|
-
#
|
241
|
-
# .tmp* files are temporary and can be safely removed
|
242
|
-
# if indexer fails to remove them automatically
|
243
|
-
#
|
244
|
-
# .sp* files are fulltext index data files. specifically,
|
245
|
-
# .spa contains attribute values attached to each document id
|
246
|
-
# .spd contains doclists and hitlists
|
247
|
-
# .sph contains index header (schema and other settings)
|
248
|
-
# .spi contains wordlists
|
249
|
-
#
|
250
|
-
# MUST be defined
|
251
|
-
path = /var/sphinx/<%= application %>
|
252
|
-
|
253
|
-
# docinfo (ie. per-document attribute values) storage strategy
|
254
|
-
# defines how docinfo will be stored
|
255
|
-
#
|
256
|
-
# available values are "none", "inline" and "extern"
|
257
|
-
#
|
258
|
-
# "none" means there'll be no docinfo at all (no groups/dates)
|
259
|
-
#
|
260
|
-
# "inline" means that the docinfo will be stored in the .spd
|
261
|
-
# file along with the document ID lists (doclists)
|
262
|
-
#
|
263
|
-
# "extern" means that the docinfo will be stored in the .spa
|
264
|
-
# file separately
|
265
|
-
#
|
266
|
-
# externally stored docinfo should (basically) be kept in RAM
|
267
|
-
# when querying; therefore, "inline" may be the only viable option
|
268
|
-
# for really huge (50-100+ million docs) datasets. however, for
|
269
|
-
# smaller datasets "extern" storage makes both indexing and
|
270
|
-
# searching MUCH more efficient.
|
271
|
-
#
|
272
|
-
# additional search-time memory requirements for extern storage are
|
273
|
-
#
|
274
|
-
# ( 1 + number_of_attrs )*number_of_docs*4 bytes
|
275
|
-
#
|
276
|
-
# so 10 million docs with 2 groups and 1 timestamp will take
|
277
|
-
# (1+2+1)*10M*4 = 160 MB of RAM. this is PER DAEMON, ie. searchd
|
278
|
-
# will alloc 160 MB on startup, read the data and keep it shared
|
279
|
-
# between queries; the children will NOT allocate additional
|
280
|
-
# copies of this data.
|
281
|
-
#
|
282
|
-
# default is "extern" (as most collections are smaller than 100M docs)
|
38
|
+
source = pages
|
39
|
+
path = <%= sphinx_index_root %>/pages
|
283
40
|
docinfo = extern
|
284
|
-
|
285
|
-
|
286
|
-
#
|
287
|
-
# currently supported morphology preprocessors are Porter stemmers
|
288
|
-
# for English and Russian, and Soundex. more stemmers could be added
|
289
|
-
# at users request.
|
290
|
-
#
|
291
|
-
# available values are "none", "stem_en", "stem_ru", "stem_enru",
|
292
|
-
# and "soundex"
|
293
|
-
#
|
294
|
-
# optional, default is "none"
|
295
|
-
#
|
296
|
-
# morphology = none
|
297
|
-
# morphology = stem_en
|
298
|
-
# morphology = stem_ru
|
299
|
-
# morphology = stem_enru
|
300
|
-
# morphology = soundex
|
301
|
-
morphology = none
|
302
|
-
|
303
|
-
# stopwords file
|
304
|
-
#
|
305
|
-
# format is plain text in whatever encoding you use
|
306
|
-
# optional, default is empty
|
307
|
-
#
|
308
|
-
# stopwords = /var/data/stopwords.txt
|
309
|
-
#stopwords =
|
310
|
-
|
311
|
-
# minimum word length
|
312
|
-
#
|
313
|
-
# only the words that are of this length and above will be indexed;
|
314
|
-
# for example, if min_word_len is 4, "the" won't be indexed,
|
315
|
-
# but "they" will be.
|
316
|
-
#
|
317
|
-
# default is 1, which (obviously) means to index everything
|
41
|
+
morphology = stem_en
|
42
|
+
stopwords = <%= sphinx_conf_path %>/stopwords.txt
|
318
43
|
min_word_len = 1
|
319
|
-
|
320
|
-
# charset encoding type
|
321
|
-
#
|
322
|
-
# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
|
323
|
-
#
|
324
|
-
# optional, default is sbcs
|
325
44
|
charset_type = utf-8
|
326
|
-
|
327
|
-
# charset definition and case folding rules "table"
|
328
|
-
#
|
329
|
-
# optional, default value depends on charset_type
|
330
|
-
#
|
331
|
-
# for now, defaults are configured to support English and Russian
|
332
|
-
# this behavior MAY change in future versions
|
333
|
-
#
|
334
|
-
# 'sbcs' default value is
|
335
|
-
# charset_table = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
|
336
|
-
#
|
337
|
-
# 'utf-8' default value is
|
338
|
-
# charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
|
339
|
-
|
340
|
-
# minimum prefix length
|
341
|
-
#
|
342
|
-
# if prefix length is positive, indexer will not only index all words,
|
343
|
-
# but all the possible prefixes (ie. word beginnings) as well
|
344
|
-
#
|
345
|
-
# for instance, "exam" query against such index will match documents
|
346
|
-
# which contain "example" word, even if they do not contain "exam"
|
347
|
-
#
|
348
|
-
# indexing prefixes will make the index grow significantly
|
349
|
-
# and could degrade search times
|
350
|
-
#
|
351
|
-
# currently there's no way to rank perfect word matches higher
|
352
|
-
# than prefix matches using only one index; you could setup two
|
353
|
-
# indexes for that
|
354
|
-
#
|
355
|
-
# default is 0, which means NOT to index prefixes
|
356
45
|
min_prefix_len = 0
|
357
|
-
|
358
|
-
# minimum infix length
|
359
|
-
#
|
360
|
-
# if infix length is positive, indexer will not only index all words,
|
361
|
-
# but all the possible infixes (ie. characters subsequences starting
|
362
|
-
# anywhere inside the word) as well
|
363
|
-
#
|
364
|
-
# for instance, "amp" query against such index will match documents
|
365
|
-
# which contain "example" word, even if they do not contain "amp"
|
366
|
-
#
|
367
|
-
# indexing prefixes will make the index grow significantly
|
368
|
-
# and could degrade search times
|
369
|
-
#
|
370
|
-
# currently there's no way to rank perfect word matches higher
|
371
|
-
# than infix matches using only one index; you could setup two
|
372
|
-
# indexes for that
|
373
|
-
#
|
374
|
-
# default is 0, which means NOT to index infixes
|
375
46
|
min_infix_len = 0
|
376
|
-
|
377
|
-
# n-grams length
|
378
|
-
#
|
379
|
-
# n-grams provide basic CJK support for unsegmented texts. if using
|
380
|
-
# n-grams, streams of CJK characters are indexed as n-grams. for example,
|
381
|
-
# if incoming stream is ABCDEF and n is 2, this text would be indexed
|
382
|
-
# as if it was AB BC CD DE EF.
|
383
|
-
#
|
384
|
-
# this feature is in alpha version state and only n=1 is currently
|
385
|
-
# supported; this is going to be improved.
|
386
|
-
#
|
387
|
-
# note that if search query is segmented (ie. words are separated with
|
388
|
-
# whitespace), words are in quotes and extended matching mode is used,
|
389
|
-
# then all matching documents will be returned even if their text was
|
390
|
-
# *not* segmented. in the example above, ABCDEF text will be indexed as
|
391
|
-
# A B C D E F, and "BCD" query will be transformed to "B C D" (where
|
392
|
-
# quotes is phrase matching operator), so the document will match.
|
393
|
-
#
|
394
|
-
# optional, default is 0, which means NOT to use n-grams
|
395
|
-
#
|
396
|
-
# ngram_len = 1
|
397
|
-
|
398
|
-
# n-gram characters table
|
399
|
-
#
|
400
|
-
# specifies what specific characters are subject to n-gram
|
401
|
-
# extraction. format is similar to charset_table.
|
402
|
-
#
|
403
|
-
# optional, default is empty
|
404
|
-
#
|
405
|
-
# ngrams_chars = U+3000..U+2FA1F
|
406
47
|
}
|
407
48
|
|
408
|
-
|
409
|
-
# inherited index example
|
410
|
-
#
|
411
|
-
# all the parameters are copied from the parent index,
|
412
|
-
# and may then be overridden in this index definition
|
413
|
-
##index test1stemmed : test1
|
414
|
-
##{
|
415
|
-
## path = /var/data/test1stemmed
|
416
|
-
## morphology = stem_en
|
417
|
-
##}
|
418
|
-
|
419
|
-
|
420
49
|
#############################################################################
|
421
50
|
## indexer settings
|
422
51
|
#############################################################################
|
@@ -432,7 +61,7 @@ indexer
|
|
432
61
|
# will warn if set too low and potentially hurting the performance
|
433
62
|
#
|
434
63
|
# optional, default is 32M
|
435
|
-
mem_limit =
|
64
|
+
mem_limit = 64M
|
436
65
|
}
|
437
66
|
|
438
67
|
#############################################################################
|
@@ -447,22 +76,22 @@ searchd
|
|
447
76
|
# optional, default is to listen on all addresses,
|
448
77
|
# ie. address = 0.0.0.0
|
449
78
|
#
|
450
|
-
address =
|
79
|
+
address = <%= sphinx_host %>
|
451
80
|
# address = 192.168.0.1
|
452
81
|
|
453
82
|
|
454
83
|
# port on which search daemon will listen
|
455
|
-
port =
|
84
|
+
port = <%= sphinx_port %>
|
456
85
|
|
457
86
|
|
458
87
|
# log file
|
459
88
|
# searchd run info is logged here
|
460
|
-
log =
|
89
|
+
log = <%= sphinx_log_root %>/searchd.log
|
461
90
|
|
462
91
|
|
463
92
|
# query log file
|
464
93
|
# all the search queries are logged here
|
465
|
-
query_log =
|
94
|
+
query_log = <%= sphinx_log_root %>/query.log
|
466
95
|
|
467
96
|
|
468
97
|
# client read timeout, seconds
|
@@ -477,7 +106,7 @@ searchd
|
|
477
106
|
# a file which will contain searchd process ID
|
478
107
|
# used for different external automation scripts
|
479
108
|
# MUST be present
|
480
|
-
pid_file =
|
109
|
+
pid_file = <%= sphinx_pid_path %>
|
481
110
|
|
482
111
|
|
483
112
|
# maximum amount of matches this daemon would ever retrieve
|