solrsam 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +22 -0
- data/LICENSE +23 -0
- data/README.rdoc +115 -0
- data/Rakefile +18 -0
- data/config/solr/conf/elevate.xml +31 -0
- data/config/solr/conf/mapping-ISOLatin1Accent.txt +246 -0
- data/config/solr/conf/protwords.txt +22 -0
- data/config/solr/conf/schema.xml +237 -0
- data/config/solr/conf/solrconfig.xml +430 -0
- data/config/solr/conf/spellings.txt +2 -0
- data/config/solr/conf/stopwords.txt +56 -0
- data/config/solr/conf/synonyms.txt +24 -0
- data/config/solr/conf/xslt/example.xsl +132 -0
- data/config/solr/conf/xslt/example_atom.xsl +67 -0
- data/config/solr/conf/xslt/example_rss.xsl +66 -0
- data/config/solr/conf/xslt/luke.xsl +337 -0
- data/config/solr.yml +12 -0
- data/config/solr.yml.example +13 -0
- data/lib/rails/generators/solrsan/config/config_generator.rb +23 -0
- data/lib/rails/generators/solrsan/config/templates/solr.yml +13 -0
- data/lib/rails/generators/solrsan/config/templates/solrsan.rb +5 -0
- data/lib/rails/generators/solrsan_generator.rb +11 -0
- data/lib/solrsam/capistrano.rb +31 -0
- data/lib/solrsam/config.rb +25 -0
- data/lib/solrsam/indexer.rb +83 -0
- data/lib/solrsam/search.rb +195 -0
- data/lib/solrsam/version.rb +3 -0
- data/lib/solrsam.rb +12 -0
- data/lib/tasks/solr.rake +71 -0
- data/solrsam.gemspec +25 -0
- data/test/models/document.rb +11 -0
- data/test/search_test_helper.rb +13 -0
- data/test/test_helper.rb +27 -0
- data/test/unit/indexer_test.rb +25 -0
- data/test/unit/search_test.rb +251 -0
- metadata +124 -0
@@ -0,0 +1,237 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" ?>
|
2
|
+
<schema name="solrsan" version="1.2">
|
3
|
+
<types>
|
4
|
+
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
5
|
+
|
6
|
+
<!-- boolean type: "true" or "false" -->
|
7
|
+
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
|
8
|
+
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
9
|
+
<fieldtype name="binary" class="solr.BinaryField"/>
|
10
|
+
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
11
|
+
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
12
|
+
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
13
|
+
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
14
|
+
|
15
|
+
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
16
|
+
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
17
|
+
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
18
|
+
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
19
|
+
|
20
|
+
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
|
21
|
+
<!-- A Trie based date field for faster date range queries and date faceting. -->
|
22
|
+
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
|
23
|
+
|
24
|
+
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
|
25
|
+
|
26
|
+
<!-- A text field that only splits on whitespace for exact matching of words -->
|
27
|
+
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
28
|
+
<analyzer>
|
29
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
30
|
+
</analyzer>
|
31
|
+
</fieldType>
|
32
|
+
|
33
|
+
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" termVectors="true">
|
34
|
+
<analyzer type="index">
|
35
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
36
|
+
<!-- in this example, we will only use synonyms at query time
|
37
|
+
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
38
|
+
-->
|
39
|
+
<!-- Case insensitive stop word removal.
|
40
|
+
add enablePositionIncrements=true in both the index and query
|
41
|
+
analyzers to leave a 'gap' for more accurate phrase queries.
|
42
|
+
-->
|
43
|
+
<filter class="solr.StopFilterFactory"
|
44
|
+
ignoreCase="true"
|
45
|
+
words="stopwords.txt"
|
46
|
+
enablePositionIncrements="true"
|
47
|
+
/>
|
48
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
49
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
50
|
+
</analyzer>
|
51
|
+
<analyzer type="query">
|
52
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
53
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
54
|
+
<filter class="solr.StopFilterFactory"
|
55
|
+
ignoreCase="true"
|
56
|
+
words="stopwords.txt"
|
57
|
+
enablePositionIncrements="true"
|
58
|
+
/>
|
59
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
60
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
61
|
+
</analyzer>
|
62
|
+
</fieldType>
|
63
|
+
|
64
|
+
<fieldType name="textFacetEval" class="solr.TextField" positionIncrementGap="100">
|
65
|
+
<analyzer type="index">
|
66
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
67
|
+
<!-- in this example, we will only use synonyms at query time
|
68
|
+
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
69
|
+
-->
|
70
|
+
<!-- Case insensitive stop word removal.
|
71
|
+
add enablePositionIncrements=true in both the index and query
|
72
|
+
analyzers to leave a 'gap' for more accurate phrase queries.
|
73
|
+
-->
|
74
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
75
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
76
|
+
</analyzer>
|
77
|
+
|
78
|
+
<analyzer type="query">
|
79
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
80
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
81
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
82
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
83
|
+
</analyzer>
|
84
|
+
</fieldType>
|
85
|
+
|
86
|
+
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
87
|
+
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
88
|
+
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
|
89
|
+
<analyzer>
|
90
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
91
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
92
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
93
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
94
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
95
|
+
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
96
|
+
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
97
|
+
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
98
|
+
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
99
|
+
</analyzer>
|
100
|
+
</fieldType>
|
101
|
+
|
102
|
+
|
103
|
+
<!-- A general unstemmed text field - good if one does not know the language of the field -->
|
104
|
+
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
|
105
|
+
<analyzer type="index">
|
106
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
107
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
108
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
109
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
110
|
+
</analyzer>
|
111
|
+
<analyzer type="query">
|
112
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
113
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
114
|
+
<filter class="solr.StopFilterFactory"
|
115
|
+
ignoreCase="true"
|
116
|
+
words="stopwords.txt"
|
117
|
+
enablePositionIncrements="true"
|
118
|
+
/>
|
119
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
120
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
121
|
+
</analyzer>
|
122
|
+
</fieldType>
|
123
|
+
|
124
|
+
|
125
|
+
<!-- A general unstemmed text field that indexes tokens normally and also
|
126
|
+
reversed (via ReversedWildcardFilterFactory), to enable more efficient
|
127
|
+
leading wildcard queries. -->
|
128
|
+
<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
|
129
|
+
<analyzer type="index">
|
130
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
131
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
132
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
133
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
134
|
+
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
135
|
+
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
136
|
+
</analyzer>
|
137
|
+
<analyzer type="query">
|
138
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
139
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
140
|
+
<filter class="solr.StopFilterFactory"
|
141
|
+
ignoreCase="true"
|
142
|
+
words="stopwords.txt"
|
143
|
+
enablePositionIncrements="true"
|
144
|
+
/>
|
145
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
146
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
147
|
+
</analyzer>
|
148
|
+
</fieldType>
|
149
|
+
|
150
|
+
<fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
151
|
+
<analyzer>
|
152
|
+
<!-- KeywordTokenizer does no actual tokenizing, so the entire
|
153
|
+
input string is preserved as a single token
|
154
|
+
-->
|
155
|
+
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
156
|
+
<!-- The LowerCase TokenFilter does what you expect, which can be
|
157
|
+
when you want your sorting to be case insensitive
|
158
|
+
-->
|
159
|
+
<filter class="solr.LowerCaseFilterFactory" />
|
160
|
+
<!-- The TrimFilter removes any leading or trailing whitespace -->
|
161
|
+
<filter class="solr.TrimFilterFactory" />
|
162
|
+
<!-- The PatternReplaceFilter gives you the flexibility to use
|
163
|
+
Java Regular expression to replace any sequence of characters
|
164
|
+
matching a pattern with an arbitrary replacement string,
|
165
|
+
which may include back references to portions of the original
|
166
|
+
string matched by the pattern.
|
167
|
+
|
168
|
+
See the Java Regular Expression documentation for more
|
169
|
+
information on pattern and replacement string syntax.
|
170
|
+
|
171
|
+
http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
|
172
|
+
-->
|
173
|
+
<filter class="solr.PatternReplaceFilterFactory"
|
174
|
+
pattern="([^a-z])" replacement="" replace="all"
|
175
|
+
/>
|
176
|
+
</analyzer>
|
177
|
+
</fieldType>
|
178
|
+
|
179
|
+
<fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
180
|
+
<analyzer>
|
181
|
+
<tokenizer class="solr.StandardTokenizerFactory"/>
|
182
|
+
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
183
|
+
</analyzer>
|
184
|
+
</fieldtype>
|
185
|
+
|
186
|
+
<fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
|
187
|
+
<analyzer>
|
188
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
189
|
+
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
|
190
|
+
</analyzer>
|
191
|
+
</fieldtype>
|
192
|
+
|
193
|
+
<!-- lowercases the entire field value, keeping it as a single token. -->
|
194
|
+
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
|
195
|
+
<analyzer>
|
196
|
+
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
197
|
+
<filter class="solr.LowerCaseFilterFactory" />
|
198
|
+
</analyzer>
|
199
|
+
</fieldType>
|
200
|
+
|
201
|
+
|
202
|
+
<!-- since fields of this type are by default not stored or indexed,
|
203
|
+
any data added to them will be ignored outright. -->
|
204
|
+
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
205
|
+
|
206
|
+
</types>
|
207
|
+
|
208
|
+
<fields>
|
209
|
+
<field name="id" type="string" indexed="true" stored="true" required="true" />
|
210
|
+
<field name="db_id" type="string" indexed="false" stored="true" required="true" />
|
211
|
+
<field name="type" type="string" indexed="true" stored="true" required="true"/>
|
212
|
+
|
213
|
+
<field name="title" type="string" indexed="true" stored="true"/>
|
214
|
+
<field name="content" type="text" indexed="true" stored="true"/>
|
215
|
+
<field name="author" type="string" indexed="true" stored="true"/>
|
216
|
+
<field name="review_count" type="tint" indexed="true" stored="true"/>
|
217
|
+
<field name="tags" multiValued="true" type="string" indexed="true" stored="true"/>
|
218
|
+
<field name="scores" multiValued="true" type="string" indexed="true" stored="true"/>
|
219
|
+
<field name="created_at" type="tdate" indexed="true" stored="true"/>
|
220
|
+
|
221
|
+
<!-- Dynamic Fields -->
|
222
|
+
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
223
|
+
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
|
224
|
+
<dynamicField name="*_f" type="tfloat" indexed="true" stored="true"/>
|
225
|
+
<dynamicField name="*_i" type="tint" indexed="true" stored="true"/>
|
226
|
+
|
227
|
+
<!-- All Objects -->
|
228
|
+
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
|
229
|
+
<field name="payloads" type="payloads" indexed="true" stored="true"/>
|
230
|
+
</fields>
|
231
|
+
|
232
|
+
<uniqueKey>id</uniqueKey>
|
233
|
+
<defaultSearchField>text</defaultSearchField>
|
234
|
+
<solrQueryParser defaultOperator="OR"/>
|
235
|
+
<copyField source="*" dest="text" />
|
236
|
+
|
237
|
+
</schema>
|
@@ -0,0 +1,430 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" ?>
|
2
|
+
<!--
|
3
|
+
Licensed to the Apache Software Foundation (ASF) under one or more
|
4
|
+
contributor license agreements. See the NOTICE file distributed with
|
5
|
+
this work for additional information regarding copyright ownership.
|
6
|
+
The ASF licenses this file to You under the Apache License, Version 2.0
|
7
|
+
(the "License"); you may not use this file except in compliance with
|
8
|
+
the License. You may obtain a copy of the License at
|
9
|
+
|
10
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
Unless required by applicable law or agreed to in writing, software
|
13
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
See the License for the specific language governing permissions and
|
16
|
+
limitations under the License.
|
17
|
+
-->
|
18
|
+
<!--
|
19
|
+
For more details about configurations options that may appear in this
|
20
|
+
file, see http://wiki.apache.org/solr/SolrConfigXml.
|
21
|
+
|
22
|
+
Specifically, the Solr Config can support XInclude, which may make it easier to manage
|
23
|
+
the configuration. See https://issues.apache.org/jira/browse/SOLR-1167
|
24
|
+
-->
|
25
|
+
<config>
|
26
|
+
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
|
27
|
+
|
28
|
+
<!-- lib directives can be used to instruct Solr to load an Jars identified
|
29
|
+
and use them to resolve any "plugins" specified in your solrconfig.xml or
|
30
|
+
schema.xml (ie: Analyzers, Request Handlers, etc...).
|
31
|
+
|
32
|
+
All directories and paths are resolved relative the instanceDir.
|
33
|
+
|
34
|
+
If a "./lib" directory exists in your instanceDir, all files found in it
|
35
|
+
are included as if you had used the following syntax...
|
36
|
+
|
37
|
+
<lib dir="./lib" />
|
38
|
+
-->
|
39
|
+
<!-- A dir option by itself adds any files found in the directory to the
|
40
|
+
classpath, this is useful for including all jars in a directory.
|
41
|
+
-->
|
42
|
+
<lib dir="../../contrib/extraction/lib" />
|
43
|
+
<!-- When a regex is specified in addition to a directory, only the files in that
|
44
|
+
directory which completely match the regex (anchored on both ends)
|
45
|
+
will be included.
|
46
|
+
-->
|
47
|
+
<lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" />
|
48
|
+
<lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" />
|
49
|
+
<!-- If a dir option (with or without a regex) is used and nothing is found
|
50
|
+
that matches, it will be ignored
|
51
|
+
-->
|
52
|
+
<lib dir="../../contrib/clustering/lib/downloads/" />
|
53
|
+
<lib dir="../../contrib/clustering/lib/" />
|
54
|
+
<lib dir="/total/crap/dir/ignored" />
|
55
|
+
|
56
|
+
<!-- Used to specify an alternate directory to hold all index data
|
57
|
+
other than the default ./data under the Solr home.
|
58
|
+
If replication is in use, this should match the replication configuration. -->
|
59
|
+
<dataDir>${solr.data.dir:./solr/data}</dataDir>
|
60
|
+
|
61
|
+
|
62
|
+
<!-- WARNING: this <indexDefaults> section only provides defaults for index writers
|
63
|
+
in general. See also the <mainIndex> section after that when changing parameters
|
64
|
+
for Solr's main Lucene index. -->
|
65
|
+
<indexDefaults>
|
66
|
+
<!-- Values here affect all index writers and act as a default unless overridden. -->
|
67
|
+
<useCompoundFile>false</useCompoundFile>
|
68
|
+
|
69
|
+
<mergeFactor>10</mergeFactor>
|
70
|
+
<ramBufferSizeMB>32</ramBufferSizeMB>
|
71
|
+
<!-- <maxMergeDocs>2147483647</maxMergeDocs> -->
|
72
|
+
<maxFieldLength>10000</maxFieldLength>
|
73
|
+
<writeLockTimeout>1000</writeLockTimeout>
|
74
|
+
<commitLockTimeout>10000</commitLockTimeout>
|
75
|
+
<lockType>native</lockType>
|
76
|
+
<!--
|
77
|
+
Expert:
|
78
|
+
Controls how often Lucene loads terms into memory -->
|
79
|
+
<!--<termIndexInterval>256</termIndexInterval>-->
|
80
|
+
</indexDefaults>
|
81
|
+
|
82
|
+
<mainIndex>
|
83
|
+
<!-- options specific to the main on-disk lucene index -->
|
84
|
+
<useCompoundFile>false</useCompoundFile>
|
85
|
+
<ramBufferSizeMB>32</ramBufferSizeMB>
|
86
|
+
<mergeFactor>10</mergeFactor>
|
87
|
+
<unlockOnStartup>false</unlockOnStartup>
|
88
|
+
|
89
|
+
<reopenReaders>true</reopenReaders>
|
90
|
+
<deletionPolicy class="solr.SolrDeletionPolicy">
|
91
|
+
<!-- The number of commit points to be kept -->
|
92
|
+
<str name="maxCommitsToKeep">1</str>
|
93
|
+
<!-- The number of optimized commit points to be kept -->
|
94
|
+
<str name="maxOptimizedCommitsToKeep">0</str>
|
95
|
+
</deletionPolicy>
|
96
|
+
|
97
|
+
<infoStream file="INFOSTREAM.txt">false</infoStream>
|
98
|
+
|
99
|
+
</mainIndex>
|
100
|
+
|
101
|
+
<!-- Enables JMX if and only if an existing MBeanServer is found, use this
|
102
|
+
if you want to configure JMX through JVM parameters. Remove this to disable
|
103
|
+
exposing Solr configuration and statistics to JMX.
|
104
|
+
|
105
|
+
If you want to connect to a particular server, specify the agentId
|
106
|
+
e.g. <jmx agentId="myAgent" />
|
107
|
+
|
108
|
+
If you want to start a new MBeanServer, specify the serviceUrl
|
109
|
+
e.g <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
|
110
|
+
|
111
|
+
For more details see http://wiki.apache.org/solr/SolrJmx
|
112
|
+
-->
|
113
|
+
<jmx />
|
114
|
+
|
115
|
+
<!-- the default high-performance update handler -->
|
116
|
+
<updateHandler class="solr.DirectUpdateHandler2">
|
117
|
+
</updateHandler>
|
118
|
+
|
119
|
+
<query>
|
120
|
+
<maxBooleanClauses>1024</maxBooleanClauses>
|
121
|
+
<filterCache
|
122
|
+
class="solr.FastLRUCache"
|
123
|
+
size="512"
|
124
|
+
initialSize="512"
|
125
|
+
autowarmCount="0"/>
|
126
|
+
<queryResultCache
|
127
|
+
class="solr.LRUCache"
|
128
|
+
size="512"
|
129
|
+
initialSize="512"
|
130
|
+
autowarmCount="0"/>
|
131
|
+
<documentCache
|
132
|
+
class="solr.LRUCache"
|
133
|
+
size="512"
|
134
|
+
initialSize="512"
|
135
|
+
autowarmCount="0"/>
|
136
|
+
<enableLazyFieldLoading>true</enableLazyFieldLoading>
|
137
|
+
|
138
|
+
<!-- An optimization for use with the queryResultCache. When a search
|
139
|
+
is requested, a superset of the requested number of document ids
|
140
|
+
are collected. For example, if a search for a particular query
|
141
|
+
requests matching documents 10 through 19, and queryWindowSize is 50,
|
142
|
+
then documents 0 through 49 will be collected and cached. Any further
|
143
|
+
requests in that range can be satisfied via the cache. -->
|
144
|
+
<queryResultWindowSize>20</queryResultWindowSize>
|
145
|
+
|
146
|
+
<!-- Maximum number of documents to cache for any entry in the
|
147
|
+
queryResultCache. -->
|
148
|
+
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
|
149
|
+
|
150
|
+
<!-- a newSearcher event is fired whenever a new searcher is being prepared
|
151
|
+
and there is a current searcher handling requests (aka registered).
|
152
|
+
It can be used to prime certain caches to prevent long request times for
|
153
|
+
certain requests.
|
154
|
+
-->
|
155
|
+
<!-- QuerySenderListener takes an array of NamedList and executes a
|
156
|
+
local query request for each NamedList in sequence. -->
|
157
|
+
<listener event="newSearcher" class="solr.QuerySenderListener">
|
158
|
+
<arr name="queries">
|
159
|
+
<!--
|
160
|
+
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
|
161
|
+
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
|
162
|
+
<lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst>
|
163
|
+
-->
|
164
|
+
</arr>
|
165
|
+
</listener>
|
166
|
+
|
167
|
+
<listener event="firstSearcher" class="solr.QuerySenderListener">
|
168
|
+
<arr name="queries">
|
169
|
+
<lst> <str name="q">solr rocks</str><str name="start">0</str><str name="rows">10</str></lst>
|
170
|
+
<lst><str name="q">static firstSearcher warming query from solrconfig.xml</str></lst>
|
171
|
+
</arr>
|
172
|
+
</listener>
|
173
|
+
<useColdSearcher>false</useColdSearcher>
|
174
|
+
<maxWarmingSearchers>2</maxWarmingSearchers>
|
175
|
+
</query>
|
176
|
+
|
177
|
+
<requestDispatcher handleSelect="true" >
|
178
|
+
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000" />
|
179
|
+
<httpCaching lastModifiedFrom="openTime"
|
180
|
+
etagSeed="Solr">
|
181
|
+
</httpCaching>
|
182
|
+
</requestDispatcher>
|
183
|
+
|
184
|
+
<!-- <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
|
185
|
+
<lst name="defaults">
|
186
|
+
<str name="config">xml-data-config.xml</str>
|
187
|
+
</lst>
|
188
|
+
</requestHandler> -->
|
189
|
+
|
190
|
+
<requestHandler name="standard" class="solr.SearchHandler" default="true">
|
191
|
+
<lst name="defaults">
|
192
|
+
<str name="echoParams">explicit</str>
|
193
|
+
</lst>
|
194
|
+
</requestHandler>
|
195
|
+
|
196
|
+
|
197
|
+
<!-- DisMaxRequestHandler allows easy searching across multiple fields
|
198
|
+
for simple user-entered phrases. It's implementation is now
|
199
|
+
just the standard SearchHandler with a default query type
|
200
|
+
of "dismax".
|
201
|
+
see http://wiki.apache.org/solr/DisMaxRequestHandler
|
202
|
+
-->
|
203
|
+
<requestHandler name="dismax" class="solr.SearchHandler">
|
204
|
+
<lst name="defaults">
|
205
|
+
<str name="defType">dismax</str>
|
206
|
+
<str name="echoParams">explicit</str>
|
207
|
+
<float name="tie">0.01</float>
|
208
|
+
<str name="qf">
|
209
|
+
text
|
210
|
+
</str>
|
211
|
+
<str name="mm">3</str>
|
212
|
+
<int name="ps">100</int>
|
213
|
+
<str name="q.alt">*:*</str>
|
214
|
+
<str name="f.name.hl.alternateField">name</str>
|
215
|
+
<str name="f.text.hl.fragmenter">regex</str> <!-- defined below -->
|
216
|
+
</lst>
|
217
|
+
</requestHandler>
|
218
|
+
|
219
|
+
<!-- The spell check component can return a list of alternative spelling
|
220
|
+
suggestions. -->
|
221
|
+
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
|
222
|
+
<str name="queryAnalyzerFieldType">textSpell</str>
|
223
|
+
|
224
|
+
<lst name="spellchecker">
|
225
|
+
<str name="name">default</str>
|
226
|
+
<str name="field">name</str>
|
227
|
+
<str name="spellcheckIndexDir">./spellchecker</str>
|
228
|
+
</lst>
|
229
|
+
</searchComponent>
|
230
|
+
|
231
|
+
<!-- A request handler utilizing the spellcheck component.
|
232
|
+
#############################################################################
|
233
|
+
NOTE: This is purely as an example. The whole purpose of the
|
234
|
+
SpellCheckComponent is to hook it into the request handler that handles (i.e.
|
235
|
+
the standard or dismax SearchHandler) queries such that a separate request is
|
236
|
+
not needed to get suggestions.
|
237
|
+
|
238
|
+
IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT YOU
|
239
|
+
WANT FOR YOUR PRODUCTION SYSTEM!
|
240
|
+
#############################################################################
|
241
|
+
-->
|
242
|
+
<requestHandler name="/spell" class="solr.SearchHandler" lazy="true">
|
243
|
+
<lst name="defaults">
|
244
|
+
<!-- omp = Only More Popular -->
|
245
|
+
<str name="spellcheck.onlyMorePopular">false</str>
|
246
|
+
<!-- exr = Extended Results -->
|
247
|
+
<str name="spellcheck.extendedResults">false</str>
|
248
|
+
<!-- The number of suggestions to return -->
|
249
|
+
<str name="spellcheck.count">1</str>
|
250
|
+
</lst>
|
251
|
+
<arr name="last-components">
|
252
|
+
<str>spellcheck</str>
|
253
|
+
</arr>
|
254
|
+
</requestHandler>
|
255
|
+
|
256
|
+
<searchComponent name="tvComponent" class="org.apache.solr.handler.component.TermVectorComponent"/>
|
257
|
+
<!-- A Req Handler for working with the tvComponent. This is purely as an example.
|
258
|
+
You will likely want to add the component to your already specified request handlers. -->
|
259
|
+
<requestHandler name="tvrh" class="org.apache.solr.handler.component.SearchHandler">
|
260
|
+
<lst name="defaults">
|
261
|
+
<bool name="tv">true</bool>
|
262
|
+
</lst>
|
263
|
+
<arr name="last-components">
|
264
|
+
<str>tvComponent</str>
|
265
|
+
</arr>
|
266
|
+
</requestHandler>
|
267
|
+
|
268
|
+
<!-- Clustering Component
|
269
|
+
http://wiki.apache.org/solr/ClusteringComponent
|
270
|
+
This relies on third party jars which are not included in the release.
|
271
|
+
To use this component (and the "/clustering" handler)
|
272
|
+
Those jars will need to be downloaded, and you'll need to set the
|
273
|
+
solr.cluster.enabled system property when running solr...
|
274
|
+
java -Dsolr.clustering.enabled=true -jar start.jar
|
275
|
+
-->
|
276
|
+
<searchComponent
|
277
|
+
name="clusteringComponent"
|
278
|
+
enable="${solr.clustering.enabled:false}"
|
279
|
+
class="org.apache.solr.handler.clustering.ClusteringComponent" >
|
280
|
+
<!-- Declare an engine -->
|
281
|
+
<lst name="engine">
|
282
|
+
<!-- The name, only one can be named "default" -->
|
283
|
+
<str name="name">default</str>
|
284
|
+
<!--
|
285
|
+
Class name of Carrot2 clustering algorithm. Currently available algorithms are:
|
286
|
+
|
287
|
+
* org.carrot2.clustering.lingo.LingoClusteringAlgorithm
|
288
|
+
* org.carrot2.clustering.stc.STCClusteringAlgorithm
|
289
|
+
|
290
|
+
See http://project.carrot2.org/algorithms.html for the algorithm's characteristics.
|
291
|
+
-->
|
292
|
+
<str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
|
293
|
+
<!--
|
294
|
+
Overriding values for Carrot2 default algorithm attributes. For a description
|
295
|
+
of all available attributes, see: http://download.carrot2.org/stable/manual/#chapter.components.
|
296
|
+
Use attribute key as name attribute of str elements below. These can be further
|
297
|
+
overridden for individual requests by specifying attribute key as request
|
298
|
+
parameter name and attribute value as parameter value.
|
299
|
+
-->
|
300
|
+
<str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
|
301
|
+
</lst>
|
302
|
+
<lst name="engine">
|
303
|
+
<str name="name">stc</str>
|
304
|
+
<str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
|
305
|
+
</lst>
|
306
|
+
</searchComponent>
|
307
|
+
|
308
|
+
<requestHandler name="/clustering"
|
309
|
+
enable="${solr.clustering.enabled:false}"
|
310
|
+
class="solr.SearchHandler">
|
311
|
+
<lst name="defaults">
|
312
|
+
<bool name="clustering">true</bool>
|
313
|
+
<str name="clustering.engine">default</str>
|
314
|
+
<bool name="clustering.results">true</bool>
|
315
|
+
<!-- The title field -->
|
316
|
+
<str name="carrot.title">name</str>
|
317
|
+
<str name="carrot.url">id</str>
|
318
|
+
<!-- The field to cluster on -->
|
319
|
+
<str name="carrot.snippet">features</str>
|
320
|
+
<!-- produce summaries -->
|
321
|
+
<bool name="carrot.produceSummary">true</bool>
|
322
|
+
<!-- the maximum number of labels per cluster -->
|
323
|
+
<!--<int name="carrot.numDescriptions">5</int>-->
|
324
|
+
<!-- produce sub clusters -->
|
325
|
+
<bool name="carrot.outputSubClusters">false</bool>
|
326
|
+
</lst>
|
327
|
+
<arr name="last-components">
|
328
|
+
<str>clusteringComponent</str>
|
329
|
+
</arr>
|
330
|
+
</requestHandler>
|
331
|
+
|
332
|
+
<!-- A component to return terms and document frequency of those terms.
|
333
|
+
This component does not yet support distributed search. -->
|
334
|
+
<searchComponent name="termsComponent" class="org.apache.solr.handler.component.TermsComponent"/>
|
335
|
+
|
336
|
+
<requestHandler name="/terms" class="org.apache.solr.handler.component.SearchHandler">
|
337
|
+
<lst name="defaults">
|
338
|
+
<bool name="terms">true</bool>
|
339
|
+
</lst>
|
340
|
+
<arr name="components">
|
341
|
+
<str>termsComponent</str>
|
342
|
+
</arr>
|
343
|
+
</requestHandler>
|
344
|
+
|
345
|
+
|
346
|
+
<!-- a search component that enables you to configure the top results for
|
347
|
+
a given query regardless of the normal lucene scoring.-->
|
348
|
+
<searchComponent name="elevator" class="solr.QueryElevationComponent" >
|
349
|
+
<!-- pick a fieldType to analyze queries -->
|
350
|
+
<str name="queryFieldType">string</str>
|
351
|
+
<str name="config-file">elevate.xml</str>
|
352
|
+
</searchComponent>
|
353
|
+
|
354
|
+
<!-- a request handler utilizing the elevator component -->
|
355
|
+
<requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
|
356
|
+
<lst name="defaults">
|
357
|
+
<str name="echoParams">explicit</str>
|
358
|
+
</lst>
|
359
|
+
<arr name="last-components">
|
360
|
+
<str>elevator</str>
|
361
|
+
</arr>
|
362
|
+
</requestHandler>
|
363
|
+
|
364
|
+
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
|
365
|
+
<requestHandler name="/update/javabin" class="solr.BinaryUpdateRequestHandler" />
|
366
|
+
<requestHandler name="/analysis/document" class="solr.DocumentAnalysisRequestHandler" />
|
367
|
+
<requestHandler name="/analysis/field" class="solr.FieldAnalysisRequestHandler" />
|
368
|
+
<!-- CSV update handler, loaded on demand -->
|
369
|
+
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
|
370
|
+
<requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
|
371
|
+
|
372
|
+
<!-- ping/healthcheck -->
|
373
|
+
<requestHandler name="/admin/ping" class="PingRequestHandler">
|
374
|
+
<lst name="defaults">
|
375
|
+
<str name="qt">standard</str>
|
376
|
+
<str name="q">solrpingquery</str>
|
377
|
+
<str name="echoParams">all</str>
|
378
|
+
</lst>
|
379
|
+
</requestHandler>
|
380
|
+
|
381
|
+
<!-- Echo the request contents back to the client -->
|
382
|
+
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
|
383
|
+
<lst name="defaults">
|
384
|
+
<str name="echoParams">explicit</str> <!-- for all params (including the default etc) use: 'all' -->
|
385
|
+
<str name="echoHandler">true</str>
|
386
|
+
</lst>
|
387
|
+
</requestHandler>
|
388
|
+
|
389
|
+
<highlighting>
|
390
|
+
<!-- Configure the standard fragmenter -->
|
391
|
+
<!-- This could most likely be commented out in the "default" case -->
|
392
|
+
<fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
|
393
|
+
<lst name="defaults">
|
394
|
+
<int name="hl.fragsize">100</int>
|
395
|
+
</lst>
|
396
|
+
</fragmenter>
|
397
|
+
|
398
|
+
<!-- A regular-expression-based fragmenter (f.i., for sentence extraction) -->
|
399
|
+
<fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
|
400
|
+
<lst name="defaults">
|
401
|
+
<!-- slightly smaller fragsizes work better because of slop -->
|
402
|
+
<int name="hl.fragsize">70</int>
|
403
|
+
<!-- allow 50% slop on fragment sizes -->
|
404
|
+
<float name="hl.regex.slop">0.5</float>
|
405
|
+
<!-- a basic sentence pattern -->
|
406
|
+
<str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
|
407
|
+
</lst>
|
408
|
+
</fragmenter>
|
409
|
+
|
410
|
+
<!-- Configure the standard formatter -->
|
411
|
+
<formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
|
412
|
+
<lst name="defaults">
|
413
|
+
<str name="hl.simple.pre"><![CDATA[<mark>]]></str>
|
414
|
+
<str name="hl.simple.post"><![CDATA[</mark>]]></str>
|
415
|
+
</lst>
|
416
|
+
</formatter>
|
417
|
+
</highlighting>
|
418
|
+
|
419
|
+
|
420
|
+
<queryResponseWriter name="xslt" class="org.apache.solr.request.XSLTResponseWriter">
|
421
|
+
<int name="xsltCacheLifetimeSeconds">5</int>
|
422
|
+
</queryResponseWriter>
|
423
|
+
|
424
|
+
<admin>
|
425
|
+
<defaultQuery>solr</defaultQuery>
|
426
|
+
|
427
|
+
</admin>
|
428
|
+
|
429
|
+
</config>
|
430
|
+
|