solrsan 0.0.20
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +26 -0
- data/LICENSE +23 -0
- data/README.markdown +54 -0
- data/Rakefile +18 -0
- data/config/solr/conf/elevate.xml +31 -0
- data/config/solr/conf/mapping-ISOLatin1Accent.txt +246 -0
- data/config/solr/conf/protwords.txt +22 -0
- data/config/solr/conf/schema.xml +237 -0
- data/config/solr/conf/solrconfig.xml +430 -0
- data/config/solr/conf/spellings.txt +2 -0
- data/config/solr/conf/stopwords.txt +56 -0
- data/config/solr/conf/synonyms.txt +24 -0
- data/config/solr/conf/xslt/example.xsl +132 -0
- data/config/solr/conf/xslt/example_atom.xsl +67 -0
- data/config/solr/conf/xslt/example_rss.xsl +66 -0
- data/config/solr/conf/xslt/luke.xsl +337 -0
- data/config/solr.yml +12 -0
- data/config/solr.yml.example +13 -0
- data/lib/rails/generators/solrsan/config/config_generator.rb +30 -0
- data/lib/rails/generators/solrsan/config/templates/solr.yml +13 -0
- data/lib/rails/generators/solrsan/config/templates/solrsan.rb +5 -0
- data/lib/rails/generators/solrsan_generator.rb +11 -0
- data/lib/solrsan/config.rb +10 -0
- data/lib/solrsan/indexer.rb +83 -0
- data/lib/solrsan/search.rb +149 -0
- data/lib/solrsan/version.rb +3 -0
- data/lib/solrsan.rb +12 -0
- data/lib/tasks/solr.rake +63 -0
- data/solrsan.gemspec +25 -0
- data/test/models/document.rb +11 -0
- data/test/search_test_helper.rb +13 -0
- data/test/test_helper.rb +24 -0
- data/test/unit/indexer_test.rb +25 -0
- data/test/unit/search_test.rb +171 -0
- metadata +123 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
solrsan (0.0.20)
|
5
|
+
activemodel (~> 3.0.5)
|
6
|
+
activesupport (~> 3.0.5)
|
7
|
+
rsolr (~> 1.0.0)
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: http://rubygems.org/
|
11
|
+
specs:
|
12
|
+
activemodel (3.0.5)
|
13
|
+
activesupport (= 3.0.5)
|
14
|
+
builder (~> 2.1.2)
|
15
|
+
i18n (~> 0.4)
|
16
|
+
activesupport (3.0.5)
|
17
|
+
builder (2.1.2)
|
18
|
+
i18n (0.5.0)
|
19
|
+
rsolr (1.0.0)
|
20
|
+
builder (>= 2.1.2)
|
21
|
+
|
22
|
+
PLATFORMS
|
23
|
+
ruby
|
24
|
+
|
25
|
+
DEPENDENCIES
|
26
|
+
solrsan!
|
data/LICENSE
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
MIT License
|
2
|
+
---
|
3
|
+
|
4
|
+
Copyright (c) 2010 Tommy Chheng
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
8
|
+
in the Software without restriction, including without limitation the rights
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
11
|
+
furnished to do so, subject to the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
14
|
+
all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
THE SOFTWARE.
|
23
|
+
|
data/README.markdown
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
# solrsan
|
2
|
+
This gem is a lightweight wrapper for the Apache Solr API.
|
3
|
+
|
4
|
+
Before you start, read the documentation for solr at http://wiki.apache.org/solr/
|
5
|
+
|
6
|
+
It'll be invaluable for knowing parameters and error messages. I made a few test cases for further examples at http://github.com/tc/solrsan/tree/master/test/
|
7
|
+
|
8
|
+
## HOWTO
|
9
|
+
Install jetty:
|
10
|
+
Download jetty 7 from http://download.eclipse.org/jetty/stable-7/dist/
|
11
|
+
|
12
|
+
Install solr:
|
13
|
+
Download solr from http://www.apache.org/dyn/closer.cgi/lucene/solr/
|
14
|
+
Unzip the jar file:
|
15
|
+
tar -zxvf apache-solr-*.jar
|
16
|
+
|
17
|
+
Copy dist/apache-solr-*.war into jetty's webapps directory as solr.war:
|
18
|
+
cd apache-solr-*
|
19
|
+
cp dist/apache-solr-*.war JETTY_PATH/webapps/solr.war
|
20
|
+
|
21
|
+
Create solrsan and solr configuration files using:
|
22
|
+
rails generate Solrsan:Config
|
23
|
+
|
24
|
+
The generator will copy the following files into your application.
|
25
|
+
config/solr.yml
|
26
|
+
config/solr
|
27
|
+
config/initializers/solrsan.rb
|
28
|
+
lib/tasks/solr.rake
|
29
|
+
|
30
|
+
##
|
31
|
+
The fields are required for each solr document:
|
32
|
+
id, db_id, type
|
33
|
+
|
34
|
+
In each model, you can include a Solrsan::Search module which will include a few interface helper methods:
|
35
|
+
index
|
36
|
+
destroy_index_document
|
37
|
+
search(params)
|
38
|
+
|
39
|
+
You can also add hooks for thse methods:
|
40
|
+
class Document < ActiveRecord::Base
|
41
|
+
include Solrsan::Search
|
42
|
+
after_save :index
|
43
|
+
before_destroy :destroy_index_document
|
44
|
+
end
|
45
|
+
|
46
|
+
---
|
47
|
+
## Changelog
|
48
|
+
0.0.1
|
49
|
+
First release.
|
50
|
+
|
51
|
+
## Copyright
|
52
|
+
|
53
|
+
Copyright (c) 2011 Tommy Chheng. See LICENSE for details.
|
54
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
require 'rake/testtask'
|
3
|
+
Bundler::GemHelper.install_tasks
|
4
|
+
Dir[File.join(File.dirname(__FILE__), "lib", "tasks", "**", "*.rake")].each { |ext| load ext }
|
5
|
+
|
6
|
+
desc "Default: run all tests"
|
7
|
+
task :default => :test
|
8
|
+
|
9
|
+
desc "Run tests"
|
10
|
+
task :test => %w(test:units)
|
11
|
+
namespace :test do
|
12
|
+
desc "Run unit tests"
|
13
|
+
Rake::TestTask.new(:units) do |t|
|
14
|
+
t.libs << 'lib' << 'test'
|
15
|
+
t.test_files = FileList["test/unit/*_test.rb", "test/unit/*/*_test.rb"]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
@@ -0,0 +1,31 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" ?>
|
2
|
+
<!--
|
3
|
+
Licensed to the Apache Software Foundation (ASF) under one or more
|
4
|
+
contributor license agreements. See the NOTICE file distributed with
|
5
|
+
this work for additional information regarding copyright ownership.
|
6
|
+
The ASF licenses this file to You under the Apache License, Version 2.0
|
7
|
+
(the "License"); you may not use this file except in compliance with
|
8
|
+
the License. You may obtain a copy of the License at
|
9
|
+
|
10
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
Unless required by applicable law or agreed to in writing, software
|
13
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
See the License for the specific language governing permissions and
|
16
|
+
limitations under the License.
|
17
|
+
-->
|
18
|
+
|
19
|
+
<!-- If this file is found in the config directory, it will only be
|
20
|
+
loaded once at startup. If it is found in Solr's data
|
21
|
+
directory, it will be re-loaded every commit.
|
22
|
+
-->
|
23
|
+
|
24
|
+
<elevate>
|
25
|
+
<query text="foo bar">
|
26
|
+
<doc id="1" />
|
27
|
+
<doc id="2" />
|
28
|
+
<doc id="3" />
|
29
|
+
</query>
|
30
|
+
|
31
|
+
</elevate>
|
@@ -0,0 +1,246 @@
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
3
|
+
# the License. You may obtain a copy of the License at
|
4
|
+
#
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the License for the specific language governing permissions and
|
11
|
+
# limitations under the License.
|
12
|
+
|
13
|
+
# Syntax:
|
14
|
+
# "source" => "target"
|
15
|
+
# "source".length() > 0 (source cannot be empty.)
|
16
|
+
# "target".length() >= 0 (target can be empty.)
|
17
|
+
|
18
|
+
# example:
|
19
|
+
# "À" => "A"
|
20
|
+
# "\u00C0" => "A"
|
21
|
+
# "\u00C0" => "\u0041"
|
22
|
+
# "ß" => "ss"
|
23
|
+
# "\t" => " "
|
24
|
+
# "\n" => ""
|
25
|
+
|
26
|
+
# À => A
|
27
|
+
"\u00C0" => "A"
|
28
|
+
|
29
|
+
# Á => A
|
30
|
+
"\u00C1" => "A"
|
31
|
+
|
32
|
+
# Â => A
|
33
|
+
"\u00C2" => "A"
|
34
|
+
|
35
|
+
# Ã => A
|
36
|
+
"\u00C3" => "A"
|
37
|
+
|
38
|
+
# Ä => A
|
39
|
+
"\u00C4" => "A"
|
40
|
+
|
41
|
+
# Å => A
|
42
|
+
"\u00C5" => "A"
|
43
|
+
|
44
|
+
# Æ => AE
|
45
|
+
"\u00C6" => "AE"
|
46
|
+
|
47
|
+
# Ç => C
|
48
|
+
"\u00C7" => "C"
|
49
|
+
|
50
|
+
# È => E
|
51
|
+
"\u00C8" => "E"
|
52
|
+
|
53
|
+
# É => E
|
54
|
+
"\u00C9" => "E"
|
55
|
+
|
56
|
+
# Ê => E
|
57
|
+
"\u00CA" => "E"
|
58
|
+
|
59
|
+
# Ë => E
|
60
|
+
"\u00CB" => "E"
|
61
|
+
|
62
|
+
# Ì => I
|
63
|
+
"\u00CC" => "I"
|
64
|
+
|
65
|
+
# Í => I
|
66
|
+
"\u00CD" => "I"
|
67
|
+
|
68
|
+
# Î => I
|
69
|
+
"\u00CE" => "I"
|
70
|
+
|
71
|
+
# Ï => I
|
72
|
+
"\u00CF" => "I"
|
73
|
+
|
74
|
+
# IJ => IJ
|
75
|
+
"\u0132" => "IJ"
|
76
|
+
|
77
|
+
# Ð => D
|
78
|
+
"\u00D0" => "D"
|
79
|
+
|
80
|
+
# Ñ => N
|
81
|
+
"\u00D1" => "N"
|
82
|
+
|
83
|
+
# Ò => O
|
84
|
+
"\u00D2" => "O"
|
85
|
+
|
86
|
+
# Ó => O
|
87
|
+
"\u00D3" => "O"
|
88
|
+
|
89
|
+
# Ô => O
|
90
|
+
"\u00D4" => "O"
|
91
|
+
|
92
|
+
# Õ => O
|
93
|
+
"\u00D5" => "O"
|
94
|
+
|
95
|
+
# Ö => O
|
96
|
+
"\u00D6" => "O"
|
97
|
+
|
98
|
+
# Ø => O
|
99
|
+
"\u00D8" => "O"
|
100
|
+
|
101
|
+
# Œ => OE
|
102
|
+
"\u0152" => "OE"
|
103
|
+
|
104
|
+
# Þ
|
105
|
+
"\u00DE" => "TH"
|
106
|
+
|
107
|
+
# Ù => U
|
108
|
+
"\u00D9" => "U"
|
109
|
+
|
110
|
+
# Ú => U
|
111
|
+
"\u00DA" => "U"
|
112
|
+
|
113
|
+
# Û => U
|
114
|
+
"\u00DB" => "U"
|
115
|
+
|
116
|
+
# Ü => U
|
117
|
+
"\u00DC" => "U"
|
118
|
+
|
119
|
+
# Ý => Y
|
120
|
+
"\u00DD" => "Y"
|
121
|
+
|
122
|
+
# Ÿ => Y
|
123
|
+
"\u0178" => "Y"
|
124
|
+
|
125
|
+
# à => a
|
126
|
+
"\u00E0" => "a"
|
127
|
+
|
128
|
+
# á => a
|
129
|
+
"\u00E1" => "a"
|
130
|
+
|
131
|
+
# â => a
|
132
|
+
"\u00E2" => "a"
|
133
|
+
|
134
|
+
# ã => a
|
135
|
+
"\u00E3" => "a"
|
136
|
+
|
137
|
+
# ä => a
|
138
|
+
"\u00E4" => "a"
|
139
|
+
|
140
|
+
# å => a
|
141
|
+
"\u00E5" => "a"
|
142
|
+
|
143
|
+
# æ => ae
|
144
|
+
"\u00E6" => "ae"
|
145
|
+
|
146
|
+
# ç => c
|
147
|
+
"\u00E7" => "c"
|
148
|
+
|
149
|
+
# è => e
|
150
|
+
"\u00E8" => "e"
|
151
|
+
|
152
|
+
# é => e
|
153
|
+
"\u00E9" => "e"
|
154
|
+
|
155
|
+
# ê => e
|
156
|
+
"\u00EA" => "e"
|
157
|
+
|
158
|
+
# ë => e
|
159
|
+
"\u00EB" => "e"
|
160
|
+
|
161
|
+
# ì => i
|
162
|
+
"\u00EC" => "i"
|
163
|
+
|
164
|
+
# í => i
|
165
|
+
"\u00ED" => "i"
|
166
|
+
|
167
|
+
# î => i
|
168
|
+
"\u00EE" => "i"
|
169
|
+
|
170
|
+
# ï => i
|
171
|
+
"\u00EF" => "i"
|
172
|
+
|
173
|
+
# ij => ij
|
174
|
+
"\u0133" => "ij"
|
175
|
+
|
176
|
+
# ð => d
|
177
|
+
"\u00F0" => "d"
|
178
|
+
|
179
|
+
# ñ => n
|
180
|
+
"\u00F1" => "n"
|
181
|
+
|
182
|
+
# ò => o
|
183
|
+
"\u00F2" => "o"
|
184
|
+
|
185
|
+
# ó => o
|
186
|
+
"\u00F3" => "o"
|
187
|
+
|
188
|
+
# ô => o
|
189
|
+
"\u00F4" => "o"
|
190
|
+
|
191
|
+
# õ => o
|
192
|
+
"\u00F5" => "o"
|
193
|
+
|
194
|
+
# ö => o
|
195
|
+
"\u00F6" => "o"
|
196
|
+
|
197
|
+
# ø => o
|
198
|
+
"\u00F8" => "o"
|
199
|
+
|
200
|
+
# œ => oe
|
201
|
+
"\u0153" => "oe"
|
202
|
+
|
203
|
+
# ß => ss
|
204
|
+
"\u00DF" => "ss"
|
205
|
+
|
206
|
+
# þ => th
|
207
|
+
"\u00FE" => "th"
|
208
|
+
|
209
|
+
# ù => u
|
210
|
+
"\u00F9" => "u"
|
211
|
+
|
212
|
+
# ú => u
|
213
|
+
"\u00FA" => "u"
|
214
|
+
|
215
|
+
# û => u
|
216
|
+
"\u00FB" => "u"
|
217
|
+
|
218
|
+
# ü => u
|
219
|
+
"\u00FC" => "u"
|
220
|
+
|
221
|
+
# ý => y
|
222
|
+
"\u00FD" => "y"
|
223
|
+
|
224
|
+
# ÿ => y
|
225
|
+
"\u00FF" => "y"
|
226
|
+
|
227
|
+
# ff => ff
|
228
|
+
"\uFB00" => "ff"
|
229
|
+
|
230
|
+
# fi => fi
|
231
|
+
"\uFB01" => "fi"
|
232
|
+
|
233
|
+
# fl => fl
|
234
|
+
"\uFB02" => "fl"
|
235
|
+
|
236
|
+
# ffi => ffi
|
237
|
+
"\uFB03" => "ffi"
|
238
|
+
|
239
|
+
# ffl => ffl
|
240
|
+
"\uFB04" => "ffl"
|
241
|
+
|
242
|
+
# ſt => ft
|
243
|
+
"\uFB05" => "ft"
|
244
|
+
|
245
|
+
# st => st
|
246
|
+
"\uFB06" => "st"
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
3
|
+
# the License. You may obtain a copy of the License at
|
4
|
+
#
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the License for the specific language governing permissions and
|
11
|
+
# limitations under the License.
|
12
|
+
|
13
|
+
#-----------------------------------------------------------------------
|
14
|
+
# Use a protected word file to protect against the stemmer reducing two
|
15
|
+
# unrelated words to the same base word.
|
16
|
+
|
17
|
+
# Some non-words that normally won't be encountered,
|
18
|
+
# just to test that they won't be stemmed.
|
19
|
+
dontstems
|
20
|
+
zwhacky
|
21
|
+
|
22
|
+
|
@@ -0,0 +1,237 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" ?>
|
2
|
+
<schema name="solrsan" version="1.2">
|
3
|
+
<types>
|
4
|
+
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
5
|
+
|
6
|
+
<!-- boolean type: "true" or "false" -->
|
7
|
+
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
|
8
|
+
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
9
|
+
<fieldtype name="binary" class="solr.BinaryField"/>
|
10
|
+
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
11
|
+
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
12
|
+
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
13
|
+
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
14
|
+
|
15
|
+
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
16
|
+
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
17
|
+
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
18
|
+
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
19
|
+
|
20
|
+
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
|
21
|
+
<!-- A Trie based date field for faster date range queries and date faceting. -->
|
22
|
+
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
|
23
|
+
|
24
|
+
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
|
25
|
+
|
26
|
+
<!-- A text field that only splits on whitespace for exact matching of words -->
|
27
|
+
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
28
|
+
<analyzer>
|
29
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
30
|
+
</analyzer>
|
31
|
+
</fieldType>
|
32
|
+
|
33
|
+
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" termVectors="true">
|
34
|
+
<analyzer type="index">
|
35
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
36
|
+
<!-- in this example, we will only use synonyms at query time
|
37
|
+
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
38
|
+
-->
|
39
|
+
<!-- Case insensitive stop word removal.
|
40
|
+
add enablePositionIncrements=true in both the index and query
|
41
|
+
analyzers to leave a 'gap' for more accurate phrase queries.
|
42
|
+
-->
|
43
|
+
<filter class="solr.StopFilterFactory"
|
44
|
+
ignoreCase="true"
|
45
|
+
words="stopwords.txt"
|
46
|
+
enablePositionIncrements="true"
|
47
|
+
/>
|
48
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
49
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
50
|
+
</analyzer>
|
51
|
+
<analyzer type="query">
|
52
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
53
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
54
|
+
<filter class="solr.StopFilterFactory"
|
55
|
+
ignoreCase="true"
|
56
|
+
words="stopwords.txt"
|
57
|
+
enablePositionIncrements="true"
|
58
|
+
/>
|
59
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
60
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
61
|
+
</analyzer>
|
62
|
+
</fieldType>
|
63
|
+
|
64
|
+
<fieldType name="textFacetEval" class="solr.TextField" positionIncrementGap="100">
|
65
|
+
<analyzer type="index">
|
66
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
67
|
+
<!-- in this example, we will only use synonyms at query time
|
68
|
+
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
69
|
+
-->
|
70
|
+
<!-- Case insensitive stop word removal.
|
71
|
+
add enablePositionIncrements=true in both the index and query
|
72
|
+
analyzers to leave a 'gap' for more accurate phrase queries.
|
73
|
+
-->
|
74
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
75
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
76
|
+
</analyzer>
|
77
|
+
|
78
|
+
<analyzer type="query">
|
79
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
80
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
81
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
82
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
83
|
+
</analyzer>
|
84
|
+
</fieldType>
|
85
|
+
|
86
|
+
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
87
|
+
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
88
|
+
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
|
89
|
+
<analyzer>
|
90
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
91
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
92
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
93
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
94
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
95
|
+
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
96
|
+
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
97
|
+
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
98
|
+
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
99
|
+
</analyzer>
|
100
|
+
</fieldType>
|
101
|
+
|
102
|
+
|
103
|
+
<!-- A general unstemmed text field - good if one does not know the language of the field -->
|
104
|
+
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
|
105
|
+
<analyzer type="index">
|
106
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
107
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
108
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
109
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
110
|
+
</analyzer>
|
111
|
+
<analyzer type="query">
|
112
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
113
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
114
|
+
<filter class="solr.StopFilterFactory"
|
115
|
+
ignoreCase="true"
|
116
|
+
words="stopwords.txt"
|
117
|
+
enablePositionIncrements="true"
|
118
|
+
/>
|
119
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
120
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
121
|
+
</analyzer>
|
122
|
+
</fieldType>
|
123
|
+
|
124
|
+
|
125
|
+
<!-- A general unstemmed text field that indexes tokens normally and also
|
126
|
+
reversed (via ReversedWildcardFilterFactory), to enable more efficient
|
127
|
+
leading wildcard queries. -->
|
128
|
+
<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
|
129
|
+
<analyzer type="index">
|
130
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
131
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
132
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
133
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
134
|
+
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
135
|
+
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
136
|
+
</analyzer>
|
137
|
+
<analyzer type="query">
|
138
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
139
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
140
|
+
<filter class="solr.StopFilterFactory"
|
141
|
+
ignoreCase="true"
|
142
|
+
words="stopwords.txt"
|
143
|
+
enablePositionIncrements="true"
|
144
|
+
/>
|
145
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
146
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
147
|
+
</analyzer>
|
148
|
+
</fieldType>
|
149
|
+
|
150
|
+
<fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
151
|
+
<analyzer>
|
152
|
+
<!-- KeywordTokenizer does no actual tokenizing, so the entire
|
153
|
+
input string is preserved as a single token
|
154
|
+
-->
|
155
|
+
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
156
|
+
<!-- The LowerCase TokenFilter does what you expect, which can be
|
157
|
+
when you want your sorting to be case insensitive
|
158
|
+
-->
|
159
|
+
<filter class="solr.LowerCaseFilterFactory" />
|
160
|
+
<!-- The TrimFilter removes any leading or trailing whitespace -->
|
161
|
+
<filter class="solr.TrimFilterFactory" />
|
162
|
+
<!-- The PatternReplaceFilter gives you the flexibility to use
|
163
|
+
Java Regular expression to replace any sequence of characters
|
164
|
+
matching a pattern with an arbitrary replacement string,
|
165
|
+
which may include back references to portions of the original
|
166
|
+
string matched by the pattern.
|
167
|
+
|
168
|
+
See the Java Regular Expression documentation for more
|
169
|
+
information on pattern and replacement string syntax.
|
170
|
+
|
171
|
+
http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
|
172
|
+
-->
|
173
|
+
<filter class="solr.PatternReplaceFilterFactory"
|
174
|
+
pattern="([^a-z])" replacement="" replace="all"
|
175
|
+
/>
|
176
|
+
</analyzer>
|
177
|
+
</fieldType>
|
178
|
+
|
179
|
+
<fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
180
|
+
<analyzer>
|
181
|
+
<tokenizer class="solr.StandardTokenizerFactory"/>
|
182
|
+
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
183
|
+
</analyzer>
|
184
|
+
</fieldtype>
|
185
|
+
|
186
|
+
<fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
|
187
|
+
<analyzer>
|
188
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
189
|
+
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
|
190
|
+
</analyzer>
|
191
|
+
</fieldtype>
|
192
|
+
|
193
|
+
<!-- lowercases the entire field value, keeping it as a single token. -->
|
194
|
+
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
|
195
|
+
<analyzer>
|
196
|
+
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
197
|
+
<filter class="solr.LowerCaseFilterFactory" />
|
198
|
+
</analyzer>
|
199
|
+
</fieldType>
|
200
|
+
|
201
|
+
|
202
|
+
<!-- since fields of this type are by default not stored or indexed,
|
203
|
+
any data added to them will be ignored outright. -->
|
204
|
+
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
205
|
+
|
206
|
+
</types>
|
207
|
+
|
208
|
+
<fields>
|
209
|
+
<field name="id" type="string" indexed="true" stored="true" required="true" />
|
210
|
+
<field name="db_id" type="string" indexed="false" stored="true" required="true" />
|
211
|
+
<field name="type" type="string" indexed="true" stored="true" required="true"/>
|
212
|
+
|
213
|
+
<field name="title" type="string" indexed="true" stored="true"/>
|
214
|
+
<field name="content" type="text" indexed="true" stored="true"/>
|
215
|
+
<field name="author" type="string" indexed="true" stored="true"/>
|
216
|
+
<field name="review_count" type="tint" indexed="true" stored="true"/>
|
217
|
+
<field name="tags" multiValued="true" type="string" indexed="true" stored="true"/>
|
218
|
+
<field name="scores" multiValued="true" type="string" indexed="true" stored="true"/>
|
219
|
+
<field name="created_at" type="tdate" indexed="true" stored="true"/>
|
220
|
+
|
221
|
+
<!-- Dynamic Fields -->
|
222
|
+
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
223
|
+
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
|
224
|
+
<dynamicField name="*_f" type="tfloat" indexed="true" stored="true"/>
|
225
|
+
<dynamicField name="*_i" type="tint" indexed="true" stored="true"/>
|
226
|
+
|
227
|
+
<!-- All Objects -->
|
228
|
+
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
|
229
|
+
<field name="payloads" type="payloads" indexed="true" stored="true"/>
|
230
|
+
</fields>
|
231
|
+
|
232
|
+
<uniqueKey>id</uniqueKey>
|
233
|
+
<defaultSearchField>text</defaultSearchField>
|
234
|
+
<solrQueryParser defaultOperator="OR"/>
|
235
|
+
<copyField source="*" dest="text" />
|
236
|
+
|
237
|
+
</schema>
|