rere 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (168) hide show
  1. data/.classpath +260 -0
  2. data/.gitignore +28 -0
  3. data/.project +14 -0
  4. data/.ruby-version +1 -0
  5. data/.settings/org.eclim.prefs +3 -0
  6. data/.settings/org.eclipse.jdt.core.prefs +5 -0
  7. data/.settings/org.eclipse.jdt.ui.prefs +2 -0
  8. data/Gemfile +7 -0
  9. data/LICENSE +22 -0
  10. data/README.md +44 -0
  11. data/Rakefile +65 -0
  12. data/bin/solr +61 -0
  13. data/example/config/solr.yml +23 -0
  14. data/example/log/.gitkeep +0 -0
  15. data/example/solr/README.txt +63 -0
  16. data/example/solr/collection1/README.txt +50 -0
  17. data/example/solr/collection1/conf/admin-extra.html +24 -0
  18. data/example/solr/collection1/conf/admin-extra.menu-bottom.html +25 -0
  19. data/example/solr/collection1/conf/admin-extra.menu-top.html +25 -0
  20. data/example/solr/collection1/conf/currency.xml +67 -0
  21. data/example/solr/collection1/conf/elevate.xml +38 -0
  22. data/example/solr/collection1/conf/lang/contractions_ca.txt +8 -0
  23. data/example/solr/collection1/conf/lang/contractions_fr.txt +15 -0
  24. data/example/solr/collection1/conf/lang/contractions_ga.txt +5 -0
  25. data/example/solr/collection1/conf/lang/contractions_it.txt +23 -0
  26. data/example/solr/collection1/conf/lang/hyphenations_ga.txt +5 -0
  27. data/example/solr/collection1/conf/lang/stemdict_nl.txt +6 -0
  28. data/example/solr/collection1/conf/lang/stoptags_ja.txt +420 -0
  29. data/example/solr/collection1/conf/lang/stopwords_ar.txt +125 -0
  30. data/example/solr/collection1/conf/lang/stopwords_bg.txt +193 -0
  31. data/example/solr/collection1/conf/lang/stopwords_ca.txt +220 -0
  32. data/example/solr/collection1/conf/lang/stopwords_cz.txt +172 -0
  33. data/example/solr/collection1/conf/lang/stopwords_da.txt +108 -0
  34. data/example/solr/collection1/conf/lang/stopwords_de.txt +292 -0
  35. data/example/solr/collection1/conf/lang/stopwords_el.txt +78 -0
  36. data/example/solr/collection1/conf/lang/stopwords_en.txt +54 -0
  37. data/example/solr/collection1/conf/lang/stopwords_es.txt +354 -0
  38. data/example/solr/collection1/conf/lang/stopwords_eu.txt +99 -0
  39. data/example/solr/collection1/conf/lang/stopwords_fa.txt +313 -0
  40. data/example/solr/collection1/conf/lang/stopwords_fi.txt +95 -0
  41. data/example/solr/collection1/conf/lang/stopwords_fr.txt +184 -0
  42. data/example/solr/collection1/conf/lang/stopwords_ga.txt +110 -0
  43. data/example/solr/collection1/conf/lang/stopwords_gl.txt +161 -0
  44. data/example/solr/collection1/conf/lang/stopwords_hi.txt +235 -0
  45. data/example/solr/collection1/conf/lang/stopwords_hu.txt +209 -0
  46. data/example/solr/collection1/conf/lang/stopwords_hy.txt +46 -0
  47. data/example/solr/collection1/conf/lang/stopwords_id.txt +359 -0
  48. data/example/solr/collection1/conf/lang/stopwords_it.txt +301 -0
  49. data/example/solr/collection1/conf/lang/stopwords_ja.txt +127 -0
  50. data/example/solr/collection1/conf/lang/stopwords_lv.txt +172 -0
  51. data/example/solr/collection1/conf/lang/stopwords_nl.txt +117 -0
  52. data/example/solr/collection1/conf/lang/stopwords_no.txt +192 -0
  53. data/example/solr/collection1/conf/lang/stopwords_pt.txt +251 -0
  54. data/example/solr/collection1/conf/lang/stopwords_ro.txt +233 -0
  55. data/example/solr/collection1/conf/lang/stopwords_ru.txt +241 -0
  56. data/example/solr/collection1/conf/lang/stopwords_sv.txt +131 -0
  57. data/example/solr/collection1/conf/lang/stopwords_th.txt +119 -0
  58. data/example/solr/collection1/conf/lang/stopwords_tr.txt +212 -0
  59. data/example/solr/collection1/conf/lang/userdict_ja.txt +29 -0
  60. data/example/solr/collection1/conf/mapping-FoldToASCII.txt +3813 -0
  61. data/example/solr/collection1/conf/mapping-ISOLatin1Accent.txt +246 -0
  62. data/example/solr/collection1/conf/protwords.txt +21 -0
  63. data/example/solr/collection1/conf/schema.xml +1125 -0
  64. data/example/solr/collection1/conf/scripts.conf +24 -0
  65. data/example/solr/collection1/conf/solrconfig.xml +1816 -0
  66. data/example/solr/collection1/conf/spellings.txt +2 -0
  67. data/example/solr/collection1/conf/stopwords.txt +14 -0
  68. data/example/solr/collection1/conf/synonyms.txt +29 -0
  69. data/example/solr/collection1/conf/update-script.js +53 -0
  70. data/example/solr/collection1/conf/velocity/VM_global_library.vm +170 -0
  71. data/example/solr/collection1/conf/velocity/browse.vm +50 -0
  72. data/example/solr/collection1/conf/velocity/cluster.vm +9 -0
  73. data/example/solr/collection1/conf/velocity/clusterResults.vm +12 -0
  74. data/example/solr/collection1/conf/velocity/debug.vm +17 -0
  75. data/example/solr/collection1/conf/velocity/did_you_mean.vm +4 -0
  76. data/example/solr/collection1/conf/velocity/facet_fields.vm +15 -0
  77. data/example/solr/collection1/conf/velocity/facet_pivot.vm +3 -0
  78. data/example/solr/collection1/conf/velocity/facet_queries.vm +3 -0
  79. data/example/solr/collection1/conf/velocity/facet_ranges.vm +15 -0
  80. data/example/solr/collection1/conf/velocity/facets.vm +5 -0
  81. data/example/solr/collection1/conf/velocity/footer.vm +17 -0
  82. data/example/solr/collection1/conf/velocity/head.vm +32 -0
  83. data/example/solr/collection1/conf/velocity/header.vm +3 -0
  84. data/example/solr/collection1/conf/velocity/hit.vm +11 -0
  85. data/example/solr/collection1/conf/velocity/hitGrouped.vm +24 -0
  86. data/example/solr/collection1/conf/velocity/join-doc.vm +4 -0
  87. data/example/solr/collection1/conf/velocity/jquery.autocomplete.css +48 -0
  88. data/example/solr/collection1/conf/velocity/jquery.autocomplete.js +763 -0
  89. data/example/solr/collection1/conf/velocity/layout.vm +20 -0
  90. data/example/solr/collection1/conf/velocity/main.css +208 -0
  91. data/example/solr/collection1/conf/velocity/product-doc.vm +27 -0
  92. data/example/solr/collection1/conf/velocity/query.vm +42 -0
  93. data/example/solr/collection1/conf/velocity/queryGroup.vm +19 -0
  94. data/example/solr/collection1/conf/velocity/querySpatial.vm +40 -0
  95. data/example/solr/collection1/conf/velocity/richtext-doc.vm +114 -0
  96. data/example/solr/collection1/conf/velocity/suggest.vm +3 -0
  97. data/example/solr/collection1/conf/velocity/tabs.vm +6 -0
  98. data/example/solr/collection1/conf/xslt/example.xsl +132 -0
  99. data/example/solr/collection1/conf/xslt/example_atom.xsl +67 -0
  100. data/example/solr/collection1/conf/xslt/example_rss.xsl +66 -0
  101. data/example/solr/collection1/conf/xslt/luke.xsl +337 -0
  102. data/example/solr/collection1/conf/xslt/updateXml.xsl +70 -0
  103. data/example/solr/collection1/data/index/segments.gen +0 -0
  104. data/example/solr/collection1/data/index/segments_1 +0 -0
  105. data/example/solr/data/development/index/segments.gen +0 -0
  106. data/example/solr/data/development/index/segments_1 +0 -0
  107. data/example/solr/solr.xml +53 -0
  108. data/example/solr/zoo.cfg +17 -0
  109. data/lib/rere.rb +66 -0
  110. data/lib/rere/railtie.rb +8 -0
  111. data/lib/rere/server.rb +380 -0
  112. data/lib/rere/tasks/solr.rake +47 -0
  113. data/lib/rere/version.rb +3 -0
  114. data/pom.xml +168 -0
  115. data/rere.gemspec +26 -0
  116. data/server/README.txt +78 -0
  117. data/server/cloud-scripts/zkcli.bat +12 -0
  118. data/server/cloud-scripts/zkcli.sh +14 -0
  119. data/server/contexts/solr-jetty-context.xml +8 -0
  120. data/server/etc/create-solrtest.keystore.sh +37 -0
  121. data/server/etc/jetty.xml +205 -0
  122. data/server/etc/logging.properties +38 -0
  123. data/server/etc/solrtest.keystore +0 -0
  124. data/server/etc/webdefault.xml +527 -0
  125. data/server/exampledocs/books.csv +11 -0
  126. data/server/exampledocs/books.json +51 -0
  127. data/server/exampledocs/gb18030-example.xml +32 -0
  128. data/server/exampledocs/hd.xml +56 -0
  129. data/server/exampledocs/ipod_other.xml +60 -0
  130. data/server/exampledocs/ipod_video.xml +40 -0
  131. data/server/exampledocs/manufacturers.xml +75 -0
  132. data/server/exampledocs/mem.xml +77 -0
  133. data/server/exampledocs/money.xml +65 -0
  134. data/server/exampledocs/monitor.xml +35 -0
  135. data/server/exampledocs/monitor2.xml +34 -0
  136. data/server/exampledocs/mp500.xml +43 -0
  137. data/server/exampledocs/post.jar +0 -0
  138. data/server/exampledocs/post.sh +30 -0
  139. data/server/exampledocs/sd500.xml +38 -0
  140. data/server/exampledocs/solr.xml +38 -0
  141. data/server/exampledocs/test_utf8.sh +93 -0
  142. data/server/exampledocs/utf8-example.xml +42 -0
  143. data/server/exampledocs/vidcard.xml +62 -0
  144. data/server/lib/ext/jcl-over-slf4j-1.6.6.jar +0 -0
  145. data/server/lib/ext/jul-to-slf4j-1.6.6.jar +0 -0
  146. data/server/lib/ext/log4j-1.2.16.jar +0 -0
  147. data/server/lib/ext/slf4j-api-1.6.6.jar +0 -0
  148. data/server/lib/ext/slf4j-log4j12-1.6.6.jar +0 -0
  149. data/server/lib/jetty-continuation-8.1.8.v20121106.jar +0 -0
  150. data/server/lib/jetty-deploy-8.1.8.v20121106.jar +0 -0
  151. data/server/lib/jetty-http-8.1.8.v20121106.jar +0 -0
  152. data/server/lib/jetty-io-8.1.8.v20121106.jar +0 -0
  153. data/server/lib/jetty-jmx-8.1.8.v20121106.jar +0 -0
  154. data/server/lib/jetty-security-8.1.8.v20121106.jar +0 -0
  155. data/server/lib/jetty-server-8.1.8.v20121106.jar +0 -0
  156. data/server/lib/jetty-servlet-8.1.8.v20121106.jar +0 -0
  157. data/server/lib/jetty-util-8.1.8.v20121106.jar +0 -0
  158. data/server/lib/jetty-webapp-8.1.8.v20121106.jar +0 -0
  159. data/server/lib/jetty-xml-8.1.8.v20121106.jar +0 -0
  160. data/server/lib/servlet-api-3.0.jar +0 -0
  161. data/server/resources/log4j.properties +19 -0
  162. data/server/solr/README.txt +63 -0
  163. data/server/solr/solr.xml +53 -0
  164. data/server/solr/zoo.cfg +17 -0
  165. data/server/start.jar +0 -0
  166. data/server/webapps/solr.war +0 -0
  167. data/solr/lib/solr-winds-0.1.jar +0 -0
  168. metadata +284 -0
@@ -0,0 +1,246 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ # Syntax:
14
+ # "source" => "target"
15
+ # "source".length() > 0 (source cannot be empty.)
16
+ # "target".length() >= 0 (target can be empty.)
17
+
18
+ # example:
19
+ # "À" => "A"
20
+ # "\u00C0" => "A"
21
+ # "\u00C0" => "\u0041"
22
+ # "ß" => "ss"
23
+ # "\t" => " "
24
+ # "\n" => ""
25
+
26
+ # À => A
27
+ "\u00C0" => "A"
28
+
29
+ # Á => A
30
+ "\u00C1" => "A"
31
+
32
+ # Â => A
33
+ "\u00C2" => "A"
34
+
35
+ # Ã => A
36
+ "\u00C3" => "A"
37
+
38
+ # Ä => A
39
+ "\u00C4" => "A"
40
+
41
+ # Å => A
42
+ "\u00C5" => "A"
43
+
44
+ # Æ => AE
45
+ "\u00C6" => "AE"
46
+
47
+ # Ç => C
48
+ "\u00C7" => "C"
49
+
50
+ # È => E
51
+ "\u00C8" => "E"
52
+
53
+ # É => E
54
+ "\u00C9" => "E"
55
+
56
+ # Ê => E
57
+ "\u00CA" => "E"
58
+
59
+ # Ë => E
60
+ "\u00CB" => "E"
61
+
62
+ # Ì => I
63
+ "\u00CC" => "I"
64
+
65
+ # Í => I
66
+ "\u00CD" => "I"
67
+
68
+ # Î => I
69
+ "\u00CE" => "I"
70
+
71
+ # Ï => I
72
+ "\u00CF" => "I"
73
+
74
+ # IJ => IJ
75
+ "\u0132" => "IJ"
76
+
77
+ # Ð => D
78
+ "\u00D0" => "D"
79
+
80
+ # Ñ => N
81
+ "\u00D1" => "N"
82
+
83
+ # Ò => O
84
+ "\u00D2" => "O"
85
+
86
+ # Ó => O
87
+ "\u00D3" => "O"
88
+
89
+ # Ô => O
90
+ "\u00D4" => "O"
91
+
92
+ # Õ => O
93
+ "\u00D5" => "O"
94
+
95
+ # Ö => O
96
+ "\u00D6" => "O"
97
+
98
+ # Ø => O
99
+ "\u00D8" => "O"
100
+
101
+ # Π=> OE
102
+ "\u0152" => "OE"
103
+
104
+ # Þ
105
+ "\u00DE" => "TH"
106
+
107
+ # Ù => U
108
+ "\u00D9" => "U"
109
+
110
+ # Ú => U
111
+ "\u00DA" => "U"
112
+
113
+ # Û => U
114
+ "\u00DB" => "U"
115
+
116
+ # Ü => U
117
+ "\u00DC" => "U"
118
+
119
+ # Ý => Y
120
+ "\u00DD" => "Y"
121
+
122
+ # Ÿ => Y
123
+ "\u0178" => "Y"
124
+
125
+ # à => a
126
+ "\u00E0" => "a"
127
+
128
+ # á => a
129
+ "\u00E1" => "a"
130
+
131
+ # â => a
132
+ "\u00E2" => "a"
133
+
134
+ # ã => a
135
+ "\u00E3" => "a"
136
+
137
+ # ä => a
138
+ "\u00E4" => "a"
139
+
140
+ # å => a
141
+ "\u00E5" => "a"
142
+
143
+ # æ => ae
144
+ "\u00E6" => "ae"
145
+
146
+ # ç => c
147
+ "\u00E7" => "c"
148
+
149
+ # è => e
150
+ "\u00E8" => "e"
151
+
152
+ # é => e
153
+ "\u00E9" => "e"
154
+
155
+ # ê => e
156
+ "\u00EA" => "e"
157
+
158
+ # ë => e
159
+ "\u00EB" => "e"
160
+
161
+ # ì => i
162
+ "\u00EC" => "i"
163
+
164
+ # í => i
165
+ "\u00ED" => "i"
166
+
167
+ # î => i
168
+ "\u00EE" => "i"
169
+
170
+ # ï => i
171
+ "\u00EF" => "i"
172
+
173
+ # ij => ij
174
+ "\u0133" => "ij"
175
+
176
+ # ð => d
177
+ "\u00F0" => "d"
178
+
179
+ # ñ => n
180
+ "\u00F1" => "n"
181
+
182
+ # ò => o
183
+ "\u00F2" => "o"
184
+
185
+ # ó => o
186
+ "\u00F3" => "o"
187
+
188
+ # ô => o
189
+ "\u00F4" => "o"
190
+
191
+ # õ => o
192
+ "\u00F5" => "o"
193
+
194
+ # ö => o
195
+ "\u00F6" => "o"
196
+
197
+ # ø => o
198
+ "\u00F8" => "o"
199
+
200
+ # œ => oe
201
+ "\u0153" => "oe"
202
+
203
+ # ß => ss
204
+ "\u00DF" => "ss"
205
+
206
+ # þ => th
207
+ "\u00FE" => "th"
208
+
209
+ # ù => u
210
+ "\u00F9" => "u"
211
+
212
+ # ú => u
213
+ "\u00FA" => "u"
214
+
215
+ # û => u
216
+ "\u00FB" => "u"
217
+
218
+ # ü => u
219
+ "\u00FC" => "u"
220
+
221
+ # ý => y
222
+ "\u00FD" => "y"
223
+
224
+ # ÿ => y
225
+ "\u00FF" => "y"
226
+
227
+ # ff => ff
228
+ "\uFB00" => "ff"
229
+
230
+ # fi => fi
231
+ "\uFB01" => "fi"
232
+
233
+ # fl => fl
234
+ "\uFB02" => "fl"
235
+
236
+ # ffi => ffi
237
+ "\uFB03" => "ffi"
238
+
239
+ # ffl => ffl
240
+ "\uFB04" => "ffl"
241
+
242
+ # ſt => ft
243
+ "\uFB05" => "ft"
244
+
245
+ # st => st
246
+ "\uFB06" => "st"
@@ -0,0 +1,21 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ #-----------------------------------------------------------------------
14
+ # Use a protected word file to protect against the stemmer reducing two
15
+ # unrelated words to the same base word.
16
+
17
+ # Some non-words that normally won't be encountered,
18
+ # just to test that they won't be stemmed.
19
+ dontstems
20
+ zwhacky
21
+
@@ -0,0 +1,1125 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is the Solr schema file. This file should be named "schema.xml" and
21
+ should be in the conf directory under the solr home
22
+ (i.e. ./solr/conf/schema.xml by default)
23
+ or located where the classloader for the Solr webapp can find it.
24
+
25
+ This example schema is the recommended starting point for users.
26
+ It should be kept correct and concise, usable out-of-the-box.
27
+
28
+ For more information, on how to customize this file, please see
29
+ http://wiki.apache.org/solr/SchemaXml
30
+
31
+ PERFORMANCE NOTE: this schema includes many optional features and should not
32
+ be used for benchmarking. To improve performance one could
33
+ - set stored="false" for all fields possible (esp large fields) when you
34
+ only need to search on the field but don't need to return the original
35
+ value.
36
+ - set indexed="false" if you don't need to search on the field, but only
37
+ return the field as a result of searching on other indexed fields.
38
+ - remove all unneeded copyField statements
39
+ - for best index size and searching performance, set "index" to false
40
+ for all general text fields, use copyField to copy them to the
41
+ catchall "text" field, and use that for searching.
42
+ - For maximum indexing performance, use the StreamingUpdateSolrServer
43
+ java client.
44
+ - Remember to run the JVM in server mode, and use a higher logging level
45
+ that avoids logging every request
46
+ -->
47
+
48
+ <schema name="example" version="1.5">
49
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
50
+ version="x.y" is Solr's version number for the schema syntax and
51
+ semantics. It should not normally be changed by applications.
52
+
53
+ 1.0: multiValued attribute did not exist, all fields are multiValued
54
+ by nature
55
+ 1.1: multiValued attribute introduced, false by default
56
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default
57
+ except for text fields.
58
+ 1.3: removed optional field compress feature
59
+ 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
60
+ behavior when a single string produces multiple tokens. Defaults
61
+ to off for version >= 1.4
62
+ 1.5: omitNorms defaults to true for primitive field types
63
+ (int, float, boolean, string...)
64
+ -->
65
+
66
+ <fields>
67
+ <!-- Valid attributes for fields:
68
+ name: mandatory - the name for the field
69
+ type: mandatory - the name of a field type from the
70
+ <types> fieldType section
71
+ indexed: true if this field should be indexed (searchable or sortable)
72
+ stored: true if this field should be retrievable
73
+ docValues: true if this field should have doc values. Doc values are
74
+ useful for faceting, grouping, sorting and function queries. Although not
75
+ required, doc values will make the index faster to load, more
76
+ NRT-friendly and more memory-efficient. They however come with some
77
+ limitations: they are currently only supported by StrField, UUIDField
78
+ and all Trie*Fields, and depending on the field type, they might
79
+ require the field to be single-valued, be required or have a default
80
+ value (check the documentation of the field type you're interested in
81
+ for more information)
82
+ multiValued: true if this field may contain multiple values per document
83
+ omitNorms: (expert) set to true to omit the norms associated with
84
+ this field (this disables length normalization and index-time
85
+ boosting for the field, and saves some memory). Only full-text
86
+ fields or fields that need an index-time boost need norms.
87
+ Norms are omitted for primitive (non-analyzed) types by default.
88
+ termVectors: [false] set to true to store the term vector for a
89
+ given field.
90
+ When using MoreLikeThis, fields used for similarity should be
91
+ stored for best performance.
92
+ termPositions: Store position information with the term vector.
93
+ This will increase storage costs.
94
+ termOffsets: Store offset information with the term vector. This
95
+ will increase storage costs.
96
+ required: The field is required. It will throw an error if the
97
+ value does not exist
98
+ default: a value that should be used if no value is specified
99
+ when adding a document.
100
+ -->
101
+
102
+ <!-- field names should consist of alphanumeric or underscore characters only and
103
+ not start with a digit. This is not currently strictly enforced,
104
+ but other field names will not have first class support from all components
105
+ and back compatibility is not guaranteed. Names with both leading and
106
+ trailing underscores (e.g. _version_) are reserved.
107
+ -->
108
+
109
+ <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
110
+ <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
111
+ <field name="name" type="text_general" indexed="true" stored="true"/>
112
+ <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
113
+ <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
114
+ <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
115
+ <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
116
+
117
+ <field name="weight" type="float" indexed="true" stored="true"/>
118
+ <field name="price" type="float" indexed="true" stored="true"/>
119
+ <field name="popularity" type="int" indexed="true" stored="true" />
120
+ <field name="inStock" type="boolean" indexed="true" stored="true" />
121
+
122
+ <field name="store" type="location" indexed="true" stored="true"/>
123
+
124
+ <!-- Common metadata fields, named specifically to match up with
125
+ SolrCell metadata when parsing rich documents such as Word, PDF.
126
+ Some fields are multiValued only because Tika currently may return
127
+ multiple values for them. Some metadata is parsed from the documents,
128
+ but there are some which come from the client context:
129
+ "content_type": From the HTTP headers of incoming stream
130
+ "resourcename": From SolrCell request param resource.name
131
+ -->
132
+ <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
133
+ <field name="subject" type="text_general" indexed="true" stored="true"/>
134
+ <field name="description" type="text_general" indexed="true" stored="true"/>
135
+ <field name="comments" type="text_general" indexed="true" stored="true"/>
136
+ <field name="author" type="text_general" indexed="true" stored="true"/>
137
+ <field name="keywords" type="text_general" indexed="true" stored="true"/>
138
+ <field name="category" type="text_general" indexed="true" stored="true"/>
139
+ <field name="resourcename" type="text_general" indexed="true" stored="true"/>
140
+ <field name="url" type="text_general" indexed="true" stored="true"/>
141
+ <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
142
+ <field name="last_modified" type="date" indexed="true" stored="true"/>
143
+ <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
144
+
145
+ <!-- Main body of document extracted by SolrCell.
146
+ NOTE: This field is not indexed by default, since it is also copied to "text"
147
+ using copyField below. This is to save space. Use this field for returning and
148
+ highlighting document content. Use the "text" field to search the content. -->
149
+ <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
150
+
151
+
152
+ <!-- catchall field, containing all other searchable text fields (implemented
153
+ via copyField further on in this schema -->
154
+ <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
155
+
156
+ <!-- catchall text field that indexes tokens both normally and in reverse for efficient
157
+ leading wildcard queries. -->
158
+ <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
159
+
160
+ <!-- non-tokenized version of manufacturer to make it easier to sort or group
161
+ results by manufacturer. copied from "manu" via copyField -->
162
+ <field name="manu_exact" type="string" indexed="true" stored="false"/>
163
+
164
+ <field name="payloads" type="payloads" indexed="true" stored="true"/>
165
+
166
+ <field name="_version_" type="long" indexed="true" stored="true"/>
167
+
168
+ <!--
169
+ Some fields such as popularity and manu_exact could be modified to
170
+ leverage doc values:
171
+ <field name="popularity" type="int" indexed="true" stored="true" docValues="true" default="0" />
172
+ <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" default="" />
173
+
174
+ Although it would make indexing slightly slower and the index bigger, it
175
+ would also make the index faster to load, more memory-efficient and more
176
+ NRT-friendly.
177
+ -->
178
+
179
+ <!-- Dynamic field definitions allow using convention over configuration
180
+ for fields via the specification of patterns to match field names.
181
+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
182
+ RESTRICTION: the glob-like pattern in the name attribute must have
183
+ a "*" only at the start or the end. -->
184
+
185
+ <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
186
+ <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/>
187
+ <dynamicField name="*_s" type="string" indexed="true" stored="true" />
188
+ <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
189
+ <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
190
+ <dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/>
191
+ <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
192
+ <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
193
+ <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/>
194
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
195
+ <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
196
+ <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
197
+ <dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/>
198
+ <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
199
+ <dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/>
200
+
201
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
202
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
203
+
204
+ <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
205
+ <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
206
+ <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
207
+
208
+ <!-- some trie-coded dynamic fields for faster range queries -->
209
+ <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
210
+ <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
211
+ <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
212
+ <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
213
+ <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
214
+
215
+ <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
216
+ <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
217
+
218
+ <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
219
+ <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
220
+
221
+ <dynamicField name="random_*" type="random" />
222
+
223
+ <!-- uncomment the following to ignore any fields that don't already match an existing
224
+ field name or dynamic field, rather than reporting them as an error.
225
+ alternately, change the type="ignored" to some other type e.g. "text" if you want
226
+ unknown fields indexed and/or stored by default -->
227
+ <!--dynamicField name="*" type="ignored" multiValued="true" /-->
228
+
229
+ </fields>
230
+
231
+
232
+ <!-- Field to use to determine and enforce document uniqueness.
233
+ Unless this field is marked with required="false", it will be a required field
234
+ -->
235
+ <uniqueKey>id</uniqueKey>
236
+
237
+ <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
238
+ parsing a query string that isn't explicit about the field. Machine (non-user)
239
+ generated queries are best made explicit, or they can use the "df" request parameter
240
+ which takes precedence over this.
241
+ Note: Un-commenting defaultSearchField will be insufficient if your request handler
242
+ in solrconfig.xml defines "df", which takes precedence. That would need to be removed.
243
+ <defaultSearchField>text</defaultSearchField> -->
244
+
245
+ <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers
246
+ when parsing a query string to determine if a clause of the query should be marked as
247
+ required or optional, assuming the clause isn't already marked by some operator.
248
+ The default is OR, which is generally assumed so it is not a good idea to change it
249
+ globally here. The "q.op" request parameter takes precedence over this.
250
+ <solrQueryParser defaultOperator="OR"/> -->
251
+
252
+ <!-- copyField commands copy one field to another at the time a document
253
+ is added to the index. It's used either to index the same field differently,
254
+ or to add multiple fields to the same field for easier/faster searching. -->
255
+
256
+ <copyField source="cat" dest="text"/>
257
+ <copyField source="name" dest="text"/>
258
+ <copyField source="manu" dest="text"/>
259
+ <copyField source="features" dest="text"/>
260
+ <copyField source="includes" dest="text"/>
261
+ <copyField source="manu" dest="manu_exact"/>
262
+
263
+ <!-- Copy the price into a currency enabled field (default USD) -->
264
+ <copyField source="price" dest="price_c"/>
265
+
266
+ <!-- Text fields from SolrCell to search by default in our catch-all field -->
267
+ <copyField source="title" dest="text"/>
268
+ <copyField source="author" dest="text"/>
269
+ <copyField source="description" dest="text"/>
270
+ <copyField source="keywords" dest="text"/>
271
+ <copyField source="content" dest="text"/>
272
+ <copyField source="content_type" dest="text"/>
273
+ <copyField source="resourcename" dest="text"/>
274
+ <copyField source="url" dest="text"/>
275
+
276
+ <!-- Create a string version of author for faceting -->
277
+ <copyField source="author" dest="author_s"/>
278
+
279
+ <!-- Above, multiple source fields are copied to the [text] field.
280
+ Another way to map multiple source fields to the same
281
+ destination field is to use the dynamic field syntax.
282
+ copyField also supports a maxChars to copy setting. -->
283
+
284
+ <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
285
+
286
+ <!-- copy name to alphaNameSort, a field designed for sorting by name -->
287
+ <!-- <copyField source="name" dest="alphaNameSort"/> -->
288
+
289
+ <types>
290
+ <!-- field type definitions. The "name" attribute is
291
+ just a label to be used by field definitions. The "class"
292
+ attribute and any other attributes determine the real
293
+ behavior of the fieldType.
294
+ Class names starting with "solr" refer to java classes in a
295
+ standard package such as org.apache.solr.analysis
296
+ -->
297
+
298
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim.
299
+ It supports doc values but in that case the field needs to be
300
+ single-valued and either required or have a default value.
301
+ -->
302
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
303
+
304
+ <!-- boolean type: "true" or "false" -->
305
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
306
+
307
+ <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
308
+ currently supported on types that are sorted internally as strings
309
+ and on numeric types.
310
+ This includes "string","boolean", and, as of 3.5 (and 4.x),
311
+ int, float, long, date, double, including the "Trie" variants.
312
+ - If sortMissingLast="true", then a sort on this field will cause documents
313
+ without the field to come after documents with the field,
314
+ regardless of the requested sort order (asc or desc).
315
+ - If sortMissingFirst="true", then a sort on this field will cause documents
316
+ without the field to come before documents with the field,
317
+ regardless of the requested sort order.
318
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
319
+ then default lucene sorting will be used which places docs without the
320
+ field first in an ascending sort and last in a descending sort.
321
+ -->
322
+
323
+ <!--
324
+ Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
325
+
326
+ These fields support doc values, but they require the field to be
327
+ single-valued and either be required or have a default value.
328
+ -->
329
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
330
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
331
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
332
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
333
+
334
+ <!--
335
+ Numeric field types that index each value at various levels of precision
336
+ to accelerate range queries when the number of values between the range
337
+ endpoints is large. See the javadoc for NumericRangeQuery for internal
338
+ implementation details.
339
+
340
+ Smaller precisionStep values (specified in bits) will lead to more tokens
341
+ indexed per value, slightly larger index size, and faster range queries.
342
+ A precisionStep of 0 disables indexing at different precision levels.
343
+ -->
344
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
345
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
346
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
347
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
348
+
349
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
350
+ is a more restricted form of the canonical representation of dateTime
351
+ http://www.w3.org/TR/xmlschema-2/#dateTime
352
+ The trailing "Z" designates UTC time and is mandatory.
353
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
354
+ All other components are mandatory.
355
+
356
+ Expressions can also be used to denote calculations that should be
357
+ performed relative to "NOW" to determine the value, ie...
358
+
359
+ NOW/HOUR
360
+ ... Round to the start of the current hour
361
+ NOW-1DAY
362
+ ... Exactly 1 day prior to now
363
+ NOW/DAY+6MONTHS+3DAYS
364
+ ... 6 months and 3 days in the future from the start of
365
+ the current day
366
+
367
+ Consult the DateField javadocs for more information.
368
+
369
+ Note: For faster range queries, consider the tdate type
370
+ -->
371
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
372
+
373
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
374
+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
375
+
376
+
377
+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
378
+ <fieldtype name="binary" class="solr.BinaryField"/>
379
+
380
+ <!--
381
+ Note:
382
+ These should only be used for compatibility with existing indexes (created with lucene or older Solr versions).
383
+ Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last
384
+
385
+ Plain numeric field types that store and index the text
386
+ value verbatim (and hence don't correctly support range queries, since the
387
+ lexicographic ordering isn't equal to the numeric ordering)
388
+ -->
389
+ <fieldType name="pint" class="solr.IntField"/>
390
+ <fieldType name="plong" class="solr.LongField"/>
391
+ <fieldType name="pfloat" class="solr.FloatField"/>
392
+ <fieldType name="pdouble" class="solr.DoubleField"/>
393
+ <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
394
+
395
+ <!-- The "RandomSortField" is not used to store or search any
396
+ data. You can declare fields of this type it in your schema
397
+ to generate pseudo-random orderings of your docs for sorting
398
+ or function purposes. The ordering is generated based on the field
399
+ name and the version of the index. As long as the index version
400
+ remains unchanged, and the same field name is reused,
401
+ the ordering of the docs will be consistent.
402
+ If you want different psuedo-random orderings of documents,
403
+ for the same version of the index, use a dynamicField and
404
+ change the field name in the request.
405
+ -->
406
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
407
+
408
+ <!-- solr.TextField allows the specification of custom text analyzers
409
+ specified as a tokenizer and a list of token filters. Different
410
+ analyzers may be specified for indexing and querying.
411
+
412
+ The optional positionIncrementGap puts space between multiple fields of
413
+ this type on the same document, with the purpose of preventing false phrase
414
+ matching across fields.
415
+
416
+ For more info on customizing your analyzer chain, please see
417
+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
418
+ -->
419
+
420
+ <!-- One can also specify an existing Analyzer class that has a
421
+ default constructor via the class attribute on the analyzer element.
422
+ Example:
423
+ <fieldType name="text_greek" class="solr.TextField">
424
+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
425
+ </fieldType>
426
+ -->
427
+
428
+ <!-- A text field that only splits on whitespace for exact matching of words -->
429
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
430
+ <analyzer>
431
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
432
+ </analyzer>
433
+ </fieldType>
434
+
435
+ <!-- A general text field that has reasonable, generic
436
+ cross-language defaults: it tokenizes with StandardTokenizer,
437
+ removes stop words from case-insensitive "stopwords.txt"
438
+ (empty by default), and down cases. At query time only, it
439
+ also applies synonyms. -->
440
+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
441
+ <analyzer type="index">
442
+ <tokenizer class="solr.StandardTokenizerFactory"/>
443
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
444
+ <!-- in this example, we will only use synonyms at query time
445
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
446
+ -->
447
+ <filter class="solr.LowerCaseFilterFactory"/>
448
+ </analyzer>
449
+ <analyzer type="query">
450
+ <tokenizer class="solr.StandardTokenizerFactory"/>
451
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
452
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
453
+ <filter class="solr.LowerCaseFilterFactory"/>
454
+ </analyzer>
455
+ </fieldType>
456
+
457
+ <!-- A text field with defaults appropriate for English: it
458
+ tokenizes with StandardTokenizer, removes English stop words
459
+ (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
460
+ finally applies Porter's stemming. The query time analyzer
461
+ also applies synonyms from synonyms.txt. -->
462
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
463
+ <analyzer type="index">
464
+ <tokenizer class="solr.StandardTokenizerFactory"/>
465
+ <!-- in this example, we will only use synonyms at query time
466
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
467
+ -->
468
+ <!-- Case insensitive stop word removal.
469
+ add enablePositionIncrements=true in both the index and query
470
+ analyzers to leave a 'gap' for more accurate phrase queries.
471
+ -->
472
+ <filter class="solr.StopFilterFactory"
473
+ ignoreCase="true"
474
+ words="lang/stopwords_en.txt"
475
+ enablePositionIncrements="true"
476
+ />
477
+ <filter class="solr.LowerCaseFilterFactory"/>
478
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
479
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
480
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
481
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
482
+ -->
483
+ <filter class="solr.PorterStemFilterFactory"/>
484
+ </analyzer>
485
+ <analyzer type="query">
486
+ <tokenizer class="solr.StandardTokenizerFactory"/>
487
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
488
+ <filter class="solr.StopFilterFactory"
489
+ ignoreCase="true"
490
+ words="lang/stopwords_en.txt"
491
+ enablePositionIncrements="true"
492
+ />
493
+ <filter class="solr.LowerCaseFilterFactory"/>
494
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
495
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
496
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
497
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
498
+ -->
499
+ <filter class="solr.PorterStemFilterFactory"/>
500
+ </analyzer>
501
+ </fieldType>
502
+
503
+ <!-- A text field with defaults appropriate for English, plus
504
+ aggressive word-splitting and autophrase features enabled.
505
+ This field is just like text_en, except it adds
506
+ WordDelimiterFilter to enable splitting and matching of
507
+ words on case-change, alpha numeric boundaries, and
508
+ non-alphanumeric chars. This means certain compound word
509
+ cases will work, for example query "wi fi" will match
510
+ document "WiFi" or "wi-fi".
511
+ -->
512
+ <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
513
+ <analyzer type="index">
514
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
515
+ <!-- in this example, we will only use synonyms at query time
516
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
517
+ -->
518
+ <!-- Case insensitive stop word removal.
519
+ add enablePositionIncrements=true in both the index and query
520
+ analyzers to leave a 'gap' for more accurate phrase queries.
521
+ -->
522
+ <filter class="solr.StopFilterFactory"
523
+ ignoreCase="true"
524
+ words="lang/stopwords_en.txt"
525
+ enablePositionIncrements="true"
526
+ />
527
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
528
+ <filter class="solr.LowerCaseFilterFactory"/>
529
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
530
+ <filter class="solr.PorterStemFilterFactory"/>
531
+ </analyzer>
532
+ <analyzer type="query">
533
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
534
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
535
+ <filter class="solr.StopFilterFactory"
536
+ ignoreCase="true"
537
+ words="lang/stopwords_en.txt"
538
+ enablePositionIncrements="true"
539
+ />
540
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
541
+ <filter class="solr.LowerCaseFilterFactory"/>
542
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
543
+ <filter class="solr.PorterStemFilterFactory"/>
544
+ </analyzer>
545
+ </fieldType>
546
+
547
+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
548
+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
549
+ <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
550
+ <analyzer>
551
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
552
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
553
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
554
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
555
+ <filter class="solr.LowerCaseFilterFactory"/>
556
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
557
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
558
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
559
+ possible with WordDelimiterFilter in conjuncton with stemming. -->
560
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
561
+ </analyzer>
562
+ </fieldType>
563
+
564
+ <!-- Just like text_general except it reverses the characters of
565
+ each token, to enable more efficient leading wildcard queries. -->
566
+ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
567
+ <analyzer type="index">
568
+ <tokenizer class="solr.StandardTokenizerFactory"/>
569
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
570
+ <filter class="solr.LowerCaseFilterFactory"/>
571
+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
572
+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
573
+ </analyzer>
574
+ <analyzer type="query">
575
+ <tokenizer class="solr.StandardTokenizerFactory"/>
576
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
577
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
578
+ <filter class="solr.LowerCaseFilterFactory"/>
579
+ </analyzer>
580
+ </fieldType>
581
+
582
+ <!-- charFilter + WhitespaceTokenizer -->
583
+ <!--
584
+ <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
585
+ <analyzer>
586
+ <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
587
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
588
+ </analyzer>
589
+ </fieldType>
590
+ -->
591
+
592
+ <!-- This is an example of using the KeywordTokenizer along
593
+ With various TokenFilterFactories to produce a sortable field
594
+ that does not include some properties of the source text
595
+ -->
596
+ <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
597
+ <analyzer>
598
+ <!-- KeywordTokenizer does no actual tokenizing, so the entire
599
+ input string is preserved as a single token
600
+ -->
601
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
602
+ <!-- The LowerCase TokenFilter does what you expect, which can be
603
+ when you want your sorting to be case insensitive
604
+ -->
605
+ <filter class="solr.LowerCaseFilterFactory" />
606
+ <!-- The TrimFilter removes any leading or trailing whitespace -->
607
+ <filter class="solr.TrimFilterFactory" />
608
+ <!-- The PatternReplaceFilter gives you the flexibility to use
609
+ Java Regular expression to replace any sequence of characters
610
+ matching a pattern with an arbitrary replacement string,
611
+ which may include back references to portions of the original
612
+ string matched by the pattern.
613
+
614
+ See the Java Regular Expression documentation for more
615
+ information on pattern and replacement string syntax.
616
+
617
+ http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html
618
+ -->
619
+ <filter class="solr.PatternReplaceFilterFactory"
620
+ pattern="([^a-z])" replacement="" replace="all"
621
+ />
622
+ </analyzer>
623
+ </fieldType>
624
+
625
+ <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
626
+ <analyzer>
627
+ <tokenizer class="solr.StandardTokenizerFactory"/>
628
+ <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
629
+ </analyzer>
630
+ </fieldtype>
631
+
632
+ <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
633
+ <analyzer>
634
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
635
+ <!--
636
+ The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
637
+ a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
638
+ Attributes of the DelimitedPayloadTokenFilterFactory :
639
+ "delimiter" - a one character delimiter. Default is | (pipe)
640
+ "encoder" - how to encode the following value into a playload
641
+ float -> org.apache.lucene.analysis.payloads.FloatEncoder,
642
+ integer -> o.a.l.a.p.IntegerEncoder
643
+ identity -> o.a.l.a.p.IdentityEncoder
644
+ Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
645
+ -->
646
+ <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
647
+ </analyzer>
648
+ </fieldtype>
649
+
650
+ <!-- lowercases the entire field value, keeping it as a single token. -->
651
+ <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
652
+ <analyzer>
653
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
654
+ <filter class="solr.LowerCaseFilterFactory" />
655
+ </analyzer>
656
+ </fieldType>
657
+
658
+ <!--
659
+ Example of using PathHierarchyTokenizerFactory at index time, so
660
+ queries for paths match documents at that path, or in descendent paths
661
+ -->
662
+ <fieldType name="descendent_path" class="solr.TextField">
663
+ <analyzer type="index">
664
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
665
+ </analyzer>
666
+ <analyzer type="query">
667
+ <tokenizer class="solr.KeywordTokenizerFactory" />
668
+ </analyzer>
669
+ </fieldType>
670
+ <!--
671
+ Example of using PathHierarchyTokenizerFactory at query time, so
672
+ queries for paths match documents at that path, or in ancestor paths
673
+ -->
674
+ <fieldType name="ancestor_path" class="solr.TextField">
675
+ <analyzer type="index">
676
+ <tokenizer class="solr.KeywordTokenizerFactory" />
677
+ </analyzer>
678
+ <analyzer type="query">
679
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
680
+ </analyzer>
681
+ </fieldType>
682
+
683
+ <!-- since fields of this type are by default not stored or indexed,
684
+ any data added to them will be ignored outright. -->
685
+ <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
686
+
687
+ <!-- This point type indexes the coordinates as separate fields (subFields)
688
+ If subFieldType is defined, it references a type, and a dynamic field
689
+ definition is created matching *___<typename>. Alternately, if
690
+ subFieldSuffix is defined, that is used to create the subFields.
691
+ Example: if subFieldType="double", then the coordinates would be
692
+ indexed in fields myloc_0___double,myloc_1___double.
693
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
694
+ in fields myloc_0_d,myloc_1_d
695
+ The subFields are an implementation detail of the fieldType, and end
696
+ users normally should not need to know about them.
697
+ -->
698
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
699
+
700
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
701
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
702
+
703
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
704
+ For more information about this and other Spatial fields new to Solr 4, see:
705
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
706
+ -->
707
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
708
+ geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
709
+
710
+ <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
711
+ Parameters:
712
+ defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
713
+ precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
714
+ providerClass: Lets you plug in other exchange provider backend:
715
+ solr.FileExchangeRateProvider is the default and takes one parameter:
716
+ currencyConfig: name of an xml file holding exchange rates
717
+ solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
718
+ ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
719
+ refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
720
+ -->
721
+ <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
722
+
723
+
724
+
725
+ <!-- some examples for different languages (generally ordered by ISO code) -->
726
+
727
+ <!-- Arabic -->
728
+ <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
729
+ <analyzer>
730
+ <tokenizer class="solr.StandardTokenizerFactory"/>
731
+ <!-- for any non-arabic -->
732
+ <filter class="solr.LowerCaseFilterFactory"/>
733
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" enablePositionIncrements="true"/>
734
+ <!-- normalizes ﻯ to ﻱ, etc -->
735
+ <filter class="solr.ArabicNormalizationFilterFactory"/>
736
+ <filter class="solr.ArabicStemFilterFactory"/>
737
+ </analyzer>
738
+ </fieldType>
739
+
740
+ <!-- Bulgarian -->
741
+ <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
742
+ <analyzer>
743
+ <tokenizer class="solr.StandardTokenizerFactory"/>
744
+ <filter class="solr.LowerCaseFilterFactory"/>
745
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" enablePositionIncrements="true"/>
746
+ <filter class="solr.BulgarianStemFilterFactory"/>
747
+ </analyzer>
748
+ </fieldType>
749
+
750
+ <!-- Catalan -->
751
+ <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
752
+ <analyzer>
753
+ <tokenizer class="solr.StandardTokenizerFactory"/>
754
+ <!-- removes l', etc -->
755
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
756
+ <filter class="solr.LowerCaseFilterFactory"/>
757
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" enablePositionIncrements="true"/>
758
+ <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
759
+ </analyzer>
760
+ </fieldType>
761
+
762
+ <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
763
+ <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
764
+ <analyzer>
765
+ <tokenizer class="solr.StandardTokenizerFactory"/>
766
+ <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
767
+ <filter class="solr.CJKWidthFilterFactory"/>
768
+ <!-- for any non-CJK -->
769
+ <filter class="solr.LowerCaseFilterFactory"/>
770
+ <filter class="solr.CJKBigramFilterFactory"/>
771
+ </analyzer>
772
+ </fieldType>
773
+
774
+ <!-- Czech -->
775
+ <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
776
+ <analyzer>
777
+ <tokenizer class="solr.StandardTokenizerFactory"/>
778
+ <filter class="solr.LowerCaseFilterFactory"/>
779
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" enablePositionIncrements="true"/>
780
+ <filter class="solr.CzechStemFilterFactory"/>
781
+ </analyzer>
782
+ </fieldType>
783
+
784
+ <!-- Danish -->
785
+ <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
786
+ <analyzer>
787
+ <tokenizer class="solr.StandardTokenizerFactory"/>
788
+ <filter class="solr.LowerCaseFilterFactory"/>
789
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" enablePositionIncrements="true"/>
790
+ <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
791
+ </analyzer>
792
+ </fieldType>
793
+
794
+ <!-- German -->
795
+ <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
796
+ <analyzer>
797
+ <tokenizer class="solr.StandardTokenizerFactory"/>
798
+ <filter class="solr.LowerCaseFilterFactory"/>
799
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/>
800
+ <filter class="solr.GermanNormalizationFilterFactory"/>
801
+ <filter class="solr.GermanLightStemFilterFactory"/>
802
+ <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
803
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
804
+ </analyzer>
805
+ </fieldType>
806
+
807
+ <!-- Greek -->
808
+ <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
809
+ <analyzer>
810
+ <tokenizer class="solr.StandardTokenizerFactory"/>
811
+ <!-- greek specific lowercase for sigma -->
812
+ <filter class="solr.GreekLowerCaseFilterFactory"/>
813
+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
814
+ <filter class="solr.GreekStemFilterFactory"/>
815
+ </analyzer>
816
+ </fieldType>
817
+
818
+ <!-- Spanish -->
819
+ <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
820
+ <analyzer>
821
+ <tokenizer class="solr.StandardTokenizerFactory"/>
822
+ <filter class="solr.LowerCaseFilterFactory"/>
823
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" enablePositionIncrements="true"/>
824
+ <filter class="solr.SpanishLightStemFilterFactory"/>
825
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
826
+ </analyzer>
827
+ </fieldType>
828
+
829
+ <!-- Basque -->
830
+ <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
831
+ <analyzer>
832
+ <tokenizer class="solr.StandardTokenizerFactory"/>
833
+ <filter class="solr.LowerCaseFilterFactory"/>
834
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" enablePositionIncrements="true"/>
835
+ <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
836
+ </analyzer>
837
+ </fieldType>
838
+
839
+ <!-- Persian -->
840
+ <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
841
+ <analyzer>
842
+ <!-- for ZWNJ -->
843
+ <charFilter class="solr.PersianCharFilterFactory"/>
844
+ <tokenizer class="solr.StandardTokenizerFactory"/>
845
+ <filter class="solr.LowerCaseFilterFactory"/>
846
+ <filter class="solr.ArabicNormalizationFilterFactory"/>
847
+ <filter class="solr.PersianNormalizationFilterFactory"/>
848
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" enablePositionIncrements="true"/>
849
+ </analyzer>
850
+ </fieldType>
851
+
852
+ <!-- Finnish -->
853
+ <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
854
+ <analyzer>
855
+ <tokenizer class="solr.StandardTokenizerFactory"/>
856
+ <filter class="solr.LowerCaseFilterFactory"/>
857
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" enablePositionIncrements="true"/>
858
+ <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
859
+ <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
860
+ </analyzer>
861
+ </fieldType>
862
+
863
+ <!-- French -->
864
+ <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
865
+ <analyzer>
866
+ <tokenizer class="solr.StandardTokenizerFactory"/>
867
+ <!-- removes l', etc -->
868
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
869
+ <filter class="solr.LowerCaseFilterFactory"/>
870
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" enablePositionIncrements="true"/>
871
+ <filter class="solr.FrenchLightStemFilterFactory"/>
872
+ <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
873
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
874
+ </analyzer>
875
+ </fieldType>
876
+
877
+ <!-- Irish -->
878
+ <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
879
+ <analyzer>
880
+ <tokenizer class="solr.StandardTokenizerFactory"/>
881
+ <!-- removes d', etc -->
882
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
883
+ <!-- removes n-, etc. position increments is intentionally false! -->
884
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/>
885
+ <filter class="solr.IrishLowerCaseFilterFactory"/>
886
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/>
887
+ <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
888
+ </analyzer>
889
+ </fieldType>
890
+
891
+ <!-- Galician -->
892
+ <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
893
+ <analyzer>
894
+ <tokenizer class="solr.StandardTokenizerFactory"/>
895
+ <filter class="solr.LowerCaseFilterFactory"/>
896
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" enablePositionIncrements="true"/>
897
+ <filter class="solr.GalicianStemFilterFactory"/>
898
+ <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
899
+ </analyzer>
900
+ </fieldType>
901
+
902
+ <!-- Hindi -->
903
+ <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
904
+ <analyzer>
905
+ <tokenizer class="solr.StandardTokenizerFactory"/>
906
+ <filter class="solr.LowerCaseFilterFactory"/>
907
+ <!-- normalizes unicode representation -->
908
+ <filter class="solr.IndicNormalizationFilterFactory"/>
909
+ <!-- normalizes variation in spelling -->
910
+ <filter class="solr.HindiNormalizationFilterFactory"/>
911
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" enablePositionIncrements="true"/>
912
+ <filter class="solr.HindiStemFilterFactory"/>
913
+ </analyzer>
914
+ </fieldType>
915
+
916
+ <!-- Hungarian -->
917
+ <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
918
+ <analyzer>
919
+ <tokenizer class="solr.StandardTokenizerFactory"/>
920
+ <filter class="solr.LowerCaseFilterFactory"/>
921
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" enablePositionIncrements="true"/>
922
+ <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
923
+ <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
924
+ </analyzer>
925
+ </fieldType>
926
+
927
+ <!-- Armenian -->
928
+ <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
929
+ <analyzer>
930
+ <tokenizer class="solr.StandardTokenizerFactory"/>
931
+ <filter class="solr.LowerCaseFilterFactory"/>
932
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" enablePositionIncrements="true"/>
933
+ <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
934
+ </analyzer>
935
+ </fieldType>
936
+
937
+ <!-- Indonesian -->
938
+ <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
939
+ <analyzer>
940
+ <tokenizer class="solr.StandardTokenizerFactory"/>
941
+ <filter class="solr.LowerCaseFilterFactory"/>
942
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" enablePositionIncrements="true"/>
943
+ <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
944
+ <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
945
+ </analyzer>
946
+ </fieldType>
947
+
948
+ <!-- Italian -->
949
+ <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
950
+ <analyzer>
951
+ <tokenizer class="solr.StandardTokenizerFactory"/>
952
+ <!-- removes l', etc -->
953
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
954
+ <filter class="solr.LowerCaseFilterFactory"/>
955
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" enablePositionIncrements="true"/>
956
+ <filter class="solr.ItalianLightStemFilterFactory"/>
957
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
958
+ </analyzer>
959
+ </fieldType>
960
+
961
+ <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
962
+
963
+ NOTE: If you want to optimize search for precision, use default operator AND in your query
964
+ parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use
965
+ OR if you would like to optimize for recall (default).
966
+ -->
967
+ <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
968
+ <analyzer>
969
+ <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
970
+
971
+ Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
972
+ is used to segment compounds into its parts and the compound itself is kept as synonym.
973
+
974
+ Valid values for attribute mode are:
975
+ normal: regular segmentation
976
+ search: segmentation useful for search with synonyms compounds (default)
977
+ extended: same as search mode, but unigrams unknown words (experimental)
978
+
979
+ For some applications it might be good to use search mode for indexing and normal mode for
980
+ queries to reduce recall and prevent parts of compounds from being matched and highlighted.
981
+ Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
982
+
983
+ Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
984
+ model with your own entries for segmentation, part-of-speech tags and readings without a need
985
+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
986
+
987
+ User dictionary attributes are:
988
+ userDictionary: user dictionary filename
989
+ userDictionaryEncoding: user dictionary encoding (default is UTF-8)
990
+
991
+ See lang/userdict_ja.txt for a sample user dictionary file.
992
+
993
+ Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
994
+
995
+ See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
996
+ -->
997
+ <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
998
+ <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
999
+ <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
1000
+ <filter class="solr.JapaneseBaseFormFilterFactory"/>
1001
+ <!-- Removes tokens with certain part-of-speech tags -->
1002
+ <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="true"/>
1003
+ <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
1004
+ <filter class="solr.CJKWidthFilterFactory"/>
1005
+ <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
1006
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" />
1007
+ <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
1008
+ <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
1009
+ <!-- Lower-cases romaji characters -->
1010
+ <filter class="solr.LowerCaseFilterFactory"/>
1011
+ </analyzer>
1012
+ </fieldType>
1013
+
1014
+ <!-- Latvian -->
1015
+ <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
1016
+ <analyzer>
1017
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1018
+ <filter class="solr.LowerCaseFilterFactory"/>
1019
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" enablePositionIncrements="true"/>
1020
+ <filter class="solr.LatvianStemFilterFactory"/>
1021
+ </analyzer>
1022
+ </fieldType>
1023
+
1024
+ <!-- Dutch -->
1025
+ <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
1026
+ <analyzer>
1027
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1028
+ <filter class="solr.LowerCaseFilterFactory"/>
1029
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" enablePositionIncrements="true"/>
1030
+ <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
1031
+ <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
1032
+ </analyzer>
1033
+ </fieldType>
1034
+
1035
+ <!-- Norwegian -->
1036
+ <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
1037
+ <analyzer>
1038
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1039
+ <filter class="solr.LowerCaseFilterFactory"/>
1040
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" enablePositionIncrements="true"/>
1041
+ <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
1042
+ <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
1043
+ <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
1044
+ </analyzer>
1045
+ </fieldType>
1046
+
1047
+ <!-- Portuguese -->
1048
+ <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
1049
+ <analyzer>
1050
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1051
+ <filter class="solr.LowerCaseFilterFactory"/>
1052
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" enablePositionIncrements="true"/>
1053
+ <filter class="solr.PortugueseLightStemFilterFactory"/>
1054
+ <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
1055
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
1056
+ <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
1057
+ </analyzer>
1058
+ </fieldType>
1059
+
1060
+ <!-- Romanian -->
1061
+ <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
1062
+ <analyzer>
1063
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1064
+ <filter class="solr.LowerCaseFilterFactory"/>
1065
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" enablePositionIncrements="true"/>
1066
+ <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
1067
+ </analyzer>
1068
+ </fieldType>
1069
+
1070
+ <!-- Russian -->
1071
+ <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
1072
+ <analyzer>
1073
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1074
+ <filter class="solr.LowerCaseFilterFactory"/>
1075
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" enablePositionIncrements="true"/>
1076
+ <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
1077
+ <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
1078
+ </analyzer>
1079
+ </fieldType>
1080
+
1081
+ <!-- Swedish -->
1082
+ <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
1083
+ <analyzer>
1084
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1085
+ <filter class="solr.LowerCaseFilterFactory"/>
1086
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" enablePositionIncrements="true"/>
1087
+ <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
1088
+ <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
1089
+ </analyzer>
1090
+ </fieldType>
1091
+
1092
+ <!-- Thai -->
1093
+ <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
1094
+ <analyzer>
1095
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1096
+ <filter class="solr.LowerCaseFilterFactory"/>
1097
+ <filter class="solr.ThaiWordFilterFactory"/>
1098
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" enablePositionIncrements="true"/>
1099
+ </analyzer>
1100
+ </fieldType>
1101
+
1102
+ <!-- Turkish -->
1103
+ <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
1104
+ <analyzer>
1105
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1106
+ <filter class="solr.TurkishLowerCaseFilterFactory"/>
1107
+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" enablePositionIncrements="true"/>
1108
+ <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
1109
+ </analyzer>
1110
+ </fieldType>
1111
+
1112
+ </types>
1113
+
1114
+ <!-- Similarity is the scoring routine for each document vs. a query.
1115
+ A custom Similarity or SimilarityFactory may be specified here, but
1116
+ the default is fine for most applications.
1117
+ For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
1118
+ -->
1119
+ <!--
1120
+ <similarity class="com.example.solr.CustomSimilarityFactory">
1121
+ <str name="paramkey">param value</str>
1122
+ </similarity>
1123
+ -->
1124
+
1125
+ </schema>