blacklight-access_controls 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/Gemfile +32 -0
  4. data/README.textile +74 -0
  5. data/Rakefile +47 -0
  6. data/VERSION +1 -0
  7. data/blacklight-access_controls.gemspec +29 -0
  8. data/lib/blacklight-access_controls.rb +23 -0
  9. data/lib/blacklight/access_controls.rb +14 -0
  10. data/lib/blacklight/access_controls/ability.rb +148 -0
  11. data/lib/blacklight/access_controls/catalog.rb +27 -0
  12. data/lib/blacklight/access_controls/config.rb +39 -0
  13. data/lib/blacklight/access_controls/enforcement.rb +103 -0
  14. data/lib/blacklight/access_controls/permissions_cache.rb +19 -0
  15. data/lib/blacklight/access_controls/permissions_query.rb +53 -0
  16. data/lib/blacklight/access_controls/permissions_solr_document.rb +2 -0
  17. data/lib/blacklight/access_controls/user.rb +23 -0
  18. data/lib/generators/blacklight/ability.rb +4 -0
  19. data/lib/generators/blacklight/access_controls_generator.rb +49 -0
  20. data/solr_conf/conf/abc123 +0 -0
  21. data/solr_conf/conf/admin-extra.html +24 -0
  22. data/solr_conf/conf/admin-extra.menu-bottom.html +25 -0
  23. data/solr_conf/conf/admin-extra.menu-top.html +25 -0
  24. data/solr_conf/conf/clustering/carrot2/kmeans-attributes.xml +19 -0
  25. data/solr_conf/conf/clustering/carrot2/lingo-attributes.xml +24 -0
  26. data/solr_conf/conf/clustering/carrot2/stc-attributes.xml +19 -0
  27. data/solr_conf/conf/currency.xml +67 -0
  28. data/solr_conf/conf/dataimport.properties +3 -0
  29. data/solr_conf/conf/db-data-config.xml +93 -0
  30. data/solr_conf/conf/elevate.xml +38 -0
  31. data/solr_conf/conf/lang/contractions_ca.txt +8 -0
  32. data/solr_conf/conf/lang/contractions_fr.txt +15 -0
  33. data/solr_conf/conf/lang/contractions_ga.txt +5 -0
  34. data/solr_conf/conf/lang/contractions_it.txt +23 -0
  35. data/solr_conf/conf/lang/hyphenations_ga.txt +5 -0
  36. data/solr_conf/conf/lang/stemdict_nl.txt +6 -0
  37. data/solr_conf/conf/lang/stoptags_ja.txt +420 -0
  38. data/solr_conf/conf/lang/stopwords_ar.txt +125 -0
  39. data/solr_conf/conf/lang/stopwords_bg.txt +193 -0
  40. data/solr_conf/conf/lang/stopwords_ca.txt +220 -0
  41. data/solr_conf/conf/lang/stopwords_ckb.txt +136 -0
  42. data/solr_conf/conf/lang/stopwords_cz.txt +172 -0
  43. data/solr_conf/conf/lang/stopwords_da.txt +110 -0
  44. data/solr_conf/conf/lang/stopwords_de.txt +294 -0
  45. data/solr_conf/conf/lang/stopwords_el.txt +78 -0
  46. data/solr_conf/conf/lang/stopwords_en.txt +54 -0
  47. data/solr_conf/conf/lang/stopwords_es.txt +356 -0
  48. data/solr_conf/conf/lang/stopwords_eu.txt +99 -0
  49. data/solr_conf/conf/lang/stopwords_fa.txt +313 -0
  50. data/solr_conf/conf/lang/stopwords_fi.txt +97 -0
  51. data/solr_conf/conf/lang/stopwords_fr.txt +186 -0
  52. data/solr_conf/conf/lang/stopwords_ga.txt +110 -0
  53. data/solr_conf/conf/lang/stopwords_gl.txt +161 -0
  54. data/solr_conf/conf/lang/stopwords_hi.txt +235 -0
  55. data/solr_conf/conf/lang/stopwords_hu.txt +211 -0
  56. data/solr_conf/conf/lang/stopwords_hy.txt +46 -0
  57. data/solr_conf/conf/lang/stopwords_id.txt +359 -0
  58. data/solr_conf/conf/lang/stopwords_it.txt +303 -0
  59. data/solr_conf/conf/lang/stopwords_ja.txt +127 -0
  60. data/solr_conf/conf/lang/stopwords_lv.txt +172 -0
  61. data/solr_conf/conf/lang/stopwords_nl.txt +119 -0
  62. data/solr_conf/conf/lang/stopwords_no.txt +194 -0
  63. data/solr_conf/conf/lang/stopwords_pt.txt +253 -0
  64. data/solr_conf/conf/lang/stopwords_ro.txt +233 -0
  65. data/solr_conf/conf/lang/stopwords_ru.txt +243 -0
  66. data/solr_conf/conf/lang/stopwords_sv.txt +133 -0
  67. data/solr_conf/conf/lang/stopwords_th.txt +119 -0
  68. data/solr_conf/conf/lang/stopwords_tr.txt +212 -0
  69. data/solr_conf/conf/lang/userdict_ja.txt +29 -0
  70. data/solr_conf/conf/mapping-FoldToASCII.txt +3813 -0
  71. data/solr_conf/conf/mapping-ISOLatin1Accent.txt +246 -0
  72. data/solr_conf/conf/protwords.txt +21 -0
  73. data/solr_conf/conf/schema.blacklight.xml +724 -0
  74. data/solr_conf/conf/schema.xml +1268 -0
  75. data/solr_conf/conf/schema.xml.orig +1524 -0
  76. data/solr_conf/conf/solrconfig.adams.xml +1903 -0
  77. data/solr_conf/conf/solrconfig.blacklight.xml +411 -0
  78. data/solr_conf/conf/solrconfig.old.xml +1634 -0
  79. data/solr_conf/conf/solrconfig.xml +332 -0
  80. data/solr_conf/conf/solrconfig.xml.orig +3531 -0
  81. data/solr_conf/conf/spellings.txt +2 -0
  82. data/solr_conf/conf/stopwords.txt +14 -0
  83. data/solr_conf/conf/synonyms.txt +29 -0
  84. data/solr_conf/conf/update-script.js +53 -0
  85. data/solr_conf/conf/xslt/example.xsl +132 -0
  86. data/solr_conf/conf/xslt/example_atom.xsl +67 -0
  87. data/solr_conf/conf/xslt/example_rss.xsl +66 -0
  88. data/solr_conf/conf/xslt/luke.xsl +337 -0
  89. data/solr_conf/conf/xslt/updateXml.xsl +70 -0
  90. data/spec/factories/user.rb +6 -0
  91. data/spec/spec_helper.rb +29 -0
  92. data/spec/support/solr_support.rb +11 -0
  93. data/spec/test_app_templates/blacklight.yml +18 -0
  94. data/spec/test_app_templates/lib/generators/test_app_generator.rb +25 -0
  95. data/spec/unit/ability_spec.rb +202 -0
  96. data/spec/unit/catalog_spec.rb +41 -0
  97. data/spec/unit/config_spec.rb +69 -0
  98. data/spec/unit/enforcement_spec.rb +147 -0
  99. metadata +265 -0
@@ -0,0 +1,1268 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is the Solr schema file. This file should be named "schema.xml" and
21
+ should be in the conf directory under the solr home
22
+ (i.e. ./solr/conf/schema.xml by default)
23
+ or located where the classloader for the Solr webapp can find it.
24
+
25
+ This example schema is the recommended starting point for users.
26
+ It should be kept correct and concise, usable out-of-the-box.
27
+
28
+ For more information, on how to customize this file, please see
29
+ http://wiki.apache.org/solr/SchemaXml
30
+
31
+ PERFORMANCE NOTE: this schema includes many optional features and should not
32
+ be used for benchmarking. To improve performance one could
33
+ - set stored="false" for all fields possible (esp large fields) when you
34
+ only need to search on the field but don't need to return the original
35
+ value.
36
+ - set indexed="false" if you don't need to search on the field, but only
37
+ return the field as a result of searching on other indexed fields.
38
+ - remove all unneeded copyField statements
39
+ - for best index size and searching performance, set "index" to false
40
+ for all general text fields, use copyField to copy them to the
41
+ catchall "text" field, and use that for searching.
42
+ - For maximum indexing performance, use the ConcurrentUpdateSolrServer
43
+ java client.
44
+ - Remember to run the JVM in server mode, and use a higher logging level
45
+ that avoids logging every request
46
+ -->
47
+
48
+ <schema name="example-DIH-db" version="1.5">
49
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
50
+ version="x.y" is Solr's version number for the schema syntax and
51
+ semantics. It should not normally be changed by applications.
52
+
53
+ 1.0: multiValued attribute did not exist, all fields are multiValued
54
+ by nature
55
+ 1.1: multiValued attribute introduced, false by default
56
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default
57
+ except for text fields.
58
+ 1.3: removed optional field compress feature
59
+ 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
60
+ behavior when a single string produces multiple tokens. Defaults
61
+ to off for version >= 1.4
62
+ 1.5: omitNorms defaults to true for primitive field types
63
+ (int, float, boolean, string...)
64
+ -->
65
+
66
+
67
+ <!-- Valid attributes for fields:
68
+ name: mandatory - the name for the field
69
+ type: mandatory - the name of a field type from the
70
+ <types> fieldType section
71
+ indexed: true if this field should be indexed (searchable or sortable)
72
+ stored: true if this field should be retrievable
73
+ docValues: true if this field should have doc values. Doc values are
74
+ useful for faceting, grouping, sorting and function queries. Although not
75
+ required, doc values will make the index faster to load, more
76
+ NRT-friendly and more memory-efficient. They however come with some
77
+ limitations: they are currently only supported by StrField, UUIDField
78
+ and all Trie*Fields, and depending on the field type, they might
79
+ require the field to be single-valued, be required or have a default
80
+ value (check the documentation of the field type you're interested in
81
+ for more information)
82
+ multiValued: true if this field may contain multiple values per document
83
+ omitNorms: (expert) set to true to omit the norms associated with
84
+ this field (this disables length normalization and index-time
85
+ boosting for the field, and saves some memory). Only full-text
86
+ fields or fields that need an index-time boost need norms.
87
+ Norms are omitted for primitive (non-analyzed) types by default.
88
+ termVectors: [false] set to true to store the term vector for a
89
+ given field.
90
+ When using MoreLikeThis, fields used for similarity should be
91
+ stored for best performance.
92
+ termPositions: Store position information with the term vector.
93
+ This will increase storage costs.
94
+ termOffsets: Store offset information with the term vector. This
95
+ will increase storage costs.
96
+ required: The field is required. It will throw an error if the
97
+ value does not exist
98
+ default: a value that should be used if no value is specified
99
+ when adding a document.
100
+ -->
101
+
102
+ <!-- field names should consist of alphanumeric or underscore characters only and
103
+ not start with a digit. This is not currently strictly enforced,
104
+ but other field names will not have first class support from all components
105
+ and back compatibility is not guaranteed. Names with both leading and
106
+ trailing underscores (e.g. _version_) are reserved.
107
+ -->
108
+
109
+ <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
110
+ or Solr won't start. _version_ and update log are required for SolrCloud
111
+ -->
112
+ <field name="_version_" type="long" indexed="true" stored="true"/>
113
+
114
+ <!-- points to the root document of a block of nested documents. Required for nested
115
+ document support, may be removed otherwise
116
+ -->
117
+ <field name="_root_" type="string" indexed="true" stored="false"/>
118
+
119
+ <!-- Only remove the "id" field if you have a very good reason to. While not strictly
120
+ required, it is highly recommended. A <uniqueKey> is present in almost all Solr
121
+ installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
122
+ -->
123
+ <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
124
+
125
+ <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
126
+ <field name="name" type="text_general" indexed="true" stored="true"/>
127
+ <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
128
+ <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
129
+ <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
130
+ <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
131
+
132
+ <field name="weight" type="float" indexed="true" stored="true"/>
133
+ <field name="price" type="float" indexed="true" stored="true"/>
134
+ <field name="popularity" type="int" indexed="true" stored="true" />
135
+ <field name="inStock" type="boolean" indexed="true" stored="true" />
136
+
137
+ <field name="store" type="location" indexed="true" stored="true"/>
138
+
139
+ <!-- Common metadata fields, named specifically to match up with
140
+ SolrCell metadata when parsing rich documents such as Word, PDF.
141
+ Some fields are multiValued only because Tika currently may return
142
+ multiple values for them. Some metadata is parsed from the documents,
143
+ but there are some which come from the client context:
144
+ "content_type": From the HTTP headers of incoming stream
145
+ "resourcename": From SolrCell request param resource.name
146
+ -->
147
+ <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
148
+ <field name="subject" type="text_general" indexed="true" stored="true"/>
149
+ <field name="description" type="text_general" indexed="true" stored="true"/>
150
+ <field name="comments" type="text_general" indexed="true" stored="true"/>
151
+ <field name="author" type="text_general" indexed="true" stored="true"/>
152
+ <field name="keywords" type="text_general" indexed="true" stored="true"/>
153
+ <field name="category" type="text_general" indexed="true" stored="true"/>
154
+ <field name="resourcename" type="text_general" indexed="true" stored="true"/>
155
+ <field name="url" type="text_general" indexed="true" stored="true"/>
156
+ <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
157
+ <field name="last_modified" type="date" indexed="true" stored="true"/>
158
+ <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
159
+
160
+ <!-- Main body of document extracted by SolrCell.
161
+ NOTE: This field is not indexed by default, since it is also copied to "text"
162
+ using copyField below. This is to save space. Use this field for returning and
163
+ highlighting document content. Use the "text" field to search the content. -->
164
+ <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
165
+
166
+
167
+ <!-- catchall field, containing all other searchable text fields (implemented
168
+ via copyField further on in this schema -->
169
+ <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
170
+
171
+ <!-- catchall text field that indexes tokens both normally and in reverse for efficient
172
+ leading wildcard queries. -->
173
+ <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
174
+
175
+ <!-- non-tokenized version of manufacturer to make it easier to sort or group
176
+ results by manufacturer. copied from "manu" via copyField -->
177
+ <field name="manu_exact" type="string" indexed="true" stored="false"/>
178
+
179
+ <field name="payloads" type="payloads" indexed="true" stored="true"/>
180
+
181
+
182
+ <!--
183
+ Some fields such as popularity and manu_exact could be modified to
184
+ leverage doc values:
185
+ <field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
186
+ <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
187
+ <field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
188
+
189
+
190
+ Although it would make indexing slightly slower and the index bigger, it
191
+ would also make the index faster to load, more memory-efficient and more
192
+ NRT-friendly.
193
+ -->
194
+
195
+ <!-- Dynamic field definitions allow using convention over configuration
196
+ for fields via the specification of patterns to match field names.
197
+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
198
+ RESTRICTION: the glob-like pattern in the name attribute must have
199
+ a "*" only at the start or the end. -->
200
+
201
+ <!-- text (_t...) -->
202
+ <dynamicField name="*_ti" type="text_general" stored="false" indexed="true" multiValued="false"/>
203
+ <dynamicField name="*_tim" type="text_general" stored="false" indexed="true" multiValued="true"/>
204
+ <dynamicField name="*_ts" type="text_general" stored="true" indexed="false" multiValued="false"/>
205
+ <dynamicField name="*_tsm" type="text_general" stored="true" indexed="false" multiValued="true"/>
206
+ <dynamicField name="*_tsi" type="text_general" stored="true" indexed="true" multiValued="false"/>
207
+ <dynamicField name="*_tsim" type="text_general" stored="true" indexed="true" multiValued="true"/>
208
+ <dynamicField name="*_tiv" type="text_general" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
209
+ <dynamicField name="*_timv" type="text_general" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
210
+ <dynamicField name="*_tsiv" type="text_general" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
211
+ <dynamicField name="*_tsimv" type="text_general" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
212
+
213
+ <!-- English text (_te...) -->
214
+ <dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false"/>
215
+ <dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true"/>
216
+ <dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false"/>
217
+ <dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true"/>
218
+ <dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false"/>
219
+ <dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true"/>
220
+ <dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
221
+ <dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
222
+ <dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
223
+ <dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
224
+
225
+ <!-- string (_s...) -->
226
+ <dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false"/>
227
+ <dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true"/>
228
+ <dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false"/>
229
+ <dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true"/>
230
+ <dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false"/>
231
+ <dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true"/>
232
+ <dynamicField name="*_ssort" type="alphaOnlySort" stored="false" indexed="true" multiValued="false"/>
233
+
234
+ <!-- integer (_i...) -->
235
+ <dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false"/>
236
+ <dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true"/>
237
+ <dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false"/>
238
+ <dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true"/>
239
+ <dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false"/>
240
+ <dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true"/>
241
+
242
+ <!-- trie integer (_it...) (for faster range queries) -->
243
+ <dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false"/>
244
+ <dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true"/>
245
+ <dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false"/>
246
+ <dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true"/>
247
+ <dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false"/>
248
+ <dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true"/>
249
+
250
+ <!-- date (_dt...) -->
251
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
252
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
253
+ <dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false"/>
254
+ <dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true"/>
255
+ <dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false"/>
256
+ <dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true"/>
257
+ <dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false"/>
258
+ <dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true"/>
259
+
260
+ <!-- trie date (_dtt...) (for faster range queries) -->
261
+ <dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false"/>
262
+ <dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true"/>
263
+ <dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false"/>
264
+ <dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true"/>
265
+ <dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false"/>
266
+ <dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true"/>
267
+
268
+ <!-- long (_l...) -->
269
+ <dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false"/>
270
+ <dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true"/>
271
+ <dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false"/>
272
+ <dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true"/>
273
+ <dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false"/>
274
+ <dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true"/>
275
+
276
+ <!-- trie long (_lt...) (for faster range queries) -->
277
+ <dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false"/>
278
+ <dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true"/>
279
+ <dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false"/>
280
+ <dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true"/>
281
+ <dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false"/>
282
+ <dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true"/>
283
+
284
+ <!-- double (_db...) -->
285
+ <dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false"/>
286
+ <dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true"/>
287
+ <dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false"/>
288
+ <dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true"/>
289
+ <dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false"/>
290
+ <dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true"/>
291
+
292
+ <!-- trie double (_dbt...) (for faster range queries) -->
293
+ <dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false"/>
294
+ <dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true"/>
295
+ <dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false"/>
296
+ <dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true"/>
297
+ <dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false"/>
298
+ <dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true"/>
299
+
300
+ <!-- float (_f...) -->
301
+ <dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false"/>
302
+ <dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true"/>
303
+ <dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false"/>
304
+ <dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true"/>
305
+ <dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false"/>
306
+ <dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true"/>
307
+
308
+ <!-- trie float (_ft...) (for faster range queries) -->
309
+ <dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false"/>
310
+ <dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true"/>
311
+ <dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false"/>
312
+ <dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true"/>
313
+ <dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false"/>
314
+ <dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true"/>
315
+
316
+ <!-- boolean (_b...) -->
317
+ <dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false"/>
318
+ <dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false"/>
319
+ <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false"/>
320
+
321
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
322
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
323
+
324
+ <!-- location (_ll...) -->
325
+ <dynamicField name="*_lli" type="location" stored="false" indexed="true" multiValued="false"/>
326
+ <dynamicField name="*_llim" type="location" stored="false" indexed="true" multiValued="true"/>
327
+ <dynamicField name="*_lls" type="location" stored="true" indexed="false" multiValued="false"/>
328
+ <dynamicField name="*_llsm" type="location" stored="true" indexed="false" multiValued="true"/>
329
+ <dynamicField name="*_llsi" type="location" stored="true" indexed="true" multiValued="false"/>
330
+ <dynamicField name="*_llsim" type="location" stored="true" indexed="true" multiValued="true"/>
331
+
332
+ <!--<dynamicField name="*_i" type="int" indexed="true" stored="true"/>-->
333
+ <!--<dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/>-->
334
+ <!--<dynamicField name="*_s" type="string" indexed="true" stored="true" />-->
335
+ <!--<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>-->
336
+ <!--<dynamicField name="*_l" type="long" indexed="true" stored="true"/>-->
337
+ <!--<dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/>-->
338
+ <!--<dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>-->
339
+ <!--<dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>-->
340
+ <!--<dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/>-->
341
+ <!--<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>-->
342
+ <!--<dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>-->
343
+ <!--<dynamicField name="*_f" type="float" indexed="true" stored="true"/>-->
344
+ <!--<dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/>-->
345
+ <!--<dynamicField name="*_d" type="double" indexed="true" stored="true"/>-->
346
+ <!--<dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/>-->
347
+
348
+ <!--&lt;!&ndash; Type used to index the lat and lon components for the "location" FieldType &ndash;&gt;-->
349
+ <!--<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />-->
350
+
351
+ <!--<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>-->
352
+ <!--<dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>-->
353
+ <!--<dynamicField name="*_p" type="location" indexed="true" stored="true"/>-->
354
+
355
+ <!--&lt;!&ndash; some trie-coded dynamic fields for faster range queries &ndash;&gt;-->
356
+ <!--<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>-->
357
+ <!--<dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>-->
358
+ <!--<dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>-->
359
+ <!--<dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>-->
360
+ <!--<dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>-->
361
+
362
+ <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
363
+
364
+ <!--<dynamicField name="ignored_*" type="ignored" multiValued="true"/>-->
365
+ <!--<dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>-->
366
+
367
+ <!--<dynamicField name="random_*" type="random" />-->
368
+
369
+ <!-- uncomment the following to ignore any fields that don't already match an existing
370
+ field name or dynamic field, rather than reporting them as an error.
371
+ alternately, change the type="ignored" to some other type e.g. "text" if you want
372
+ unknown fields indexed and/or stored by default -->
373
+ <!--dynamicField name="*" type="ignored" multiValued="true" /-->
374
+
375
+
376
+
377
+
378
+ <!-- Field to use to determine and enforce document uniqueness.
379
+ Unless this field is marked with required="false", it will be a required field
380
+ -->
381
+ <uniqueKey>id</uniqueKey>
382
+
383
+ <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
384
+ parsing a query string that isn't explicit about the field. Machine (non-user)
385
+ generated queries are best made explicit, or they can use the "df" request parameter
386
+ which takes precedence over this.
387
+ Note: Un-commenting defaultSearchField will be insufficient if your request handler
388
+ in solrconfig.xml defines "df", which takes precedence. That would need to be removed.
389
+ <defaultSearchField>text</defaultSearchField> -->
390
+
391
+ <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers
392
+ when parsing a query string to determine if a clause of the query should be marked as
393
+ required or optional, assuming the clause isn't already marked by some operator.
394
+ The default is OR, which is generally assumed so it is not a good idea to change it
395
+ globally here. The "q.op" request parameter takes precedence over this.
396
+ <solrQueryParser defaultOperator="OR"/> -->
397
+
398
+ <!-- copyField commands copy one field to another at the time a document
399
+ is added to the index. It's used either to index the same field differently,
400
+ or to add multiple fields to the same field for easier/faster searching. -->
401
+
402
+ <copyField source="cat" dest="text"/>
403
+ <copyField source="name" dest="text"/>
404
+ <copyField source="manu" dest="text"/>
405
+ <copyField source="features" dest="text"/>
406
+ <copyField source="includes" dest="text"/>
407
+ <copyField source="manu" dest="manu_exact"/>
408
+
409
+ <!-- Copy the price into a currency enabled field (default USD) -->
410
+ <copyField source="price" dest="price_c"/>
411
+
412
+ <!-- Text fields from SolrCell to search by default in our catch-all field -->
413
+ <copyField source="title" dest="text"/>
414
+ <copyField source="author" dest="text"/>
415
+ <copyField source="description" dest="text"/>
416
+ <copyField source="keywords" dest="text"/>
417
+ <copyField source="content" dest="text"/>
418
+ <copyField source="content_type" dest="text"/>
419
+ <copyField source="resourcename" dest="text"/>
420
+ <copyField source="url" dest="text"/>
421
+
422
+ <!-- Create a string version of author for faceting -->
423
+ <copyField source="author" dest="author_ssi"/>
424
+
425
+ <!-- Above, multiple source fields are copied to the [text] field.
426
+ Another way to map multiple source fields to the same
427
+ destination field is to use the dynamic field syntax.
428
+ copyField also supports a maxChars to copy setting. -->
429
+
430
+ <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
431
+
432
+ <!-- copy name to alphaNameSort, a field designed for sorting by name -->
433
+ <!-- <copyField source="name" dest="alphaNameSort"/> -->
434
+
435
+
436
+ <!-- field type definitions. The "name" attribute is
437
+ just a label to be used by field definitions. The "class"
438
+ attribute and any other attributes determine the real
439
+ behavior of the fieldType.
440
+ Class names starting with "solr" refer to java classes in a
441
+ standard package such as org.apache.solr.analysis
442
+ -->
443
+
444
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim.
445
+ It supports doc values but in that case the field needs to be
446
+ single-valued and either required or have a default value.
447
+ -->
448
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
449
+
450
+ <!-- boolean type: "true" or "false" -->
451
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
452
+
453
+ <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
454
+ currently supported on types that are sorted internally as strings
455
+ and on numeric types.
456
+ This includes "string","boolean", and, as of 3.5 (and 4.x),
457
+ int, float, long, date, double, including the "Trie" variants.
458
+ - If sortMissingLast="true", then a sort on this field will cause documents
459
+ without the field to come after documents with the field,
460
+ regardless of the requested sort order (asc or desc).
461
+ - If sortMissingFirst="true", then a sort on this field will cause documents
462
+ without the field to come before documents with the field,
463
+ regardless of the requested sort order.
464
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
465
+ then default lucene sorting will be used which places docs without the
466
+ field first in an ascending sort and last in a descending sort.
467
+ -->
468
+
469
+ <!--
470
+ Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
471
+
472
+ These fields support doc values, but they require the field to be
473
+ single-valued and either be required or have a default value.
474
+ -->
475
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
476
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
477
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
478
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
479
+
480
+ <!--
481
+ Numeric field types that index each value at various levels of precision
482
+ to accelerate range queries when the number of values between the range
483
+ endpoints is large. See the javadoc for NumericRangeQuery for internal
484
+ implementation details.
485
+
486
+ Smaller precisionStep values (specified in bits) will lead to more tokens
487
+ indexed per value, slightly larger index size, and faster range queries.
488
+ A precisionStep of 0 disables indexing at different precision levels.
489
+ -->
490
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
491
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
492
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
493
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
494
+
495
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
496
+ is a more restricted form of the canonical representation of dateTime
497
+ http://www.w3.org/TR/xmlschema-2/#dateTime
498
+ The trailing "Z" designates UTC time and is mandatory.
499
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
500
+ All other components are mandatory.
501
+
502
+ Expressions can also be used to denote calculations that should be
503
+ performed relative to "NOW" to determine the value, ie...
504
+
505
+ NOW/HOUR
506
+ ... Round to the start of the current hour
507
+ NOW-1DAY
508
+ ... Exactly 1 day prior to now
509
+ NOW/DAY+6MONTHS+3DAYS
510
+ ... 6 months and 3 days in the future from the start of
511
+ the current day
512
+
513
+ Consult the TrieDateField javadocs for more information.
514
+
515
+ Note: For faster range queries, consider the tdate type
516
+ -->
517
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
518
+
519
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
520
+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
521
+
522
+
523
+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
524
+ <fieldType name="binary" class="solr.BinaryField"/>
525
+
526
+ <!-- The "RandomSortField" is not used to store or search any
527
+ data. You can declare fields of this type it in your schema
528
+ to generate pseudo-random orderings of your docs for sorting
529
+ or function purposes. The ordering is generated based on the field
530
+ name and the version of the index. As long as the index version
531
+ remains unchanged, and the same field name is reused,
532
+ the ordering of the docs will be consistent.
533
+ If you want different psuedo-random orderings of documents,
534
+ for the same version of the index, use a dynamicField and
535
+ change the field name in the request.
536
+ -->
537
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
538
+
539
+ <!-- solr.TextField allows the specification of custom text analyzers
540
+ specified as a tokenizer and a list of token filters. Different
541
+ analyzers may be specified for indexing and querying.
542
+
543
+ The optional positionIncrementGap puts space between multiple fields of
544
+ this type on the same document, with the purpose of preventing false phrase
545
+ matching across fields.
546
+
547
+ For more info on customizing your analyzer chain, please see
548
+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
549
+ -->
550
+
551
+ <!-- One can also specify an existing Analyzer class that has a
552
+ default constructor via the class attribute on the analyzer element.
553
+ Example:
554
+ <fieldType name="text_greek" class="solr.TextField">
555
+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
556
+ </fieldType>
557
+ -->
558
+
559
+ <!-- A text field that only splits on whitespace for exact matching of words -->
560
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
561
+ <analyzer>
562
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
563
+ </analyzer>
564
+ </fieldType>
565
+
566
+ <!-- A general text field that has reasonable, generic
567
+ cross-language defaults: it tokenizes with StandardTokenizer,
568
+ removes stop words from case-insensitive "stopwords.txt"
569
+ (empty by default), and down cases. At query time only, it
570
+ also applies synonyms. -->
571
+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
572
+ <analyzer type="index">
573
+ <tokenizer class="solr.StandardTokenizerFactory"/>
574
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
575
+ <!-- in this example, we will only use synonyms at query time
576
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
577
+ -->
578
+ <filter class="solr.LowerCaseFilterFactory"/>
579
+ </analyzer>
580
+ <analyzer type="query">
581
+ <tokenizer class="solr.StandardTokenizerFactory"/>
582
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
583
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
584
+ <filter class="solr.LowerCaseFilterFactory"/>
585
+ </analyzer>
586
+ </fieldType>
587
+
588
+ <!-- A text field with defaults appropriate for English: it
589
+ tokenizes with StandardTokenizer, removes English stop words
590
+ (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
591
+ finally applies Porter's stemming. The query time analyzer
592
+ also applies synonyms from synonyms.txt. -->
593
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
594
+ <analyzer type="index">
595
+ <tokenizer class="solr.StandardTokenizerFactory"/>
596
+ <!-- in this example, we will only use synonyms at query time
597
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
598
+ -->
599
+ <!-- Case insensitive stop word removal.
600
+ -->
601
+ <filter class="solr.StopFilterFactory"
602
+ ignoreCase="true"
603
+ words="lang/stopwords_en.txt"
604
+ />
605
+ <filter class="solr.LowerCaseFilterFactory"/>
606
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
607
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
608
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
609
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
610
+ -->
611
+ <filter class="solr.PorterStemFilterFactory"/>
612
+ </analyzer>
613
+ <analyzer type="query">
614
+ <tokenizer class="solr.StandardTokenizerFactory"/>
615
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
616
+ <filter class="solr.StopFilterFactory"
617
+ ignoreCase="true"
618
+ words="lang/stopwords_en.txt"
619
+ />
620
+ <filter class="solr.LowerCaseFilterFactory"/>
621
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
622
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
623
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
624
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
625
+ -->
626
+ <filter class="solr.PorterStemFilterFactory"/>
627
+ </analyzer>
628
+ </fieldType>
629
+
630
+ <!-- A text field with defaults appropriate for English, plus
631
+ aggressive word-splitting and autophrase features enabled.
632
+ This field is just like text_en, except it adds
633
+ WordDelimiterFilter to enable splitting and matching of
634
+ words on case-change, alpha numeric boundaries, and
635
+ non-alphanumeric chars. This means certain compound word
636
+ cases will work, for example query "wi fi" will match
637
+ document "WiFi" or "wi-fi".
638
+ -->
639
+ <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
640
+ <analyzer type="index">
641
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
642
+ <!-- in this example, we will only use synonyms at query time
643
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
644
+ -->
645
+ <!-- Case insensitive stop word removal.
646
+ -->
647
+ <filter class="solr.StopFilterFactory"
648
+ ignoreCase="true"
649
+ words="lang/stopwords_en.txt"
650
+ />
651
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
652
+ <filter class="solr.LowerCaseFilterFactory"/>
653
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
654
+ <filter class="solr.PorterStemFilterFactory"/>
655
+ </analyzer>
656
+ <analyzer type="query">
657
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
658
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
659
+ <filter class="solr.StopFilterFactory"
660
+ ignoreCase="true"
661
+ words="lang/stopwords_en.txt"
662
+ />
663
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
664
+ <filter class="solr.LowerCaseFilterFactory"/>
665
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
666
+ <filter class="solr.PorterStemFilterFactory"/>
667
+ </analyzer>
668
+ </fieldType>
669
+
670
+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
671
+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
672
+ <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
673
+ <analyzer>
674
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
675
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
676
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
677
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
678
+ <filter class="solr.LowerCaseFilterFactory"/>
679
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
680
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
681
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
682
+ possible with WordDelimiterFilter in conjuncton with stemming. -->
683
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
684
+ </analyzer>
685
+ </fieldType>
686
+
687
+ <!-- Just like text_general except it reverses the characters of
688
+ each token, to enable more efficient leading wildcard queries. -->
689
+ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
690
+ <analyzer type="index">
691
+ <tokenizer class="solr.StandardTokenizerFactory"/>
692
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
693
+ <filter class="solr.LowerCaseFilterFactory"/>
694
+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
695
+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
696
+ </analyzer>
697
+ <analyzer type="query">
698
+ <tokenizer class="solr.StandardTokenizerFactory"/>
699
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
700
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
701
+ <filter class="solr.LowerCaseFilterFactory"/>
702
+ </analyzer>
703
+ </fieldType>
704
+
705
+ <!-- charFilter + WhitespaceTokenizer -->
706
+ <!--
707
+ <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
708
+ <analyzer>
709
+ <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
710
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
711
+ </analyzer>
712
+ </fieldType>
713
+ -->
714
+
715
+ <!-- This is an example of using the KeywordTokenizer along
716
+ With various TokenFilterFactories to produce a sortable field
717
+ that does not include some properties of the source text
718
+ -->
719
+ <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
720
+ <analyzer>
721
+ <!-- KeywordTokenizer does no actual tokenizing, so the entire
722
+ input string is preserved as a single token
723
+ -->
724
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
725
+ <!-- The LowerCase TokenFilter does what you expect, which can be
726
+ when you want your sorting to be case insensitive
727
+ -->
728
+ <filter class="solr.LowerCaseFilterFactory" />
729
+ <!-- The TrimFilter removes any leading or trailing whitespace -->
730
+ <filter class="solr.TrimFilterFactory" />
731
+ <!-- The PatternReplaceFilter gives you the flexibility to use
732
+ Java Regular expression to replace any sequence of characters
733
+ matching a pattern with an arbitrary replacement string,
734
+ which may include back references to portions of the original
735
+ string matched by the pattern.
736
+
737
+ See the Java Regular Expression documentation for more
738
+ information on pattern and replacement string syntax.
739
+
740
+ http://docs.oracle.com/javase/7/docs/api/java/util/regex/package-summary.html
741
+ -->
742
+ <filter class="solr.PatternReplaceFilterFactory"
743
+ pattern="([^a-z])" replacement="" replace="all"
744
+ />
745
+ </analyzer>
746
+ </fieldType>
747
+
748
+ <fieldType name="phonetic" stored="false" indexed="true" class="solr.TextField" >
749
+ <analyzer>
750
+ <tokenizer class="solr.StandardTokenizerFactory"/>
751
+ <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
752
+ </analyzer>
753
+ </fieldType>
754
+
755
+ <fieldType name="payloads" stored="false" indexed="true" class="solr.TextField" >
756
+ <analyzer>
757
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
758
+ <!--
759
+ The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
760
+ a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
761
+ Attributes of the DelimitedPayloadTokenFilterFactory :
762
+ "delimiter" - a one character delimiter. Default is | (pipe)
763
+ "encoder" - how to encode the following value into a playload
764
+ float -> org.apache.lucene.analysis.payloads.FloatEncoder,
765
+ integer -> o.a.l.a.p.IntegerEncoder
766
+ identity -> o.a.l.a.p.IdentityEncoder
767
+ Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
768
+ -->
769
+ <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
770
+ </analyzer>
771
+ </fieldType>
772
+
773
+ <!-- lowercases the entire field value, keeping it as a single token. -->
774
+ <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
775
+ <analyzer>
776
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
777
+ <filter class="solr.LowerCaseFilterFactory" />
778
+ </analyzer>
779
+ </fieldType>
780
+
781
+ <!--
782
+ Example of using PathHierarchyTokenizerFactory at index time, so
783
+ queries for paths match documents at that path, or in descendent paths
784
+ -->
785
+ <fieldType name="descendent_path" class="solr.TextField">
786
+ <analyzer type="index">
787
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
788
+ </analyzer>
789
+ <analyzer type="query">
790
+ <tokenizer class="solr.KeywordTokenizerFactory" />
791
+ </analyzer>
792
+ </fieldType>
793
+ <!--
794
+ Example of using PathHierarchyTokenizerFactory at query time, so
795
+ queries for paths match documents at that path, or in ancestor paths
796
+ -->
797
+ <fieldType name="ancestor_path" class="solr.TextField">
798
+ <analyzer type="index">
799
+ <tokenizer class="solr.KeywordTokenizerFactory" />
800
+ </analyzer>
801
+ <analyzer type="query">
802
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
803
+ </analyzer>
804
+ </fieldType>
805
+
806
+ <!-- since fields of this type are by default not stored or indexed,
807
+ any data added to them will be ignored outright. -->
808
+ <fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
809
+
810
+ <!-- This point type indexes the coordinates as separate fields (subFields)
811
+ If subFieldType is defined, it references a type, and a dynamic field
812
+ definition is created matching *___<typename>. Alternately, if
813
+ subFieldSuffix is defined, that is used to create the subFields.
814
+ Example: if subFieldType="double", then the coordinates would be
815
+ indexed in fields myloc_0___double,myloc_1___double.
816
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
817
+ in fields myloc_0_d,myloc_1_d
818
+ The subFields are an implementation detail of the fieldType, and end
819
+ users normally should not need to know about them.
820
+ -->
821
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
822
+
823
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
824
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
825
+
826
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
827
+ For more information about this and other Spatial fields new to Solr 4, see:
828
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
829
+ -->
830
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
831
+ geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />
832
+
833
+ <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
834
+ Parameters:
835
+ defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
836
+ precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
837
+ providerClass: Lets you plug in other exchange provider backend:
838
+ solr.FileExchangeRateProvider is the default and takes one parameter:
839
+ currencyConfig: name of an xml file holding exchange rates
840
+ solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
841
+ ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
842
+ refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
843
+ -->
844
+ <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
845
+
846
+
847
+
848
+ <!-- some examples for different languages (generally ordered by ISO code) -->
849
+
850
+ <!-- Arabic -->
851
+ <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
852
+ <analyzer>
853
+ <tokenizer class="solr.StandardTokenizerFactory"/>
854
+ <!-- for any non-arabic -->
855
+ <filter class="solr.LowerCaseFilterFactory"/>
856
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
857
+ <!-- normalizes ﻯ to ﻱ, etc -->
858
+ <filter class="solr.ArabicNormalizationFilterFactory"/>
859
+ <filter class="solr.ArabicStemFilterFactory"/>
860
+ </analyzer>
861
+ </fieldType>
862
+
863
+ <!-- Bulgarian -->
864
+ <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
865
+ <analyzer>
866
+ <tokenizer class="solr.StandardTokenizerFactory"/>
867
+ <filter class="solr.LowerCaseFilterFactory"/>
868
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" />
869
+ <filter class="solr.BulgarianStemFilterFactory"/>
870
+ </analyzer>
871
+ </fieldType>
872
+
873
+ <!-- Catalan -->
874
+ <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
875
+ <analyzer>
876
+ <tokenizer class="solr.StandardTokenizerFactory"/>
877
+ <!-- removes l', etc -->
878
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
879
+ <filter class="solr.LowerCaseFilterFactory"/>
880
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
881
+ <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
882
+ </analyzer>
883
+ </fieldType>
884
+
885
+ <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
886
+ <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
887
+ <analyzer>
888
+ <tokenizer class="solr.StandardTokenizerFactory"/>
889
+ <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
890
+ <filter class="solr.CJKWidthFilterFactory"/>
891
+ <!-- for any non-CJK -->
892
+ <filter class="solr.LowerCaseFilterFactory"/>
893
+ <filter class="solr.CJKBigramFilterFactory"/>
894
+ </analyzer>
895
+ </fieldType>
896
+
897
+ <!-- Kurdish -->
898
+ <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
899
+ <analyzer>
900
+ <tokenizer class="solr.StandardTokenizerFactory"/>
901
+ <filter class="solr.SoraniNormalizationFilterFactory"/>
902
+ <!-- for any latin text -->
903
+ <filter class="solr.LowerCaseFilterFactory"/>
904
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/>
905
+ <filter class="solr.SoraniStemFilterFactory"/>
906
+ </analyzer>
907
+ </fieldType>
908
+
909
+ <!-- Czech -->
910
+ <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
911
+ <analyzer>
912
+ <tokenizer class="solr.StandardTokenizerFactory"/>
913
+ <filter class="solr.LowerCaseFilterFactory"/>
914
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
915
+ <filter class="solr.CzechStemFilterFactory"/>
916
+ </analyzer>
917
+ </fieldType>
918
+
919
+ <!-- Danish -->
920
+ <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
921
+ <analyzer>
922
+ <tokenizer class="solr.StandardTokenizerFactory"/>
923
+ <filter class="solr.LowerCaseFilterFactory"/>
924
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
925
+ <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
926
+ </analyzer>
927
+ </fieldType>
928
+
929
+ <!-- German -->
930
+ <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
931
+ <analyzer>
932
+ <tokenizer class="solr.StandardTokenizerFactory"/>
933
+ <filter class="solr.LowerCaseFilterFactory"/>
934
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
935
+ <filter class="solr.GermanNormalizationFilterFactory"/>
936
+ <filter class="solr.GermanLightStemFilterFactory"/>
937
+ <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
938
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
939
+ </analyzer>
940
+ </fieldType>
941
+
942
+ <!-- Greek -->
943
+ <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
944
+ <analyzer>
945
+ <tokenizer class="solr.StandardTokenizerFactory"/>
946
+ <!-- greek specific lowercase for sigma -->
947
+ <filter class="solr.GreekLowerCaseFilterFactory"/>
948
+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
949
+ <filter class="solr.GreekStemFilterFactory"/>
950
+ </analyzer>
951
+ </fieldType>
952
+
953
+ <!-- Spanish -->
954
+ <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
955
+ <analyzer>
956
+ <tokenizer class="solr.StandardTokenizerFactory"/>
957
+ <filter class="solr.LowerCaseFilterFactory"/>
958
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
959
+ <filter class="solr.SpanishLightStemFilterFactory"/>
960
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
961
+ </analyzer>
962
+ </fieldType>
963
+
964
+ <!-- Basque -->
965
+ <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
966
+ <analyzer>
967
+ <tokenizer class="solr.StandardTokenizerFactory"/>
968
+ <filter class="solr.LowerCaseFilterFactory"/>
969
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
970
+ <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
971
+ </analyzer>
972
+ </fieldType>
973
+
974
+ <!-- Persian -->
975
+ <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
976
+ <analyzer>
977
+ <!-- for ZWNJ -->
978
+ <charFilter class="solr.PersianCharFilterFactory"/>
979
+ <tokenizer class="solr.StandardTokenizerFactory"/>
980
+ <filter class="solr.LowerCaseFilterFactory"/>
981
+ <filter class="solr.ArabicNormalizationFilterFactory"/>
982
+ <filter class="solr.PersianNormalizationFilterFactory"/>
983
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
984
+ </analyzer>
985
+ </fieldType>
986
+
987
+ <!-- Finnish -->
988
+ <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
989
+ <analyzer>
990
+ <tokenizer class="solr.StandardTokenizerFactory"/>
991
+ <filter class="solr.LowerCaseFilterFactory"/>
992
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
993
+ <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
994
+ <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
995
+ </analyzer>
996
+ </fieldType>
997
+
998
+ <!-- French -->
999
+ <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
1000
+ <analyzer>
1001
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1002
+ <!-- removes l', etc -->
1003
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
1004
+ <filter class="solr.LowerCaseFilterFactory"/>
1005
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
1006
+ <filter class="solr.FrenchLightStemFilterFactory"/>
1007
+ <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
1008
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
1009
+ </analyzer>
1010
+ </fieldType>
1011
+
1012
+ <!-- Irish -->
1013
+ <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
1014
+ <analyzer>
1015
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1016
+ <!-- removes d', etc -->
1017
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
1018
+ <!-- removes n-, etc. position increments is intentionally false! -->
1019
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
1020
+ <filter class="solr.IrishLowerCaseFilterFactory"/>
1021
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/>
1022
+ <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
1023
+ </analyzer>
1024
+ </fieldType>
1025
+
1026
+ <!-- Galician -->
1027
+ <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
1028
+ <analyzer>
1029
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1030
+ <filter class="solr.LowerCaseFilterFactory"/>
1031
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
1032
+ <filter class="solr.GalicianStemFilterFactory"/>
1033
+ <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
1034
+ </analyzer>
1035
+ </fieldType>
1036
+
1037
+ <!-- Hindi -->
1038
+ <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
1039
+ <analyzer>
1040
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1041
+ <filter class="solr.LowerCaseFilterFactory"/>
1042
+ <!-- normalizes unicode representation -->
1043
+ <filter class="solr.IndicNormalizationFilterFactory"/>
1044
+ <!-- normalizes variation in spelling -->
1045
+ <filter class="solr.HindiNormalizationFilterFactory"/>
1046
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
1047
+ <filter class="solr.HindiStemFilterFactory"/>
1048
+ </analyzer>
1049
+ </fieldType>
1050
+
1051
+ <!-- Hungarian -->
1052
+ <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
1053
+ <analyzer>
1054
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1055
+ <filter class="solr.LowerCaseFilterFactory"/>
1056
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
1057
+ <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
1058
+ <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
1059
+ </analyzer>
1060
+ </fieldType>
1061
+
1062
+ <!-- Armenian -->
1063
+ <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
1064
+ <analyzer>
1065
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1066
+ <filter class="solr.LowerCaseFilterFactory"/>
1067
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
1068
+ <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
1069
+ </analyzer>
1070
+ </fieldType>
1071
+
1072
+ <!-- Indonesian -->
1073
+ <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
1074
+ <analyzer>
1075
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1076
+ <filter class="solr.LowerCaseFilterFactory"/>
1077
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
1078
+ <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
1079
+ <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
1080
+ </analyzer>
1081
+ </fieldType>
1082
+
1083
+ <!-- Italian -->
1084
+ <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
1085
+ <analyzer>
1086
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1087
+ <!-- removes l', etc -->
1088
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
1089
+ <filter class="solr.LowerCaseFilterFactory"/>
1090
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
1091
+ <filter class="solr.ItalianLightStemFilterFactory"/>
1092
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
1093
+ </analyzer>
1094
+ </fieldType>
1095
+
1096
+ <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
1097
+
1098
+ NOTE: If you want to optimize search for precision, use default operator AND in your query
1099
+ parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use
1100
+ OR if you would like to optimize for recall (default).
1101
+ -->
1102
+ <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
1103
+ <analyzer>
1104
+ <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
1105
+
1106
+ Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
1107
+ is used to segment compounds into its parts and the compound itself is kept as synonym.
1108
+
1109
+ Valid values for attribute mode are:
1110
+ normal: regular segmentation
1111
+ search: segmentation useful for search with synonyms compounds (default)
1112
+ extended: same as search mode, but unigrams unknown words (experimental)
1113
+
1114
+ For some applications it might be good to use search mode for indexing and normal mode for
1115
+ queries to reduce recall and prevent parts of compounds from being matched and highlighted.
1116
+ Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
1117
+
1118
+ Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
1119
+ model with your own entries for segmentation, part-of-speech tags and readings without a need
1120
+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
1121
+
1122
+ User dictionary attributes are:
1123
+ userDictionary: user dictionary filename
1124
+ userDictionaryEncoding: user dictionary encoding (default is UTF-8)
1125
+
1126
+ See lang/userdict_ja.txt for a sample user dictionary file.
1127
+
1128
+ Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
1129
+
1130
+ See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
1131
+ -->
1132
+ <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
1133
+ <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
1134
+ <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
1135
+ <filter class="solr.JapaneseBaseFormFilterFactory"/>
1136
+ <!-- Removes tokens with certain part-of-speech tags -->
1137
+ <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
1138
+ <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
1139
+ <filter class="solr.CJKWidthFilterFactory"/>
1140
+ <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
1141
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
1142
+ <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
1143
+ <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
1144
+ <!-- Lower-cases romaji characters -->
1145
+ <filter class="solr.LowerCaseFilterFactory"/>
1146
+ </analyzer>
1147
+ </fieldType>
1148
+
1149
+ <!-- Latvian -->
1150
+ <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
1151
+ <analyzer>
1152
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1153
+ <filter class="solr.LowerCaseFilterFactory"/>
1154
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
1155
+ <filter class="solr.LatvianStemFilterFactory"/>
1156
+ </analyzer>
1157
+ </fieldType>
1158
+
1159
+ <!-- Dutch -->
1160
+ <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
1161
+ <analyzer>
1162
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1163
+ <filter class="solr.LowerCaseFilterFactory"/>
1164
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
1165
+ <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
1166
+ <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
1167
+ </analyzer>
1168
+ </fieldType>
1169
+
1170
+ <!-- Norwegian -->
1171
+ <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
1172
+ <analyzer>
1173
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1174
+ <filter class="solr.LowerCaseFilterFactory"/>
1175
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
1176
+ <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
1177
+ <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/> -->
1178
+ <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/> -->
1179
+ <!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both -->
1180
+ </analyzer>
1181
+ </fieldType>
1182
+
1183
+ <!-- Portuguese -->
1184
+ <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
1185
+ <analyzer>
1186
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1187
+ <filter class="solr.LowerCaseFilterFactory"/>
1188
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
1189
+ <filter class="solr.PortugueseLightStemFilterFactory"/>
1190
+ <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
1191
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
1192
+ <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
1193
+ </analyzer>
1194
+ </fieldType>
1195
+
1196
+ <!-- Romanian -->
1197
+ <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
1198
+ <analyzer>
1199
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1200
+ <filter class="solr.LowerCaseFilterFactory"/>
1201
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
1202
+ <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
1203
+ </analyzer>
1204
+ </fieldType>
1205
+
1206
+ <!-- Russian -->
1207
+ <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
1208
+ <analyzer>
1209
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1210
+ <filter class="solr.LowerCaseFilterFactory"/>
1211
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
1212
+ <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
1213
+ <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
1214
+ </analyzer>
1215
+ </fieldType>
1216
+
1217
+ <!-- Swedish -->
1218
+ <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
1219
+ <analyzer>
1220
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1221
+ <filter class="solr.LowerCaseFilterFactory"/>
1222
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
1223
+ <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
1224
+ <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
1225
+ </analyzer>
1226
+ </fieldType>
1227
+
1228
+ <!-- Thai -->
1229
+ <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
1230
+ <analyzer>
1231
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1232
+ <filter class="solr.LowerCaseFilterFactory"/>
1233
+ <filter class="solr.ThaiWordFilterFactory"/>
1234
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" />
1235
+ </analyzer>
1236
+ </fieldType>
1237
+
1238
+ <!-- Turkish -->
1239
+ <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
1240
+ <analyzer>
1241
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1242
+ <filter class="solr.ApostropheFilterFactory"/>
1243
+ <filter class="solr.TurkishLowerCaseFilterFactory"/>
1244
+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
1245
+ <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
1246
+ </analyzer>
1247
+ </fieldType>
1248
+
1249
+ <!-- Similarity is the scoring routine for each document vs. a query.
1250
+ A custom Similarity or SimilarityFactory may be specified here, but
1251
+ the default is fine for most applications.
1252
+ For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
1253
+ -->
1254
+ <!--
1255
+ <similarity class="com.example.solr.CustomSimilarityFactory">
1256
+ <str name="paramkey">param value</str>
1257
+ </similarity>
1258
+ -->
1259
+ <fieldType class="solr.TextField" name="textSuggest" positionIncrementGap="100">
1260
+ <analyzer>
1261
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
1262
+ <filter class="solr.StandardFilterFactory"/>
1263
+ <filter class="solr.LowerCaseFilterFactory"/>
1264
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
1265
+ </analyzer>
1266
+ </fieldType>
1267
+
1268
+ </schema>