solr_ead 0.4.5 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/solr/schema.xml ADDED
@@ -0,0 +1,1163 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <schema name="Rock and Roll Hall of Fame - Catalog Solr Schema" version="1.5">
20
+
21
+ <types>
22
+ <!-- field type definitions. The "name" attribute is
23
+ just a label to be used by field definitions. The "class"
24
+ attribute and any other attributes determine the real
25
+ behavior of the fieldType.
26
+ Class names starting with "solr" refer to java classes in a
27
+ standard package such as org.apache.solr.analysis
28
+ -->
29
+
30
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim.
31
+ It supports doc values but in that case the field needs to be
32
+ single-valued and either required or have a default value.
33
+ -->
34
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
35
+
36
+ <!-- boolean type: "true" or "false" -->
37
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
38
+
39
+ <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
40
+ currently supported on types that are sorted internally as strings
41
+ and on numeric types.
42
+ This includes "string","boolean", and, as of 3.5 (and 4.x),
43
+ int, float, long, date, double, including the "Trie" variants.
44
+ - If sortMissingLast="true", then a sort on this field will cause documents
45
+ without the field to come after documents with the field,
46
+ regardless of the requested sort order (asc or desc).
47
+ - If sortMissingFirst="true", then a sort on this field will cause documents
48
+ without the field to come before documents with the field,
49
+ regardless of the requested sort order.
50
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
51
+ then default lucene sorting will be used which places docs without the
52
+ field first in an ascending sort and last in a descending sort.
53
+ -->
54
+
55
+ <!--
56
+ Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
57
+
58
+ These fields support doc values, but they require the field to be
59
+ single-valued and either be required or have a default value.
60
+ -->
61
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
62
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
63
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
64
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
65
+
66
+ <!--
67
+ Numeric field types that index each value at various levels of precision
68
+ to accelerate range queries when the number of values between the range
69
+ endpoints is large. See the javadoc for NumericRangeQuery for internal
70
+ implementation details.
71
+
72
+ Smaller precisionStep values (specified in bits) will lead to more tokens
73
+ indexed per value, slightly larger index size, and faster range queries.
74
+ A precisionStep of 0 disables indexing at different precision levels.
75
+ -->
76
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
77
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
78
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
79
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
80
+
81
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
82
+ is a more restricted form of the canonical representation of dateTime
83
+ http://www.w3.org/TR/xmlschema-2/#dateTime
84
+ The trailing "Z" designates UTC time and is mandatory.
85
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
86
+ All other components are mandatory.
87
+
88
+ Expressions can also be used to denote calculations that should be
89
+ performed relative to "NOW" to determine the value, ie...
90
+
91
+ NOW/HOUR
92
+ ... Round to the start of the current hour
93
+ NOW-1DAY
94
+ ... Exactly 1 day prior to now
95
+ NOW/DAY+6MONTHS+3DAYS
96
+ ... 6 months and 3 days in the future from the start of
97
+ the current day
98
+
99
+ Consult the DateField javadocs for more information.
100
+
101
+ Note: For faster range queries, consider the tdate type
102
+ -->
103
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
104
+
105
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
106
+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
107
+
108
+
109
+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
110
+ <fieldtype name="binary" class="solr.BinaryField"/>
111
+
112
+ <!--
113
+ Note:
114
+ These should only be used for compatibility with existing indexes (created with lucene or older Solr versions).
115
+ Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last
116
+
117
+ Plain numeric field types that store and index the text
118
+ value verbatim (and hence don't correctly support range queries, since the
119
+ lexicographic ordering isn't equal to the numeric ordering)
120
+ -->
121
+ <fieldType name="pint" class="solr.IntField"/>
122
+ <fieldType name="plong" class="solr.LongField"/>
123
+ <fieldType name="pfloat" class="solr.FloatField"/>
124
+ <fieldType name="pdouble" class="solr.DoubleField"/>
125
+ <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
126
+
127
+ <!-- The "RandomSortField" is not used to store or search any
128
+ data. You can declare fields of this type it in your schema
129
+ to generate pseudo-random orderings of your docs for sorting
130
+ or function purposes. The ordering is generated based on the field
131
+ name and the version of the index. As long as the index version
132
+ remains unchanged, and the same field name is reused,
133
+ the ordering of the docs will be consistent.
134
+ If you want different psuedo-random orderings of documents,
135
+ for the same version of the index, use a dynamicField and
136
+ change the field name in the request.
137
+ -->
138
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
139
+
140
+ <!-- solr.TextField allows the specification of custom text analyzers
141
+ specified as a tokenizer and a list of token filters. Different
142
+ analyzers may be specified for indexing and querying.
143
+
144
+ The optional positionIncrementGap puts space between multiple fields of
145
+ this type on the same document, with the purpose of preventing false phrase
146
+ matching across fields.
147
+
148
+ For more info on customizing your analyzer chain, please see
149
+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
150
+ -->
151
+
152
+ <!-- One can also specify an existing Analyzer class that has a
153
+ default constructor via the class attribute on the analyzer element.
154
+ Example:
155
+ <fieldType name="text_greek" class="solr.TextField">
156
+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
157
+ </fieldType>
158
+ -->
159
+
160
+ <!-- A text field that only splits on whitespace for exact matching of words -->
161
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
162
+ <analyzer>
163
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
164
+ </analyzer>
165
+ </fieldType>
166
+
167
+ <!-- A general text field that has reasonable, generic
168
+ cross-language defaults: it tokenizes with StandardTokenizer,
169
+ removes stop words from case-insensitive "stopwords.txt"
170
+ (empty by default), and down cases. At query time only, it
171
+ also applies synonyms. -->
172
+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
173
+ <analyzer type="index">
174
+ <tokenizer class="solr.StandardTokenizerFactory"/>
175
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
176
+ <!-- in this example, we will only use synonyms at query time
177
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
178
+ -->
179
+ <filter class="solr.LowerCaseFilterFactory"/>
180
+ </analyzer>
181
+ <analyzer type="query">
182
+ <tokenizer class="solr.StandardTokenizerFactory"/>
183
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
184
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
185
+ <filter class="solr.LowerCaseFilterFactory"/>
186
+ </analyzer>
187
+ </fieldType>
188
+
189
+ <!-- Carry-over from solr 4.1 schema. Could be removed and a similar or equal type used instead -->
190
+ <fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100" >
191
+ <analyzer>
192
+ <tokenizer class="solr.StandardTokenizerFactory"/>
193
+ <!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> -->
194
+ <filter class="solr.StandardFilterFactory"/>
195
+ <filter class="solr.LowerCaseFilterFactory"/>
196
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
197
+ </analyzer>
198
+ </fieldType>
199
+
200
+ <!-- A text field with defaults appropriate for English: it
201
+ tokenizes with StandardTokenizer, removes English stop words
202
+ (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
203
+ finally applies Porter's stemming. The query time analyzer
204
+ also applies synonyms from synonyms.txt. -->
205
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
206
+ <analyzer type="index">
207
+ <tokenizer class="solr.StandardTokenizerFactory"/>
208
+ <!-- in this example, we will only use synonyms at query time
209
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
210
+ -->
211
+ <!-- Case insensitive stop word removal.
212
+ add enablePositionIncrements=true in both the index and query
213
+ analyzers to leave a 'gap' for more accurate phrase queries.
214
+ -->
215
+ <filter class="solr.StopFilterFactory"
216
+ ignoreCase="true"
217
+ words="lang/stopwords_en.txt"
218
+ enablePositionIncrements="true"
219
+ />
220
+ <filter class="solr.LowerCaseFilterFactory"/>
221
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
222
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
223
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
224
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
225
+ -->
226
+ <filter class="solr.PorterStemFilterFactory"/>
227
+ </analyzer>
228
+ <analyzer type="query">
229
+ <tokenizer class="solr.StandardTokenizerFactory"/>
230
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
231
+ <filter class="solr.StopFilterFactory"
232
+ ignoreCase="true"
233
+ words="lang/stopwords_en.txt"
234
+ enablePositionIncrements="true"
235
+ />
236
+ <filter class="solr.LowerCaseFilterFactory"/>
237
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
238
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
239
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
240
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
241
+ -->
242
+ <filter class="solr.PorterStemFilterFactory"/>
243
+ </analyzer>
244
+ </fieldType>
245
+
246
+ <!-- A text field with defaults appropriate for English, plus
247
+ aggressive word-splitting and autophrase features enabled.
248
+ This field is just like text_en, except it adds
249
+ WordDelimiterFilter to enable splitting and matching of
250
+ words on case-change, alpha numeric boundaries, and
251
+ non-alphanumeric chars. This means certain compound word
252
+ cases will work, for example query "wi fi" will match
253
+ document "WiFi" or "wi-fi".
254
+ -->
255
+ <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
256
+ <analyzer type="index">
257
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
258
+ <!-- in this example, we will only use synonyms at query time
259
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
260
+ -->
261
+ <!-- Case insensitive stop word removal.
262
+ add enablePositionIncrements=true in both the index and query
263
+ analyzers to leave a 'gap' for more accurate phrase queries.
264
+ -->
265
+ <filter class="solr.StopFilterFactory"
266
+ ignoreCase="true"
267
+ words="lang/stopwords_en.txt"
268
+ enablePositionIncrements="true"
269
+ />
270
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
271
+ <filter class="solr.LowerCaseFilterFactory"/>
272
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
273
+ <filter class="solr.PorterStemFilterFactory"/>
274
+ </analyzer>
275
+ <analyzer type="query">
276
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
277
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
278
+ <filter class="solr.StopFilterFactory"
279
+ ignoreCase="true"
280
+ words="lang/stopwords_en.txt"
281
+ enablePositionIncrements="true"
282
+ />
283
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
284
+ <filter class="solr.LowerCaseFilterFactory"/>
285
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
286
+ <filter class="solr.PorterStemFilterFactory"/>
287
+ </analyzer>
288
+ </fieldType>
289
+
290
+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
291
+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
292
+ <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
293
+ <analyzer>
294
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
295
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
296
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
297
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
298
+ <filter class="solr.LowerCaseFilterFactory"/>
299
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
300
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
301
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
302
+ possible with WordDelimiterFilter in conjuncton with stemming. -->
303
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
304
+ </analyzer>
305
+ </fieldType>
306
+
307
+ <!-- Just like text_general except it reverses the characters of
308
+ each token, to enable more efficient leading wildcard queries. -->
309
+ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
310
+ <analyzer type="index">
311
+ <tokenizer class="solr.StandardTokenizerFactory"/>
312
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
313
+ <filter class="solr.LowerCaseFilterFactory"/>
314
+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
315
+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
316
+ </analyzer>
317
+ <analyzer type="query">
318
+ <tokenizer class="solr.StandardTokenizerFactory"/>
319
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
320
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
321
+ <filter class="solr.LowerCaseFilterFactory"/>
322
+ </analyzer>
323
+ </fieldType>
324
+
325
+ <!-- charFilter + WhitespaceTokenizer -->
326
+ <!--
327
+ <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
328
+ <analyzer>
329
+ <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
330
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
331
+ </analyzer>
332
+ </fieldType>
333
+ -->
334
+
335
+ <!-- This is an example of using the KeywordTokenizer along
336
+ With various TokenFilterFactories to produce a sortable field
337
+ that does not include some properties of the source text
338
+ -->
339
+ <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
340
+ <analyzer>
341
+ <!-- KeywordTokenizer does no actual tokenizing, so the entire
342
+ input string is preserved as a single token
343
+ -->
344
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
345
+ <!-- The LowerCase TokenFilter does what you expect, which can be
346
+ when you want your sorting to be case insensitive
347
+ -->
348
+ <filter class="solr.LowerCaseFilterFactory" />
349
+ <!-- The TrimFilter removes any leading or trailing whitespace -->
350
+ <filter class="solr.TrimFilterFactory" />
351
+ <!-- The PatternReplaceFilter gives you the flexibility to use
352
+ Java Regular expression to replace any sequence of characters
353
+ matching a pattern with an arbitrary replacement string,
354
+ which may include back references to portions of the original
355
+ string matched by the pattern.
356
+
357
+ See the Java Regular Expression documentation for more
358
+ information on pattern and replacement string syntax.
359
+
360
+ http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html
361
+ -->
362
+ <filter class="solr.PatternReplaceFilterFactory"
363
+ pattern="([^a-z])" replacement="" replace="all"
364
+ />
365
+ </analyzer>
366
+ </fieldType>
367
+
368
+ <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
369
+ <analyzer>
370
+ <tokenizer class="solr.StandardTokenizerFactory"/>
371
+ <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
372
+ </analyzer>
373
+ </fieldtype>
374
+
375
+ <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
376
+ <analyzer>
377
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
378
+ <!--
379
+ The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
380
+ a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
381
+ Attributes of the DelimitedPayloadTokenFilterFactory :
382
+ "delimiter" - a one character delimiter. Default is | (pipe)
383
+ "encoder" - how to encode the following value into a playload
384
+ float -> org.apache.lucene.analysis.payloads.FloatEncoder,
385
+ integer -> o.a.l.a.p.IntegerEncoder
386
+ identity -> o.a.l.a.p.IdentityEncoder
387
+ Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
388
+ -->
389
+ <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
390
+ </analyzer>
391
+ </fieldtype>
392
+
393
+ <!-- lowercases the entire field value, keeping it as a single token. -->
394
+ <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
395
+ <analyzer>
396
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
397
+ <filter class="solr.LowerCaseFilterFactory" />
398
+ </analyzer>
399
+ </fieldType>
400
+
401
+ <!--
402
+ Example of using PathHierarchyTokenizerFactory at index time, so
403
+ queries for paths match documents at that path, or in descendent paths
404
+ -->
405
+ <fieldType name="descendent_path" class="solr.TextField">
406
+ <analyzer type="index">
407
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
408
+ </analyzer>
409
+ <analyzer type="query">
410
+ <tokenizer class="solr.KeywordTokenizerFactory" />
411
+ </analyzer>
412
+ </fieldType>
413
+ <!--
414
+ Example of using PathHierarchyTokenizerFactory at query time, so
415
+ queries for paths match documents at that path, or in ancestor paths
416
+ -->
417
+ <fieldType name="ancestor_path" class="solr.TextField">
418
+ <analyzer type="index">
419
+ <tokenizer class="solr.KeywordTokenizerFactory" />
420
+ </analyzer>
421
+ <analyzer type="query">
422
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
423
+ </analyzer>
424
+ </fieldType>
425
+
426
+ <!-- since fields of this type are by default not stored or indexed,
427
+ any data added to them will be ignored outright. -->
428
+ <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
429
+
430
+ <!-- This point type indexes the coordinates as separate fields (subFields)
431
+ If subFieldType is defined, it references a type, and a dynamic field
432
+ definition is created matching *___<typename>. Alternately, if
433
+ subFieldSuffix is defined, that is used to create the subFields.
434
+ Example: if subFieldType="double", then the coordinates would be
435
+ indexed in fields myloc_0___double,myloc_1___double.
436
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
437
+ in fields myloc_0_d,myloc_1_d
438
+ The subFields are an implementation detail of the fieldType, and end
439
+ users normally should not need to know about them.
440
+ -->
441
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
442
+
443
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
444
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
445
+
446
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
447
+ For more information about this and other Spatial fields new to Solr 4, see:
448
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
449
+ -->
450
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
451
+ geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
452
+
453
+ <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
454
+ Parameters:
455
+ defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
456
+ precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
457
+ providerClass: Lets you plug in other exchange provider backend:
458
+ solr.FileExchangeRateProvider is the default and takes one parameter:
459
+ currencyConfig: name of an xml file holding exchange rates
460
+ solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
461
+ ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
462
+ refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
463
+ -->
464
+ <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
465
+
466
+
467
+
468
+ <!-- some examples for different languages (generally ordered by ISO code) -->
469
+
470
+ <!-- Arabic -->
471
+ <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
472
+ <analyzer>
473
+ <tokenizer class="solr.StandardTokenizerFactory"/>
474
+ <!-- for any non-arabic -->
475
+ <filter class="solr.LowerCaseFilterFactory"/>
476
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" enablePositionIncrements="true"/>
477
+ <!-- normalizes ﻯ to ﻱ, etc -->
478
+ <filter class="solr.ArabicNormalizationFilterFactory"/>
479
+ <filter class="solr.ArabicStemFilterFactory"/>
480
+ </analyzer>
481
+ </fieldType>
482
+
483
+ <!-- Bulgarian -->
484
+ <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
485
+ <analyzer>
486
+ <tokenizer class="solr.StandardTokenizerFactory"/>
487
+ <filter class="solr.LowerCaseFilterFactory"/>
488
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" enablePositionIncrements="true"/>
489
+ <filter class="solr.BulgarianStemFilterFactory"/>
490
+ </analyzer>
491
+ </fieldType>
492
+
493
+ <!-- Catalan -->
494
+ <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
495
+ <analyzer>
496
+ <tokenizer class="solr.StandardTokenizerFactory"/>
497
+ <!-- removes l', etc -->
498
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
499
+ <filter class="solr.LowerCaseFilterFactory"/>
500
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" enablePositionIncrements="true"/>
501
+ <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
502
+ </analyzer>
503
+ </fieldType>
504
+
505
+ <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
506
+ <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
507
+ <analyzer>
508
+ <tokenizer class="solr.StandardTokenizerFactory"/>
509
+ <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
510
+ <filter class="solr.CJKWidthFilterFactory"/>
511
+ <!-- for any non-CJK -->
512
+ <filter class="solr.LowerCaseFilterFactory"/>
513
+ <filter class="solr.CJKBigramFilterFactory"/>
514
+ </analyzer>
515
+ </fieldType>
516
+
517
+ <!-- Czech -->
518
+ <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
519
+ <analyzer>
520
+ <tokenizer class="solr.StandardTokenizerFactory"/>
521
+ <filter class="solr.LowerCaseFilterFactory"/>
522
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" enablePositionIncrements="true"/>
523
+ <filter class="solr.CzechStemFilterFactory"/>
524
+ </analyzer>
525
+ </fieldType>
526
+
527
+ <!-- Danish -->
528
+ <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
529
+ <analyzer>
530
+ <tokenizer class="solr.StandardTokenizerFactory"/>
531
+ <filter class="solr.LowerCaseFilterFactory"/>
532
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" enablePositionIncrements="true"/>
533
+ <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
534
+ </analyzer>
535
+ </fieldType>
536
+
537
+ <!-- German -->
538
+ <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
539
+ <analyzer>
540
+ <tokenizer class="solr.StandardTokenizerFactory"/>
541
+ <filter class="solr.LowerCaseFilterFactory"/>
542
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/>
543
+ <filter class="solr.GermanNormalizationFilterFactory"/>
544
+ <filter class="solr.GermanLightStemFilterFactory"/>
545
+ <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
546
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
547
+ </analyzer>
548
+ </fieldType>
549
+
550
+ <!-- Greek -->
551
+ <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
552
+ <analyzer>
553
+ <tokenizer class="solr.StandardTokenizerFactory"/>
554
+ <!-- greek specific lowercase for sigma -->
555
+ <filter class="solr.GreekLowerCaseFilterFactory"/>
556
+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
557
+ <filter class="solr.GreekStemFilterFactory"/>
558
+ </analyzer>
559
+ </fieldType>
560
+
561
+ <!-- Spanish -->
562
+ <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
563
+ <analyzer>
564
+ <tokenizer class="solr.StandardTokenizerFactory"/>
565
+ <filter class="solr.LowerCaseFilterFactory"/>
566
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" enablePositionIncrements="true"/>
567
+ <filter class="solr.SpanishLightStemFilterFactory"/>
568
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
569
+ </analyzer>
570
+ </fieldType>
571
+
572
+ <!-- Basque -->
573
+ <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
574
+ <analyzer>
575
+ <tokenizer class="solr.StandardTokenizerFactory"/>
576
+ <filter class="solr.LowerCaseFilterFactory"/>
577
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" enablePositionIncrements="true"/>
578
+ <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
579
+ </analyzer>
580
+ </fieldType>
581
+
582
+ <!-- Persian -->
583
+ <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
584
+ <analyzer>
585
+ <!-- for ZWNJ -->
586
+ <charFilter class="solr.PersianCharFilterFactory"/>
587
+ <tokenizer class="solr.StandardTokenizerFactory"/>
588
+ <filter class="solr.LowerCaseFilterFactory"/>
589
+ <filter class="solr.ArabicNormalizationFilterFactory"/>
590
+ <filter class="solr.PersianNormalizationFilterFactory"/>
591
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" enablePositionIncrements="true"/>
592
+ </analyzer>
593
+ </fieldType>
594
+
595
+ <!-- Finnish -->
596
+ <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
597
+ <analyzer>
598
+ <tokenizer class="solr.StandardTokenizerFactory"/>
599
+ <filter class="solr.LowerCaseFilterFactory"/>
600
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" enablePositionIncrements="true"/>
601
+ <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
602
+ <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
603
+ </analyzer>
604
+ </fieldType>
605
+
606
+ <!-- French -->
607
+ <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
608
+ <analyzer>
609
+ <tokenizer class="solr.StandardTokenizerFactory"/>
610
+ <!-- removes l', etc -->
611
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
612
+ <filter class="solr.LowerCaseFilterFactory"/>
613
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" enablePositionIncrements="true"/>
614
+ <filter class="solr.FrenchLightStemFilterFactory"/>
615
+ <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
616
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
617
+ </analyzer>
618
+ </fieldType>
619
+
620
+ <!-- Irish -->
621
+ <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
622
+ <analyzer>
623
+ <tokenizer class="solr.StandardTokenizerFactory"/>
624
+ <!-- removes d', etc -->
625
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
626
+ <!-- removes n-, etc. position increments is intentionally false! -->
627
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/>
628
+ <filter class="solr.IrishLowerCaseFilterFactory"/>
629
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/>
630
+ <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
631
+ </analyzer>
632
+ </fieldType>
633
+
634
+ <!-- Galician -->
635
+ <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
636
+ <analyzer>
637
+ <tokenizer class="solr.StandardTokenizerFactory"/>
638
+ <filter class="solr.LowerCaseFilterFactory"/>
639
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" enablePositionIncrements="true"/>
640
+ <filter class="solr.GalicianStemFilterFactory"/>
641
+ <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
642
+ </analyzer>
643
+ </fieldType>
644
+
645
+ <!-- Hindi -->
646
+ <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
647
+ <analyzer>
648
+ <tokenizer class="solr.StandardTokenizerFactory"/>
649
+ <filter class="solr.LowerCaseFilterFactory"/>
650
+ <!-- normalizes unicode representation -->
651
+ <filter class="solr.IndicNormalizationFilterFactory"/>
652
+ <!-- normalizes variation in spelling -->
653
+ <filter class="solr.HindiNormalizationFilterFactory"/>
654
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" enablePositionIncrements="true"/>
655
+ <filter class="solr.HindiStemFilterFactory"/>
656
+ </analyzer>
657
+ </fieldType>
658
+
659
+ <!-- Hungarian -->
660
+ <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
661
+ <analyzer>
662
+ <tokenizer class="solr.StandardTokenizerFactory"/>
663
+ <filter class="solr.LowerCaseFilterFactory"/>
664
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" enablePositionIncrements="true"/>
665
+ <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
666
+ <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
667
+ </analyzer>
668
+ </fieldType>
669
+
670
+ <!-- Armenian -->
671
+ <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
672
+ <analyzer>
673
+ <tokenizer class="solr.StandardTokenizerFactory"/>
674
+ <filter class="solr.LowerCaseFilterFactory"/>
675
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" enablePositionIncrements="true"/>
676
+ <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
677
+ </analyzer>
678
+ </fieldType>
679
+
680
+ <!-- Indonesian -->
681
+ <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
682
+ <analyzer>
683
+ <tokenizer class="solr.StandardTokenizerFactory"/>
684
+ <filter class="solr.LowerCaseFilterFactory"/>
685
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" enablePositionIncrements="true"/>
686
+ <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
687
+ <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
688
+ </analyzer>
689
+ </fieldType>
690
+
691
+ <!-- Italian -->
692
+ <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
693
+ <analyzer>
694
+ <tokenizer class="solr.StandardTokenizerFactory"/>
695
+ <!-- removes l', etc -->
696
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
697
+ <filter class="solr.LowerCaseFilterFactory"/>
698
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" enablePositionIncrements="true"/>
699
+ <filter class="solr.ItalianLightStemFilterFactory"/>
700
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
701
+ </analyzer>
702
+ </fieldType>
703
+
704
+ <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
705
+
706
+ NOTE: If you want to optimize search for precision, use default operator AND in your query
707
+ parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use
708
+ OR if you would like to optimize for recall (default).
709
+ -->
710
+ <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
711
+ <analyzer>
712
+ <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
713
+
714
+ Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
715
+ is used to segment compounds into its parts and the compound itself is kept as synonym.
716
+
717
+ Valid values for attribute mode are:
718
+ normal: regular segmentation
719
+ search: segmentation useful for search with synonyms compounds (default)
720
+ extended: same as search mode, but unigrams unknown words (experimental)
721
+
722
+ For some applications it might be good to use search mode for indexing and normal mode for
723
+ queries to reduce recall and prevent parts of compounds from being matched and highlighted.
724
+ Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
725
+
726
+ Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
727
+ model with your own entries for segmentation, part-of-speech tags and readings without a need
728
+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
729
+
730
+ User dictionary attributes are:
731
+ userDictionary: user dictionary filename
732
+ userDictionaryEncoding: user dictionary encoding (default is UTF-8)
733
+
734
+ See lang/userdict_ja.txt for a sample user dictionary file.
735
+
736
+ Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
737
+
738
+ See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
739
+ -->
740
+ <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
741
+ <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
742
+ <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
743
+ <filter class="solr.JapaneseBaseFormFilterFactory"/>
744
+ <!-- Removes tokens with certain part-of-speech tags -->
745
+ <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="true"/>
746
+ <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
747
+ <filter class="solr.CJKWidthFilterFactory"/>
748
+ <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
749
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" />
750
+ <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
751
+ <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
752
+ <!-- Lower-cases romaji characters -->
753
+ <filter class="solr.LowerCaseFilterFactory"/>
754
+ </analyzer>
755
+ </fieldType>
756
+
757
+ <!-- Latvian -->
758
+ <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
759
+ <analyzer>
760
+ <tokenizer class="solr.StandardTokenizerFactory"/>
761
+ <filter class="solr.LowerCaseFilterFactory"/>
762
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" enablePositionIncrements="true"/>
763
+ <filter class="solr.LatvianStemFilterFactory"/>
764
+ </analyzer>
765
+ </fieldType>
766
+
767
+ <!-- Dutch -->
768
+ <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
769
+ <analyzer>
770
+ <tokenizer class="solr.StandardTokenizerFactory"/>
771
+ <filter class="solr.LowerCaseFilterFactory"/>
772
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" enablePositionIncrements="true"/>
773
+ <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
774
+ <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
775
+ </analyzer>
776
+ </fieldType>
777
+
778
+ <!-- Norwegian -->
779
+ <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
780
+ <analyzer>
781
+ <tokenizer class="solr.StandardTokenizerFactory"/>
782
+ <filter class="solr.LowerCaseFilterFactory"/>
783
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" enablePositionIncrements="true"/>
784
+ <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
785
+ <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
786
+ <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
787
+ </analyzer>
788
+ </fieldType>
789
+
790
+ <!-- Portuguese -->
791
+ <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
792
+ <analyzer>
793
+ <tokenizer class="solr.StandardTokenizerFactory"/>
794
+ <filter class="solr.LowerCaseFilterFactory"/>
795
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" enablePositionIncrements="true"/>
796
+ <filter class="solr.PortugueseLightStemFilterFactory"/>
797
+ <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
798
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
799
+ <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
800
+ </analyzer>
801
+ </fieldType>
802
+
803
+ <!-- Romanian -->
804
+ <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
805
+ <analyzer>
806
+ <tokenizer class="solr.StandardTokenizerFactory"/>
807
+ <filter class="solr.LowerCaseFilterFactory"/>
808
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" enablePositionIncrements="true"/>
809
+ <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
810
+ </analyzer>
811
+ </fieldType>
812
+
813
+ <!-- Russian -->
814
+ <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
815
+ <analyzer>
816
+ <tokenizer class="solr.StandardTokenizerFactory"/>
817
+ <filter class="solr.LowerCaseFilterFactory"/>
818
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" enablePositionIncrements="true"/>
819
+ <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
820
+ <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
821
+ </analyzer>
822
+ </fieldType>
823
+
824
+ <!-- Swedish -->
825
+ <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
826
+ <analyzer>
827
+ <tokenizer class="solr.StandardTokenizerFactory"/>
828
+ <filter class="solr.LowerCaseFilterFactory"/>
829
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" enablePositionIncrements="true"/>
830
+ <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
831
+ <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
832
+ </analyzer>
833
+ </fieldType>
834
+
835
+ <!-- Thai -->
836
+ <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
837
+ <analyzer>
838
+ <tokenizer class="solr.StandardTokenizerFactory"/>
839
+ <filter class="solr.LowerCaseFilterFactory"/>
840
+ <filter class="solr.ThaiWordFilterFactory"/>
841
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" enablePositionIncrements="true"/>
842
+ </analyzer>
843
+ </fieldType>
844
+
845
+ <!-- Turkish -->
846
+ <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
847
+ <analyzer>
848
+ <tokenizer class="solr.StandardTokenizerFactory"/>
849
+ <filter class="solr.TurkishLowerCaseFilterFactory"/>
850
+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" enablePositionIncrements="true"/>
851
+ <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
852
+ </analyzer>
853
+ </fieldType>
854
+ </types>
855
+
856
+ <fields>
857
+
858
+ <!-- NOTE: this is not a full list of fields in the index; dynamic fields are also used -->
859
+ <field name="id" type="string" indexed="true" stored="true" required="true" />
860
+ <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
861
+ <!-- default, catch all search field -->
862
+ <field name="text" type="text_en" indexed="true" stored="false" multiValued="true"/>
863
+
864
+ <!-- these display fields are NOT multi-valued -->
865
+ <field name="marc_display" type="string" indexed="false" stored="true" multiValued="false" />
866
+ <field name="heading_display" type="string" indexed="false" stored="true" multiValued="false" />
867
+ <field name="title_display" type="text_en" indexed="false" stored="true" multiValued="false" />
868
+ <field name="title_vern_display" type="text_en" indexed="false" stored="true" multiValued="false" />
869
+ <field name="subtitle_display" type="text_en" indexed="false" stored="true" multiValued="false" />
870
+ <field name="subtitle_vern_display" type="text_en" indexed="false" stored="true" multiValued="false" />
871
+ <field name="author_display" type="text_en" indexed="false" stored="true" multiValued="false" />
872
+ <field name="author_vern_display" type="text_en" indexed="false" stored="true" multiValued="false" />
873
+
874
+ <!-- by default, facet fields are not stored, so they may be display differenty than the are
875
+ indexed. Here are the exceptions to this, which are facet fields that are displayed the
876
+ same as they are indexed -->
877
+ <field name="subject_facet" type="string" indexed="true" stored="true" multiValued="true" />
878
+ <field name="pub_date" type="string" indexed="true" stored="true" multiValued="true" />
879
+ <field name="genre_facet" type="string" indexed="true" stored="true" multiValued="true" />
880
+ <field name="collection_facet" type="string" indexed="true" stored="true" multiValued="true" />
881
+
882
+ <!-- pub_date_sort uses new trie-based int fields, which are recommended for any int and are
883
+ displayable, sortable, and can be used in range queries.
884
+ Use 'tint' for faster range-queries. -->
885
+ <field name="pub_date_sort" type="tint" indexed="true" stored="true" multiValued="false"/>
886
+
887
+ <!-- format is used for facet, display, and choosing which partial to use for the show view,
888
+ so it must be stored and indexed -->
889
+ <field name="format" type="string" indexed="true" stored="true"/>
890
+
891
+ <!-- Dynamic field definitions. If a field name is not found, dynamicFields
892
+ will be used if the name matches any of the patterns.
893
+ RESTRICTION: the glob-like pattern in the name attribute must have
894
+ a "*" only at the start or the end.
895
+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
896
+ Longer patterns will be matched first. if equal size patterns
897
+ both match, the first appearing in the schema will be used. -->
898
+ <dynamicField name="*_i" type="int" indexed="true" stored="true" multiValued="false" />
899
+ <dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true" />
900
+ <dynamicField name="*_id" type="string" indexed="true" stored="true" multiValued="false" />
901
+ <dynamicField name="*_l" type="long" indexed="true" stored="true" multiValued="false" />
902
+ <dynamicField name="*_t" type="text_en" indexed="true" stored="false" multiValued="true" />
903
+ <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true" />
904
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true" multiValued="false" />
905
+ <dynamicField name="*_f" type="float" indexed="true" stored="true" multiValued="false" />
906
+ <dynamicField name="*_d" type="double" indexed="true" stored="true" multiValued="false" />
907
+ <dynamicField name="*_display" type="text_en" indexed="false" stored="true" multiValued="true" />
908
+ <dynamicField name="*_label" type="string" indexed="false" stored="true" multiValued="true" />
909
+ <dynamicField name="*_facet" type="string" indexed="true" stored="false" multiValued="true" />
910
+ <dynamicField name="*_sort" type="alphaOnlySort" indexed="true" stored="false" multiValued="false" />
911
+ <dynamicField name="*_unstem_search" type="text_general" indexed="true" stored="false" multiValued="true" />
912
+ <dynamicField name="*spell" type="textSpell" indexed="true" stored="false" multiValued="true" />
913
+ <dynamicField name="*_dt" type="date" indexed="true" stored="true" multiValued="false" />
914
+ <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true" />
915
+ <dynamicField name="*_p" type="location" indexed="true" stored="true" multiValued="false" />
916
+
917
+ <!-- Miscellaneous fields -->
918
+ <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
919
+ <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
920
+ <dynamicField name="random_*" type="random" />
921
+
922
+ <dynamicField name="*_ti" type="text_en" stored="false" indexed="true" multiValued="false" />
923
+ <dynamicField name="*_tim" type="text_en" stored="false" indexed="true" multiValued="true" />
924
+ <dynamicField name="*_ts" type="text_en" stored="true" indexed="false" multiValued="false" />
925
+ <dynamicField name="*_tsm" type="text_en" stored="true" indexed="false" multiValued="true" />
926
+ <dynamicField name="*_tsi" type="text_en" stored="true" indexed="true" multiValued="false" />
927
+ <dynamicField name="*_tsim" type="text_en" stored="true" indexed="true" multiValued="true" />
928
+ <dynamicField name="*_tiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
929
+ <dynamicField name="*_timv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
930
+ <dynamicField name="*_tsiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
931
+ <dynamicField name="*_tsimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
932
+
933
+ <!-- English text (_te...) -->
934
+ <dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false" />
935
+ <dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true" />
936
+ <dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false" />
937
+ <dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true" />
938
+ <dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false" />
939
+ <dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true" />
940
+ <dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
941
+ <dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
942
+ <dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
943
+ <dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
944
+
945
+ <!-- string (_s...) -->
946
+ <dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false" />
947
+ <dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true" />
948
+ <dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false" />
949
+ <dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true" />
950
+ <dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false" />
951
+ <dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true" />
952
+ <dynamicField name="*_ssort" type="alphaSort" stored="false" indexed="true" multiValued="false" />
953
+
954
+ <!-- integer (_i...) -->
955
+ <dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false" />
956
+ <dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true" />
957
+ <dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false" />
958
+ <dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true" />
959
+ <dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false" />
960
+ <dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true" />
961
+
962
+ <!-- trie integer (_it...) (for faster range queries) -->
963
+ <dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false" />
964
+ <dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true" />
965
+ <dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false" />
966
+ <dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true" />
967
+ <dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false" />
968
+ <dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true" />
969
+
970
+ <!-- date (_dt...) -->
971
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
972
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
973
+ <dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false" />
974
+ <dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true" />
975
+ <dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false" />
976
+ <dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true" />
977
+ <dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false" />
978
+ <dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true" />
979
+
980
+ <!-- trie date (_dtt...) (for faster range queries) -->
981
+ <dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false" />
982
+ <dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true" />
983
+ <dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false" />
984
+ <dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true" />
985
+ <dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false" />
986
+ <dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true" />
987
+
988
+ <!-- long (_l...) -->
989
+ <dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false" />
990
+ <dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true" />
991
+ <dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false" />
992
+ <dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true" />
993
+ <dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false" />
994
+ <dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true" />
995
+
996
+ <!-- trie long (_lt...) (for faster range queries) -->
997
+ <dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false" />
998
+ <dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true" />
999
+ <dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false" />
1000
+ <dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true" />
1001
+ <dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false" />
1002
+ <dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true" />
1003
+
1004
+ <!-- double (_db...) -->
1005
+ <dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false" />
1006
+ <dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true" />
1007
+ <dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false" />
1008
+ <dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true" />
1009
+ <dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false" />
1010
+ <dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true" />
1011
+
1012
+ <!-- trie double (_dbt...) (for faster range queries) -->
1013
+ <dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false" />
1014
+ <dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true" />
1015
+ <dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false" />
1016
+ <dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true" />
1017
+ <dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false" />
1018
+ <dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true" />
1019
+
1020
+ <!-- float (_f...) -->
1021
+ <dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false" />
1022
+ <dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true" />
1023
+ <dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false" />
1024
+ <dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true" />
1025
+ <dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false" />
1026
+ <dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true" />
1027
+
1028
+ <!-- trie float (_ft...) (for faster range queries) -->
1029
+ <dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false" />
1030
+ <dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true" />
1031
+ <dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false" />
1032
+ <dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true" />
1033
+ <dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false" />
1034
+ <dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true" />
1035
+
1036
+ <!-- boolean (_b...) -->
1037
+ <dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false" />
1038
+ <dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false" />
1039
+ <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false" />
1040
+
1041
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
1042
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
1043
+
1044
+ <!-- location (_ll...) -->
1045
+ <dynamicField name="*_lli" type="location" stored="false" indexed="true" multiValued="false" />
1046
+ <dynamicField name="*_llim" type="location" stored="false" indexed="true" multiValued="true" />
1047
+ <dynamicField name="*_lls" type="location" stored="true" indexed="false" multiValued="false" />
1048
+ <dynamicField name="*_llsm" type="location" stored="true" indexed="false" multiValued="true" />
1049
+ <dynamicField name="*_llsi" type="location" stored="true" indexed="true" multiValued="false" />
1050
+ <dynamicField name="*_llsim" type="location" stored="true" indexed="true" multiValued="true" />
1051
+
1052
+ <!-- uncomment the following to ignore any fields that don't already match an existing
1053
+ field name or dynamic field, rather than reporting them as an error.
1054
+ alternately, change the type="ignored" to some other type e.g. "text" if you want
1055
+ unknown fields indexed and/or stored by default -->
1056
+ <!-- dynamicField name="*" type="ignored" multiValued="true" /-->
1057
+ </fields>
1058
+
1059
+ <!-- Field to use to determine and enforce document uniqueness.
1060
+ Unless this field is marked with required="false", it will be a required field -->
1061
+ <uniqueKey>id</uniqueKey>
1062
+
1063
+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->
1064
+ <defaultSearchField>text</defaultSearchField>
1065
+
1066
+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
1067
+ <solrQueryParser defaultOperator="OR"/>
1068
+
1069
+ <!-- copyField commands copy one field to another at the time a document
1070
+ is added to the index. It's used either to index the same field differently,
1071
+ or to add multiple fields to the same field for easier/faster searching. -->
1072
+ <!-- Copy Fields -->
1073
+
1074
+ <!-- unstemmed fields -->
1075
+ <copyField source="title_t" dest="title_unstem_search"/>
1076
+ <copyField source="subtitle_t" dest="subtitle_unstem_search"/>
1077
+ <copyField source="title_addl_t" dest="title_addl_unstem_search"/>
1078
+ <copyField source="title_added_entry_t" dest="title_added_entry_unstem_search"/>
1079
+ <copyField source="title_series_t" dest="title_series_unstem_search"/>
1080
+ <copyField source="author_t" dest="author_unstem_search"/>
1081
+ <copyField source="author_addl_t" dest="author_addl_unstem_search"/>
1082
+ <copyField source="subject_t" dest="subject_unstem_search"/>
1083
+ <copyField source="subject_facet" dest="subject_unstem_search"/>
1084
+ <copyField source="contributors_t" dest="contributors_unstem_search"/>
1085
+ <copyField source="lc_callnum_display" dest="lc_callnum_unstem_search"/>
1086
+
1087
+ <!-- display fields that are copied to the text field for searching -->
1088
+ <copyField source="isbn_display" dest="text"/>
1089
+
1090
+ <!-- sort fields -->
1091
+ <copyField source="pub_date" dest="pub_date_sort"/>
1092
+
1093
+ <!-- spellcheck fields -->
1094
+ <!-- default spell check; should match fields for default request handler -->
1095
+ <!-- it won't work with a copy of a copy field -->
1096
+ <copyField source="*_t" dest="spell" />
1097
+ <copyField source="*_facet" dest="spell" />
1098
+ <!-- title spell check; should match fields for title request handler -->
1099
+ <copyField source="title_t" dest="title_spell" />
1100
+ <copyField source="subtitle_t" dest="title_spell" />
1101
+ <copyField source="addl_titles_t" dest="title_spell" />
1102
+ <copyField source="title_added_entry_t" dest="title_spell" />
1103
+ <copyField source="title_series_t" dest="title_spell" />
1104
+ <!-- author spell check; should match fields for author request handler -->
1105
+ <copyField source="author_t" dest="name_spell" />
1106
+ <copyField source="author_addl_t" dest="name_spell" />
1107
+ <!-- subject spell check; should match fields for subject request handler -->
1108
+ <copyField source="subject_facet" dest="subject_spell" />
1109
+ <copyField source="subject_t" dest="subject_spell" />
1110
+ <!-- contributor and genre spell check -->
1111
+ <copyField source="contributors_t" dest="name_spell" />
1112
+ <copyField source="genre_t" dest="genre_spell" />
1113
+
1114
+ <!-- OpenSearch query field should match request handler search fields -->
1115
+ <copyField source="title_t" dest="opensearch_display" />
1116
+ <copyField source="subtitle_t" dest="opensearch_display" />
1117
+ <copyField source="addl_titles_t" dest="opensearch_display" />
1118
+ <copyField source="title_added_entry_t" dest="opensearch_display" />
1119
+ <copyField source="title_series_t" dest="opensearch_display" />
1120
+ <copyField source="author_t" dest="opensearch_display" />
1121
+ <copyField source="author_addl_t" dest="opensearch_display" />
1122
+ <copyField source="subject_facet" dest="opensearch_display" />
1123
+ <copyField source="subject_t" dest="opensearch_display" />
1124
+
1125
+ <!-- Copy over all of Hydra dynamic fields to Blacklight's -->
1126
+ <copyField source="*_dtsi" dest="*_dt" />
1127
+ <copyField source="*_teim" dest="*_t" />
1128
+ <copyField source="*_si" dest="*_t" />
1129
+ <copyField source="*_sim" dest="*_t" />
1130
+ <copyField source="*_ssi" dest="*_t" />
1131
+ <copyField source="*_ssim" dest="*_t" />
1132
+ <copyField source="*_dtsi" dest="*_t" />
1133
+ <copyField source="*_dtsim" dest="*_t" />
1134
+ <copyField source="*_ssm" dest="*_display" />
1135
+ <copyField source="*_ssi" dest="*_display" />
1136
+ <copyField source="*_ssim" dest="*_display" />
1137
+ <copyField source="*_dtsi" dest="*_display" />
1138
+ <copyField source="*_dtsim" dest="*_display" />
1139
+
1140
+
1141
+ <!-- Above, multiple source fields are copied to the [text] field.
1142
+ Another way to map multiple source fields to the same
1143
+ destination field is to use the dynamic field syntax.
1144
+ copyField also supports a maxChars to copy setting. -->
1145
+
1146
+ <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
1147
+
1148
+ <!-- copy name to alphaNameSort, a field designed for sorting by name -->
1149
+ <!-- <copyField source="name" dest="alphaNameSort"/> -->
1150
+
1151
+
1152
+ <!-- Similarity is the scoring routine for each document vs. a query.
1153
+ A custom Similarity or SimilarityFactory may be specified here, but
1154
+ the default is fine for most applications.
1155
+ For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
1156
+ -->
1157
+ <!--
1158
+ <similarity class="com.example.solr.CustomSimilarityFactory">
1159
+ <str name="paramkey">param value</str>
1160
+ </similarity>
1161
+ -->
1162
+
1163
+ </schema>