solr_wrapper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,21 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ #-----------------------------------------------------------------------
14
+ # Use a protected word file to protect against the stemmer reducing two
15
+ # unrelated words to the same base word.
16
+
17
+ # Some non-words that normally won't be encountered,
18
+ # just to test that they won't be stemmed.
19
+ dontstems
20
+ zwhacky
21
+
@@ -0,0 +1,529 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is the Solr schema file. This file should be named "schema.xml" and
21
+ should be in the conf directory under the solr home
22
+ (i.e. ./solr/conf/schema.xml by default)
23
+ or located where the classloader for the Solr webapp can find it.
24
+
25
+ This example schema is the recommended starting point for users.
26
+ It should be kept correct and concise, usable out-of-the-box.
27
+
28
+ For more information, on how to customize this file, please see
29
+ http://wiki.apache.org/solr/SchemaXml
30
+ -->
31
+
32
+ <schema name="example" version="1.5">
33
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
34
+ version="x.y" is Solr's version number for the schema syntax and
35
+ semantics. It should not normally be changed by applications.
36
+
37
+ 1.0: multiValued attribute did not exist, all fields are multiValued
38
+ by nature
39
+ 1.1: multiValued attribute introduced, false by default
40
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default
41
+ except for text fields.
42
+ 1.3: removed optional field compress feature
43
+ 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
44
+ behavior when a single string produces multiple tokens. Defaults
45
+ to off for version >= 1.4
46
+ 1.5: omitNorms defaults to true for primitive field types
47
+ (int, float, boolean, string...)
48
+ -->
49
+
50
+
51
+ <!-- Valid attributes for fields:
52
+ name: mandatory - the name for the field
53
+ type: mandatory - the name of a field type from the
54
+ <types> fieldType section
55
+ indexed: true if this field should be indexed (searchable or sortable)
56
+ stored: true if this field should be retrievable
57
+ docValues: true if this field should have doc values. Doc values are
58
+ useful for faceting, grouping, sorting and function queries. Although not
59
+ required, doc values will make the index faster to load, more
60
+ NRT-friendly and more memory-efficient. They however come with some
61
+ limitations: they are currently only supported by StrField, UUIDField
62
+ and all Trie*Fields, and depending on the field type, they might
63
+ require the field to be single-valued, be required or have a default
64
+ value (check the documentation of the field type you're interested in
65
+ for more information)
66
+ multiValued: true if this field may contain multiple values per document
67
+ omitNorms: (expert) set to true to omit the norms associated with
68
+ this field (this disables length normalization and index-time
69
+ boosting for the field, and saves some memory). Only full-text
70
+ fields or fields that need an index-time boost need norms.
71
+ Norms are omitted for primitive (non-analyzed) types by default.
72
+ termVectors: [false] set to true to store the term vector for a
73
+ given field.
74
+ When using MoreLikeThis, fields used for similarity should be
75
+ stored for best performance.
76
+ termPositions: Store position information with the term vector.
77
+ This will increase storage costs.
78
+ termOffsets: Store offset information with the term vector. This
79
+ will increase storage costs.
80
+ required: The field is required. It will throw an error if the
81
+ value does not exist
82
+ default: a value that should be used if no value is specified
83
+ when adding a document.
84
+ -->
85
+
86
+ <!-- field names should consist of alphanumeric or underscore characters only and
87
+ not start with a digit. This is not currently strictly enforced,
88
+ but other field names will not have first class support from all components
89
+ and back compatibility is not guaranteed. Names with both leading and
90
+ trailing underscores (e.g. _version_) are reserved.
91
+ -->
92
+
93
+ <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
94
+ or Solr won't start. _version_ and update log are required for SolrCloud
95
+ -->
96
+ <field name="_version_" type="long" indexed="true" stored="true"/>
97
+
98
+ <!-- points to the root document of a block of nested documents. Required for nested
99
+ document support, may be removed otherwise
100
+ -->
101
+ <field name="_root_" type="string" indexed="true" stored="false"/>
102
+
103
+ <!-- Only remove the "id" field if you have a very good reason to. While not strictly
104
+ required, it is highly recommended. A <uniqueKey> is present in almost all Solr
105
+ installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
106
+ -->
107
+ <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
108
+
109
+ <!-- Dynamic field definitions allow using convention over configuration
110
+ for fields via the specification of patterns to match field names.
111
+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
112
+ RESTRICTION: the glob-like pattern in the name attribute must have
113
+ a "*" only at the start or the end. -->
114
+
115
+ <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
116
+ <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/>
117
+ <dynamicField name="*_s" type="string" indexed="true" stored="true" />
118
+ <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
119
+ <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
120
+ <dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/>
121
+ <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
122
+ <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
123
+ <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/>
124
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
125
+ <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
126
+ <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
127
+ <dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/>
128
+ <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
129
+ <dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/>
130
+
131
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
132
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
133
+
134
+ <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
135
+ <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
136
+ <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
137
+
138
+ <!-- some trie-coded dynamic fields for faster range queries -->
139
+ <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
140
+ <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
141
+ <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
142
+ <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
143
+ <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
144
+
145
+ <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
146
+
147
+ <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
148
+ <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
149
+
150
+ <dynamicField name="random_*" type="random" />
151
+
152
+ <!-- uncomment the following to ignore any fields that don't already match an existing
153
+ field name or dynamic field, rather than reporting them as an error.
154
+ alternately, change the type="ignored" to some other type e.g. "text" if you want
155
+ unknown fields indexed and/or stored by default -->
156
+ <!--dynamicField name="*" type="ignored" multiValued="true" /-->
157
+
158
+ <!-- Field to use to determine and enforce document uniqueness.
159
+ Unless this field is marked with required="false", it will be a required field
160
+ -->
161
+ <uniqueKey>id</uniqueKey>
162
+
163
+ <!-- copyField commands copy one field to another at the time a document
164
+ is added to the index. It's used either to index the same field differently,
165
+ or to add multiple fields to the same field for easier/faster searching. -->
166
+
167
+ <!--
168
+ <copyField source="title" dest="text"/>
169
+ <copyField source="body" dest="text"/>
170
+ -->
171
+
172
+ <!-- field type definitions. The "name" attribute is
173
+ just a label to be used by field definitions. The "class"
174
+ attribute and any other attributes determine the real
175
+ behavior of the fieldType.
176
+ Class names starting with "solr" refer to java classes in a
177
+ standard package such as org.apache.solr.analysis
178
+ -->
179
+
180
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim.
181
+ It supports doc values but in that case the field needs to be
182
+ single-valued and either required or have a default value.
183
+ -->
184
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
185
+
186
+ <!-- boolean type: "true" or "false" -->
187
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
188
+
189
+ <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
190
+ currently supported on types that are sorted internally as strings
191
+ and on numeric types.
192
+ This includes "string","boolean", and, as of 3.5 (and 4.x),
193
+ int, float, long, date, double, including the "Trie" variants.
194
+ - If sortMissingLast="true", then a sort on this field will cause documents
195
+ without the field to come after documents with the field,
196
+ regardless of the requested sort order (asc or desc).
197
+ - If sortMissingFirst="true", then a sort on this field will cause documents
198
+ without the field to come before documents with the field,
199
+ regardless of the requested sort order.
200
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
201
+ then default lucene sorting will be used which places docs without the
202
+ field first in an ascending sort and last in a descending sort.
203
+ -->
204
+
205
+ <!--
206
+ Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
207
+
208
+ These fields support doc values, but they require the field to be
209
+ single-valued and either be required or have a default value.
210
+ -->
211
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
212
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
213
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
214
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
215
+
216
+ <!--
217
+ Numeric field types that index each value at various levels of precision
218
+ to accelerate range queries when the number of values between the range
219
+ endpoints is large. See the javadoc for NumericRangeQuery for internal
220
+ implementation details.
221
+
222
+ Smaller precisionStep values (specified in bits) will lead to more tokens
223
+ indexed per value, slightly larger index size, and faster range queries.
224
+ A precisionStep of 0 disables indexing at different precision levels.
225
+ -->
226
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
227
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
228
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
229
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
230
+
231
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
232
+ is a more restricted form of the canonical representation of dateTime
233
+ http://www.w3.org/TR/xmlschema-2/#dateTime
234
+ The trailing "Z" designates UTC time and is mandatory.
235
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
236
+ All other components are mandatory.
237
+
238
+ Expressions can also be used to denote calculations that should be
239
+ performed relative to "NOW" to determine the value, ie...
240
+
241
+ NOW/HOUR
242
+ ... Round to the start of the current hour
243
+ NOW-1DAY
244
+ ... Exactly 1 day prior to now
245
+ NOW/DAY+6MONTHS+3DAYS
246
+ ... 6 months and 3 days in the future from the start of
247
+ the current day
248
+
249
+ Consult the TrieDateField javadocs for more information.
250
+
251
+ Note: For faster range queries, consider the tdate type
252
+ -->
253
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
254
+
255
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
256
+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
257
+
258
+
259
+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
260
+ <fieldType name="binary" class="solr.BinaryField"/>
261
+
262
+ <!-- The "RandomSortField" is not used to store or search any
263
+ data. You can declare fields of this type it in your schema
264
+ to generate pseudo-random orderings of your docs for sorting
265
+ or function purposes. The ordering is generated based on the field
266
+ name and the version of the index. As long as the index version
267
+ remains unchanged, and the same field name is reused,
268
+ the ordering of the docs will be consistent.
269
+ If you want different psuedo-random orderings of documents,
270
+ for the same version of the index, use a dynamicField and
271
+ change the field name in the request.
272
+ -->
273
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
274
+
275
+ <!-- solr.TextField allows the specification of custom text analyzers
276
+ specified as a tokenizer and a list of token filters. Different
277
+ analyzers may be specified for indexing and querying.
278
+
279
+ The optional positionIncrementGap puts space between multiple fields of
280
+ this type on the same document, with the purpose of preventing false phrase
281
+ matching across fields.
282
+
283
+ For more info on customizing your analyzer chain, please see
284
+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
285
+ -->
286
+
287
+ <!-- One can also specify an existing Analyzer class that has a
288
+ default constructor via the class attribute on the analyzer element.
289
+ Example:
290
+ <fieldType name="text_greek" class="solr.TextField">
291
+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
292
+ </fieldType>
293
+ -->
294
+
295
+ <!-- A text field that only splits on whitespace for exact matching of words -->
296
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
297
+ <analyzer>
298
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
299
+ </analyzer>
300
+ </fieldType>
301
+
302
+ <!-- A general text field that has reasonable, generic
303
+ cross-language defaults: it tokenizes with StandardTokenizer,
304
+ removes stop words from case-insensitive "stopwords.txt"
305
+ (empty by default), and down cases. At query time only, it
306
+ also applies synonyms. -->
307
+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
308
+ <analyzer type="index">
309
+ <tokenizer class="solr.StandardTokenizerFactory"/>
310
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
311
+ <!-- in this example, we will only use synonyms at query time
312
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
313
+ -->
314
+ <filter class="solr.LowerCaseFilterFactory"/>
315
+ </analyzer>
316
+ <analyzer type="query">
317
+ <tokenizer class="solr.StandardTokenizerFactory"/>
318
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
319
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
320
+ <filter class="solr.LowerCaseFilterFactory"/>
321
+ </analyzer>
322
+ </fieldType>
323
+
324
+ <!-- A text field with defaults appropriate for English: it
325
+ tokenizes with StandardTokenizer, removes English stop words
326
+ (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
327
+ finally applies Porter's stemming. The query time analyzer
328
+ also applies synonyms from synonyms.txt. -->
329
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
330
+ <analyzer type="index">
331
+ <tokenizer class="solr.StandardTokenizerFactory"/>
332
+ <!-- in this example, we will only use synonyms at query time
333
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
334
+ -->
335
+ <!-- Case insensitive stop word removal.
336
+ -->
337
+ <filter class="solr.StopFilterFactory"
338
+ ignoreCase="true"
339
+ words="lang/stopwords_en.txt"
340
+ />
341
+ <filter class="solr.LowerCaseFilterFactory"/>
342
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
343
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
344
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
345
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
346
+ -->
347
+ <filter class="solr.PorterStemFilterFactory"/>
348
+ </analyzer>
349
+ <analyzer type="query">
350
+ <tokenizer class="solr.StandardTokenizerFactory"/>
351
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
352
+ <filter class="solr.StopFilterFactory"
353
+ ignoreCase="true"
354
+ words="lang/stopwords_en.txt"
355
+ />
356
+ <filter class="solr.LowerCaseFilterFactory"/>
357
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
358
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
359
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
360
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
361
+ -->
362
+ <filter class="solr.PorterStemFilterFactory"/>
363
+ </analyzer>
364
+ </fieldType>
365
+
366
+ <!-- A text field with defaults appropriate for English, plus
367
+ aggressive word-splitting and autophrase features enabled.
368
+ This field is just like text_en, except it adds
369
+ WordDelimiterFilter to enable splitting and matching of
370
+ words on case-change, alpha numeric boundaries, and
371
+ non-alphanumeric chars. This means certain compound word
372
+ cases will work, for example query "wi fi" will match
373
+ document "WiFi" or "wi-fi".
374
+ -->
375
+ <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
376
+ <analyzer type="index">
377
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
378
+ <!-- in this example, we will only use synonyms at query time
379
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
380
+ -->
381
+ <!-- Case insensitive stop word removal.
382
+ -->
383
+ <filter class="solr.StopFilterFactory"
384
+ ignoreCase="true"
385
+ words="lang/stopwords_en.txt"
386
+ />
387
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
388
+ <filter class="solr.LowerCaseFilterFactory"/>
389
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
390
+ <filter class="solr.PorterStemFilterFactory"/>
391
+ </analyzer>
392
+ <analyzer type="query">
393
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
394
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
395
+ <filter class="solr.StopFilterFactory"
396
+ ignoreCase="true"
397
+ words="lang/stopwords_en.txt"
398
+ />
399
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
400
+ <filter class="solr.LowerCaseFilterFactory"/>
401
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
402
+ <filter class="solr.PorterStemFilterFactory"/>
403
+ </analyzer>
404
+ </fieldType>
405
+
406
+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
407
+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
408
+ <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
409
+ <analyzer>
410
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
411
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
412
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
413
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
414
+ <filter class="solr.LowerCaseFilterFactory"/>
415
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
416
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
417
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
418
+ possible with WordDelimiterFilter in conjuncton with stemming. -->
419
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
420
+ </analyzer>
421
+ </fieldType>
422
+
423
+ <!-- Just like text_general except it reverses the characters of
424
+ each token, to enable more efficient leading wildcard queries. -->
425
+ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
426
+ <analyzer type="index">
427
+ <tokenizer class="solr.StandardTokenizerFactory"/>
428
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
429
+ <filter class="solr.LowerCaseFilterFactory"/>
430
+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
431
+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
432
+ </analyzer>
433
+ <analyzer type="query">
434
+ <tokenizer class="solr.StandardTokenizerFactory"/>
435
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
436
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
437
+ <filter class="solr.LowerCaseFilterFactory"/>
438
+ </analyzer>
439
+ </fieldType>
440
+
441
+ <!-- This is an example of using the KeywordTokenizer along
442
+ With various TokenFilterFactories to produce a sortable field
443
+ that does not include some properties of the source text
444
+ -->
445
+ <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
446
+ <analyzer>
447
+ <!-- KeywordTokenizer does no actual tokenizing, so the entire
448
+ input string is preserved as a single token
449
+ -->
450
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
451
+ <!-- The LowerCase TokenFilter does what you expect, which can be
452
+ when you want your sorting to be case insensitive
453
+ -->
454
+ <filter class="solr.LowerCaseFilterFactory" />
455
+ <!-- The TrimFilter removes any leading or trailing whitespace -->
456
+ <filter class="solr.TrimFilterFactory" />
457
+ <!-- The PatternReplaceFilter gives you the flexibility to use
458
+ Java Regular expression to replace any sequence of characters
459
+ matching a pattern with an arbitrary replacement string,
460
+ which may include back references to portions of the original
461
+ string matched by the pattern.
462
+
463
+ See the Java Regular Expression documentation for more
464
+ information on pattern and replacement string syntax.
465
+
466
+ http://docs.oracle.com/javase/7/docs/api/java/util/regex/package-summary.html
467
+ -->
468
+ <filter class="solr.PatternReplaceFilterFactory"
469
+ pattern="([^a-z])" replacement="" replace="all"
470
+ />
471
+ </analyzer>
472
+ </fieldType>
473
+
474
+ <!-- lowercases the entire field value, keeping it as a single token. -->
475
+ <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
476
+ <analyzer>
477
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
478
+ <filter class="solr.LowerCaseFilterFactory" />
479
+ </analyzer>
480
+ </fieldType>
481
+
482
+ <!-- since fields of this type are by default not stored or indexed,
483
+ any data added to them will be ignored outright. -->
484
+ <fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
485
+
486
+ <!-- This point type indexes the coordinates as separate fields (subFields)
487
+ If subFieldType is defined, it references a type, and a dynamic field
488
+ definition is created matching *___<typename>. Alternately, if
489
+ subFieldSuffix is defined, that is used to create the subFields.
490
+ Example: if subFieldType="double", then the coordinates would be
491
+ indexed in fields myloc_0___double,myloc_1___double.
492
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
493
+ in fields myloc_0_d,myloc_1_d
494
+ The subFields are an implementation detail of the fieldType, and end
495
+ users normally should not need to know about them.
496
+ -->
497
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
498
+
499
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
500
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
501
+
502
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
503
+ For more information about this and other Spatial fields new to Solr 4, see:
504
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
505
+ -->
506
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
507
+ geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />
508
+
509
+ <!-- Spatial rectangle (bounding box) field. It supports most spatial predicates, and has
510
+ special relevancy modes: score=overlapRatio|area|area2D (local-param to the query). DocValues is recommended for
511
+ relevancy. -->
512
+ <fieldType name="bbox" class="solr.BBoxField"
513
+ geo="true" distanceUnits="kilometers" numberType="_bbox_coord" />
514
+ <fieldType name="_bbox_coord" class="solr.TrieDoubleField" precisionStep="8" docValues="true" stored="false"/>
515
+
516
+ <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
517
+ Parameters:
518
+ defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
519
+ precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
520
+ providerClass: Lets you plug in other exchange provider backend:
521
+ solr.FileExchangeRateProvider is the default and takes one parameter:
522
+ currencyConfig: name of an xml file holding exchange rates
523
+ solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
524
+ ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
525
+ refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
526
+ -->
527
+ <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
528
+
529
+ </schema>