supernova 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +2 -0
  3. data/VERSION +1 -1
  4. data/lib/supernova/criteria.rb +6 -2
  5. data/lib/supernova/solr.rb +1 -0
  6. data/lib/supernova/solr_criteria.rb +10 -1
  7. data/lib/supernova/solr_indexer.rb +71 -0
  8. data/solr/conf/admin-extra.html +31 -0
  9. data/solr/conf/elevate.xml +36 -0
  10. data/solr/conf/mapping-FoldToASCII.txt +3813 -0
  11. data/solr/conf/mapping-ISOLatin1Accent.txt +246 -0
  12. data/solr/conf/protwords.txt +21 -0
  13. data/solr/conf/schema.xml +475 -0
  14. data/solr/conf/scripts.conf +24 -0
  15. data/solr/conf/solrconfig.xml +1508 -0
  16. data/solr/conf/spellings.txt +2 -0
  17. data/solr/conf/stopwords.txt +58 -0
  18. data/solr/conf/synonyms.txt +29 -0
  19. data/solr/conf/velocity/VM_global_library.vm +184 -0
  20. data/solr/conf/velocity/browse.vm +45 -0
  21. data/solr/conf/velocity/cluster.vm +26 -0
  22. data/solr/conf/velocity/clusterResults.vm +29 -0
  23. data/solr/conf/velocity/doc.vm +29 -0
  24. data/solr/conf/velocity/facet_dates.vm +0 -0
  25. data/solr/conf/velocity/facet_fields.vm +12 -0
  26. data/solr/conf/velocity/facet_queries.vm +3 -0
  27. data/solr/conf/velocity/facet_ranges.vm +30 -0
  28. data/solr/conf/velocity/facets.vm +7 -0
  29. data/solr/conf/velocity/footer.vm +17 -0
  30. data/solr/conf/velocity/head.vm +45 -0
  31. data/solr/conf/velocity/header.vm +3 -0
  32. data/solr/conf/velocity/hit.vm +5 -0
  33. data/solr/conf/velocity/jquery.autocomplete.css +48 -0
  34. data/solr/conf/velocity/jquery.autocomplete.js +762 -0
  35. data/solr/conf/velocity/layout.vm +20 -0
  36. data/solr/conf/velocity/main.css +184 -0
  37. data/solr/conf/velocity/query.vm +56 -0
  38. data/solr/conf/velocity/querySpatial.vm +40 -0
  39. data/solr/conf/velocity/suggest.vm +3 -0
  40. data/solr/conf/velocity/tabs.vm +22 -0
  41. data/solr/conf/xslt/example.xsl +132 -0
  42. data/solr/conf/xslt/example_atom.xsl +67 -0
  43. data/solr/conf/xslt/example_rss.xsl +66 -0
  44. data/solr/conf/xslt/luke.xsl +337 -0
  45. data/spec/integration/solr_spec.rb +6 -0
  46. data/spec/spec_helper.rb +1 -1
  47. data/spec/supernova/solr_criteria_spec.rb +16 -0
  48. data/spec/supernova/solr_indexer_spec.rb +167 -0
  49. data/supernova.gemspec +45 -3
  50. metadata +91 -36
@@ -0,0 +1,246 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ # Syntax:
14
+ # "source" => "target"
15
+ # "source".length() > 0 (source cannot be empty.)
16
+ # "target".length() >= 0 (target can be empty.)
17
+
18
+ # example:
19
+ # "À" => "A"
20
+ # "\u00C0" => "A"
21
+ # "\u00C0" => "\u0041"
22
+ # "ß" => "ss"
23
+ # "\t" => " "
24
+ # "\n" => ""
25
+
26
+ # À => A
27
+ "\u00C0" => "A"
28
+
29
+ # Á => A
30
+ "\u00C1" => "A"
31
+
32
+ # Â => A
33
+ "\u00C2" => "A"
34
+
35
+ # Ã => A
36
+ "\u00C3" => "A"
37
+
38
+ # Ä => A
39
+ "\u00C4" => "A"
40
+
41
+ # Å => A
42
+ "\u00C5" => "A"
43
+
44
+ # Æ => AE
45
+ "\u00C6" => "AE"
46
+
47
+ # Ç => C
48
+ "\u00C7" => "C"
49
+
50
+ # È => E
51
+ "\u00C8" => "E"
52
+
53
+ # É => E
54
+ "\u00C9" => "E"
55
+
56
+ # Ê => E
57
+ "\u00CA" => "E"
58
+
59
+ # Ë => E
60
+ "\u00CB" => "E"
61
+
62
+ # Ì => I
63
+ "\u00CC" => "I"
64
+
65
+ # Í => I
66
+ "\u00CD" => "I"
67
+
68
+ # Î => I
69
+ "\u00CE" => "I"
70
+
71
+ # Ï => I
72
+ "\u00CF" => "I"
73
+
74
+ # IJ => IJ
75
+ "\u0132" => "IJ"
76
+
77
+ # Ð => D
78
+ "\u00D0" => "D"
79
+
80
+ # Ñ => N
81
+ "\u00D1" => "N"
82
+
83
+ # Ò => O
84
+ "\u00D2" => "O"
85
+
86
+ # Ó => O
87
+ "\u00D3" => "O"
88
+
89
+ # Ô => O
90
+ "\u00D4" => "O"
91
+
92
+ # Õ => O
93
+ "\u00D5" => "O"
94
+
95
+ # Ö => O
96
+ "\u00D6" => "O"
97
+
98
+ # Ø => O
99
+ "\u00D8" => "O"
100
+
101
+ # Π=> OE
102
+ "\u0152" => "OE"
103
+
104
+ # Þ
105
+ "\u00DE" => "TH"
106
+
107
+ # Ù => U
108
+ "\u00D9" => "U"
109
+
110
+ # Ú => U
111
+ "\u00DA" => "U"
112
+
113
+ # Û => U
114
+ "\u00DB" => "U"
115
+
116
+ # Ü => U
117
+ "\u00DC" => "U"
118
+
119
+ # Ý => Y
120
+ "\u00DD" => "Y"
121
+
122
+ # Ÿ => Y
123
+ "\u0178" => "Y"
124
+
125
+ # à => a
126
+ "\u00E0" => "a"
127
+
128
+ # á => a
129
+ "\u00E1" => "a"
130
+
131
+ # â => a
132
+ "\u00E2" => "a"
133
+
134
+ # ã => a
135
+ "\u00E3" => "a"
136
+
137
+ # ä => a
138
+ "\u00E4" => "a"
139
+
140
+ # å => a
141
+ "\u00E5" => "a"
142
+
143
+ # æ => ae
144
+ "\u00E6" => "ae"
145
+
146
+ # ç => c
147
+ "\u00E7" => "c"
148
+
149
+ # è => e
150
+ "\u00E8" => "e"
151
+
152
+ # é => e
153
+ "\u00E9" => "e"
154
+
155
+ # ê => e
156
+ "\u00EA" => "e"
157
+
158
+ # ë => e
159
+ "\u00EB" => "e"
160
+
161
+ # ì => i
162
+ "\u00EC" => "i"
163
+
164
+ # í => i
165
+ "\u00ED" => "i"
166
+
167
+ # î => i
168
+ "\u00EE" => "i"
169
+
170
+ # ï => i
171
+ "\u00EF" => "i"
172
+
173
+ # ij => ij
174
+ "\u0133" => "ij"
175
+
176
+ # ð => d
177
+ "\u00F0" => "d"
178
+
179
+ # ñ => n
180
+ "\u00F1" => "n"
181
+
182
+ # ò => o
183
+ "\u00F2" => "o"
184
+
185
+ # ó => o
186
+ "\u00F3" => "o"
187
+
188
+ # ô => o
189
+ "\u00F4" => "o"
190
+
191
+ # õ => o
192
+ "\u00F5" => "o"
193
+
194
+ # ö => o
195
+ "\u00F6" => "o"
196
+
197
+ # ø => o
198
+ "\u00F8" => "o"
199
+
200
+ # œ => oe
201
+ "\u0153" => "oe"
202
+
203
+ # ß => ss
204
+ "\u00DF" => "ss"
205
+
206
+ # þ => th
207
+ "\u00FE" => "th"
208
+
209
+ # ù => u
210
+ "\u00F9" => "u"
211
+
212
+ # ú => u
213
+ "\u00FA" => "u"
214
+
215
+ # û => u
216
+ "\u00FB" => "u"
217
+
218
+ # ü => u
219
+ "\u00FC" => "u"
220
+
221
+ # ý => y
222
+ "\u00FD" => "y"
223
+
224
+ # ÿ => y
225
+ "\u00FF" => "y"
226
+
227
+ # ff => ff
228
+ "\uFB00" => "ff"
229
+
230
+ # fi => fi
231
+ "\uFB01" => "fi"
232
+
233
+ # fl => fl
234
+ "\uFB02" => "fl"
235
+
236
+ # ffi => ffi
237
+ "\uFB03" => "ffi"
238
+
239
+ # ffl => ffl
240
+ "\uFB04" => "ffl"
241
+
242
+ # ſt => ft
243
+ "\uFB05" => "ft"
244
+
245
+ # st => st
246
+ "\uFB06" => "st"
@@ -0,0 +1,21 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ #-----------------------------------------------------------------------
14
+ # Use a protected word file to protect against the stemmer reducing two
15
+ # unrelated words to the same base word.
16
+
17
+ # Some non-words that normally won't be encountered,
18
+ # just to test that they won't be stemmed.
19
+ dontstems
20
+ zwhacky
21
+
@@ -0,0 +1,475 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is the Solr schema file. This file should be named "schema.xml" and
21
+ should be in the conf directory under the solr home
22
+ (i.e. ./solr/conf/schema.xml by default)
23
+ or located where the classloader for the Solr webapp can find it.
24
+
25
+ This example schema is the recommended starting point for users.
26
+ It should be kept correct and concise, usable out-of-the-box.
27
+
28
+ For more information, on how to customize this file, please see
29
+ http://wiki.apache.org/solr/SchemaXml
30
+
31
+ PERFORMANCE NOTE: this schema includes many optional features and should not
32
+ be used for benchmarking. To improve performance one could
33
+ - set stored="false" for all fields possible (esp large fields) when you
34
+ only need to search on the field but don't need to return the original
35
+ value.
36
+ - set indexed="false" if you don't need to search on the field, but only
37
+ return the field as a result of searching on other indexed fields.
38
+ - remove all unneeded copyField statements
39
+ - for best index size and searching performance, set "index" to false
40
+ for all general text fields, use copyField to copy them to the
41
+ catchall "text" field, and use that for searching.
42
+ - For maximum indexing performance, use the StreamingUpdateSolrServer
43
+ java client.
44
+ - Remember to run the JVM in server mode, and use a higher logging level
45
+ that avoids logging every request
46
+ -->
47
+
48
+ <schema name="example" version="1.3">
49
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
50
+ Applications should change this to reflect the nature of the search collection.
51
+ version="1.2" is Solr's version number for the schema syntax and semantics. It should
52
+ not normally be changed by applications.
53
+ 1.0: multiValued attribute did not exist, all fields are multiValued by nature
54
+ 1.1: multiValued attribute introduced, false by default
55
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
56
+ 1.3: removed optional field compress feature
57
+ -->
58
+
59
+ <types>
60
+ <!-- field type definitions. The "name" attribute is
61
+ just a label to be used by field definitions. The "class"
62
+ attribute and any other attributes determine the real
63
+ behavior of the fieldType.
64
+ Class names starting with "solr" refer to java classes in the
65
+ org.apache.solr.analysis package.
66
+ -->
67
+
68
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
69
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
70
+
71
+ <!-- boolean type: "true" or "false" -->
72
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
73
+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
74
+ <fieldtype name="binary" class="solr.BinaryField"/>
75
+
76
+ <!-- The optional sortMissingLast and sortMissingFirst attributes are
77
+ currently supported on types that are sorted internally as strings.
78
+ This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
79
+ - If sortMissingLast="true", then a sort on this field will cause documents
80
+ without the field to come after documents with the field,
81
+ regardless of the requested sort order (asc or desc).
82
+ - If sortMissingFirst="true", then a sort on this field will cause documents
83
+ without the field to come before documents with the field,
84
+ regardless of the requested sort order.
85
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
86
+ then default lucene sorting will be used which places docs without the
87
+ field first in an ascending sort and last in a descending sort.
88
+ -->
89
+
90
+ <!--
91
+ Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
92
+ -->
93
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
94
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
95
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
96
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
97
+
98
+ <!--
99
+ Numeric field types that index each value at various levels of precision
100
+ to accelerate range queries when the number of values between the range
101
+ endpoints is large. See the javadoc for NumericRangeQuery for internal
102
+ implementation details.
103
+
104
+ Smaller precisionStep values (specified in bits) will lead to more tokens
105
+ indexed per value, slightly larger index size, and faster range queries.
106
+ A precisionStep of 0 disables indexing at different precision levels.
107
+ -->
108
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
109
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
110
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
111
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
112
+
113
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
114
+ is a more restricted form of the canonical representation of dateTime
115
+ http://www.w3.org/TR/xmlschema-2/#dateTime
116
+ The trailing "Z" designates UTC time and is mandatory.
117
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
118
+ All other components are mandatory.
119
+
120
+ Expressions can also be used to denote calculations that should be
121
+ performed relative to "NOW" to determine the value, ie...
122
+
123
+ NOW/HOUR
124
+ ... Round to the start of the current hour
125
+ NOW-1DAY
126
+ ... Exactly 1 day prior to now
127
+ NOW/DAY+6MONTHS+3DAYS
128
+ ... 6 months and 3 days in the future from the start of
129
+ the current day
130
+
131
+ Consult the DateField javadocs for more information.
132
+
133
+ Note: For faster range queries, consider the tdate type
134
+ -->
135
+ <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
136
+
137
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
138
+ <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
139
+
140
+
141
+ <!--
142
+ Note:
143
+ These should only be used for compatibility with existing indexes (created with older Solr versions)
144
+ or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
145
+
146
+ Plain numeric field types that store and index the text
147
+ value verbatim (and hence don't support range queries, since the
148
+ lexicographic ordering isn't equal to the numeric ordering)
149
+ -->
150
+ <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
151
+ <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
152
+ <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
153
+ <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
154
+ <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
155
+ <fieldType name="nGram" class="solr.TextField" positionIncrementGap="100" stored="false" multiValued="true">
156
+ <analyzer type="index">
157
+ <tokenizer class="solr.StandardTokenizerFactory"/>
158
+ <filter class="solr.LowerCaseFilterFactory"/>
159
+ <filter class="solr.NGramFilterFactory" minGramSize="2" maxGramSize="15"/>
160
+ </analyzer>
161
+ <analyzer type="query">
162
+ <tokenizer class="solr.StandardTokenizerFactory"/>
163
+ <filter class="solr.LowerCaseFilterFactory"/>
164
+ </analyzer>
165
+ </fieldType>
166
+
167
+ <!--
168
+ Note:
169
+ These should only be used for compatibility with existing indexes (created with older Solr versions)
170
+ or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
171
+
172
+ Numeric field types that manipulate the value into
173
+ a string value that isn't human-readable in its internal form,
174
+ but with a lexicographic ordering the same as the numeric ordering,
175
+ so that range queries work correctly.
176
+ -->
177
+ <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
178
+ <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
179
+ <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
180
+ <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
181
+
182
+
183
+ <!-- The "RandomSortField" is not used to store or search any
184
+ data. You can declare fields of this type it in your schema
185
+ to generate pseudo-random orderings of your docs for sorting
186
+ purposes. The ordering is generated based on the field name
187
+ and the version of the index, As long as the index version
188
+ remains unchanged, and the same field name is reused,
189
+ the ordering of the docs will be consistent.
190
+ If you want different psuedo-random orderings of documents,
191
+ for the same version of the index, use a dynamicField and
192
+ change the name
193
+ -->
194
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
195
+
196
+ <!-- solr.TextField allows the specification of custom text analyzers
197
+ specified as a tokenizer and a list of token filters. Different
198
+ analyzers may be specified for indexing and querying.
199
+
200
+ The optional positionIncrementGap puts space between multiple fields of
201
+ this type on the same document, with the purpose of preventing false phrase
202
+ matching across fields.
203
+
204
+ For more info on customizing your analyzer chain, please see
205
+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
206
+ -->
207
+
208
+ <!-- One can also specify an existing Analyzer class that has a
209
+ default constructor via the class attribute on the analyzer element
210
+ <fieldType name="text_greek" class="solr.TextField">
211
+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
212
+ </fieldType>
213
+ -->
214
+
215
+ <!-- A text field that only splits on whitespace for exact matching of words -->
216
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
217
+ <analyzer>
218
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
219
+ </analyzer>
220
+ </fieldType>
221
+
222
+ <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
223
+ words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
224
+ so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
225
+ Synonyms and stopwords are customized by external files, and stemming is enabled.
226
+ The attribute autoGeneratePhraseQueries="true" (the default) causes words that get split to
227
+ form phrase queries. For example, WordDelimiterFilter splitting text:pdp-11 will cause the parser
228
+ to generate text:"pdp 11" rather than (text:PDP OR text:11).
229
+ NOTE: autoGeneratePhraseQueries="true" tends to not work well for non whitespace delimited languages.
230
+ -->
231
+ <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
232
+ <analyzer type="index">
233
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
234
+ <!-- in this example, we will only use synonyms at query time
235
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
236
+ -->
237
+ <!-- Case insensitive stop word removal.
238
+ add enablePositionIncrements=true in both the index and query
239
+ analyzers to leave a 'gap' for more accurate phrase queries.
240
+ -->
241
+ <filter class="solr.StopFilterFactory"
242
+ ignoreCase="true"
243
+ words="stopwords.txt"
244
+ enablePositionIncrements="true"
245
+ />
246
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
247
+ <filter class="solr.LowerCaseFilterFactory"/>
248
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
249
+ <filter class="solr.PorterStemFilterFactory"/>
250
+ </analyzer>
251
+ <analyzer type="query">
252
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
253
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
254
+ <filter class="solr.StopFilterFactory"
255
+ ignoreCase="true"
256
+ words="stopwords.txt"
257
+ enablePositionIncrements="true"
258
+ />
259
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
260
+ <filter class="solr.LowerCaseFilterFactory"/>
261
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
262
+ <filter class="solr.PorterStemFilterFactory"/>
263
+ </analyzer>
264
+ </fieldType>
265
+
266
+
267
+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
268
+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
269
+ <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
270
+ <analyzer>
271
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
272
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
273
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
274
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
275
+ <filter class="solr.LowerCaseFilterFactory"/>
276
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
277
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
278
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
279
+ possible with WordDelimiterFilter in conjuncton with stemming. -->
280
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
281
+ </analyzer>
282
+ </fieldType>
283
+
284
+
285
+ <!-- A general unstemmed text field - good if one does not know the language of the field -->
286
+ <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
287
+ <analyzer type="index">
288
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
289
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
290
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
291
+ <filter class="solr.LowerCaseFilterFactory"/>
292
+ </analyzer>
293
+ <analyzer type="query">
294
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
295
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
296
+ <filter class="solr.StopFilterFactory"
297
+ ignoreCase="true"
298
+ words="stopwords.txt"
299
+ enablePositionIncrements="true"
300
+ />
301
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
302
+ <filter class="solr.LowerCaseFilterFactory"/>
303
+ </analyzer>
304
+ </fieldType>
305
+
306
+
307
+ <!-- A general unstemmed text field that indexes tokens normally and also
308
+ reversed (via ReversedWildcardFilterFactory), to enable more efficient
309
+ leading wildcard queries. -->
310
+ <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
311
+ <analyzer type="index">
312
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
313
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
314
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
315
+ <filter class="solr.LowerCaseFilterFactory"/>
316
+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
317
+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
318
+ </analyzer>
319
+ <analyzer type="query">
320
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
321
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
322
+ <filter class="solr.StopFilterFactory"
323
+ ignoreCase="true"
324
+ words="stopwords.txt"
325
+ enablePositionIncrements="true"
326
+ />
327
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
328
+ <filter class="solr.LowerCaseFilterFactory"/>
329
+ </analyzer>
330
+ </fieldType>
331
+
332
+ <!-- charFilter + WhitespaceTokenizer -->
333
+ <!--
334
+ <fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
335
+ <analyzer>
336
+ <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
337
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
338
+ </analyzer>
339
+ </fieldType>
340
+ -->
341
+
342
+ <!-- This is an example of using the KeywordTokenizer along
343
+ With various TokenFilterFactories to produce a sortable field
344
+ that does not include some properties of the source text
345
+ -->
346
+ <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
347
+ <analyzer>
348
+ <!-- KeywordTokenizer does no actual tokenizing, so the entire
349
+ input string is preserved as a single token
350
+ -->
351
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
352
+ <!-- The LowerCase TokenFilter does what you expect, which can be
353
+ when you want your sorting to be case insensitive
354
+ -->
355
+ <filter class="solr.LowerCaseFilterFactory" />
356
+ <!-- The TrimFilter removes any leading or trailing whitespace -->
357
+ <filter class="solr.TrimFilterFactory" />
358
+ <!-- The PatternReplaceFilter gives you the flexibility to use
359
+ Java Regular expression to replace any sequence of characters
360
+ matching a pattern with an arbitrary replacement string,
361
+ which may include back references to portions of the original
362
+ string matched by the pattern.
363
+
364
+ See the Java Regular Expression documentation for more
365
+ information on pattern and replacement string syntax.
366
+
367
+ http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
368
+ -->
369
+ <filter class="solr.PatternReplaceFilterFactory"
370
+ pattern="([^a-z])" replacement="" replace="all"
371
+ />
372
+ </analyzer>
373
+ </fieldType>
374
+
375
+ <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
376
+ <analyzer>
377
+ <tokenizer class="solr.StandardTokenizerFactory"/>
378
+ <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
379
+ </analyzer>
380
+ </fieldtype>
381
+
382
+ <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
383
+ <analyzer>
384
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
385
+ <!--
386
+ The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
387
+ a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
388
+ Attributes of the DelimitedPayloadTokenFilterFactory :
389
+ "delimiter" - a one character delimiter. Default is | (pipe)
390
+ "encoder" - how to encode the following value into a playload
391
+ float -> org.apache.lucene.analysis.payloads.FloatEncoder,
392
+ integer -> o.a.l.a.p.IntegerEncoder
393
+ identity -> o.a.l.a.p.IdentityEncoder
394
+ Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
395
+ -->
396
+ <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
397
+ </analyzer>
398
+ </fieldtype>
399
+
400
+ <!-- lowercases the entire field value, keeping it as a single token. -->
401
+ <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
402
+ <analyzer>
403
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
404
+ <filter class="solr.LowerCaseFilterFactory" />
405
+ </analyzer>
406
+ </fieldType>
407
+
408
+ <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
409
+ <analyzer>
410
+ <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
411
+ </analyzer>
412
+ </fieldType>
413
+
414
+ <!-- since fields of this type are by default not stored or indexed,
415
+ any data added to them will be ignored outright. -->
416
+ <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
417
+
418
+ <!-- This point type indexes the coordinates as separate fields (subFields)
419
+ If subFieldType is defined, it references a type, and a dynamic field
420
+ definition is created matching *___<typename>. Alternately, if
421
+ subFieldSuffix is defined, that is used to create the subFields.
422
+ Example: if subFieldType="double", then the coordinates would be
423
+ indexed in fields myloc_0___double,myloc_1___double.
424
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
425
+ in fields myloc_0_d,myloc_1_d
426
+ The subFields are an implementation detail of the fieldType, and end
427
+ users normally should not need to know about them.
428
+ -->
429
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
430
+
431
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
432
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
433
+
434
+ <!--
435
+ A Geohash is a compact representation of a latitude longitude pair in a single field.
436
+ See http://wiki.apache.org/solr/SpatialSearch
437
+ -->
438
+ <fieldtype name="geohash" class="solr.GeoHashField"/>
439
+ </types>
440
+
441
+
442
+ <fields>
443
+ <field name="id" type="string" indexed="true" stored="true" required="true" />
444
+ <field name="type" type="string" indexed="true" stored="true" required="true" />
445
+ <field name="enabled" type="boolean" indexed="true" stored="true" multiValued="true" />
446
+ <field name="location" type="location" indexed="true" stored="true" />
447
+ <field name="user_id" type="int" indexed="true" stored="true" />
448
+ <field name="popularity" type="int" indexed="true" stored="true" />
449
+ <field name="text" type="text" indexed="true" stored="true" />
450
+
451
+ <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
452
+ <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
453
+ <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
454
+ <dynamicField name="*_t" type="text" indexed="true" stored="true"/>
455
+ <dynamicField name="*_txt" type="text" indexed="true" stored="true" multiValued="true"/>
456
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
457
+ <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
458
+ <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
459
+ <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
460
+ <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
461
+ <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
462
+ <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
463
+ <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
464
+ <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
465
+ <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
466
+ <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
467
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
468
+ <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
469
+ <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/>
470
+ <dynamicField name="random_*" type="random" />
471
+ </fields>
472
+ <uniqueKey>id</uniqueKey>
473
+ <defaultSearchField>text</defaultSearchField>
474
+ <solrQueryParser defaultOperator="AND"/>
475
+ </schema>