warclight 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +7 -0
  2. data/.eslintrc +12 -0
  3. data/.gitignore +19 -0
  4. data/.rspec +2 -0
  5. data/.rubocop.yml +66 -0
  6. data/.solr_wrapper +5 -0
  7. data/.travis.yml +31 -0
  8. data/CONTRIBUTING.md +53 -0
  9. data/Gemfile +42 -0
  10. data/LICENSE.txt +11 -0
  11. data/README.md +79 -0
  12. data/Rakefile +16 -0
  13. data/app/assets/images/blacklight/compact.svg +25 -0
  14. data/app/assets/images/blacklight/logo.png +0 -0
  15. data/app/assets/javascripts/warclight/oembed_viewer.js +39 -0
  16. data/app/assets/javascripts/warclight/warclight.js +4 -0
  17. data/app/assets/stylesheets/warclight/application.scss +1 -0
  18. data/app/assets/stylesheets/warclight/warclight.scss +1 -0
  19. data/app/controllers/concerns/warclight/field_config_helpers.rb +11 -0
  20. data/app/helpers/warclight/application_helper.rb +8 -0
  21. data/app/jobs/warclight/application_job.rb +6 -0
  22. data/app/models/concerns/warclight/catalog.rb +8 -0
  23. data/app/models/concerns/warclight/search_behavior.rb +9 -0
  24. data/app/models/concerns/warclight/solr_document.rb +9 -0
  25. data/app/views/layouts/warclight/application.html.erb +14 -0
  26. data/bin/rails +14 -0
  27. data/config/routes.rb +4 -0
  28. data/lib/generators/warclight/install_generator.rb +55 -0
  29. data/lib/generators/warclight/templates/catalog_controller.rb +129 -0
  30. data/lib/generators/warclight/templates/warclight.js +2 -0
  31. data/lib/generators/warclight/templates/warclight.scss +3 -0
  32. data/lib/generators/warclight/update_generator.rb +22 -0
  33. data/lib/warclight.rb +7 -0
  34. data/lib/warclight/engine.rb +105 -0
  35. data/lib/warclight/version.rb +5 -0
  36. data/package.json +24 -0
  37. data/solr/conf/elevate.xml +42 -0
  38. data/solr/conf/lang/contractions_ca.txt +8 -0
  39. data/solr/conf/lang/contractions_fr.txt +15 -0
  40. data/solr/conf/lang/contractions_ga.txt +5 -0
  41. data/solr/conf/lang/contractions_it.txt +23 -0
  42. data/solr/conf/lang/hyphenations_ga.txt +5 -0
  43. data/solr/conf/lang/stemdict_nl.txt +6 -0
  44. data/solr/conf/lang/stoptags_ja.txt +420 -0
  45. data/solr/conf/lang/stopwords_ar.txt +125 -0
  46. data/solr/conf/lang/stopwords_bg.txt +193 -0
  47. data/solr/conf/lang/stopwords_ca.txt +220 -0
  48. data/solr/conf/lang/stopwords_cz.txt +172 -0
  49. data/solr/conf/lang/stopwords_da.txt +110 -0
  50. data/solr/conf/lang/stopwords_de.txt +294 -0
  51. data/solr/conf/lang/stopwords_el.txt +78 -0
  52. data/solr/conf/lang/stopwords_en.txt +54 -0
  53. data/solr/conf/lang/stopwords_es.txt +356 -0
  54. data/solr/conf/lang/stopwords_eu.txt +99 -0
  55. data/solr/conf/lang/stopwords_fa.txt +313 -0
  56. data/solr/conf/lang/stopwords_fi.txt +97 -0
  57. data/solr/conf/lang/stopwords_fr.txt +186 -0
  58. data/solr/conf/lang/stopwords_ga.txt +110 -0
  59. data/solr/conf/lang/stopwords_gl.txt +161 -0
  60. data/solr/conf/lang/stopwords_hi.txt +235 -0
  61. data/solr/conf/lang/stopwords_hu.txt +211 -0
  62. data/solr/conf/lang/stopwords_hy.txt +46 -0
  63. data/solr/conf/lang/stopwords_id.txt +359 -0
  64. data/solr/conf/lang/stopwords_it.txt +303 -0
  65. data/solr/conf/lang/stopwords_ja.txt +127 -0
  66. data/solr/conf/lang/stopwords_lv.txt +172 -0
  67. data/solr/conf/lang/stopwords_nl.txt +119 -0
  68. data/solr/conf/lang/stopwords_no.txt +194 -0
  69. data/solr/conf/lang/stopwords_pt.txt +253 -0
  70. data/solr/conf/lang/stopwords_ro.txt +233 -0
  71. data/solr/conf/lang/stopwords_ru.txt +243 -0
  72. data/solr/conf/lang/stopwords_sv.txt +133 -0
  73. data/solr/conf/lang/stopwords_th.txt +119 -0
  74. data/solr/conf/lang/stopwords_tr.txt +212 -0
  75. data/solr/conf/lang/userdict_ja.txt +29 -0
  76. data/solr/conf/managed-schema +1045 -0
  77. data/solr/conf/params.json +20 -0
  78. data/solr/conf/protwords.txt +21 -0
  79. data/solr/conf/schema.xml +350 -0
  80. data/solr/conf/solrconfig.xml +1361 -0
  81. data/solr/conf/stopwords.txt +14 -0
  82. data/solr/conf/synonyms.txt +29 -0
  83. data/tasks/warclight.rake +61 -0
  84. data/template.rb +15 -0
  85. data/vendor/assets/javascripts/responsiveTruncator.js +69 -0
  86. data/vendor/assets/javascripts/stickyfill.js +480 -0
  87. data/warclight.gemspec +38 -0
  88. metadata +312 -0
@@ -0,0 +1,212 @@
1
+ # Turkish stopwords from LUCENE-559
2
+ # merged with the list from "Information Retrieval on Turkish Texts"
3
+ # (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
4
+ acaba
5
+ altmış
6
+ altı
7
+ ama
8
+ ancak
9
+ arada
10
+ aslında
11
+ ayrıca
12
+ bana
13
+ bazı
14
+ belki
15
+ ben
16
+ benden
17
+ beni
18
+ benim
19
+ beri
20
+ beş
21
+ bile
22
+ bin
23
+ bir
24
+ birçok
25
+ biri
26
+ birkaç
27
+ birkez
28
+ birşey
29
+ birşeyi
30
+ biz
31
+ bize
32
+ bizden
33
+ bizi
34
+ bizim
35
+ böyle
36
+ böylece
37
+ bu
38
+ buna
39
+ bunda
40
+ bundan
41
+ bunlar
42
+ bunları
43
+ bunların
44
+ bunu
45
+ bunun
46
+ burada
47
+ çok
48
+ çünkü
49
+ da
50
+ daha
51
+ dahi
52
+ de
53
+ defa
54
+ değil
55
+ diğer
56
+ diye
57
+ doksan
58
+ dokuz
59
+ dolayı
60
+ dolayısıyla
61
+ dört
62
+ edecek
63
+ eden
64
+ ederek
65
+ edilecek
66
+ ediliyor
67
+ edilmesi
68
+ ediyor
69
+ eğer
70
+ elli
71
+ en
72
+ etmesi
73
+ etti
74
+ ettiği
75
+ ettiğini
76
+ gibi
77
+ göre
78
+ halen
79
+ hangi
80
+ hatta
81
+ hem
82
+ henüz
83
+ hep
84
+ hepsi
85
+ her
86
+ herhangi
87
+ herkesin
88
+ hiç
89
+ hiçbir
90
+ için
91
+ iki
92
+ ile
93
+ ilgili
94
+ ise
95
+ işte
96
+ itibaren
97
+ itibariyle
98
+ kadar
99
+ karşın
100
+ katrilyon
101
+ kendi
102
+ kendilerine
103
+ kendini
104
+ kendisi
105
+ kendisine
106
+ kendisini
107
+ kez
108
+ ki
109
+ kim
110
+ kimden
111
+ kime
112
+ kimi
113
+ kimse
114
+ kırk
115
+ milyar
116
+ milyon
117
+ mu
118
+
119
+
120
+ nasıl
121
+ ne
122
+ neden
123
+ nedenle
124
+ nerde
125
+ nerede
126
+ nereye
127
+ niye
128
+ niçin
129
+ o
130
+ olan
131
+ olarak
132
+ oldu
133
+ olduğu
134
+ olduğunu
135
+ olduklarını
136
+ olmadı
137
+ olmadığı
138
+ olmak
139
+ olması
140
+ olmayan
141
+ olmaz
142
+ olsa
143
+ olsun
144
+ olup
145
+ olur
146
+ olursa
147
+ oluyor
148
+ on
149
+ ona
150
+ ondan
151
+ onlar
152
+ onlardan
153
+ onları
154
+ onların
155
+ onu
156
+ onun
157
+ otuz
158
+ oysa
159
+ öyle
160
+ pek
161
+ rağmen
162
+ sadece
163
+ sanki
164
+ sekiz
165
+ seksen
166
+ sen
167
+ senden
168
+ seni
169
+ senin
170
+ siz
171
+ sizden
172
+ sizi
173
+ sizin
174
+ şey
175
+ şeyden
176
+ şeyi
177
+ şeyler
178
+ şöyle
179
+ şu
180
+ şuna
181
+ şunda
182
+ şundan
183
+ şunları
184
+ şunu
185
+ tarafından
186
+ trilyon
187
+ tüm
188
+ üç
189
+ üzere
190
+ var
191
+ vardı
192
+ ve
193
+ veya
194
+ ya
195
+ yani
196
+ yapacak
197
+ yapılan
198
+ yapılması
199
+ yapıyor
200
+ yapmak
201
+ yaptı
202
+ yaptığı
203
+ yaptığını
204
+ yaptıkları
205
+ yedi
206
+ yerine
207
+ yetmiş
208
+ yine
209
+ yirmi
210
+ yoksa
211
+ yüz
212
+ zaten
@@ -0,0 +1,29 @@
1
+ #
2
+ # This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
3
+ #
4
+ # Add entries to this file in order to override the statistical model in terms
5
+ # of segmentation, readings and part-of-speech tags. Notice that entries do
6
+ # not have weights since they are always used when found. This is by-design
7
+ # in order to maximize ease-of-use.
8
+ #
9
+ # Entries are defined using the following CSV format:
10
+ # <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag>
11
+ #
12
+ # Notice that a single half-width space separates tokens and readings, and
13
+ # that the number tokens and readings must match exactly.
14
+ #
15
+ # Also notice that multiple entries with the same <text> is undefined.
16
+ #
17
+ # Whitespace only lines are ignored. Comments are not allowed on entry lines.
18
+ #
19
+
20
+ # Custom segmentation for kanji compounds
21
+ 日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
22
+ 関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
23
+
24
+ # Custom segmentation for compound katakana
25
+ トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
26
+ ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
27
+
28
+ # Custom reading for former sumo wrestler
29
+ 朝青龍,朝青龍,アサショウリュウ,カスタム人名
@@ -0,0 +1,1045 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is the Solr schema file. This file should be named "schema.xml" and
21
+ should be in the conf directory under the solr home
22
+ (i.e. ./solr/conf/schema.xml by default)
23
+ or located where the classloader for the Solr webapp can find it.
24
+
25
+ This example schema is the recommended starting point for users.
26
+ It should be kept correct and concise, usable out-of-the-box.
27
+
28
+ For more information, on how to customize this file, please see
29
+ http://wiki.apache.org/solr/SchemaXml
30
+
31
+ PERFORMANCE NOTE: this schema includes many optional features and should not
32
+ be used for benchmarking. To improve performance one could
33
+ - set stored="false" for all fields possible (esp large fields) when you
34
+ only need to search on the field but don't need to return the original
35
+ value.
36
+ - set indexed="false" if you don't need to search on the field, but only
37
+ return the field as a result of searching on other indexed fields.
38
+ - remove all unneeded copyField statements
39
+ - for best index size and searching performance, set "index" to false
40
+ for all general text fields, use copyField to copy them to the
41
+ catchall "text" field, and use that for searching.
42
+ - For maximum indexing performance, use the ConcurrentUpdateSolrServer
43
+ java client.
44
+ - Remember to run the JVM in server mode, and use a higher logging level
45
+ that avoids logging every request
46
+ -->
47
+
48
+ <schema name="example-basic" version="1.6">
49
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
50
+ version="x.y" is Solr's version number for the schema syntax and
51
+ semantics. It should not normally be changed by applications.
52
+
53
+ 1.0: multiValued attribute did not exist, all fields are multiValued
54
+ by nature
55
+ 1.1: multiValued attribute introduced, false by default
56
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default
57
+ except for text fields.
58
+ 1.3: removed optional field compress feature
59
+ 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
60
+ behavior when a single string produces multiple tokens. Defaults
61
+ to off for version >= 1.4
62
+ 1.5: omitNorms defaults to true for primitive field types
63
+ (int, float, boolean, string...)
64
+ 1.6: useDocValuesAsStored defaults to true.
65
+ -->
66
+
67
+ <!-- Valid attributes for fields:
68
+ name: mandatory - the name for the field
69
+ type: mandatory - the name of a field type from the
70
+ fieldTypes section
71
+ indexed: true if this field should be indexed (searchable or sortable)
72
+ stored: true if this field should be retrievable
73
+ docValues: true if this field should have doc values. Doc values are
74
+ useful (required, if you are using *Point fields) for faceting,
75
+ grouping, sorting and function queries. Doc values will make the index
76
+ faster to load, more NRT-friendly and more memory-efficient.
77
+ They however come with some limitations: they are currently only
78
+ supported by StrField, UUIDField, all Trie*Fields and *PointFields,
79
+ and depending on the field type, they might require the field to be
80
+ single-valued, be required or have a default value (check the
81
+ documentation of the field type you're interested in for more information)
82
+ multiValued: true if this field may contain multiple values per document
83
+ omitNorms: (expert) set to true to omit the norms associated with
84
+ this field (this disables length normalization and index-time
85
+ boosting for the field, and saves some memory). Only full-text
86
+ fields or fields that need an index-time boost need norms.
87
+ Norms are omitted for primitive (non-analyzed) types by default.
88
+ termVectors: [false] set to true to store the term vector for a
89
+ given field.
90
+ When using MoreLikeThis, fields used for similarity should be
91
+ stored for best performance.
92
+ termPositions: Store position information with the term vector.
93
+ This will increase storage costs.
94
+ termOffsets: Store offset information with the term vector. This
95
+ will increase storage costs.
96
+ required: The field is required. It will throw an error if the
97
+ value does not exist
98
+ default: a value that should be used if no value is specified
99
+ when adding a document.
100
+ -->
101
+
102
+ <!-- field names should consist of alphanumeric or underscore characters only and
103
+ not start with a digit. This is not currently strictly enforced,
104
+ but other field names will not have first class support from all components
105
+ and back compatibility is not guaranteed. Names with both leading and
106
+ trailing underscores (e.g. _version_) are reserved.
107
+ -->
108
+
109
+ <!-- In this data_driven_schema_configs configset, only three fields are pre-declared:
110
+ id, _version_, and _text_. All other fields will be type guessed and added via the
111
+ "add-unknown-fields-to-the-schema" update request processor chain declared
112
+ in solrconfig.xml.
113
+
114
+ Note that many dynamic fields are also defined - you can use them to specify a
115
+ field's type via field naming conventions - see below.
116
+
117
+ WARNING: The _text_ catch-all field will significantly increase your index size.
118
+ If you don't need it, consider removing it and the corresponding copyField directive.
119
+ -->
120
+ <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
121
+ <!-- doc values are enabled by default for primitive types such as long so we don't index the version field -->
122
+ <field name="_version_" type="long" indexed="false" stored="false"/>
123
+ <field name="_root_" type="string" indexed="true" stored="false" docValues="false" />
124
+ <field name="_text_" type="text_general" indexed="true" stored="false" multiValued="true"/>
125
+
126
+ <!-- Only enabled in the "schemaless" data-driven example (assuming the client
127
+ does not know what fields may be searched) because it's very expensive to index everything twice. -->
128
+ <!-- <copyField source="*" dest="_text_"/> -->
129
+
130
+ <!-- Dynamic field definitions allow using convention over configuration
131
+ for fields via the specification of patterns to match field names.
132
+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
133
+ RESTRICTION: the glob-like pattern in the name attribute must have
134
+ a "*" only at the start or the end. -->
135
+
136
+ <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
137
+ <dynamicField name="*_is" type="ints" indexed="true" stored="true"/>
138
+ <dynamicField name="*_s" type="string" indexed="true" stored="true" />
139
+ <dynamicField name="*_ss" type="strings" indexed="true" stored="true"/>
140
+ <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
141
+ <dynamicField name="*_ls" type="longs" indexed="true" stored="true"/>
142
+ <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
143
+ <dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/>
144
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
145
+ <dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/>
146
+ <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
147
+ <dynamicField name="*_fs" type="floats" indexed="true" stored="true"/>
148
+ <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
149
+ <dynamicField name="*_ds" type="doubles" indexed="true" stored="true"/>
150
+
151
+ <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
152
+ <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
153
+ <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
154
+ <dynamicField name="*_srpt" type="location_rpt" indexed="true" stored="true"/>
155
+
156
+ <!-- KD-tree (point) numerics -->
157
+ <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
158
+ <dynamicField name="*_pis" type="pints" indexed="true" stored="true"/>
159
+ <dynamicField name="*_pl" type="plong" indexed="true" stored="true"/>
160
+ <dynamicField name="*_pls" type="plongs" indexed="true" stored="true"/>
161
+ <dynamicField name="*_pf" type="pfloat" indexed="true" stored="true"/>
162
+ <dynamicField name="*_pfs" type="pfloats" indexed="true" stored="true"/>
163
+ <dynamicField name="*_pd" type="pdouble" indexed="true" stored="true"/>
164
+ <dynamicField name="*_pds" type="pdoubles" indexed="true" stored="true"/>
165
+ <dynamicField name="*_pdt" type="pdate" indexed="true" stored="true"/>
166
+ <dynamicField name="*_pdts" type="pdates" indexed="true" stored="true"/>
167
+
168
+ <!-- some trie-coded dynamic fields -->
169
+ <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
170
+ <dynamicField name="*_tis" type="tints" indexed="true" stored="true"/>
171
+ <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
172
+ <dynamicField name="*_tls" type="tlongs" indexed="true" stored="true"/>
173
+ <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
174
+ <dynamicField name="*_tfs" type="tfloats" indexed="true" stored="true"/>
175
+ <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
176
+ <dynamicField name="*_tds" type="tdoubles" indexed="true" stored="true"/>
177
+ <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
178
+ <dynamicField name="*_tdts" type="tdates" indexed="true" stored="true"/>
179
+
180
+ <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
181
+
182
+ <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
183
+ <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
184
+
185
+ <dynamicField name="random_*" type="random" />
186
+
187
+ <!-- uncomment the following to ignore any fields that don't already match an existing
188
+ field name or dynamic field, rather than reporting them as an error.
189
+ alternately, change the type="ignored" to some other type e.g. "text" if you want
190
+ unknown fields indexed and/or stored by default
191
+
192
+ NB: use of "*" dynamic fields will disable field type guessing and adding
193
+ unknown fields to the schema. -->
194
+ <!--dynamicField name="*" type="ignored" multiValued="true" /-->
195
+
196
+ <!-- Field to use to determine and enforce document uniqueness.
197
+ Unless this field is marked with required="false", it will be a required field
198
+ -->
199
+ <uniqueKey>id</uniqueKey>
200
+
201
+ <!-- copyField commands copy one field to another at the time a document
202
+ is added to the index. It's used either to index the same field differently,
203
+ or to add multiple fields to the same field for easier/faster searching.
204
+
205
+ <copyField source="sourceFieldName" dest="destinationFieldName"/>
206
+ -->
207
+
208
+ <!-- field type definitions. The "name" attribute is
209
+ just a label to be used by field definitions. The "class"
210
+ attribute and any other attributes determine the real
211
+ behavior of the fieldType.
212
+ Class names starting with "solr" refer to java classes in a
213
+ standard package such as org.apache.solr.analysis
214
+ -->
215
+
216
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim.
217
+ It supports doc values but in that case the field needs to be
218
+ single-valued and either required or have a default value.
219
+ -->
220
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />
221
+ <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />
222
+
223
+ <!-- boolean type: "true" or "false" -->
224
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
225
+
226
+ <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
227
+
228
+ <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
229
+ currently supported on types that are sorted internally as strings
230
+ and on numeric types.
231
+ This includes "string","boolean", "int", "float", "long", "date", "double",
232
+ including the "Trie" and "Point" variants.
233
+ - If sortMissingLast="true", then a sort on this field will cause documents
234
+ without the field to come after documents with the field,
235
+ regardless of the requested sort order (asc or desc).
236
+ - If sortMissingFirst="true", then a sort on this field will cause documents
237
+ without the field to come before documents with the field,
238
+ regardless of the requested sort order.
239
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
240
+ then default lucene sorting will be used which places docs without the
241
+ field first in an ascending sort and last in a descending sort.
242
+ -->
243
+
244
+ <!--
245
+ Numeric field types that index values using KD-trees. *Point fields are faster and more efficient than Trie* fields both, at
246
+ search time and at index time, but some features are still not supported.
247
+ Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
248
+ -->
249
+ <fieldType name="pint" class="solr.IntPointField" docValues="true"/>
250
+ <fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
251
+ <fieldType name="plong" class="solr.LongPointField" docValues="true"/>
252
+ <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
253
+
254
+ <fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
255
+ <fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
256
+ <fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
257
+ <fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
258
+
259
+ <!--
260
+ Default numeric field types. For faster range queries, consider *PointFields (pint/pfloat/plong/pdouble), or the
261
+ tint/tfloat/tlong/tdouble types.
262
+ -->
263
+ <fieldType name="int" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
264
+ <fieldType name="float" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
265
+ <fieldType name="long" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
266
+ <fieldType name="double" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
267
+
268
+ <fieldType name="ints" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
269
+ <fieldType name="floats" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
270
+ <fieldType name="longs" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
271
+ <fieldType name="doubles" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
272
+
273
+ <!--
274
+ Numeric field types that index each value at various levels of precision
275
+ to accelerate range queries when the number of values between the range
276
+ endpoints is large. See the javadoc for NumericRangeQuery for internal
277
+ implementation details.
278
+
279
+ Smaller precisionStep values (specified in bits) will lead to more tokens
280
+ indexed per value, slightly larger index size, and faster range queries.
281
+ A precisionStep of 0 disables indexing at different precision levels.
282
+
283
+ Consider using pint/pfloat/plong/pdouble instead of Trie* fields if possible
284
+ -->
285
+ <fieldType name="tint" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
286
+ <fieldType name="tfloat" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
287
+ <fieldType name="tlong" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
288
+ <fieldType name="tdouble" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
289
+
290
+ <fieldType name="tints" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
291
+ <fieldType name="tfloats" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
292
+ <fieldType name="tlongs" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
293
+ <fieldType name="tdoubles" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
294
+
295
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
296
+ is a more restricted form of the canonical representation of dateTime
297
+ http://www.w3.org/TR/xmlschema-2/#dateTime
298
+ The trailing "Z" designates UTC time and is mandatory.
299
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
300
+ All other components are mandatory.
301
+
302
+ Expressions can also be used to denote calculations that should be
303
+ performed relative to "NOW" to determine the value, ie...
304
+
305
+ NOW/HOUR
306
+ ... Round to the start of the current hour
307
+ NOW-1DAY
308
+ ... Exactly 1 day prior to now
309
+ NOW/DAY+6MONTHS+3DAYS
310
+ ... 6 months and 3 days in the future from the start of
311
+ the current day
312
+
313
+ Consult the TrieDateField javadocs for more information.
314
+ -->
315
+ <!-- KD-tree versions of date fields -->
316
+ <fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
317
+ <fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
318
+
319
+ <fieldType name="date" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
320
+ <fieldType name="dates" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
321
+
322
+ <fieldType name="tdate" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0"/>
323
+ <fieldType name="tdates" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0" multiValued="true"/>
324
+
325
+
326
+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
327
+ <fieldType name="binary" class="solr.BinaryField"/>
328
+
329
+ <!-- The "RandomSortField" is not used to store or search any
330
+ data. You can declare fields of this type it in your schema
331
+ to generate pseudo-random orderings of your docs for sorting
332
+ or function purposes. The ordering is generated based on the field
333
+ name and the version of the index. As long as the index version
334
+ remains unchanged, and the same field name is reused,
335
+ the ordering of the docs will be consistent.
336
+ If you want different psuedo-random orderings of documents,
337
+ for the same version of the index, use a dynamicField and
338
+ change the field name in the request.
339
+ -->
340
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
341
+
342
+ <!-- solr.TextField allows the specification of custom text analyzers
343
+ specified as a tokenizer and a list of token filters. Different
344
+ analyzers may be specified for indexing and querying.
345
+
346
+ The optional positionIncrementGap puts space between multiple fields of
347
+ this type on the same document, with the purpose of preventing false phrase
348
+ matching across fields.
349
+
350
+ For more info on customizing your analyzer chain, please see
351
+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
352
+ -->
353
+
354
+ <!-- One can also specify an existing Analyzer class that has a
355
+ default constructor via the class attribute on the analyzer element.
356
+ Example:
357
+ <fieldType name="text_greek" class="solr.TextField">
358
+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
359
+ </fieldType>
360
+ -->
361
+
362
+ <!-- A text field that only splits on whitespace for exact matching of words -->
363
+ <dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/>
364
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
365
+ <analyzer>
366
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
367
+ </analyzer>
368
+ </fieldType>
369
+
370
+ <!-- A general text field that has reasonable, generic
371
+ cross-language defaults: it tokenizes with StandardTokenizer,
372
+ removes stop words from case-insensitive "stopwords.txt"
373
+ (empty by default), and down cases. At query time only, it
374
+ also applies synonyms.
375
+ -->
376
+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
377
+ <analyzer type="index">
378
+ <tokenizer class="solr.StandardTokenizerFactory"/>
379
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
380
+ <!-- in this example, we will only use synonyms at query time
381
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
382
+ <filter class="solr.FlattenGraphFilterFactory"/>
383
+ -->
384
+ <filter class="solr.LowerCaseFilterFactory"/>
385
+ </analyzer>
386
+ <analyzer type="query">
387
+ <tokenizer class="solr.StandardTokenizerFactory"/>
388
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
389
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
390
+ <filter class="solr.LowerCaseFilterFactory"/>
391
+ </analyzer>
392
+ </fieldType>
393
+
394
+ <!-- A text field with defaults appropriate for English: it
395
+ tokenizes with StandardTokenizer, removes English stop words
396
+ (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
397
+ finally applies Porter's stemming. The query time analyzer
398
+ also applies synonyms from synonyms.txt. -->
399
+ <dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/>
400
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
401
+ <analyzer type="index">
402
+ <tokenizer class="solr.StandardTokenizerFactory"/>
403
+ <!-- in this example, we will only use synonyms at query time
404
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
405
+ <filter class="solr.FlattenGraphFilterFactory"/>
406
+ -->
407
+ <!-- Case insensitive stop word removal.
408
+ -->
409
+ <filter class="solr.StopFilterFactory"
410
+ ignoreCase="true"
411
+ words="lang/stopwords_en.txt"
412
+ />
413
+ <filter class="solr.LowerCaseFilterFactory"/>
414
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
415
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
416
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
417
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
418
+ -->
419
+ <filter class="solr.PorterStemFilterFactory"/>
420
+ </analyzer>
421
+ <analyzer type="query">
422
+ <tokenizer class="solr.StandardTokenizerFactory"/>
423
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
424
+ <filter class="solr.StopFilterFactory"
425
+ ignoreCase="true"
426
+ words="lang/stopwords_en.txt"
427
+ />
428
+ <filter class="solr.LowerCaseFilterFactory"/>
429
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
430
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
431
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
432
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
433
+ -->
434
+ <filter class="solr.PorterStemFilterFactory"/>
435
+ </analyzer>
436
+ </fieldType>
437
+
438
+ <!-- A text field with defaults appropriate for English, plus
439
+ aggressive word-splitting and autophrase features enabled.
440
+ This field is just like text_en, except it adds
441
+ WordDelimiterGraphFilter to enable splitting and matching of
442
+ words on case-change, alpha numeric boundaries, and
443
+ non-alphanumeric chars. This means certain compound word
444
+ cases will work, for example query "wi fi" will match
445
+ document "WiFi" or "wi-fi".
446
+ -->
447
+ <dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/>
448
+ <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
449
+ <analyzer type="index">
450
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
451
+ <!-- in this example, we will only use synonyms at query time
452
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
453
+ -->
454
+ <!-- Case insensitive stop word removal.
455
+ -->
456
+ <filter class="solr.StopFilterFactory"
457
+ ignoreCase="true"
458
+ words="lang/stopwords_en.txt"
459
+ />
460
+ <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
461
+ <filter class="solr.LowerCaseFilterFactory"/>
462
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
463
+ <filter class="solr.PorterStemFilterFactory"/>
464
+ <filter class="solr.FlattenGraphFilterFactory" />
465
+ </analyzer>
466
+ <analyzer type="query">
467
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
468
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
469
+ <filter class="solr.StopFilterFactory"
470
+ ignoreCase="true"
471
+ words="lang/stopwords_en.txt"
472
+ />
473
+ <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
474
+ <filter class="solr.LowerCaseFilterFactory"/>
475
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
476
+ <filter class="solr.PorterStemFilterFactory"/>
477
+ </analyzer>
478
+ </fieldType>
479
+
480
+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
481
+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
482
+ <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/>
483
+ <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
484
+ <analyzer type="index">
485
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
486
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
487
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
488
+ <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
489
+ <filter class="solr.LowerCaseFilterFactory"/>
490
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
491
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
492
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
493
+ possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
494
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
495
+ <filter class="solr.FlattenGraphFilterFactory" />
496
+ </analyzer>
497
+ <analyzer type="query">
498
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
499
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
500
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
501
+ <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
502
+ <filter class="solr.LowerCaseFilterFactory"/>
503
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
504
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
505
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
506
+ possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
507
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
508
+ </analyzer>
509
+ </fieldType>
510
+
511
+ <!-- Just like text_general except it reverses the characters of
512
+ each token, to enable more efficient leading wildcard queries.
513
+ -->
514
+ <dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/>
515
+ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
516
+ <analyzer type="index">
517
+ <tokenizer class="solr.StandardTokenizerFactory"/>
518
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
519
+ <filter class="solr.LowerCaseFilterFactory"/>
520
+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
521
+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
522
+ </analyzer>
523
+ <analyzer type="query">
524
+ <tokenizer class="solr.StandardTokenizerFactory"/>
525
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
526
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
527
+ <filter class="solr.LowerCaseFilterFactory"/>
528
+ </analyzer>
529
+ </fieldType>
530
+
531
+ <dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/>
532
+ <fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
533
+ <analyzer>
534
+ <tokenizer class="solr.StandardTokenizerFactory"/>
535
+ <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
536
+ </analyzer>
537
+ </fieldType>
538
+
539
+ <!-- lowercases the entire field value, keeping it as a single token. -->
540
+ <dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/>
541
+ <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
542
+ <analyzer>
543
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
544
+ <filter class="solr.LowerCaseFilterFactory" />
545
+ </analyzer>
546
+ </fieldType>
547
+
548
+ <!--
549
+ Example of using PathHierarchyTokenizerFactory at index time, so
550
+ queries for paths match documents at that path, or in descendent paths
551
+ -->
552
+ <dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/>
553
+ <fieldType name="descendent_path" class="solr.TextField">
554
+ <analyzer type="index">
555
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
556
+ </analyzer>
557
+ <analyzer type="query">
558
+ <tokenizer class="solr.KeywordTokenizerFactory" />
559
+ </analyzer>
560
+ </fieldType>
561
+
562
+ <!--
563
+ Example of using PathHierarchyTokenizerFactory at query time, so
564
+ queries for paths match documents at that path, or in ancestor paths
565
+ -->
566
+ <dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/>
567
+ <fieldType name="ancestor_path" class="solr.TextField">
568
+ <analyzer type="index">
569
+ <tokenizer class="solr.KeywordTokenizerFactory" />
570
+ </analyzer>
571
+ <analyzer type="query">
572
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
573
+ </analyzer>
574
+ </fieldType>
575
+
576
+ <!-- since fields of this type are by default not stored or indexed,
577
+ any data added to them will be ignored outright. -->
578
+ <fieldType name="ignored" stored="false" indexed="false" docValues="false" multiValued="true" class="solr.StrField" />
579
+
580
+ <!-- This point type indexes the coordinates as separate fields (subFields)
581
+ If subFieldType is defined, it references a type, and a dynamic field
582
+ definition is created matching *___<typename>. Alternately, if
583
+ subFieldSuffix is defined, that is used to create the subFields.
584
+ Example: if subFieldType="double", then the coordinates would be
585
+ indexed in fields myloc_0___double,myloc_1___double.
586
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
587
+ in fields myloc_0_d,myloc_1_d
588
+ The subFields are an implementation detail of the fieldType, and end
589
+ users normally should not need to know about them.
590
+ -->
591
+ <dynamicField name="*_point" type="point" indexed="true" stored="true"/>
592
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
593
+
594
+ <!-- A specialized field for geospatial search filters and distance sorting. -->
595
+ <fieldType name="location" class="solr.LatLonPointSpatialField" docValues="true"/>
596
+
597
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
598
+ For more information about this and other Spatial fields new to Solr 4, see:
599
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
600
+ -->
601
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
602
+ geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />
603
+
604
+ <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
605
+ Parameters:
606
+ defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
607
+ precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
608
+ providerClass: Lets you plug in other exchange provider backend:
609
+ solr.FileExchangeRateProvider is the default and takes one parameter:
610
+ currencyConfig: name of an xml file holding exchange rates
611
+ solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
612
+ ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
613
+ refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
614
+ -->
615
+ <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
616
+
617
+
618
+
619
+ <!-- some examples for different languages (generally ordered by ISO code) -->
620
+
621
+ <!-- Arabic -->
622
+ <dynamicField name="*_txt_ar" type="text_ar" indexed="true" stored="true"/>
623
+ <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
624
+ <analyzer>
625
+ <tokenizer class="solr.StandardTokenizerFactory"/>
626
+ <!-- for any non-arabic -->
627
+ <filter class="solr.LowerCaseFilterFactory"/>
628
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
629
+ <!-- normalizes ﻯ to ﻱ, etc -->
630
+ <filter class="solr.ArabicNormalizationFilterFactory"/>
631
+ <filter class="solr.ArabicStemFilterFactory"/>
632
+ </analyzer>
633
+ </fieldType>
634
+
635
+ <!-- Bulgarian -->
636
+ <dynamicField name="*_txt_bg" type="text_bg" indexed="true" stored="true"/>
637
+ <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
638
+ <analyzer>
639
+ <tokenizer class="solr.StandardTokenizerFactory"/>
640
+ <filter class="solr.LowerCaseFilterFactory"/>
641
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" />
642
+ <filter class="solr.BulgarianStemFilterFactory"/>
643
+ </analyzer>
644
+ </fieldType>
645
+
646
+ <!-- Catalan -->
647
+ <dynamicField name="*_txt_ca" type="text_ca" indexed="true" stored="true"/>
648
+ <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
649
+ <analyzer>
650
+ <tokenizer class="solr.StandardTokenizerFactory"/>
651
+ <!-- removes l', etc -->
652
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
653
+ <filter class="solr.LowerCaseFilterFactory"/>
654
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
655
+ <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
656
+ </analyzer>
657
+ </fieldType>
658
+
659
+ <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
660
+ <dynamicField name="*_txt_cjk" type="text_cjk" indexed="true" stored="true"/>
661
+ <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
662
+ <analyzer>
663
+ <tokenizer class="solr.StandardTokenizerFactory"/>
664
+ <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
665
+ <filter class="solr.CJKWidthFilterFactory"/>
666
+ <!-- for any non-CJK -->
667
+ <filter class="solr.LowerCaseFilterFactory"/>
668
+ <filter class="solr.CJKBigramFilterFactory"/>
669
+ </analyzer>
670
+ </fieldType>
671
+
672
+ <!-- Czech -->
673
+ <dynamicField name="*_txt_cz" type="text_cz" indexed="true" stored="true"/>
674
+ <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
675
+ <analyzer>
676
+ <tokenizer class="solr.StandardTokenizerFactory"/>
677
+ <filter class="solr.LowerCaseFilterFactory"/>
678
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
679
+ <filter class="solr.CzechStemFilterFactory"/>
680
+ </analyzer>
681
+ </fieldType>
682
+
683
+ <!-- Danish -->
684
+ <dynamicField name="*_txt_da" type="text_da" indexed="true" stored="true"/>
685
+ <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
686
+ <analyzer>
687
+ <tokenizer class="solr.StandardTokenizerFactory"/>
688
+ <filter class="solr.LowerCaseFilterFactory"/>
689
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
690
+ <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
691
+ </analyzer>
692
+ </fieldType>
693
+
694
+ <!-- German -->
695
+ <dynamicField name="*_txt_de" type="text_de" indexed="true" stored="true"/>
696
+ <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
697
+ <analyzer>
698
+ <tokenizer class="solr.StandardTokenizerFactory"/>
699
+ <filter class="solr.LowerCaseFilterFactory"/>
700
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
701
+ <filter class="solr.GermanNormalizationFilterFactory"/>
702
+ <filter class="solr.GermanLightStemFilterFactory"/>
703
+ <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
704
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
705
+ </analyzer>
706
+ </fieldType>
707
+
708
+ <!-- Greek -->
709
+ <dynamicField name="*_txt_el" type="text_el" indexed="true" stored="true"/>
710
+ <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
711
+ <analyzer>
712
+ <tokenizer class="solr.StandardTokenizerFactory"/>
713
+ <!-- greek specific lowercase for sigma -->
714
+ <filter class="solr.GreekLowerCaseFilterFactory"/>
715
+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
716
+ <filter class="solr.GreekStemFilterFactory"/>
717
+ </analyzer>
718
+ </fieldType>
719
+
720
+ <!-- Spanish -->
721
+ <dynamicField name="*_txt_es" type="text_es" indexed="true" stored="true"/>
722
+ <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
723
+ <analyzer>
724
+ <tokenizer class="solr.StandardTokenizerFactory"/>
725
+ <filter class="solr.LowerCaseFilterFactory"/>
726
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
727
+ <filter class="solr.SpanishLightStemFilterFactory"/>
728
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
729
+ </analyzer>
730
+ </fieldType>
731
+
732
+ <!-- Basque -->
733
+ <dynamicField name="*_txt_eu" type="text_eu" indexed="true" stored="true"/>
734
+ <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
735
+ <analyzer>
736
+ <tokenizer class="solr.StandardTokenizerFactory"/>
737
+ <filter class="solr.LowerCaseFilterFactory"/>
738
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
739
+ <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
740
+ </analyzer>
741
+ </fieldType>
742
+
743
+ <!-- Persian -->
744
+ <dynamicField name="*_txt_fa" type="text_fa" indexed="true" stored="true"/>
745
+ <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
746
+ <analyzer>
747
+ <!-- for ZWNJ -->
748
+ <charFilter class="solr.PersianCharFilterFactory"/>
749
+ <tokenizer class="solr.StandardTokenizerFactory"/>
750
+ <filter class="solr.LowerCaseFilterFactory"/>
751
+ <filter class="solr.ArabicNormalizationFilterFactory"/>
752
+ <filter class="solr.PersianNormalizationFilterFactory"/>
753
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
754
+ </analyzer>
755
+ </fieldType>
756
+
757
+ <!-- Finnish -->
758
+ <dynamicField name="*_txt_fi" type="text_fi" indexed="true" stored="true"/>
759
+ <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
760
+ <analyzer>
761
+ <tokenizer class="solr.StandardTokenizerFactory"/>
762
+ <filter class="solr.LowerCaseFilterFactory"/>
763
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
764
+ <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
765
+ <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
766
+ </analyzer>
767
+ </fieldType>
768
+
769
+ <!-- French -->
770
+ <dynamicField name="*_txt_fr" type="text_fr" indexed="true" stored="true"/>
771
+ <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
772
+ <analyzer>
773
+ <tokenizer class="solr.StandardTokenizerFactory"/>
774
+ <!-- removes l', etc -->
775
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
776
+ <filter class="solr.LowerCaseFilterFactory"/>
777
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
778
+ <filter class="solr.FrenchLightStemFilterFactory"/>
779
+ <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
780
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
781
+ </analyzer>
782
+ </fieldType>
783
+
784
+ <!-- Irish -->
785
+ <dynamicField name="*_txt_ga" type="text_ga" indexed="true" stored="true"/>
786
+ <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
787
+ <analyzer>
788
+ <tokenizer class="solr.StandardTokenizerFactory"/>
789
+ <!-- removes d', etc -->
790
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
791
+ <!-- removes n-, etc. position increments is intentionally false! -->
792
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
793
+ <filter class="solr.IrishLowerCaseFilterFactory"/>
794
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/>
795
+ <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
796
+ </analyzer>
797
+ </fieldType>
798
+
799
+ <!-- Galician -->
800
+ <dynamicField name="*_txt_gl" type="text_gl" indexed="true" stored="true"/>
801
+ <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
802
+ <analyzer>
803
+ <tokenizer class="solr.StandardTokenizerFactory"/>
804
+ <filter class="solr.LowerCaseFilterFactory"/>
805
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
806
+ <filter class="solr.GalicianStemFilterFactory"/>
807
+ <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
808
+ </analyzer>
809
+ </fieldType>
810
+
811
+ <!-- Hindi -->
812
+ <dynamicField name="*_txt_hi" type="text_hi" indexed="true" stored="true"/>
813
+ <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
814
+ <analyzer>
815
+ <tokenizer class="solr.StandardTokenizerFactory"/>
816
+ <filter class="solr.LowerCaseFilterFactory"/>
817
+ <!-- normalizes unicode representation -->
818
+ <filter class="solr.IndicNormalizationFilterFactory"/>
819
+ <!-- normalizes variation in spelling -->
820
+ <filter class="solr.HindiNormalizationFilterFactory"/>
821
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
822
+ <filter class="solr.HindiStemFilterFactory"/>
823
+ </analyzer>
824
+ </fieldType>
825
+
826
+ <!-- Hungarian -->
827
+ <dynamicField name="*_txt_hu" type="text_hu" indexed="true" stored="true"/>
828
+ <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
829
+ <analyzer>
830
+ <tokenizer class="solr.StandardTokenizerFactory"/>
831
+ <filter class="solr.LowerCaseFilterFactory"/>
832
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
833
+ <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
834
+ <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
835
+ </analyzer>
836
+ </fieldType>
837
+
838
+ <!-- Armenian -->
839
+ <dynamicField name="*_txt_hy" type="text_hy" indexed="true" stored="true"/>
840
+ <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
841
+ <analyzer>
842
+ <tokenizer class="solr.StandardTokenizerFactory"/>
843
+ <filter class="solr.LowerCaseFilterFactory"/>
844
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
845
+ <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
846
+ </analyzer>
847
+ </fieldType>
848
+
849
+ <!-- Indonesian -->
850
+ <dynamicField name="*_txt_id" type="text_id" indexed="true" stored="true"/>
851
+ <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
852
+ <analyzer>
853
+ <tokenizer class="solr.StandardTokenizerFactory"/>
854
+ <filter class="solr.LowerCaseFilterFactory"/>
855
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
856
+ <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
857
+ <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
858
+ </analyzer>
859
+ </fieldType>
860
+
861
+ <!-- Italian -->
862
+ <dynamicField name="*_txt_it" type="text_it" indexed="true" stored="true"/>
863
+ <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
864
+ <analyzer>
865
+ <tokenizer class="solr.StandardTokenizerFactory"/>
866
+ <!-- removes l', etc -->
867
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
868
+ <filter class="solr.LowerCaseFilterFactory"/>
869
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
870
+ <filter class="solr.ItalianLightStemFilterFactory"/>
871
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
872
+ </analyzer>
873
+ </fieldType>
874
+
875
+ <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
876
+
877
+ NOTE: If you want to optimize search for precision, use default operator AND in your request
878
+ handler config (q.op) Use OR if you would like to optimize for recall (default).
879
+ -->
880
+ <dynamicField name="*_txt_ja" type="text_ja" indexed="true" stored="true"/>
881
+ <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
882
+ <analyzer>
883
+ <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
884
+
885
+ Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
886
+ is used to segment compounds into its parts and the compound itself is kept as synonym.
887
+
888
+ Valid values for attribute mode are:
889
+ normal: regular segmentation
890
+ search: segmentation useful for search with synonyms compounds (default)
891
+ extended: same as search mode, but unigrams unknown words (experimental)
892
+
893
+ For some applications it might be good to use search mode for indexing and normal mode for
894
+ queries to reduce recall and prevent parts of compounds from being matched and highlighted.
895
+ Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
896
+
897
+ Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
898
+ model with your own entries for segmentation, part-of-speech tags and readings without a need
899
+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
900
+
901
+ User dictionary attributes are:
902
+ userDictionary: user dictionary filename
903
+ userDictionaryEncoding: user dictionary encoding (default is UTF-8)
904
+
905
+ See lang/userdict_ja.txt for a sample user dictionary file.
906
+
907
+ Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
908
+
909
+ See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
910
+ -->
911
+ <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
912
+ <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
913
+ <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
914
+ <filter class="solr.JapaneseBaseFormFilterFactory"/>
915
+ <!-- Removes tokens with certain part-of-speech tags -->
916
+ <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
917
+ <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
918
+ <filter class="solr.CJKWidthFilterFactory"/>
919
+ <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
920
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
921
+ <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
922
+ <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
923
+ <!-- Lower-cases romaji characters -->
924
+ <filter class="solr.LowerCaseFilterFactory"/>
925
+ </analyzer>
926
+ </fieldType>
927
+
928
+ <!-- Latvian -->
929
+ <dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/>
930
+ <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
931
+ <analyzer>
932
+ <tokenizer class="solr.StandardTokenizerFactory"/>
933
+ <filter class="solr.LowerCaseFilterFactory"/>
934
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
935
+ <filter class="solr.LatvianStemFilterFactory"/>
936
+ </analyzer>
937
+ </fieldType>
938
+
939
+ <!-- Dutch -->
940
+ <dynamicField name="*_txt_nl" type="text_nl" indexed="true" stored="true"/>
941
+ <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
942
+ <analyzer>
943
+ <tokenizer class="solr.StandardTokenizerFactory"/>
944
+ <filter class="solr.LowerCaseFilterFactory"/>
945
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
946
+ <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
947
+ <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
948
+ </analyzer>
949
+ </fieldType>
950
+
951
+ <!-- Norwegian -->
952
+ <dynamicField name="*_txt_no" type="text_no" indexed="true" stored="true"/>
953
+ <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
954
+ <analyzer>
955
+ <tokenizer class="solr.StandardTokenizerFactory"/>
956
+ <filter class="solr.LowerCaseFilterFactory"/>
957
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
958
+ <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
959
+ <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
960
+ <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
961
+ </analyzer>
962
+ </fieldType>
963
+
964
+ <!-- Portuguese -->
965
+ <dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/>
966
+ <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
967
+ <analyzer>
968
+ <tokenizer class="solr.StandardTokenizerFactory"/>
969
+ <filter class="solr.LowerCaseFilterFactory"/>
970
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
971
+ <filter class="solr.PortugueseLightStemFilterFactory"/>
972
+ <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
973
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
974
+ <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
975
+ </analyzer>
976
+ </fieldType>
977
+
978
+ <!-- Romanian -->
979
+ <dynamicField name="*_txt_ro" type="text_ro" indexed="true" stored="true"/>
980
+ <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
981
+ <analyzer>
982
+ <tokenizer class="solr.StandardTokenizerFactory"/>
983
+ <filter class="solr.LowerCaseFilterFactory"/>
984
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
985
+ <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
986
+ </analyzer>
987
+ </fieldType>
988
+
989
+ <!-- Russian -->
990
+ <dynamicField name="*_txt_ru" type="text_ru" indexed="true" stored="true"/>
991
+ <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
992
+ <analyzer>
993
+ <tokenizer class="solr.StandardTokenizerFactory"/>
994
+ <filter class="solr.LowerCaseFilterFactory"/>
995
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
996
+ <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
997
+ <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
998
+ </analyzer>
999
+ </fieldType>
1000
+
1001
+ <!-- Swedish -->
1002
+ <dynamicField name="*_txt_sv" type="text_sv" indexed="true" stored="true"/>
1003
+ <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
1004
+ <analyzer>
1005
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1006
+ <filter class="solr.LowerCaseFilterFactory"/>
1007
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
1008
+ <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
1009
+ <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
1010
+ </analyzer>
1011
+ </fieldType>
1012
+
1013
+ <!-- Thai -->
1014
+ <dynamicField name="*_txt_th" type="text_th" indexed="true" stored="true"/>
1015
+ <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
1016
+ <analyzer>
1017
+ <tokenizer class="solr.ThaiTokenizerFactory"/>
1018
+ <filter class="solr.LowerCaseFilterFactory"/>
1019
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" />
1020
+ </analyzer>
1021
+ </fieldType>
1022
+
1023
+ <!-- Turkish -->
1024
+ <dynamicField name="*_txt_tr" type="text_tr" indexed="true" stored="true"/>
1025
+ <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
1026
+ <analyzer>
1027
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1028
+ <filter class="solr.TurkishLowerCaseFilterFactory"/>
1029
+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
1030
+ <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
1031
+ </analyzer>
1032
+ </fieldType>
1033
+
1034
+ <!-- Similarity is the scoring routine for each document vs. a query.
1035
+ A custom Similarity or SimilarityFactory may be specified here, but
1036
+ the default is fine for most applications.
1037
+ For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
1038
+ -->
1039
+ <!--
1040
+ <similarity class="com.example.solr.CustomSimilarityFactory">
1041
+ <str name="paramkey">param value</str>
1042
+ </similarity>
1043
+ -->
1044
+
1045
+ </schema>