blacklight-access_controls 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/Gemfile +32 -0
  4. data/README.textile +74 -0
  5. data/Rakefile +47 -0
  6. data/VERSION +1 -0
  7. data/blacklight-access_controls.gemspec +29 -0
  8. data/lib/blacklight-access_controls.rb +23 -0
  9. data/lib/blacklight/access_controls.rb +14 -0
  10. data/lib/blacklight/access_controls/ability.rb +148 -0
  11. data/lib/blacklight/access_controls/catalog.rb +27 -0
  12. data/lib/blacklight/access_controls/config.rb +39 -0
  13. data/lib/blacklight/access_controls/enforcement.rb +103 -0
  14. data/lib/blacklight/access_controls/permissions_cache.rb +19 -0
  15. data/lib/blacklight/access_controls/permissions_query.rb +53 -0
  16. data/lib/blacklight/access_controls/permissions_solr_document.rb +2 -0
  17. data/lib/blacklight/access_controls/user.rb +23 -0
  18. data/lib/generators/blacklight/ability.rb +4 -0
  19. data/lib/generators/blacklight/access_controls_generator.rb +49 -0
  20. data/solr_conf/conf/abc123 +0 -0
  21. data/solr_conf/conf/admin-extra.html +24 -0
  22. data/solr_conf/conf/admin-extra.menu-bottom.html +25 -0
  23. data/solr_conf/conf/admin-extra.menu-top.html +25 -0
  24. data/solr_conf/conf/clustering/carrot2/kmeans-attributes.xml +19 -0
  25. data/solr_conf/conf/clustering/carrot2/lingo-attributes.xml +24 -0
  26. data/solr_conf/conf/clustering/carrot2/stc-attributes.xml +19 -0
  27. data/solr_conf/conf/currency.xml +67 -0
  28. data/solr_conf/conf/dataimport.properties +3 -0
  29. data/solr_conf/conf/db-data-config.xml +93 -0
  30. data/solr_conf/conf/elevate.xml +38 -0
  31. data/solr_conf/conf/lang/contractions_ca.txt +8 -0
  32. data/solr_conf/conf/lang/contractions_fr.txt +15 -0
  33. data/solr_conf/conf/lang/contractions_ga.txt +5 -0
  34. data/solr_conf/conf/lang/contractions_it.txt +23 -0
  35. data/solr_conf/conf/lang/hyphenations_ga.txt +5 -0
  36. data/solr_conf/conf/lang/stemdict_nl.txt +6 -0
  37. data/solr_conf/conf/lang/stoptags_ja.txt +420 -0
  38. data/solr_conf/conf/lang/stopwords_ar.txt +125 -0
  39. data/solr_conf/conf/lang/stopwords_bg.txt +193 -0
  40. data/solr_conf/conf/lang/stopwords_ca.txt +220 -0
  41. data/solr_conf/conf/lang/stopwords_ckb.txt +136 -0
  42. data/solr_conf/conf/lang/stopwords_cz.txt +172 -0
  43. data/solr_conf/conf/lang/stopwords_da.txt +110 -0
  44. data/solr_conf/conf/lang/stopwords_de.txt +294 -0
  45. data/solr_conf/conf/lang/stopwords_el.txt +78 -0
  46. data/solr_conf/conf/lang/stopwords_en.txt +54 -0
  47. data/solr_conf/conf/lang/stopwords_es.txt +356 -0
  48. data/solr_conf/conf/lang/stopwords_eu.txt +99 -0
  49. data/solr_conf/conf/lang/stopwords_fa.txt +313 -0
  50. data/solr_conf/conf/lang/stopwords_fi.txt +97 -0
  51. data/solr_conf/conf/lang/stopwords_fr.txt +186 -0
  52. data/solr_conf/conf/lang/stopwords_ga.txt +110 -0
  53. data/solr_conf/conf/lang/stopwords_gl.txt +161 -0
  54. data/solr_conf/conf/lang/stopwords_hi.txt +235 -0
  55. data/solr_conf/conf/lang/stopwords_hu.txt +211 -0
  56. data/solr_conf/conf/lang/stopwords_hy.txt +46 -0
  57. data/solr_conf/conf/lang/stopwords_id.txt +359 -0
  58. data/solr_conf/conf/lang/stopwords_it.txt +303 -0
  59. data/solr_conf/conf/lang/stopwords_ja.txt +127 -0
  60. data/solr_conf/conf/lang/stopwords_lv.txt +172 -0
  61. data/solr_conf/conf/lang/stopwords_nl.txt +119 -0
  62. data/solr_conf/conf/lang/stopwords_no.txt +194 -0
  63. data/solr_conf/conf/lang/stopwords_pt.txt +253 -0
  64. data/solr_conf/conf/lang/stopwords_ro.txt +233 -0
  65. data/solr_conf/conf/lang/stopwords_ru.txt +243 -0
  66. data/solr_conf/conf/lang/stopwords_sv.txt +133 -0
  67. data/solr_conf/conf/lang/stopwords_th.txt +119 -0
  68. data/solr_conf/conf/lang/stopwords_tr.txt +212 -0
  69. data/solr_conf/conf/lang/userdict_ja.txt +29 -0
  70. data/solr_conf/conf/mapping-FoldToASCII.txt +3813 -0
  71. data/solr_conf/conf/mapping-ISOLatin1Accent.txt +246 -0
  72. data/solr_conf/conf/protwords.txt +21 -0
  73. data/solr_conf/conf/schema.blacklight.xml +724 -0
  74. data/solr_conf/conf/schema.xml +1268 -0
  75. data/solr_conf/conf/schema.xml.orig +1524 -0
  76. data/solr_conf/conf/solrconfig.adams.xml +1903 -0
  77. data/solr_conf/conf/solrconfig.blacklight.xml +411 -0
  78. data/solr_conf/conf/solrconfig.old.xml +1634 -0
  79. data/solr_conf/conf/solrconfig.xml +332 -0
  80. data/solr_conf/conf/solrconfig.xml.orig +3531 -0
  81. data/solr_conf/conf/spellings.txt +2 -0
  82. data/solr_conf/conf/stopwords.txt +14 -0
  83. data/solr_conf/conf/synonyms.txt +29 -0
  84. data/solr_conf/conf/update-script.js +53 -0
  85. data/solr_conf/conf/xslt/example.xsl +132 -0
  86. data/solr_conf/conf/xslt/example_atom.xsl +67 -0
  87. data/solr_conf/conf/xslt/example_rss.xsl +66 -0
  88. data/solr_conf/conf/xslt/luke.xsl +337 -0
  89. data/solr_conf/conf/xslt/updateXml.xsl +70 -0
  90. data/spec/factories/user.rb +6 -0
  91. data/spec/spec_helper.rb +29 -0
  92. data/spec/support/solr_support.rb +11 -0
  93. data/spec/test_app_templates/blacklight.yml +18 -0
  94. data/spec/test_app_templates/lib/generators/test_app_generator.rb +25 -0
  95. data/spec/unit/ability_spec.rb +202 -0
  96. data/spec/unit/catalog_spec.rb +41 -0
  97. data/spec/unit/config_spec.rb +69 -0
  98. data/spec/unit/enforcement_spec.rb +147 -0
  99. metadata +265 -0
@@ -0,0 +1,1524 @@
1
+ <<<<<<< HEAD
2
+ <?xml version="1.0" encoding="UTF-8"?>
3
+ <schema name="Hydra" version="1.5">
4
+
5
+ <uniqueKey>id</uniqueKey>
6
+
7
+ <fields>
8
+ <!-- _version_ and update log are required for SolrCloud -->
9
+ <field name="_version_" type="long" indexed="true" stored="true"/>
10
+ <field name="id" type="string" stored="true" indexed="true" multiValued="false" required="true"/>
11
+ <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
12
+ <field name="lat" type="tdouble" stored="true" indexed="true" multiValued="false"/>
13
+ <field name="lng" type="tdouble" stored="true" indexed="true" multiValued="false"/>
14
+
15
+ <!--these fields are hard coded in places in hydra-head -->
16
+ <field name="active_fedora_model_s" type="string" stored="true" indexed="true"/>
17
+ <field name="object_profile_display" type="string" stored="true" indexed="true"/>
18
+ <field name="has_model_s" type="string" stored="true" indexed="true"/>
19
+ <field name="is_governed_by_s" type="string" stored="true" indexed="true"/>
20
+
21
+ <!-- NOTE: not all possible Solr field types are represented in the dynamic fields -->
22
+
23
+ <!-- text (_t...) -->
24
+ <dynamicField name="*_ti" type="text" stored="false" indexed="true" multiValued="false"/>
25
+ <dynamicField name="*_tim" type="text" stored="false" indexed="true" multiValued="true"/>
26
+ <dynamicField name="*_ts" type="text" stored="true" indexed="false" multiValued="false"/>
27
+ <dynamicField name="*_tsm" type="text" stored="true" indexed="false" multiValued="true"/>
28
+ <dynamicField name="*_tsi" type="text" stored="true" indexed="true" multiValued="false"/>
29
+ <dynamicField name="*_tsim" type="text" stored="true" indexed="true" multiValued="true"/>
30
+ <dynamicField name="*_tiv" type="text" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
31
+ <dynamicField name="*_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
32
+ <dynamicField name="*_tsiv" type="text" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
33
+ <dynamicField name="*_tsimv" type="text" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
34
+
35
+ <!-- English text (_te...) -->
36
+ <dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false"/>
37
+ <dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true"/>
38
+ <dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false"/>
39
+ <dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true"/>
40
+ <dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false"/>
41
+ <dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true"/>
42
+ <dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
43
+ <dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
44
+ <dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
45
+ <dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
46
+
47
+ <!-- string (_s...) -->
48
+ <dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false"/>
49
+ <dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true"/>
50
+ <dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false"/>
51
+ <dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true"/>
52
+ <dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false"/>
53
+ <dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true"/>
54
+ <dynamicField name="*_ssort" type="alphaSort" stored="false" indexed="true" multiValued="false"/>
55
+
56
+ <!-- integer (_i...) -->
57
+ <dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false"/>
58
+ <dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true"/>
59
+ <dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false"/>
60
+ <dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true"/>
61
+ <dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false"/>
62
+ <dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true"/>
63
+
64
+ <!-- trie integer (_it...) (for faster range queries) -->
65
+ <dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false"/>
66
+ <dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true"/>
67
+ <dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false"/>
68
+ <dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true"/>
69
+ <dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false"/>
70
+ <dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true"/>
71
+
72
+ <!-- date (_dt...) -->
73
+ <dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false"/>
74
+ <dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true"/>
75
+ <dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false"/>
76
+ <dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true"/>
77
+ <dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false"/>
78
+ <dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true"/>
79
+
80
+ <!-- trie date (_dtt...) (for faster range queries) -->
81
+ <dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false"/>
82
+ <dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true"/>
83
+ <dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false"/>
84
+ <dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true"/>
85
+ <dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false"/>
86
+ <dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true"/>
87
+
88
+ <!-- long (_l...) -->
89
+ <dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false"/>
90
+ <dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true"/>
91
+ <dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false"/>
92
+ <dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true"/>
93
+ <dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false"/>
94
+ <dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true"/>
95
+
96
+ <!-- trie long (_lt...) (for faster range queries) -->
97
+ <dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false"/>
98
+ <dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true"/>
99
+ <dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false"/>
100
+ <dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true"/>
101
+ <dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false"/>
102
+ <dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true"/>
103
+
104
+ <!-- double (_db...) -->
105
+ <dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false"/>
106
+ <dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true"/>
107
+ <dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false"/>
108
+ <dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true"/>
109
+ <dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false"/>
110
+ <dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true"/>
111
+
112
+ <!-- trie double (_dbt...) (for faster range queries) -->
113
+ <dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false"/>
114
+ <dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true"/>
115
+ <dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false"/>
116
+ <dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true"/>
117
+ <dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false"/>
118
+ <dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true"/>
119
+
120
+ <!-- float (_f...) -->
121
+ <dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false"/>
122
+ <dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true"/>
123
+ <dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false"/>
124
+ <dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true"/>
125
+ <dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false"/>
126
+ <dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true"/>
127
+
128
+ <!-- trie float (_ft...) (for faster range queries) -->
129
+ <dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false"/>
130
+ <dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true"/>
131
+ <dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false"/>
132
+ <dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true"/>
133
+ <dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false"/>
134
+ <dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true"/>
135
+
136
+ <!-- boolean (_b...) -->
137
+ <dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false"/>
138
+ <dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false"/>
139
+ <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false"/>
140
+
141
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
142
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
143
+
144
+ <!-- location (_ll...) -->
145
+ <dynamicField name="*_lli" type="location" stored="false" indexed="true" multiValued="false"/>
146
+ <dynamicField name="*_llim" type="location" stored="false" indexed="true" multiValued="true"/>
147
+ <dynamicField name="*_lls" type="location" stored="true" indexed="false" multiValued="false"/>
148
+ <dynamicField name="*_llsm" type="location" stored="true" indexed="false" multiValued="true"/>
149
+ <dynamicField name="*_llsi" type="location" stored="true" indexed="true" multiValued="false"/>
150
+ <dynamicField name="*_llsim" type="location" stored="true" indexed="true" multiValued="true"/>
151
+
152
+ <!-- you must define copyField source and dest fields explicity or schemaBrowser doesn't work -->
153
+ <field name="all_text_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
154
+
155
+ <!-- alternative method for full-text indexing -->
156
+ <!-- <copyField source="*_tesim" dest="all_text_timv" maxChars="3000"/> -->
157
+ </fields>
158
+
159
+ <types>
160
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
161
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
162
+ <fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/>
163
+
164
+ <!-- Default numeric field types. -->
165
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
166
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
167
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
168
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
169
+
170
+ <!-- trie numeric field types for faster range queries -->
171
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
172
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
173
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
174
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
175
+
176
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
177
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
178
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
179
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
180
+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
181
+
182
+
183
+ <!-- This point type indexes the coordinates as separate fields (subFields)
184
+ If subFieldType is defined, it references a type, and a dynamic field
185
+ definition is created matching *___<typename>. Alternately, if
186
+ subFieldSuffix is defined, that is used to create the subFields.
187
+ Example: if subFieldType="double", then the coordinates would be
188
+ indexed in fields myloc_0___double,myloc_1___double.
189
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
190
+ in fields myloc_0_d,myloc_1_d
191
+ The subFields are an implementation detail of the fieldType, and end
192
+ users normally should not need to know about them. -->
193
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
194
+
195
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
196
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
197
+
198
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
199
+ For more information about this and other Spatial fields new to Solr 4, see:
200
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4 -->
201
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
202
+
203
+ <fieldType name="text" class="solr.TextField" omitNorms="false">
204
+ <analyzer>
205
+ <tokenizer class="solr.ICUTokenizerFactory"/>
206
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
207
+ <filter class="solr.TrimFilterFactory"/>
208
+ </analyzer>
209
+ </fieldType>
210
+
211
+ <!-- A text field that only splits on whitespace for exact matching of words -->
212
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
213
+ <analyzer>
214
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
215
+ <filter class="solr.TrimFilterFactory"/>
216
+ </analyzer>
217
+ </fieldType>
218
+
219
+ <!-- single token analyzed text, for sorting. Punctuation is significant. -->
220
+ <fieldtype name="alphaSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
221
+ <analyzer>
222
+ <tokenizer class="solr.KeywordTokenizerFactory" />
223
+ <filter class="solr.ICUFoldingFilterFactory"/>
224
+ <filter class="solr.TrimFilterFactory" />
225
+ </analyzer>
226
+ </fieldtype>
227
+
228
+ <!-- A text field with defaults appropriate for English -->
229
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
230
+ <analyzer>
231
+ <tokenizer class="solr.ICUTokenizerFactory"/>
232
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
233
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
234
+ <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
235
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
236
+ <!-- <filter class="solr.PorterStemFilterFactory"/> -->
237
+ <filter class="solr.TrimFilterFactory"/>
238
+ </analyzer>
239
+ </fieldType>
240
+
241
+ <!-- queries for paths match documents at that path, or in descendent paths -->
242
+ <fieldType name="descendent_path" class="solr.TextField">
243
+ <analyzer type="index">
244
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
245
+ </analyzer>
246
+ <analyzer type="query">
247
+ <tokenizer class="solr.KeywordTokenizerFactory" />
248
+ </analyzer>
249
+ </fieldType>
250
+
251
+ <!-- queries for paths match documents at that path, or in ancestor paths -->
252
+ <fieldType name="ancestor_path" class="solr.TextField">
253
+ <analyzer type="index">
254
+ <tokenizer class="solr.KeywordTokenizerFactory" />
255
+ </analyzer>
256
+ <analyzer type="query">
257
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
258
+ </analyzer>
259
+ </fieldType>
260
+ </types>
261
+
262
+ </schema>
263
+ =======
264
+ <?xml version="1.0" encoding="UTF-8" ?>
265
+ <!--
266
+ Licensed to the Apache Software Foundation (ASF) under one or more
267
+ contributor license agreements. See the NOTICE file distributed with
268
+ this work for additional information regarding copyright ownership.
269
+ The ASF licenses this file to You under the Apache License, Version 2.0
270
+ (the "License"); you may not use this file except in compliance with
271
+ the License. You may obtain a copy of the License at
272
+
273
+ http://www.apache.org/licenses/LICENSE-2.0
274
+
275
+ Unless required by applicable law or agreed to in writing, software
276
+ distributed under the License is distributed on an "AS IS" BASIS,
277
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
278
+ See the License for the specific language governing permissions and
279
+ limitations under the License.
280
+ -->
281
+
282
+ <!--
283
+ This is the Solr schema file. This file should be named "schema.xml" and
284
+ should be in the conf directory under the solr home
285
+ (i.e. ./solr/conf/schema.xml by default)
286
+ or located where the classloader for the Solr webapp can find it.
287
+
288
+ This example schema is the recommended starting point for users.
289
+ It should be kept correct and concise, usable out-of-the-box.
290
+
291
+ For more information, on how to customize this file, please see
292
+ http://wiki.apache.org/solr/SchemaXml
293
+
294
+ PERFORMANCE NOTE: this schema includes many optional features and should not
295
+ be used for benchmarking. To improve performance one could
296
+ - set stored="false" for all fields possible (esp large fields) when you
297
+ only need to search on the field but don't need to return the original
298
+ value.
299
+ - set indexed="false" if you don't need to search on the field, but only
300
+ return the field as a result of searching on other indexed fields.
301
+ - remove all unneeded copyField statements
302
+ - for best index size and searching performance, set "index" to false
303
+ for all general text fields, use copyField to copy them to the
304
+ catchall "text" field, and use that for searching.
305
+ - For maximum indexing performance, use the ConcurrentUpdateSolrServer
306
+ java client.
307
+ - Remember to run the JVM in server mode, and use a higher logging level
308
+ that avoids logging every request
309
+ -->
310
+
311
+ <schema name="example-DIH-db" version="1.5">
312
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
313
+ version="x.y" is Solr's version number for the schema syntax and
314
+ semantics. It should not normally be changed by applications.
315
+
316
+ 1.0: multiValued attribute did not exist, all fields are multiValued
317
+ by nature
318
+ 1.1: multiValued attribute introduced, false by default
319
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default
320
+ except for text fields.
321
+ 1.3: removed optional field compress feature
322
+ 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
323
+ behavior when a single string produces multiple tokens. Defaults
324
+ to off for version >= 1.4
325
+ 1.5: omitNorms defaults to true for primitive field types
326
+ (int, float, boolean, string...)
327
+ -->
328
+
329
+
330
+ <!-- Valid attributes for fields:
331
+ name: mandatory - the name for the field
332
+ type: mandatory - the name of a field type from the
333
+ <types> fieldType section
334
+ indexed: true if this field should be indexed (searchable or sortable)
335
+ stored: true if this field should be retrievable
336
+ docValues: true if this field should have doc values. Doc values are
337
+ useful for faceting, grouping, sorting and function queries. Although not
338
+ required, doc values will make the index faster to load, more
339
+ NRT-friendly and more memory-efficient. They however come with some
340
+ limitations: they are currently only supported by StrField, UUIDField
341
+ and all Trie*Fields, and depending on the field type, they might
342
+ require the field to be single-valued, be required or have a default
343
+ value (check the documentation of the field type you're interested in
344
+ for more information)
345
+ multiValued: true if this field may contain multiple values per document
346
+ omitNorms: (expert) set to true to omit the norms associated with
347
+ this field (this disables length normalization and index-time
348
+ boosting for the field, and saves some memory). Only full-text
349
+ fields or fields that need an index-time boost need norms.
350
+ Norms are omitted for primitive (non-analyzed) types by default.
351
+ termVectors: [false] set to true to store the term vector for a
352
+ given field.
353
+ When using MoreLikeThis, fields used for similarity should be
354
+ stored for best performance.
355
+ termPositions: Store position information with the term vector.
356
+ This will increase storage costs.
357
+ termOffsets: Store offset information with the term vector. This
358
+ will increase storage costs.
359
+ required: The field is required. It will throw an error if the
360
+ value does not exist
361
+ default: a value that should be used if no value is specified
362
+ when adding a document.
363
+ -->
364
+
365
+ <!-- field names should consist of alphanumeric or underscore characters only and
366
+ not start with a digit. This is not currently strictly enforced,
367
+ but other field names will not have first class support from all components
368
+ and back compatibility is not guaranteed. Names with both leading and
369
+ trailing underscores (e.g. _version_) are reserved.
370
+ -->
371
+
372
+ <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
373
+ or Solr won't start. _version_ and update log are required for SolrCloud
374
+ -->
375
+ <field name="_version_" type="long" indexed="true" stored="true"/>
376
+
377
+ <!-- points to the root document of a block of nested documents. Required for nested
378
+ document support, may be removed otherwise
379
+ -->
380
+ <field name="_root_" type="string" indexed="true" stored="false"/>
381
+
382
+ <!-- Only remove the "id" field if you have a very good reason to. While not strictly
383
+ required, it is highly recommended. A <uniqueKey> is present in almost all Solr
384
+ installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
385
+ -->
386
+ <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
387
+
388
+ <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
389
+ <field name="name" type="text_general" indexed="true" stored="true"/>
390
+ <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
391
+ <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
392
+ <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
393
+ <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
394
+
395
+ <field name="weight" type="float" indexed="true" stored="true"/>
396
+ <field name="price" type="float" indexed="true" stored="true"/>
397
+ <field name="popularity" type="int" indexed="true" stored="true" />
398
+ <field name="inStock" type="boolean" indexed="true" stored="true" />
399
+
400
+ <field name="store" type="location" indexed="true" stored="true"/>
401
+
402
+ <!-- Common metadata fields, named specifically to match up with
403
+ SolrCell metadata when parsing rich documents such as Word, PDF.
404
+ Some fields are multiValued only because Tika currently may return
405
+ multiple values for them. Some metadata is parsed from the documents,
406
+ but there are some which come from the client context:
407
+ "content_type": From the HTTP headers of incoming stream
408
+ "resourcename": From SolrCell request param resource.name
409
+ -->
410
+ <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
411
+ <field name="subject" type="text_general" indexed="true" stored="true"/>
412
+ <field name="description" type="text_general" indexed="true" stored="true"/>
413
+ <field name="comments" type="text_general" indexed="true" stored="true"/>
414
+ <field name="author" type="text_general" indexed="true" stored="true"/>
415
+ <field name="keywords" type="text_general" indexed="true" stored="true"/>
416
+ <field name="category" type="text_general" indexed="true" stored="true"/>
417
+ <field name="resourcename" type="text_general" indexed="true" stored="true"/>
418
+ <field name="url" type="text_general" indexed="true" stored="true"/>
419
+ <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
420
+ <field name="last_modified" type="date" indexed="true" stored="true"/>
421
+ <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
422
+
423
+ <!-- Main body of document extracted by SolrCell.
424
+ NOTE: This field is not indexed by default, since it is also copied to "text"
425
+ using copyField below. This is to save space. Use this field for returning and
426
+ highlighting document content. Use the "text" field to search the content. -->
427
+ <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
428
+
429
+
430
+ <!-- catchall field, containing all other searchable text fields (implemented
431
+ via copyField further on in this schema -->
432
+ <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
433
+
434
+ <!-- catchall text field that indexes tokens both normally and in reverse for efficient
435
+ leading wildcard queries. -->
436
+ <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
437
+
438
+ <!-- non-tokenized version of manufacturer to make it easier to sort or group
439
+ results by manufacturer. copied from "manu" via copyField -->
440
+ <field name="manu_exact" type="string" indexed="true" stored="false"/>
441
+
442
+ <field name="payloads" type="payloads" indexed="true" stored="true"/>
443
+
444
+
445
+ <!--
446
+ Some fields such as popularity and manu_exact could be modified to
447
+ leverage doc values:
448
+ <field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
449
+ <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
450
+ <field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
451
+
452
+
453
+ Although it would make indexing slightly slower and the index bigger, it
454
+ would also make the index faster to load, more memory-efficient and more
455
+ NRT-friendly.
456
+ -->
457
+
458
+ <!-- Dynamic field definitions allow using convention over configuration
459
+ for fields via the specification of patterns to match field names.
460
+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
461
+ RESTRICTION: the glob-like pattern in the name attribute must have
462
+ a "*" only at the start or the end. -->
463
+
464
+ <!-- text (_t...) -->
465
+ <dynamicField name="*_ti" type="text_general" stored="false" indexed="true" multiValued="false"/>
466
+ <dynamicField name="*_tim" type="text_general" stored="false" indexed="true" multiValued="true"/>
467
+ <dynamicField name="*_ts" type="text_general" stored="true" indexed="false" multiValued="false"/>
468
+ <dynamicField name="*_tsm" type="text_general" stored="true" indexed="false" multiValued="true"/>
469
+ <dynamicField name="*_tsi" type="text_general" stored="true" indexed="true" multiValued="false"/>
470
+ <dynamicField name="*_tsim" type="text_general" stored="true" indexed="true" multiValued="true"/>
471
+ <dynamicField name="*_tiv" type="text_general" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
472
+ <dynamicField name="*_timv" type="text_general" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
473
+ <dynamicField name="*_tsiv" type="text_general" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
474
+ <dynamicField name="*_tsimv" type="text_general" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
475
+
476
+ <!-- English text (_te...) -->
477
+ <dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false"/>
478
+ <dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true"/>
479
+ <dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false"/>
480
+ <dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true"/>
481
+ <dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false"/>
482
+ <dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true"/>
483
+ <dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
484
+ <dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
485
+ <dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
486
+ <dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
487
+
488
+ <!-- string (_s...) -->
489
+ <dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false"/>
490
+ <dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true"/>
491
+ <dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false"/>
492
+ <dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true"/>
493
+ <dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false"/>
494
+ <dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true"/>
495
+ <dynamicField name="*_ssort" type="alphaOnlySort" stored="false" indexed="true" multiValued="false"/>
496
+
497
+ <!-- integer (_i...) -->
498
+ <dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false"/>
499
+ <dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true"/>
500
+ <dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false"/>
501
+ <dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true"/>
502
+ <dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false"/>
503
+ <dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true"/>
504
+
505
+ <!-- trie integer (_it...) (for faster range queries) -->
506
+ <dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false"/>
507
+ <dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true"/>
508
+ <dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false"/>
509
+ <dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true"/>
510
+ <dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false"/>
511
+ <dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true"/>
512
+
513
+ <!-- date (_dt...) -->
514
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
515
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
516
+ <dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false"/>
517
+ <dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true"/>
518
+ <dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false"/>
519
+ <dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true"/>
520
+ <dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false"/>
521
+ <dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true"/>
522
+
523
+ <!-- trie date (_dtt...) (for faster range queries) -->
524
+ <dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false"/>
525
+ <dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true"/>
526
+ <dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false"/>
527
+ <dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true"/>
528
+ <dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false"/>
529
+ <dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true"/>
530
+
531
+ <!-- long (_l...) -->
532
+ <dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false"/>
533
+ <dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true"/>
534
+ <dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false"/>
535
+ <dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true"/>
536
+ <dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false"/>
537
+ <dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true"/>
538
+
539
+ <!-- trie long (_lt...) (for faster range queries) -->
540
+ <dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false"/>
541
+ <dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true"/>
542
+ <dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false"/>
543
+ <dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true"/>
544
+ <dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false"/>
545
+ <dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true"/>
546
+
547
+ <!-- double (_db...) -->
548
+ <dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false"/>
549
+ <dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true"/>
550
+ <dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false"/>
551
+ <dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true"/>
552
+ <dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false"/>
553
+ <dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true"/>
554
+
555
+ <!-- trie double (_dbt...) (for faster range queries) -->
556
+ <dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false"/>
557
+ <dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true"/>
558
+ <dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false"/>
559
+ <dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true"/>
560
+ <dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false"/>
561
+ <dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true"/>
562
+
563
+ <!-- float (_f...) -->
564
+ <dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false"/>
565
+ <dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true"/>
566
+ <dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false"/>
567
+ <dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true"/>
568
+ <dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false"/>
569
+ <dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true"/>
570
+
571
+ <!-- trie float (_ft...) (for faster range queries) -->
572
+ <dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false"/>
573
+ <dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true"/>
574
+ <dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false"/>
575
+ <dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true"/>
576
+ <dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false"/>
577
+ <dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true"/>
578
+
579
+ <!-- boolean (_b...) -->
580
+ <dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false"/>
581
+ <dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false"/>
582
+ <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false"/>
583
+
584
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
585
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
586
+
587
+ <!-- location (_ll...) -->
588
+ <dynamicField name="*_lli" type="location" stored="false" indexed="true" multiValued="false"/>
589
+ <dynamicField name="*_llim" type="location" stored="false" indexed="true" multiValued="true"/>
590
+ <dynamicField name="*_lls" type="location" stored="true" indexed="false" multiValued="false"/>
591
+ <dynamicField name="*_llsm" type="location" stored="true" indexed="false" multiValued="true"/>
592
+ <dynamicField name="*_llsi" type="location" stored="true" indexed="true" multiValued="false"/>
593
+ <dynamicField name="*_llsim" type="location" stored="true" indexed="true" multiValued="true"/>
594
+
595
+ <!--<dynamicField name="*_i" type="int" indexed="true" stored="true"/>-->
596
+ <!--<dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/>-->
597
+ <!--<dynamicField name="*_s" type="string" indexed="true" stored="true" />-->
598
+ <!--<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>-->
599
+ <!--<dynamicField name="*_l" type="long" indexed="true" stored="true"/>-->
600
+ <!--<dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/>-->
601
+ <!--<dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>-->
602
+ <!--<dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>-->
603
+ <!--<dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/>-->
604
+ <!--<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>-->
605
+ <!--<dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>-->
606
+ <!--<dynamicField name="*_f" type="float" indexed="true" stored="true"/>-->
607
+ <!--<dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/>-->
608
+ <!--<dynamicField name="*_d" type="double" indexed="true" stored="true"/>-->
609
+ <!--<dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/>-->
610
+
611
+ <!--&lt;!&ndash; Type used to index the lat and lon components for the "location" FieldType &ndash;&gt;-->
612
+ <!--<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />-->
613
+
614
+ <!--<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>-->
615
+ <!--<dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>-->
616
+ <!--<dynamicField name="*_p" type="location" indexed="true" stored="true"/>-->
617
+
618
+ <!--&lt;!&ndash; some trie-coded dynamic fields for faster range queries &ndash;&gt;-->
619
+ <!--<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>-->
620
+ <!--<dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>-->
621
+ <!--<dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>-->
622
+ <!--<dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>-->
623
+ <!--<dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>-->
624
+
625
+ <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
626
+
627
+ <!--<dynamicField name="ignored_*" type="ignored" multiValued="true"/>-->
628
+ <!--<dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>-->
629
+
630
+ <!--<dynamicField name="random_*" type="random" />-->
631
+
632
+ <!-- uncomment the following to ignore any fields that don't already match an existing
633
+ field name or dynamic field, rather than reporting them as an error.
634
+ alternately, change the type="ignored" to some other type e.g. "text" if you want
635
+ unknown fields indexed and/or stored by default -->
636
+ <!--dynamicField name="*" type="ignored" multiValued="true" /-->
637
+
638
+
639
+
640
+
641
+ <!-- Field to use to determine and enforce document uniqueness.
642
+ Unless this field is marked with required="false", it will be a required field
643
+ -->
644
+ <uniqueKey>id</uniqueKey>
645
+
646
+ <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
647
+ parsing a query string that isn't explicit about the field. Machine (non-user)
648
+ generated queries are best made explicit, or they can use the "df" request parameter
649
+ which takes precedence over this.
650
+ Note: Un-commenting defaultSearchField will be insufficient if your request handler
651
+ in solrconfig.xml defines "df", which takes precedence. That would need to be removed.
652
+ <defaultSearchField>text</defaultSearchField> -->
653
+
654
+ <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers
655
+ when parsing a query string to determine if a clause of the query should be marked as
656
+ required or optional, assuming the clause isn't already marked by some operator.
657
+ The default is OR, which is generally assumed so it is not a good idea to change it
658
+ globally here. The "q.op" request parameter takes precedence over this.
659
+ <solrQueryParser defaultOperator="OR"/> -->
660
+
661
+ <!-- copyField commands copy one field to another at the time a document
662
+ is added to the index. It's used either to index the same field differently,
663
+ or to add multiple fields to the same field for easier/faster searching. -->
664
+
665
+ <copyField source="cat" dest="text"/>
666
+ <copyField source="name" dest="text"/>
667
+ <copyField source="manu" dest="text"/>
668
+ <copyField source="features" dest="text"/>
669
+ <copyField source="includes" dest="text"/>
670
+ <copyField source="manu" dest="manu_exact"/>
671
+
672
+ <!-- Copy the price into a currency enabled field (default USD) -->
673
+ <copyField source="price" dest="price_c"/>
674
+
675
+ <!-- Text fields from SolrCell to search by default in our catch-all field -->
676
+ <copyField source="title" dest="text"/>
677
+ <copyField source="author" dest="text"/>
678
+ <copyField source="description" dest="text"/>
679
+ <copyField source="keywords" dest="text"/>
680
+ <copyField source="content" dest="text"/>
681
+ <copyField source="content_type" dest="text"/>
682
+ <copyField source="resourcename" dest="text"/>
683
+ <copyField source="url" dest="text"/>
684
+
685
+ <!-- Create a string version of author for faceting -->
686
+ <copyField source="author" dest="author_ssi"/>
687
+
688
+ <!-- Above, multiple source fields are copied to the [text] field.
689
+ Another way to map multiple source fields to the same
690
+ destination field is to use the dynamic field syntax.
691
+ copyField also supports a maxChars to copy setting. -->
692
+
693
+ <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
694
+
695
+ <!-- copy name to alphaNameSort, a field designed for sorting by name -->
696
+ <!-- <copyField source="name" dest="alphaNameSort"/> -->
697
+
698
+
699
+ <!-- field type definitions. The "name" attribute is
700
+ just a label to be used by field definitions. The "class"
701
+ attribute and any other attributes determine the real
702
+ behavior of the fieldType.
703
+ Class names starting with "solr" refer to java classes in a
704
+ standard package such as org.apache.solr.analysis
705
+ -->
706
+
707
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim.
708
+ It supports doc values but in that case the field needs to be
709
+ single-valued and either required or have a default value.
710
+ -->
711
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
712
+
713
+ <!-- boolean type: "true" or "false" -->
714
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
715
+
716
+ <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
717
+ currently supported on types that are sorted internally as strings
718
+ and on numeric types.
719
+ This includes "string","boolean", and, as of 3.5 (and 4.x),
720
+ int, float, long, date, double, including the "Trie" variants.
721
+ - If sortMissingLast="true", then a sort on this field will cause documents
722
+ without the field to come after documents with the field,
723
+ regardless of the requested sort order (asc or desc).
724
+ - If sortMissingFirst="true", then a sort on this field will cause documents
725
+ without the field to come before documents with the field,
726
+ regardless of the requested sort order.
727
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
728
+ then default lucene sorting will be used which places docs without the
729
+ field first in an ascending sort and last in a descending sort.
730
+ -->
731
+
732
+ <!--
733
+ Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
734
+
735
+ These fields support doc values, but they require the field to be
736
+ single-valued and either be required or have a default value.
737
+ -->
738
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
739
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
740
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
741
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
742
+
743
+ <!--
744
+ Numeric field types that index each value at various levels of precision
745
+ to accelerate range queries when the number of values between the range
746
+ endpoints is large. See the javadoc for NumericRangeQuery for internal
747
+ implementation details.
748
+
749
+ Smaller precisionStep values (specified in bits) will lead to more tokens
750
+ indexed per value, slightly larger index size, and faster range queries.
751
+ A precisionStep of 0 disables indexing at different precision levels.
752
+ -->
753
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
754
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
755
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
756
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
757
+
758
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
759
+ is a more restricted form of the canonical representation of dateTime
760
+ http://www.w3.org/TR/xmlschema-2/#dateTime
761
+ The trailing "Z" designates UTC time and is mandatory.
762
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
763
+ All other components are mandatory.
764
+
765
+ Expressions can also be used to denote calculations that should be
766
+ performed relative to "NOW" to determine the value, ie...
767
+
768
+ NOW/HOUR
769
+ ... Round to the start of the current hour
770
+ NOW-1DAY
771
+ ... Exactly 1 day prior to now
772
+ NOW/DAY+6MONTHS+3DAYS
773
+ ... 6 months and 3 days in the future from the start of
774
+ the current day
775
+
776
+ Consult the TrieDateField javadocs for more information.
777
+
778
+ Note: For faster range queries, consider the tdate type
779
+ -->
780
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
781
+
782
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
783
+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
784
+
785
+
786
+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
787
+ <fieldType name="binary" class="solr.BinaryField"/>
788
+
789
+ <!-- The "RandomSortField" is not used to store or search any
790
+ data. You can declare fields of this type it in your schema
791
+ to generate pseudo-random orderings of your docs for sorting
792
+ or function purposes. The ordering is generated based on the field
793
+ name and the version of the index. As long as the index version
794
+ remains unchanged, and the same field name is reused,
795
+ the ordering of the docs will be consistent.
796
+ If you want different psuedo-random orderings of documents,
797
+ for the same version of the index, use a dynamicField and
798
+ change the field name in the request.
799
+ -->
800
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
801
+
802
+ <!-- solr.TextField allows the specification of custom text analyzers
803
+ specified as a tokenizer and a list of token filters. Different
804
+ analyzers may be specified for indexing and querying.
805
+
806
+ The optional positionIncrementGap puts space between multiple fields of
807
+ this type on the same document, with the purpose of preventing false phrase
808
+ matching across fields.
809
+
810
+ For more info on customizing your analyzer chain, please see
811
+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
812
+ -->
813
+
814
+ <!-- One can also specify an existing Analyzer class that has a
815
+ default constructor via the class attribute on the analyzer element.
816
+ Example:
817
+ <fieldType name="text_greek" class="solr.TextField">
818
+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
819
+ </fieldType>
820
+ -->
821
+
822
+ <!-- A text field that only splits on whitespace for exact matching of words -->
823
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
824
+ <analyzer>
825
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
826
+ </analyzer>
827
+ </fieldType>
828
+
829
+ <!-- A general text field that has reasonable, generic
830
+ cross-language defaults: it tokenizes with StandardTokenizer,
831
+ removes stop words from case-insensitive "stopwords.txt"
832
+ (empty by default), and down cases. At query time only, it
833
+ also applies synonyms. -->
834
+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
835
+ <analyzer type="index">
836
+ <tokenizer class="solr.StandardTokenizerFactory"/>
837
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
838
+ <!-- in this example, we will only use synonyms at query time
839
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
840
+ -->
841
+ <filter class="solr.LowerCaseFilterFactory"/>
842
+ </analyzer>
843
+ <analyzer type="query">
844
+ <tokenizer class="solr.StandardTokenizerFactory"/>
845
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
846
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
847
+ <filter class="solr.LowerCaseFilterFactory"/>
848
+ </analyzer>
849
+ </fieldType>
850
+
851
+ <!-- A text field with defaults appropriate for English: it
852
+ tokenizes with StandardTokenizer, removes English stop words
853
+ (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
854
+ finally applies Porter's stemming. The query time analyzer
855
+ also applies synonyms from synonyms.txt. -->
856
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
857
+ <analyzer type="index">
858
+ <tokenizer class="solr.StandardTokenizerFactory"/>
859
+ <!-- in this example, we will only use synonyms at query time
860
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
861
+ -->
862
+ <!-- Case insensitive stop word removal.
863
+ -->
864
+ <filter class="solr.StopFilterFactory"
865
+ ignoreCase="true"
866
+ words="lang/stopwords_en.txt"
867
+ />
868
+ <filter class="solr.LowerCaseFilterFactory"/>
869
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
870
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
871
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
872
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
873
+ -->
874
+ <filter class="solr.PorterStemFilterFactory"/>
875
+ </analyzer>
876
+ <analyzer type="query">
877
+ <tokenizer class="solr.StandardTokenizerFactory"/>
878
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
879
+ <filter class="solr.StopFilterFactory"
880
+ ignoreCase="true"
881
+ words="lang/stopwords_en.txt"
882
+ />
883
+ <filter class="solr.LowerCaseFilterFactory"/>
884
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
885
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
886
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
887
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
888
+ -->
889
+ <filter class="solr.PorterStemFilterFactory"/>
890
+ </analyzer>
891
+ </fieldType>
892
+
893
+ <!-- A text field with defaults appropriate for English, plus
894
+ aggressive word-splitting and autophrase features enabled.
895
+ This field is just like text_en, except it adds
896
+ WordDelimiterFilter to enable splitting and matching of
897
+ words on case-change, alpha numeric boundaries, and
898
+ non-alphanumeric chars. This means certain compound word
899
+ cases will work, for example query "wi fi" will match
900
+ document "WiFi" or "wi-fi".
901
+ -->
902
+ <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
903
+ <analyzer type="index">
904
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
905
+ <!-- in this example, we will only use synonyms at query time
906
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
907
+ -->
908
+ <!-- Case insensitive stop word removal.
909
+ -->
910
+ <filter class="solr.StopFilterFactory"
911
+ ignoreCase="true"
912
+ words="lang/stopwords_en.txt"
913
+ />
914
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
915
+ <filter class="solr.LowerCaseFilterFactory"/>
916
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
917
+ <filter class="solr.PorterStemFilterFactory"/>
918
+ </analyzer>
919
+ <analyzer type="query">
920
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
921
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
922
+ <filter class="solr.StopFilterFactory"
923
+ ignoreCase="true"
924
+ words="lang/stopwords_en.txt"
925
+ />
926
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
927
+ <filter class="solr.LowerCaseFilterFactory"/>
928
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
929
+ <filter class="solr.PorterStemFilterFactory"/>
930
+ </analyzer>
931
+ </fieldType>
932
+
933
+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
934
+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
935
+ <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
936
+ <analyzer>
937
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
938
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
939
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
940
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
941
+ <filter class="solr.LowerCaseFilterFactory"/>
942
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
943
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
944
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
945
+ possible with WordDelimiterFilter in conjuncton with stemming. -->
946
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
947
+ </analyzer>
948
+ </fieldType>
949
+
950
+ <!-- Just like text_general except it reverses the characters of
951
+ each token, to enable more efficient leading wildcard queries. -->
952
+ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
953
+ <analyzer type="index">
954
+ <tokenizer class="solr.StandardTokenizerFactory"/>
955
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
956
+ <filter class="solr.LowerCaseFilterFactory"/>
957
+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
958
+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
959
+ </analyzer>
960
+ <analyzer type="query">
961
+ <tokenizer class="solr.StandardTokenizerFactory"/>
962
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
963
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
964
+ <filter class="solr.LowerCaseFilterFactory"/>
965
+ </analyzer>
966
+ </fieldType>
967
+
968
+ <!-- charFilter + WhitespaceTokenizer -->
969
+ <!--
970
+ <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
971
+ <analyzer>
972
+ <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
973
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
974
+ </analyzer>
975
+ </fieldType>
976
+ -->
977
+
978
+ <!-- This is an example of using the KeywordTokenizer along
979
+ With various TokenFilterFactories to produce a sortable field
980
+ that does not include some properties of the source text
981
+ -->
982
+ <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
983
+ <analyzer>
984
+ <!-- KeywordTokenizer does no actual tokenizing, so the entire
985
+ input string is preserved as a single token
986
+ -->
987
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
988
+ <!-- The LowerCase TokenFilter does what you expect, which can be
989
+ when you want your sorting to be case insensitive
990
+ -->
991
+ <filter class="solr.LowerCaseFilterFactory" />
992
+ <!-- The TrimFilter removes any leading or trailing whitespace -->
993
+ <filter class="solr.TrimFilterFactory" />
994
+ <!-- The PatternReplaceFilter gives you the flexibility to use
995
+ Java Regular expression to replace any sequence of characters
996
+ matching a pattern with an arbitrary replacement string,
997
+ which may include back references to portions of the original
998
+ string matched by the pattern.
999
+
1000
+ See the Java Regular Expression documentation for more
1001
+ information on pattern and replacement string syntax.
1002
+
1003
+ http://docs.oracle.com/javase/7/docs/api/java/util/regex/package-summary.html
1004
+ -->
1005
+ <filter class="solr.PatternReplaceFilterFactory"
1006
+ pattern="([^a-z])" replacement="" replace="all"
1007
+ />
1008
+ </analyzer>
1009
+ </fieldType>
1010
+
1011
+ <fieldType name="phonetic" stored="false" indexed="true" class="solr.TextField" >
1012
+ <analyzer>
1013
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1014
+ <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
1015
+ </analyzer>
1016
+ </fieldType>
1017
+
1018
+ <fieldType name="payloads" stored="false" indexed="true" class="solr.TextField" >
1019
+ <analyzer>
1020
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
1021
+ <!--
1022
+ The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
1023
+ a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
1024
+ Attributes of the DelimitedPayloadTokenFilterFactory :
1025
+ "delimiter" - a one character delimiter. Default is | (pipe)
1026
+ "encoder" - how to encode the following value into a playload
1027
+ float -> org.apache.lucene.analysis.payloads.FloatEncoder,
1028
+ integer -> o.a.l.a.p.IntegerEncoder
1029
+ identity -> o.a.l.a.p.IdentityEncoder
1030
+ Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
1031
+ -->
1032
+ <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
1033
+ </analyzer>
1034
+ </fieldType>
1035
+
1036
+ <!-- lowercases the entire field value, keeping it as a single token. -->
1037
+ <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
1038
+ <analyzer>
1039
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
1040
+ <filter class="solr.LowerCaseFilterFactory" />
1041
+ </analyzer>
1042
+ </fieldType>
1043
+
1044
+ <!--
1045
+ Example of using PathHierarchyTokenizerFactory at index time, so
1046
+ queries for paths match documents at that path, or in descendent paths
1047
+ -->
1048
+ <fieldType name="descendent_path" class="solr.TextField">
1049
+ <analyzer type="index">
1050
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
1051
+ </analyzer>
1052
+ <analyzer type="query">
1053
+ <tokenizer class="solr.KeywordTokenizerFactory" />
1054
+ </analyzer>
1055
+ </fieldType>
1056
+ <!--
1057
+ Example of using PathHierarchyTokenizerFactory at query time, so
1058
+ queries for paths match documents at that path, or in ancestor paths
1059
+ -->
1060
+ <fieldType name="ancestor_path" class="solr.TextField">
1061
+ <analyzer type="index">
1062
+ <tokenizer class="solr.KeywordTokenizerFactory" />
1063
+ </analyzer>
1064
+ <analyzer type="query">
1065
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
1066
+ </analyzer>
1067
+ </fieldType>
1068
+
1069
+ <!-- since fields of this type are by default not stored or indexed,
1070
+ any data added to them will be ignored outright. -->
1071
+ <fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
1072
+
1073
+ <!-- This point type indexes the coordinates as separate fields (subFields)
1074
+ If subFieldType is defined, it references a type, and a dynamic field
1075
+ definition is created matching *___<typename>. Alternately, if
1076
+ subFieldSuffix is defined, that is used to create the subFields.
1077
+ Example: if subFieldType="double", then the coordinates would be
1078
+ indexed in fields myloc_0___double,myloc_1___double.
1079
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
1080
+ in fields myloc_0_d,myloc_1_d
1081
+ The subFields are an implementation detail of the fieldType, and end
1082
+ users normally should not need to know about them.
1083
+ -->
1084
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
1085
+
1086
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
1087
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
1088
+
1089
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
1090
+ For more information about this and other Spatial fields new to Solr 4, see:
1091
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
1092
+ -->
1093
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
1094
+ geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />
1095
+
1096
+ <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
1097
+ Parameters:
1098
+ defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
1099
+ precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
1100
+ providerClass: Lets you plug in other exchange provider backend:
1101
+ solr.FileExchangeRateProvider is the default and takes one parameter:
1102
+ currencyConfig: name of an xml file holding exchange rates
1103
+ solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
1104
+ ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
1105
+ refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
1106
+ -->
1107
+ <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
1108
+
1109
+
1110
+
1111
+ <!-- some examples for different languages (generally ordered by ISO code) -->
1112
+
1113
+ <!-- Arabic -->
1114
+ <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
1115
+ <analyzer>
1116
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1117
+ <!-- for any non-arabic -->
1118
+ <filter class="solr.LowerCaseFilterFactory"/>
1119
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
1120
+ <!-- normalizes ﻯ to ﻱ, etc -->
1121
+ <filter class="solr.ArabicNormalizationFilterFactory"/>
1122
+ <filter class="solr.ArabicStemFilterFactory"/>
1123
+ </analyzer>
1124
+ </fieldType>
1125
+
1126
+ <!-- Bulgarian -->
1127
+ <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
1128
+ <analyzer>
1129
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1130
+ <filter class="solr.LowerCaseFilterFactory"/>
1131
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" />
1132
+ <filter class="solr.BulgarianStemFilterFactory"/>
1133
+ </analyzer>
1134
+ </fieldType>
1135
+
1136
+ <!-- Catalan -->
1137
+ <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
1138
+ <analyzer>
1139
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1140
+ <!-- removes l', etc -->
1141
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
1142
+ <filter class="solr.LowerCaseFilterFactory"/>
1143
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
1144
+ <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
1145
+ </analyzer>
1146
+ </fieldType>
1147
+
1148
+ <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
1149
+ <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
1150
+ <analyzer>
1151
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1152
+ <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
1153
+ <filter class="solr.CJKWidthFilterFactory"/>
1154
+ <!-- for any non-CJK -->
1155
+ <filter class="solr.LowerCaseFilterFactory"/>
1156
+ <filter class="solr.CJKBigramFilterFactory"/>
1157
+ </analyzer>
1158
+ </fieldType>
1159
+
1160
+ <!-- Kurdish -->
1161
+ <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
1162
+ <analyzer>
1163
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1164
+ <filter class="solr.SoraniNormalizationFilterFactory"/>
1165
+ <!-- for any latin text -->
1166
+ <filter class="solr.LowerCaseFilterFactory"/>
1167
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/>
1168
+ <filter class="solr.SoraniStemFilterFactory"/>
1169
+ </analyzer>
1170
+ </fieldType>
1171
+
1172
+ <!-- Czech -->
1173
+ <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
1174
+ <analyzer>
1175
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1176
+ <filter class="solr.LowerCaseFilterFactory"/>
1177
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
1178
+ <filter class="solr.CzechStemFilterFactory"/>
1179
+ </analyzer>
1180
+ </fieldType>
1181
+
1182
+ <!-- Danish -->
1183
+ <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
1184
+ <analyzer>
1185
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1186
+ <filter class="solr.LowerCaseFilterFactory"/>
1187
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
1188
+ <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
1189
+ </analyzer>
1190
+ </fieldType>
1191
+
1192
+ <!-- German -->
1193
+ <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
1194
+ <analyzer>
1195
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1196
+ <filter class="solr.LowerCaseFilterFactory"/>
1197
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
1198
+ <filter class="solr.GermanNormalizationFilterFactory"/>
1199
+ <filter class="solr.GermanLightStemFilterFactory"/>
1200
+ <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
1201
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
1202
+ </analyzer>
1203
+ </fieldType>
1204
+
1205
+ <!-- Greek -->
1206
+ <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
1207
+ <analyzer>
1208
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1209
+ <!-- greek specific lowercase for sigma -->
1210
+ <filter class="solr.GreekLowerCaseFilterFactory"/>
1211
+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
1212
+ <filter class="solr.GreekStemFilterFactory"/>
1213
+ </analyzer>
1214
+ </fieldType>
1215
+
1216
+ <!-- Spanish -->
1217
+ <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
1218
+ <analyzer>
1219
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1220
+ <filter class="solr.LowerCaseFilterFactory"/>
1221
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
1222
+ <filter class="solr.SpanishLightStemFilterFactory"/>
1223
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
1224
+ </analyzer>
1225
+ </fieldType>
1226
+
1227
+ <!-- Basque -->
1228
+ <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
1229
+ <analyzer>
1230
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1231
+ <filter class="solr.LowerCaseFilterFactory"/>
1232
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
1233
+ <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
1234
+ </analyzer>
1235
+ </fieldType>
1236
+
1237
+ <!-- Persian -->
1238
+ <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
1239
+ <analyzer>
1240
+ <!-- for ZWNJ -->
1241
+ <charFilter class="solr.PersianCharFilterFactory"/>
1242
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1243
+ <filter class="solr.LowerCaseFilterFactory"/>
1244
+ <filter class="solr.ArabicNormalizationFilterFactory"/>
1245
+ <filter class="solr.PersianNormalizationFilterFactory"/>
1246
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
1247
+ </analyzer>
1248
+ </fieldType>
1249
+
1250
+ <!-- Finnish -->
1251
+ <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
1252
+ <analyzer>
1253
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1254
+ <filter class="solr.LowerCaseFilterFactory"/>
1255
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
1256
+ <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
1257
+ <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
1258
+ </analyzer>
1259
+ </fieldType>
1260
+
1261
+ <!-- French -->
1262
+ <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
1263
+ <analyzer>
1264
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1265
+ <!-- removes l', etc -->
1266
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
1267
+ <filter class="solr.LowerCaseFilterFactory"/>
1268
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
1269
+ <filter class="solr.FrenchLightStemFilterFactory"/>
1270
+ <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
1271
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
1272
+ </analyzer>
1273
+ </fieldType>
1274
+
1275
+ <!-- Irish -->
1276
+ <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
1277
+ <analyzer>
1278
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1279
+ <!-- removes d', etc -->
1280
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
1281
+ <!-- removes n-, etc. position increments is intentionally false! -->
1282
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
1283
+ <filter class="solr.IrishLowerCaseFilterFactory"/>
1284
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/>
1285
+ <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
1286
+ </analyzer>
1287
+ </fieldType>
1288
+
1289
+ <!-- Galician -->
1290
+ <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
1291
+ <analyzer>
1292
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1293
+ <filter class="solr.LowerCaseFilterFactory"/>
1294
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
1295
+ <filter class="solr.GalicianStemFilterFactory"/>
1296
+ <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
1297
+ </analyzer>
1298
+ </fieldType>
1299
+
1300
+ <!-- Hindi -->
1301
+ <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
1302
+ <analyzer>
1303
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1304
+ <filter class="solr.LowerCaseFilterFactory"/>
1305
+ <!-- normalizes unicode representation -->
1306
+ <filter class="solr.IndicNormalizationFilterFactory"/>
1307
+ <!-- normalizes variation in spelling -->
1308
+ <filter class="solr.HindiNormalizationFilterFactory"/>
1309
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
1310
+ <filter class="solr.HindiStemFilterFactory"/>
1311
+ </analyzer>
1312
+ </fieldType>
1313
+
1314
+ <!-- Hungarian -->
1315
+ <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
1316
+ <analyzer>
1317
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1318
+ <filter class="solr.LowerCaseFilterFactory"/>
1319
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
1320
+ <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
1321
+ <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
1322
+ </analyzer>
1323
+ </fieldType>
1324
+
1325
+ <!-- Armenian -->
1326
+ <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
1327
+ <analyzer>
1328
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1329
+ <filter class="solr.LowerCaseFilterFactory"/>
1330
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
1331
+ <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
1332
+ </analyzer>
1333
+ </fieldType>
1334
+
1335
+ <!-- Indonesian -->
1336
+ <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
1337
+ <analyzer>
1338
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1339
+ <filter class="solr.LowerCaseFilterFactory"/>
1340
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
1341
+ <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
1342
+ <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
1343
+ </analyzer>
1344
+ </fieldType>
1345
+
1346
+ <!-- Italian -->
1347
+ <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
1348
+ <analyzer>
1349
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1350
+ <!-- removes l', etc -->
1351
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
1352
+ <filter class="solr.LowerCaseFilterFactory"/>
1353
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
1354
+ <filter class="solr.ItalianLightStemFilterFactory"/>
1355
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
1356
+ </analyzer>
1357
+ </fieldType>
1358
+
1359
+ <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
1360
+
1361
+ NOTE: If you want to optimize search for precision, use default operator AND in your query
1362
+ parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use
1363
+ OR if you would like to optimize for recall (default).
1364
+ -->
1365
+ <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
1366
+ <analyzer>
1367
+ <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
1368
+
1369
+ Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
1370
+ is used to segment compounds into its parts and the compound itself is kept as synonym.
1371
+
1372
+ Valid values for attribute mode are:
1373
+ normal: regular segmentation
1374
+ search: segmentation useful for search with synonyms compounds (default)
1375
+ extended: same as search mode, but unigrams unknown words (experimental)
1376
+
1377
+ For some applications it might be good to use search mode for indexing and normal mode for
1378
+ queries to reduce recall and prevent parts of compounds from being matched and highlighted.
1379
+ Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
1380
+
1381
+ Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
1382
+ model with your own entries for segmentation, part-of-speech tags and readings without a need
1383
+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
1384
+
1385
+ User dictionary attributes are:
1386
+ userDictionary: user dictionary filename
1387
+ userDictionaryEncoding: user dictionary encoding (default is UTF-8)
1388
+
1389
+ See lang/userdict_ja.txt for a sample user dictionary file.
1390
+
1391
+ Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
1392
+
1393
+ See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
1394
+ -->
1395
+ <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
1396
+ <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
1397
+ <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
1398
+ <filter class="solr.JapaneseBaseFormFilterFactory"/>
1399
+ <!-- Removes tokens with certain part-of-speech tags -->
1400
+ <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
1401
+ <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
1402
+ <filter class="solr.CJKWidthFilterFactory"/>
1403
+ <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
1404
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
1405
+ <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
1406
+ <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
1407
+ <!-- Lower-cases romaji characters -->
1408
+ <filter class="solr.LowerCaseFilterFactory"/>
1409
+ </analyzer>
1410
+ </fieldType>
1411
+
1412
+ <!-- Latvian -->
1413
+ <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
1414
+ <analyzer>
1415
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1416
+ <filter class="solr.LowerCaseFilterFactory"/>
1417
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
1418
+ <filter class="solr.LatvianStemFilterFactory"/>
1419
+ </analyzer>
1420
+ </fieldType>
1421
+
1422
+ <!-- Dutch -->
1423
+ <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
1424
+ <analyzer>
1425
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1426
+ <filter class="solr.LowerCaseFilterFactory"/>
1427
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
1428
+ <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
1429
+ <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
1430
+ </analyzer>
1431
+ </fieldType>
1432
+
1433
+ <!-- Norwegian -->
1434
+ <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
1435
+ <analyzer>
1436
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1437
+ <filter class="solr.LowerCaseFilterFactory"/>
1438
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
1439
+ <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
1440
+ <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/> -->
1441
+ <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/> -->
1442
+ <!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both -->
1443
+ </analyzer>
1444
+ </fieldType>
1445
+
1446
+ <!-- Portuguese -->
1447
+ <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
1448
+ <analyzer>
1449
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1450
+ <filter class="solr.LowerCaseFilterFactory"/>
1451
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
1452
+ <filter class="solr.PortugueseLightStemFilterFactory"/>
1453
+ <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
1454
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
1455
+ <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
1456
+ </analyzer>
1457
+ </fieldType>
1458
+
1459
+ <!-- Romanian -->
1460
+ <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
1461
+ <analyzer>
1462
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1463
+ <filter class="solr.LowerCaseFilterFactory"/>
1464
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
1465
+ <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
1466
+ </analyzer>
1467
+ </fieldType>
1468
+
1469
+ <!-- Russian -->
1470
+ <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
1471
+ <analyzer>
1472
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1473
+ <filter class="solr.LowerCaseFilterFactory"/>
1474
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
1475
+ <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
1476
+ <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
1477
+ </analyzer>
1478
+ </fieldType>
1479
+
1480
+ <!-- Swedish -->
1481
+ <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
1482
+ <analyzer>
1483
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1484
+ <filter class="solr.LowerCaseFilterFactory"/>
1485
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
1486
+ <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
1487
+ <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
1488
+ </analyzer>
1489
+ </fieldType>
1490
+
1491
+ <!-- Thai -->
1492
+ <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
1493
+ <analyzer>
1494
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1495
+ <filter class="solr.LowerCaseFilterFactory"/>
1496
+ <filter class="solr.ThaiWordFilterFactory"/>
1497
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" />
1498
+ </analyzer>
1499
+ </fieldType>
1500
+
1501
+ <!-- Turkish -->
1502
+ <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
1503
+ <analyzer>
1504
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1505
+ <filter class="solr.ApostropheFilterFactory"/>
1506
+ <filter class="solr.TurkishLowerCaseFilterFactory"/>
1507
+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
1508
+ <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
1509
+ </analyzer>
1510
+ </fieldType>
1511
+
1512
+ <!-- Similarity is the scoring routine for each document vs. a query.
1513
+ A custom Similarity or SimilarityFactory may be specified here, but
1514
+ the default is fine for most applications.
1515
+ For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
1516
+ -->
1517
+ <!--
1518
+ <similarity class="com.example.solr.CustomSimilarityFactory">
1519
+ <str name="paramkey">param value</str>
1520
+ </similarity>
1521
+ -->
1522
+
1523
+ </schema>
1524
+ >>>>>>> Adding Data import Handler configurations