warclight 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +7 -0
  2. data/.eslintrc +12 -0
  3. data/.gitignore +19 -0
  4. data/.rspec +2 -0
  5. data/.rubocop.yml +66 -0
  6. data/.solr_wrapper +5 -0
  7. data/.travis.yml +31 -0
  8. data/CONTRIBUTING.md +53 -0
  9. data/Gemfile +42 -0
  10. data/LICENSE.txt +11 -0
  11. data/README.md +79 -0
  12. data/Rakefile +16 -0
  13. data/app/assets/images/blacklight/compact.svg +25 -0
  14. data/app/assets/images/blacklight/logo.png +0 -0
  15. data/app/assets/javascripts/warclight/oembed_viewer.js +39 -0
  16. data/app/assets/javascripts/warclight/warclight.js +4 -0
  17. data/app/assets/stylesheets/warclight/application.scss +1 -0
  18. data/app/assets/stylesheets/warclight/warclight.scss +1 -0
  19. data/app/controllers/concerns/warclight/field_config_helpers.rb +11 -0
  20. data/app/helpers/warclight/application_helper.rb +8 -0
  21. data/app/jobs/warclight/application_job.rb +6 -0
  22. data/app/models/concerns/warclight/catalog.rb +8 -0
  23. data/app/models/concerns/warclight/search_behavior.rb +9 -0
  24. data/app/models/concerns/warclight/solr_document.rb +9 -0
  25. data/app/views/layouts/warclight/application.html.erb +14 -0
  26. data/bin/rails +14 -0
  27. data/config/routes.rb +4 -0
  28. data/lib/generators/warclight/install_generator.rb +55 -0
  29. data/lib/generators/warclight/templates/catalog_controller.rb +129 -0
  30. data/lib/generators/warclight/templates/warclight.js +2 -0
  31. data/lib/generators/warclight/templates/warclight.scss +3 -0
  32. data/lib/generators/warclight/update_generator.rb +22 -0
  33. data/lib/warclight.rb +7 -0
  34. data/lib/warclight/engine.rb +105 -0
  35. data/lib/warclight/version.rb +5 -0
  36. data/package.json +24 -0
  37. data/solr/conf/elevate.xml +42 -0
  38. data/solr/conf/lang/contractions_ca.txt +8 -0
  39. data/solr/conf/lang/contractions_fr.txt +15 -0
  40. data/solr/conf/lang/contractions_ga.txt +5 -0
  41. data/solr/conf/lang/contractions_it.txt +23 -0
  42. data/solr/conf/lang/hyphenations_ga.txt +5 -0
  43. data/solr/conf/lang/stemdict_nl.txt +6 -0
  44. data/solr/conf/lang/stoptags_ja.txt +420 -0
  45. data/solr/conf/lang/stopwords_ar.txt +125 -0
  46. data/solr/conf/lang/stopwords_bg.txt +193 -0
  47. data/solr/conf/lang/stopwords_ca.txt +220 -0
  48. data/solr/conf/lang/stopwords_cz.txt +172 -0
  49. data/solr/conf/lang/stopwords_da.txt +110 -0
  50. data/solr/conf/lang/stopwords_de.txt +294 -0
  51. data/solr/conf/lang/stopwords_el.txt +78 -0
  52. data/solr/conf/lang/stopwords_en.txt +54 -0
  53. data/solr/conf/lang/stopwords_es.txt +356 -0
  54. data/solr/conf/lang/stopwords_eu.txt +99 -0
  55. data/solr/conf/lang/stopwords_fa.txt +313 -0
  56. data/solr/conf/lang/stopwords_fi.txt +97 -0
  57. data/solr/conf/lang/stopwords_fr.txt +186 -0
  58. data/solr/conf/lang/stopwords_ga.txt +110 -0
  59. data/solr/conf/lang/stopwords_gl.txt +161 -0
  60. data/solr/conf/lang/stopwords_hi.txt +235 -0
  61. data/solr/conf/lang/stopwords_hu.txt +211 -0
  62. data/solr/conf/lang/stopwords_hy.txt +46 -0
  63. data/solr/conf/lang/stopwords_id.txt +359 -0
  64. data/solr/conf/lang/stopwords_it.txt +303 -0
  65. data/solr/conf/lang/stopwords_ja.txt +127 -0
  66. data/solr/conf/lang/stopwords_lv.txt +172 -0
  67. data/solr/conf/lang/stopwords_nl.txt +119 -0
  68. data/solr/conf/lang/stopwords_no.txt +194 -0
  69. data/solr/conf/lang/stopwords_pt.txt +253 -0
  70. data/solr/conf/lang/stopwords_ro.txt +233 -0
  71. data/solr/conf/lang/stopwords_ru.txt +243 -0
  72. data/solr/conf/lang/stopwords_sv.txt +133 -0
  73. data/solr/conf/lang/stopwords_th.txt +119 -0
  74. data/solr/conf/lang/stopwords_tr.txt +212 -0
  75. data/solr/conf/lang/userdict_ja.txt +29 -0
  76. data/solr/conf/managed-schema +1045 -0
  77. data/solr/conf/params.json +20 -0
  78. data/solr/conf/protwords.txt +21 -0
  79. data/solr/conf/schema.xml +350 -0
  80. data/solr/conf/solrconfig.xml +1361 -0
  81. data/solr/conf/stopwords.txt +14 -0
  82. data/solr/conf/synonyms.txt +29 -0
  83. data/tasks/warclight.rake +61 -0
  84. data/template.rb +15 -0
  85. data/vendor/assets/javascripts/responsiveTruncator.js +69 -0
  86. data/vendor/assets/javascripts/stickyfill.js +480 -0
  87. data/warclight.gemspec +38 -0
  88. metadata +312 -0
@@ -0,0 +1,20 @@
1
+ {"params":{
2
+ "query":{
3
+ "defType":"edismax",
4
+ "q.alt":"*:*",
5
+ "rows":"10",
6
+ "fl":"*,score",
7
+ "":{"v":0}
8
+ },
9
+ "facets":{
10
+ "facet":"on",
11
+ "facet.mincount": "1",
12
+ "":{"v":0}
13
+ },
14
+ "velocity":{
15
+ "wt": "velocity",
16
+ "v.template":"browse",
17
+ "v.layout": "layout",
18
+ "":{"v":0}
19
+ }
20
+ }}
@@ -0,0 +1,21 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ #-----------------------------------------------------------------------
14
+ # Use a protected word file to protect against the stemmer reducing two
15
+ # unrelated words to the same base word.
16
+
17
+ # Some non-words that normally won't be encountered,
18
+ # just to test that they won't be stemmed.
19
+ dontstems
20
+ zwhacky
21
+
@@ -0,0 +1,350 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <schema name="ukwa" version="1.6">
20
+ <fields>
21
+ <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" docValues="true"/>
22
+ <field name="_version_" type="long" indexed="true" docValues="true"/>
23
+ <field name="_root_" type="string" indexed="true" docValues="true" />
24
+ <field name="_text_" type="text_general" indexed="true" stored="false" multiValued="true"/>
25
+
26
+ <!-- BL UKWA: additional -->
27
+ <field name="access_terms" type="string" indexed="true" docValues="true" multiValued="true"/>
28
+ <field name="author" type="string" indexed="true" docValues="true"/>
29
+ <field name="category" type="text_general" indexed="true" stored="true"/>
30
+ <field name="collection" type="string" indexed="true" docValues="true" multiValued="true"/>
31
+ <field name="collections" type="string" indexed="true" docValues="true" multiValued="true"/>
32
+ <field name="comments" type="text_general" indexed="true" stored="true"/>
33
+ <field name="content_encoding" type="string" indexed="true" docValues="true" multiValued="false"/>
34
+ <field name="content_ffb" type="string" indexed="true" docValues="true" multiValued="false"/>
35
+ <field name="content_first_bytes" type="hex_text_shingle" indexed="true" stored="true" multiValued="false"/>
36
+ <field name="content_language" type="string" indexed="true" docValues="true" multiValued="false"/>
37
+ <field name="content_length" type="tint" indexed="true" stored="false" multiValued="false" docValues="true"/>
38
+ <field name="content_metadata_ss" type="string" indexed="true" docValues="true" multiValued="true"/>
39
+ <field name="content_metadata" type="text_general" indexed="true" stored="true" multiValued="false"/>
40
+ <field name="content_text_length" type="tint" indexed="true" stored="false" multiValued="false" docValues="true"/>
41
+ <field name="content_type_droid" type="string" indexed="true" docValues="true" multiValued="false"/>
42
+ <field name="content_type_ext" type="string" indexed="true" docValues="true" multiValued="false"/>
43
+ <field name="content_type_full" type="string" indexed="true" docValues="true" multiValued="false"/>
44
+ <field name="content_type_norm" type="string" indexed="true" docValues="true" multiValued="false" default="other"/>
45
+ <field name="content_type_served" type="string" indexed="true" docValues="true" multiValued="false"/>
46
+ <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
47
+ <field name="content_type_tika" type="string" indexed="true" docValues="true" multiValued="false"/>
48
+ <field name="content_type" type="string" indexed="true" docValues="true" multiValued="true"/>
49
+ <field name="content_type_version" type="string" indexed="true" docValues="true" multiValued="false"/>
50
+ <field name="crawl_dates" type="tdate" indexed="true" stored="true" multiValued="true"/>
51
+ <field name="crawl_date" type="tdate" indexed="true" stored="false" multiValued="false" docValues="true"/>
52
+ <field name="crawl_year_month_day" type="int" indexed="true" docValues="true" multiValued="false"/>
53
+ <field name="crawl_year_month" type="int" indexed="true" docValues="true" multiValued="false"/>
54
+ <field name="crawl_years" type="int" indexed="true" docValues="true" multiValued="true"/>
55
+ <field name="crawl_year" type="int" indexed="true" docValues="true" multiValued="false"/>
56
+ <field name="description" type="text_general" indexed="true" stored="true"/>
57
+ <field name="domain" type="string" indexed="true" docValues="true" multiValued="false"/>
58
+ <field name="elements_used" type="string" indexed="true" docValues="true" multiValued="true"/>
59
+ <field name="generator" type="string" indexed="true" docValues="true" multiValued="true"/>
60
+ <field name="hash" type="string" indexed="true" docValues="true" multiValued="false"/>
61
+ <field name="hashes" type="string" indexed="true" docValues="true" multiValued="true"/>
62
+ <field name="host" type="string" indexed="true" docValues="true" multiValued="false"/>
63
+ <field name="host_surt" type="string" indexed="true" docValues="true" multiValued="true"/>
64
+ <field name="id_long" type="long" indexed="true" stored="true" multiValued="false"/>
65
+ <field name="image_colours" type="string" indexed="true" stored="true" multiValued="true"/>
66
+ <field name="image_dominant_colour" type="string" indexed="true" stored="true" multiValued="false"/>
67
+ <field name="image_faces_count" type="tint" indexed="true" stored="true" multiValued="false"/>
68
+ <field name="image_faces" type="string" indexed="false" stored="true" multiValued="true"/>
69
+ <field name="image_height" type="tlong" indexed="true" stored="true" multiValued="false"/>
70
+ <field name="image_size" type="tlong" indexed="true" stored="true" multiValued="false"/>
71
+ <field name="image_width" type="tlong" indexed="true" stored="true" multiValued="false"/>
72
+ <field name="keywords" type="text_general" indexed="true" stored="true"/>
73
+ <field name="last_modified" type="tdate" indexed="true" stored="true" docValues="true"/>
74
+ <field name="last_modified_year" type="string" indexed="true" docValues="true"/>
75
+ <field name="license_url" type="string" indexed="true" docValues="true" multiValued="true"/>
76
+ <field name="links_domains" type="string" indexed="true" docValues="true" multiValued="true"/>
77
+ <field name="links_hosts" type="string" indexed="true" docValues="true" multiValued="true"/>
78
+ <field name="links_hosts_surts" type="string" indexed="true" docValues="true" multiValued="true"/>
79
+ <field name="links_norm" type="string" indexed="true" docValues="true" multiValued="true"/>
80
+ <field name="links_public_suffixes" type="string" indexed="true" docValues="true" multiValued="true"/>
81
+ <field name="links" type="string" indexed="true" docValues="true" multiValued="true"/>
82
+ <field name="locations" type="location" indexed="true" stored="true" multiValued="true"/>
83
+ <field name="parse_error" type="string" indexed="true" docValues="true" multiValued="true"/>
84
+ <field name="pdf_pdfa_errors" type="string" indexed="true" docValues="true" multiValued="true"/>
85
+ <field name="pdf_pdfa_is_valid" type="string" indexed="true" docValues="true" multiValued="false"/>
86
+ <field name="postcode_district" type="string" indexed="true" docValues="true" multiValued="true"/>
87
+ <field name="postcode" type="string" indexed="true" docValues="true" multiValued="true"/>
88
+ <field name="publication_date" type="tdate" indexed="true" stored="true" multiValued="false"/>
89
+ <field name="publication_year" type="string" indexed="true" docValues="true" multiValued="false"/>
90
+ <field name="public_suffix" type="string" indexed="true" docValues="true" multiValued="false"/>
91
+ <field name="record_type" type="string" indexed="true" stored="true" multiValued="false" docValues="true"/>
92
+ <field name="referrer_url" type="string" indexed="true" stored="true" multiValued="false" docValues="true"/>
93
+ <field name="resourcename" type="text_general" indexed="true" stored="true"/>
94
+ <field name="sentiment_score" type="float" indexed="true" stored="true" multiValued="false"/>
95
+ <field name="sentiment" type="string" indexed="true" docValues="true" multiValued="false"/>
96
+ <field name="server" type="string" indexed="true" docValues="true" multiValued="true"/>
97
+ <field name="source_file_offset" type="tlong" indexed="true" stored="true" />
98
+ <field name="source_file" type="string" indexed="true" docValues="true" />
99
+ <field name="status_code" type="int" indexed="true" stored="true" docValues="true" />
100
+ <field name="subject" type="text_general" indexed="true" stored="true" multiValued="true"/>
101
+ <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
102
+ <field name="title" type="text_general" indexed="true" stored="true" multiValued="false"/>
103
+ <field name="type" type="string" indexed="true" docValues="true" multiValued="false"/>
104
+ <field name="url_norm" type="string" indexed="true" stored="false" docValues="true" multiValued="false"/>
105
+ <field name="url_path" type="string" indexed="true" stored="false" docValues="true" multiValued="false"/>
106
+ <field name="url" type="string" indexed="true" stored="false" docValues="true" multiValued="false"/>
107
+ <field name="url_type" type="text_general" indexed="true" stored="true"/>
108
+ <field name="wayback_date" type="long" indexed="false" stored="true" docValues="false" multiValued="false"/>
109
+ <field name="wct_agency" type="string" indexed="true" docValues="true" multiValued="false"/>
110
+ <field name="wct_collections" type="string" indexed="true" docValues="true" multiValued="true"/>
111
+ <field name="wct_description" type="text_general" indexed="true" stored="true"/>
112
+ <field name="wct_instance_id" type="int" indexed="true" stored="true" multiValued="false"/>
113
+ <field name="wct_subjects" type="string" indexed="true" docValues="true" multiValued="true"/>
114
+ <field name="wct_target_id" type="string" indexed="true" docValues="true" multiValued="false"/>
115
+ <field name="wct_title" type="string" indexed="true" docValues="true"/>
116
+ <field name="xml_root_ns" type="string" indexed="true" docValues="true" multiValued="false"/>
117
+ <!--:BL UKWA -->
118
+
119
+ <!--:User Supplied Values - Web Archives for Historical Research -->
120
+ <field name="institution" type="string" indexed="true" multiValued="false" docValues="true"/>
121
+ <field name="collection_name" type="string" indexed="true" multiValued="false" docValues="true"/>
122
+ <field name="collection_number" type="string" indexed="true" multiValued="false" docValues="true"/>
123
+
124
+ <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
125
+ <dynamicField name="*_is" type="ints" indexed="true" stored="true"/>
126
+ <dynamicField name="*_s" type="string" indexed="true" stored="true" />
127
+ <dynamicField name="*_ss" type="strings" indexed="true" stored="true"/>
128
+ <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
129
+ <dynamicField name="*_ls" type="longs" indexed="true" stored="true"/>
130
+ <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
131
+ <dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/>
132
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
133
+ <dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/>
134
+ <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
135
+ <dynamicField name="*_fs" type="floats" indexed="true" stored="true"/>
136
+ <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
137
+ <dynamicField name="*_ds" type="doubles" indexed="true" stored="true"/>
138
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
139
+ <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
140
+ <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
141
+ <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
142
+ <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
143
+ <dynamicField name="*_tis" type="tints" indexed="true" stored="true"/>
144
+ <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
145
+ <dynamicField name="*_tls" type="tlongs" indexed="true" stored="true"/>
146
+ <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
147
+ <dynamicField name="*_tfs" type="tfloats" indexed="true" stored="true"/>
148
+ <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
149
+ <dynamicField name="*_tds" type="tdoubles" indexed="true" stored="true"/>
150
+ <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
151
+ <dynamicField name="*_tdts" type="tdates" indexed="true" stored="true"/>
152
+ <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
153
+ <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
154
+ <dynamicField name="random_*" type="random" />
155
+
156
+ <!-- BL UKWA: additional -->
157
+ <dynamicField name="ssdeep_hash_bs_*" type="string" indexed="true" stored="true" multiValued="false"/>
158
+ <dynamicField name="ssdeep_hash_ngram_bs_*" type="literal_ngram" indexed="true" stored="true" multiValued="false"/>
159
+ <!--:BL UKWA -->
160
+
161
+ <dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/>
162
+ <dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/>
163
+ <dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/>
164
+ <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/>
165
+ <dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/>
166
+ <dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/>
167
+ <dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/>
168
+ <dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/>
169
+ <dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/>
170
+ <dynamicField name="*_point" type="point" indexed="true" stored="true"/>
171
+ <dynamicField name="*_txt_ga" type="text_ga" indexed="true" stored="true"/>
172
+ </fields>
173
+
174
+ <uniqueKey>id</uniqueKey>
175
+
176
+ <copyField source="title" dest="text"/>
177
+ <copyField source="author" dest="text"/>
178
+ <copyField source="keywords" dest="text"/>
179
+ <copyField source="description" dest="text"/>
180
+ <copyField source="wct_title" dest="text"/>
181
+ <copyField source="wct_description" dest="text"/>
182
+ <copyField source="url" dest="text"/>
183
+ <copyField source="content" dest="text"/>
184
+
185
+ <types>
186
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
187
+ <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />
188
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
189
+ <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
190
+ <fieldType name="int" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
191
+ <fieldType name="float" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
192
+ <fieldType name="long" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
193
+ <fieldType name="double" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
194
+ <fieldType name="ints" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
195
+ <fieldType name="floats" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
196
+ <fieldType name="longs" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
197
+ <fieldType name="doubles" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
198
+ <fieldType name="tint" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
199
+ <fieldType name="tfloat" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
200
+ <fieldType name="tlong" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
201
+ <fieldType name="tdouble" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
202
+ <fieldType name="tints" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
203
+ <fieldType name="tfloats" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
204
+ <fieldType name="tlongs" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
205
+ <fieldType name="tdoubles" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
206
+ <fieldType name="date" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
207
+ <fieldType name="dates" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
208
+ <fieldType name="tdate" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0"/>
209
+ <fieldType name="tdates" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0" multiValued="true"/>
210
+ <fieldType name="binary" class="solr.BinaryField"/>
211
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
212
+
213
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
214
+ <analyzer>
215
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
216
+ </analyzer>
217
+ </fieldType>
218
+
219
+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
220
+ <analyzer type="index">
221
+ <tokenizer class="solr.StandardTokenizerFactory"/>
222
+ <filter class="solr.LowerCaseFilterFactory"/>
223
+ </analyzer>
224
+ <analyzer type="query">
225
+ <tokenizer class="solr.StandardTokenizerFactory"/>
226
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
227
+ <filter class="solr.LowerCaseFilterFactory"/>
228
+ </analyzer>
229
+ </fieldType>
230
+
231
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
232
+ <analyzer type="index">
233
+ <tokenizer class="solr.StandardTokenizerFactory"/>
234
+ <filter class="solr.LowerCaseFilterFactory"/>
235
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
236
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
237
+ </analyzer>
238
+ <analyzer type="query">
239
+ <tokenizer class="solr.StandardTokenizerFactory"/>
240
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
241
+ <filter class="solr.LowerCaseFilterFactory"/>
242
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
243
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
244
+ </analyzer>
245
+ </fieldType>
246
+
247
+ <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
248
+ <analyzer type="index">
249
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
250
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
251
+ <filter class="solr.LowerCaseFilterFactory"/>
252
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
253
+ </analyzer>
254
+ <analyzer type="query">
255
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
256
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
257
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
258
+ <filter class="solr.LowerCaseFilterFactory"/>
259
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
260
+ </analyzer>
261
+ </fieldType>
262
+
263
+ <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
264
+ <analyzer>
265
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
266
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
267
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
268
+ <filter class="solr.LowerCaseFilterFactory"/>
269
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
270
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
271
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
272
+ </analyzer>
273
+ </fieldType>
274
+
275
+ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
276
+ <analyzer type="index">
277
+ <tokenizer class="solr.StandardTokenizerFactory"/>
278
+ <filter class="solr.LowerCaseFilterFactory"/>
279
+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
280
+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
281
+ </analyzer>
282
+ <analyzer type="query">
283
+ <tokenizer class="solr.StandardTokenizerFactory"/>
284
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
285
+ <filter class="solr.LowerCaseFilterFactory"/>
286
+ </analyzer>
287
+ </fieldType>
288
+
289
+ <fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
290
+ <analyzer>
291
+ <tokenizer class="solr.StandardTokenizerFactory"/>
292
+ <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
293
+ </analyzer>
294
+ </fieldType>
295
+
296
+ <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
297
+ <analyzer>
298
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
299
+ <filter class="solr.LowerCaseFilterFactory" />
300
+ </analyzer>
301
+ </fieldType>
302
+
303
+ <fieldType name="descendent_path" class="solr.TextField">
304
+ <analyzer type="index">
305
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
306
+ </analyzer>
307
+ <analyzer type="query">
308
+ <tokenizer class="solr.KeywordTokenizerFactory" />
309
+ </analyzer>
310
+ </fieldType>
311
+
312
+ <fieldType name="ancestor_path" class="solr.TextField">
313
+ <analyzer type="index">
314
+ <tokenizer class="solr.KeywordTokenizerFactory" />
315
+ </analyzer>
316
+ <analyzer type="query">
317
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
318
+ </analyzer>
319
+ </fieldType>
320
+
321
+ <fieldType name="ignored" stored="false" indexed="false" docValues="false" multiValued="true" class="solr.StrField" />
322
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
323
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
324
+
325
+ <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
326
+ <analyzer>
327
+ <tokenizer class="solr.StandardTokenizerFactory"/>
328
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
329
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
330
+ <filter class="solr.IrishLowerCaseFilterFactory"/>
331
+ </analyzer>
332
+ </fieldType>
333
+
334
+ <!-- BL UKWA: additional -->
335
+ <fieldType name="literal_ngram" stored="false" indexed="true" class="solr.TextField">
336
+ <analyzer>
337
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
338
+ <filter class="solr.NGramFilterFactory" minGramSize="2" maxGramSize="5"/>
339
+ </analyzer>
340
+ </fieldType>
341
+
342
+ <fieldType name="hex_text_shingle" class="solr.TextField" positionIncrementGap="100">
343
+ <analyzer>
344
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
345
+ <filter class="solr.ShingleFilterFactory" minShingleSize="4" maxShingleSize="8" outputUnigrams="false" outputUnigramsIfNoShingles="false" tokenSeparator=" "/>
346
+ </analyzer>
347
+ </fieldType>
348
+ <!--:BL UKWA -->
349
+ </types>
350
+ </schema>
@@ -0,0 +1,1361 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ For more details about configurations options that may appear in
21
+ this file, see http://wiki.apache.org/solr/SolrConfigXml.
22
+ -->
23
+ <config>
24
+ <!-- In all configuration below, a prefix of "solr." for class names
25
+ is an alias that causes solr to search appropriate packages,
26
+ including org.apache.solr.(search|update|request|core|analysis)
27
+
28
+ You may also specify a fully qualified Java classname if you
29
+ have your own custom plugins.
30
+ -->
31
+
32
+ <!-- Controls what version of Lucene various components of Solr
33
+ adhere to. Generally, you want to use the latest version to
34
+ get all bug fixes and improvements. It is highly recommended
35
+ that you fully re-index after changing this setting as it can
36
+ affect both how text is indexed and queried.
37
+ -->
38
+ <luceneMatchVersion>6.6.0</luceneMatchVersion>
39
+
40
+ <!-- <lib/> directives can be used to instruct Solr to load any Jars
41
+ identified and use them to resolve any "plugins" specified in
42
+ your solrconfig.xml or schema.xml (ie: Analyzers, Request
43
+ Handlers, etc...).
44
+
45
+ All directories and paths are resolved relative to the
46
+ instanceDir.
47
+
48
+ Please note that <lib/> directives are processed in the order
49
+ that they appear in your solrconfig.xml file, and are "stacked"
50
+ on top of each other when building a ClassLoader - so if you have
51
+ plugin jars with dependencies on other jars, the "lower level"
52
+ dependency jars should be loaded first.
53
+
54
+ If a "./lib" directory exists in your instanceDir, all files
55
+ found in it are included as if you had used the following
56
+ syntax...
57
+
58
+ <lib dir="./lib" />
59
+ -->
60
+
61
+ <!-- A 'dir' option by itself adds any files found in the directory
62
+ to the classpath, this is useful for including all jars in a
63
+ directory.
64
+
65
+ When a 'regex' is specified in addition to a 'dir', only the
66
+ files in that directory which completely match the regex
67
+ (anchored on both ends) will be included.
68
+
69
+ If a 'dir' option (with or without a regex) is used and nothing
70
+ is found that matches, a warning will be logged.
71
+
72
+ The examples below can be used to load some solr-contribs along
73
+ with their external dependencies.
74
+ -->
75
+ <lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" />
76
+ <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" />
77
+
78
+ <lib dir="${solr.install.dir:../../../..}/contrib/clustering/lib/" regex=".*\.jar" />
79
+ <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-clustering-\d.*\.jar" />
80
+
81
+ <lib dir="${solr.install.dir:../../../..}/contrib/langid/lib/" regex=".*\.jar" />
82
+ <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-langid-\d.*\.jar" />
83
+
84
+ <lib dir="${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" />
85
+ <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" />
86
+ <!-- an exact 'path' can be used instead of a 'dir' to specify a
87
+ specific jar file. This will cause a serious error to be logged
88
+ if it can't be loaded.
89
+ -->
90
+ <!--
91
+ <lib path="../a-jar-that-does-not-exist.jar" />
92
+ -->
93
+
94
+ <!-- Data Directory
95
+
96
+ Used to specify an alternate directory to hold all index data
97
+ other than the default ./data under the Solr home. If
98
+ replication is in use, this should match the replication
99
+ configuration.
100
+ -->
101
+ <dataDir>${solr.data.dir:}</dataDir>
102
+
103
+
104
+ <!-- The DirectoryFactory to use for indexes.
105
+
106
+ solr.StandardDirectoryFactory is filesystem
107
+ based and tries to pick the best implementation for the current
108
+ JVM and platform. solr.NRTCachingDirectoryFactory, the default,
109
+ wraps solr.StandardDirectoryFactory and caches small files in memory
110
+ for better NRT performance.
111
+
112
+ One can force a particular implementation via solr.MMapDirectoryFactory,
113
+ solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
114
+
115
+ solr.RAMDirectoryFactory is memory based, not
116
+ persistent, and doesn't work with replication.
117
+ -->
118
+ <directoryFactory name="DirectoryFactory"
119
+ class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
120
+
121
+ <!-- The CodecFactory for defining the format of the inverted index.
122
+ The default implementation is SchemaCodecFactory, which is the official Lucene
123
+ index format, but hooks into the schema to provide per-field customization of
124
+ the postings lists and per-document values in the fieldType element
125
+ (postingsFormat/docValuesFormat). Note that most of the alternative implementations
126
+ are experimental, so if you choose to customize the index format, it's a good
127
+ idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
128
+ before upgrading to a newer version to avoid unnecessary reindexing.
129
+ A "compressionMode" string element can be added to <codecFactory> to choose
130
+ between the existing compression modes in the default codec: "BEST_SPEED" (default)
131
+ or "BEST_COMPRESSION".
132
+ -->
133
+ <codecFactory class="solr.SchemaCodecFactory"/>
134
+
135
+ <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
136
+ Index Config - These settings control low-level behavior of indexing
137
+ Most example settings here show the default value, but are commented
138
+ out, to more easily see where customizations have been made.
139
+
140
+ Note: This replaces <indexDefaults> and <mainIndex> from older versions
141
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
142
+ <indexConfig>
143
+ <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a
144
+ LimitTokenCountFilterFactory in your fieldType definition. E.g.
145
+ <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
146
+ -->
147
+ <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
148
+ <!-- <writeLockTimeout>1000</writeLockTimeout> -->
149
+
150
+ <!-- Expert: Enabling compound file will use less files for the index,
151
+ using fewer file descriptors on the expense of performance decrease.
152
+ Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
153
+ <!-- <useCompoundFile>false</useCompoundFile> -->
154
+
155
+ <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
156
+ indexing for buffering added documents and deletions before they are
157
+ flushed to the Directory.
158
+ maxBufferedDocs sets a limit on the number of documents buffered
159
+ before flushing.
160
+ If both ramBufferSizeMB and maxBufferedDocs is set, then
161
+ Lucene will flush based on whichever limit is hit first. -->
162
+ <!-- <ramBufferSizeMB>100</ramBufferSizeMB> -->
163
+ <!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
164
+
165
+ <!-- Expert: Merge Policy
166
+ The Merge Policy in Lucene controls how merging of segments is done.
167
+ The default since Solr/Lucene 3.3 is TieredMergePolicy.
168
+ The default since Lucene 2.3 was the LogByteSizeMergePolicy,
169
+ Even older versions of Lucene used LogDocMergePolicy.
170
+ -->
171
+ <!--
172
+ <mergePolicyFactory class="org.apache.solr.index.TieredMergePolicyFactory">
173
+ <int name="maxMergeAtOnce">10</int>
174
+ <int name="segmentsPerTier">10</int>
175
+ <double name="noCFSRatio">0.1</double>
176
+ </mergePolicyFactory>
177
+ -->
178
+
179
+ <!-- Expert: Merge Scheduler
180
+ The Merge Scheduler in Lucene controls how merges are
181
+ performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
182
+ can perform merges in the background using separate threads.
183
+ The SerialMergeScheduler (Lucene 2.2 default) does not.
184
+ -->
185
+ <!--
186
+ <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
187
+ -->
188
+
189
+ <!-- LockFactory
190
+
191
+ This option specifies which Lucene LockFactory implementation
192
+ to use.
193
+
194
+ single = SingleInstanceLockFactory - suggested for a
195
+ read-only index or when there is no possibility of
196
+ another process trying to modify the index.
197
+ native = NativeFSLockFactory - uses OS native file locking.
198
+ Do not use when multiple solr webapps in the same
199
+ JVM are attempting to share a single index.
200
+ simple = SimpleFSLockFactory - uses a plain file for locking
201
+
202
+ Defaults: 'native' is default for Solr3.6 and later, otherwise
203
+ 'simple' is the default
204
+
205
+ More details on the nuances of each LockFactory...
206
+ http://wiki.apache.org/lucene-java/AvailableLockFactories
207
+ -->
208
+ <lockType>${solr.lock.type:native}</lockType>
209
+
210
+ <!-- Commit Deletion Policy
211
+ Custom deletion policies can be specified here. The class must
212
+ implement org.apache.lucene.index.IndexDeletionPolicy.
213
+
214
+ The default Solr IndexDeletionPolicy implementation supports
215
+ deleting index commit points on number of commits, age of
216
+ commit point and optimized status.
217
+
218
+ The latest commit point should always be preserved regardless
219
+ of the criteria.
220
+ -->
221
+ <!--
222
+ <deletionPolicy class="solr.SolrDeletionPolicy">
223
+ -->
224
+ <!-- The number of commit points to be kept -->
225
+ <!-- <str name="maxCommitsToKeep">1</str> -->
226
+ <!-- The number of optimized commit points to be kept -->
227
+ <!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
228
+ <!--
229
+ Delete all commit points once they have reached the given age.
230
+ Supports DateMathParser syntax e.g.
231
+ -->
232
+ <!--
233
+ <str name="maxCommitAge">30MINUTES</str>
234
+ <str name="maxCommitAge">1DAY</str>
235
+ -->
236
+ <!--
237
+ </deletionPolicy>
238
+ -->
239
+
240
+ <!-- Lucene Infostream
241
+
242
+ To aid in advanced debugging, Lucene provides an "InfoStream"
243
+ of detailed information when indexing.
244
+
245
+ Setting The value to true will instruct the underlying Lucene
246
+ IndexWriter to write its debugging info the specified file
247
+ -->
248
+ <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> -->
249
+ </indexConfig>
250
+
251
+
252
+ <!-- JMX
253
+
254
+ This example enables JMX if and only if an existing MBeanServer
255
+ is found, use this if you want to configure JMX through JVM
256
+ parameters. Remove this to disable exposing Solr configuration
257
+ and statistics to JMX.
258
+
259
+ For more details see http://wiki.apache.org/solr/SolrJmx
260
+ -->
261
+ <jmx />
262
+ <!-- If you want to connect to a particular server, specify the
263
+ agentId
264
+ -->
265
+ <!-- <jmx agentId="myAgent" /> -->
266
+ <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
267
+ <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
268
+ -->
269
+
270
+ <!-- The default high-performance update handler -->
271
+ <updateHandler class="solr.DirectUpdateHandler2">
272
+
273
+ <!-- Enables a transaction log, used for real-time get, durability, and
274
+ and solr cloud replica recovery. The log can grow as big as
275
+ uncommitted changes to the index, so use of a hard autoCommit
276
+ is recommended (see below).
277
+ "dir" - the target directory for transaction logs, defaults to the
278
+ solr data directory.
279
+ "numVersionBuckets" - sets the number of buckets used to keep
280
+ track of max version values when checking for re-ordered
281
+ updates; increase this value to reduce the cost of
282
+ synchronizing access to version buckets during high-volume
283
+ indexing, this requires 8 bytes (long) * numVersionBuckets
284
+ of heap space per Solr core.
285
+ -->
286
+ <updateLog>
287
+ <str name="dir">${solr.ulog.dir:}</str>
288
+ <int name="numVersionBuckets">${solr.ulog.numVersionBuckets:65536}</int>
289
+ </updateLog>
290
+
291
+ <!-- AutoCommit
292
+
293
+ Perform a hard commit automatically under certain conditions.
294
+ Instead of enabling autoCommit, consider using "commitWithin"
295
+ when adding documents.
296
+
297
+ http://wiki.apache.org/solr/UpdateXmlMessages
298
+
299
+ maxDocs - Maximum number of documents to add since the last
300
+ commit before automatically triggering a new commit.
301
+
302
+ maxTime - Maximum amount of time in ms that is allowed to pass
303
+ since a document was added before automatically
304
+ triggering a new commit.
305
+ openSearcher - if false, the commit causes recent index changes
306
+ to be flushed to stable storage, but does not cause a new
307
+ searcher to be opened to make those changes visible.
308
+
309
+ If the updateLog is enabled, then it's highly recommended to
310
+ have some sort of hard autoCommit to limit the log size.
311
+ -->
312
+ <autoCommit>
313
+ <maxTime>${solr.autoCommit.maxTime:15000}</maxTime>
314
+ <openSearcher>false</openSearcher>
315
+ </autoCommit>
316
+
317
+ <!-- softAutoCommit is like autoCommit except it causes a
318
+ 'soft' commit which only ensures that changes are visible
319
+ but does not ensure that data is synced to disk. This is
320
+ faster and more near-realtime friendly than a hard commit.
321
+ -->
322
+
323
+ <autoSoftCommit>
324
+ <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
325
+ </autoSoftCommit>
326
+
327
+ <!-- Update Related Event Listeners
328
+
329
+ Various IndexWriter related events can trigger Listeners to
330
+ take actions.
331
+
332
+ postCommit - fired after every commit or optimize command
333
+ postOptimize - fired after every optimize command
334
+ -->
335
+ <!-- The RunExecutableListener executes an external command from a
336
+ hook such as postCommit or postOptimize.
337
+
338
+ exe - the name of the executable to run
339
+ dir - dir to use as the current working directory. (default=".")
340
+ wait - the calling thread waits until the executable returns.
341
+ (default="true")
342
+ args - the arguments to pass to the program. (default is none)
343
+ env - environment variables to set. (default is none)
344
+ -->
345
+ <!-- This example shows how RunExecutableListener could be used
346
+ with the script based replication...
347
+ http://wiki.apache.org/solr/CollectionDistribution
348
+ -->
349
+ <!--
350
+ <listener event="postCommit" class="solr.RunExecutableListener">
351
+ <str name="exe">solr/bin/snapshooter</str>
352
+ <str name="dir">.</str>
353
+ <bool name="wait">true</bool>
354
+ <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
355
+ <arr name="env"> <str>MYVAR=val1</str> </arr>
356
+ </listener>
357
+ -->
358
+
359
+ </updateHandler>
360
+
361
+ <!-- IndexReaderFactory
362
+
363
+ Use the following format to specify a custom IndexReaderFactory,
364
+ which allows for alternate IndexReader implementations.
365
+
366
+ ** Experimental Feature **
367
+
368
+ Please note - Using a custom IndexReaderFactory may prevent
369
+ certain other features from working. The API to
370
+ IndexReaderFactory may change without warning or may even be
371
+ removed from future releases if the problems cannot be
372
+ resolved.
373
+
374
+
375
+ ** Features that may not work with custom IndexReaderFactory **
376
+
377
+ The ReplicationHandler assumes a disk-resident index. Using a
378
+ custom IndexReader implementation may cause incompatibility
379
+ with ReplicationHandler and may cause replication to not work
380
+ correctly. See SOLR-1366 for details.
381
+
382
+ -->
383
+ <!--
384
+ <indexReaderFactory name="IndexReaderFactory" class="package.class">
385
+ <str name="someArg">Some Value</str>
386
+ </indexReaderFactory >
387
+ -->
388
+
389
+ <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
390
+ Query section - these settings control query time things like caches
391
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
392
+ <query>
393
+ <!-- Max Boolean Clauses
394
+
395
+ Maximum number of clauses in each BooleanQuery, an exception
396
+ is thrown if exceeded.
397
+
398
+ ** WARNING **
399
+
400
+ This option actually modifies a global Lucene property that
401
+ will affect all SolrCores. If multiple solrconfig.xml files
402
+ disagree on this property, the value at any given moment will
403
+ be based on the last SolrCore to be initialized.
404
+
405
+ -->
406
+ <maxBooleanClauses>1024</maxBooleanClauses>
407
+
408
+
409
+ <!-- Solr Internal Query Caches
410
+
411
+ There are two implementations of cache available for Solr,
412
+ LRUCache, based on a synchronized LinkedHashMap, and
413
+ FastLRUCache, based on a ConcurrentHashMap.
414
+
415
+ FastLRUCache has faster gets and slower puts in single
416
+ threaded operation and thus is generally faster than LRUCache
417
+ when the hit ratio of the cache is high (> 75%), and may be
418
+ faster under other scenarios on multi-cpu systems.
419
+ -->
420
+
421
+ <!-- Filter Cache
422
+
423
+ Cache used by SolrIndexSearcher for filters (DocSets),
424
+ unordered sets of *all* documents that match a query. When a
425
+ new searcher is opened, its caches may be prepopulated or
426
+ "autowarmed" using data from caches in the old searcher.
427
+ autowarmCount is the number of items to prepopulate. For
428
+ LRUCache, the autowarmed items will be the most recently
429
+ accessed items.
430
+
431
+ Parameters:
432
+ class - the SolrCache implementation LRUCache or
433
+ (LRUCache or FastLRUCache)
434
+ size - the maximum number of entries in the cache
435
+ initialSize - the initial capacity (number of entries) of
436
+ the cache. (see java.util.HashMap)
437
+ autowarmCount - the number of entries to prepopulate from
438
+ and old cache.
439
+ maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed
440
+ to occupy. Note that when this option is specified, the size
441
+ and initialSize parameters are ignored.
442
+ -->
443
+ <filterCache class="solr.FastLRUCache"
444
+ size="512"
445
+ initialSize="512"
446
+ autowarmCount="0"/>
447
+
448
+ <!-- Query Result Cache
449
+
450
+ Caches results of searches - ordered lists of document ids
451
+ (DocList) based on a query, a sort, and the range of documents requested.
452
+ Additional supported parameter by LRUCache:
453
+ maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed
454
+ to occupy
455
+ -->
456
+ <queryResultCache class="solr.LRUCache"
457
+ size="512"
458
+ initialSize="512"
459
+ autowarmCount="0"/>
460
+
461
+ <!-- Document Cache
462
+
463
+ Caches Lucene Document objects (the stored fields for each
464
+ document). Since Lucene internal document ids are transient,
465
+ this cache will not be autowarmed.
466
+ -->
467
+ <documentCache class="solr.LRUCache"
468
+ size="512"
469
+ initialSize="512"
470
+ autowarmCount="0"/>
471
+
472
+ <!-- custom cache currently used by block join -->
473
+ <cache name="perSegFilter"
474
+ class="solr.search.LRUCache"
475
+ size="10"
476
+ initialSize="0"
477
+ autowarmCount="10"
478
+ regenerator="solr.NoOpRegenerator" />
479
+
480
+ <!-- Field Value Cache
481
+
482
+ Cache used to hold field values that are quickly accessible
483
+ by document id. The fieldValueCache is created by default
484
+ even if not configured here.
485
+ -->
486
+ <!--
487
+ <fieldValueCache class="solr.FastLRUCache"
488
+ size="512"
489
+ autowarmCount="128"
490
+ showItems="32" />
491
+ -->
492
+
493
+ <!-- Custom Cache
494
+
495
+ Example of a generic cache. These caches may be accessed by
496
+ name through SolrIndexSearcher.getCache(),cacheLookup(), and
497
+ cacheInsert(). The purpose is to enable easy caching of
498
+ user/application level data. The regenerator argument should
499
+ be specified as an implementation of solr.CacheRegenerator
500
+ if autowarming is desired.
501
+ -->
502
+ <!--
503
+ <cache name="myUserCache"
504
+ class="solr.LRUCache"
505
+ size="4096"
506
+ initialSize="1024"
507
+ autowarmCount="1024"
508
+ regenerator="com.mycompany.MyRegenerator"
509
+ />
510
+ -->
511
+
512
+
513
+ <!-- Lazy Field Loading
514
+
515
+ If true, stored fields that are not requested will be loaded
516
+ lazily. This can result in a significant speed improvement
517
+ if the usual case is to not load all stored fields,
518
+ especially if the skipped fields are large compressed text
519
+ fields.
520
+ -->
521
+ <enableLazyFieldLoading>true</enableLazyFieldLoading>
522
+
523
+ <!-- Use Filter For Sorted Query
524
+
525
+ A possible optimization that attempts to use a filter to
526
+ satisfy a search. If the requested sort does not include
527
+ score, then the filterCache will be checked for a filter
528
+ matching the query. If found, the filter will be used as the
529
+ source of document ids, and then the sort will be applied to
530
+ that.
531
+
532
+ For most situations, this will not be useful unless you
533
+ frequently get the same search repeatedly with different sort
534
+ options, and none of them ever use "score"
535
+ -->
536
+ <!--
537
+ <useFilterForSortedQuery>true</useFilterForSortedQuery>
538
+ -->
539
+
540
+ <!-- Result Window Size
541
+
542
+ An optimization for use with the queryResultCache. When a search
543
+ is requested, a superset of the requested number of document ids
544
+ are collected. For example, if a search for a particular query
545
+ requests matching documents 10 through 19, and queryWindowSize is 50,
546
+ then documents 0 through 49 will be collected and cached. Any further
547
+ requests in that range can be satisfied via the cache.
548
+ -->
549
+ <queryResultWindowSize>20</queryResultWindowSize>
550
+
551
+ <!-- Maximum number of documents to cache for any entry in the
552
+ queryResultCache.
553
+ -->
554
+ <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
555
+
556
+ <!-- Query Related Event Listeners
557
+
558
+ Various IndexSearcher related events can trigger Listeners to
559
+ take actions.
560
+
561
+ newSearcher - fired whenever a new searcher is being prepared
562
+ and there is a current searcher handling requests (aka
563
+ registered). It can be used to prime certain caches to
564
+ prevent long request times for certain requests.
565
+
566
+ firstSearcher - fired whenever a new searcher is being
567
+ prepared but there is no current registered searcher to handle
568
+ requests or to gain autowarming data from.
569
+
570
+
571
+ -->
572
+ <!-- QuerySenderListener takes an array of NamedList and executes a
573
+ local query request for each NamedList in sequence.
574
+ -->
575
+ <listener event="newSearcher" class="solr.QuerySenderListener">
576
+ <arr name="queries">
577
+ <!--
578
+ <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
579
+ <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
580
+ -->
581
+ </arr>
582
+ </listener>
583
+ <listener event="firstSearcher" class="solr.QuerySenderListener">
584
+ <arr name="queries">
585
+ <!--
586
+ <lst>
587
+ <str name="q">static firstSearcher warming in solrconfig.xml</str>
588
+ </lst>
589
+ -->
590
+ </arr>
591
+ </listener>
592
+
593
+ <!-- Use Cold Searcher
594
+
595
+ If a search request comes in and there is no current
596
+ registered searcher, then immediately register the still
597
+ warming searcher and use it. If "false" then all requests
598
+ will block until the first searcher is done warming.
599
+ -->
600
+ <useColdSearcher>false</useColdSearcher>
601
+
602
+ </query>
603
+
604
+
605
+ <!-- Request Dispatcher
606
+
607
+ This section contains instructions for how the SolrDispatchFilter
608
+ should behave when processing requests for this SolrCore.
609
+
610
+ handleSelect is a legacy option that affects the behavior of requests
611
+ such as /select?qt=XXX
612
+
613
+ handleSelect="true" will cause the SolrDispatchFilter to process
614
+ the request and dispatch the query to a handler specified by the
615
+ "qt" param, assuming "/select" isn't already registered.
616
+
617
+ handleSelect="false" will cause the SolrDispatchFilter to
618
+ ignore "/select" requests, resulting in a 404 unless a handler
619
+ is explicitly registered with the name "/select"
620
+
621
+ handleSelect="true" is not recommended for new users, but is the default
622
+ for backwards compatibility
623
+ -->
624
+ <requestDispatcher handleSelect="false" >
625
+ <!-- Request Parsing
626
+
627
+ These settings indicate how Solr Requests may be parsed, and
628
+ what restrictions may be placed on the ContentStreams from
629
+ those requests
630
+
631
+ enableRemoteStreaming - enables use of the stream.file
632
+ and stream.url parameters for specifying remote streams.
633
+
634
+ multipartUploadLimitInKB - specifies the max size (in KiB) of
635
+ Multipart File Uploads that Solr will allow in a Request.
636
+
637
+ formdataUploadLimitInKB - specifies the max size (in KiB) of
638
+ form data (application/x-www-form-urlencoded) sent via
639
+ POST. You can use POST to pass request parameters not
640
+ fitting into the URL.
641
+
642
+ addHttpRequestToContext - if set to true, it will instruct
643
+ the requestParsers to include the original HttpServletRequest
644
+ object in the context map of the SolrQueryRequest under the
645
+ key "httpRequest". It will not be used by any of the existing
646
+ Solr components, but may be useful when developing custom
647
+ plugins.
648
+
649
+ *** WARNING ***
650
+ The settings below authorize Solr to fetch remote files, You
651
+ should make sure your system has some authentication before
652
+ using enableRemoteStreaming="true"
653
+
654
+ -->
655
+ <requestParsers enableRemoteStreaming="true"
656
+ multipartUploadLimitInKB="2048000"
657
+ formdataUploadLimitInKB="2048"
658
+ addHttpRequestToContext="false"/>
659
+
660
+ <!-- HTTP Caching
661
+
662
+ Set HTTP caching related parameters (for proxy caches and clients).
663
+
664
+ The options below instruct Solr not to output any HTTP Caching
665
+ related headers
666
+ -->
667
+ <httpCaching never304="true" />
668
+ <!-- If you include a <cacheControl> directive, it will be used to
669
+ generate a Cache-Control header (as well as an Expires header
670
+ if the value contains "max-age=")
671
+
672
+ By default, no Cache-Control header is generated.
673
+
674
+ You can use the <cacheControl> option even if you have set
675
+ never304="true"
676
+ -->
677
+ <!--
678
+ <httpCaching never304="true" >
679
+ <cacheControl>max-age=30, public</cacheControl>
680
+ </httpCaching>
681
+ -->
682
+ <!-- To enable Solr to respond with automatically generated HTTP
683
+ Caching headers, and to response to Cache Validation requests
684
+ correctly, set the value of never304="false"
685
+
686
+ This will cause Solr to generate Last-Modified and ETag
687
+ headers based on the properties of the Index.
688
+
689
+ The following options can also be specified to affect the
690
+ values of these headers...
691
+
692
+ lastModFrom - the default value is "openTime" which means the
693
+ Last-Modified value (and validation against If-Modified-Since
694
+ requests) will all be relative to when the current Searcher
695
+ was opened. You can change it to lastModFrom="dirLastMod" if
696
+ you want the value to exactly correspond to when the physical
697
+ index was last modified.
698
+
699
+ etagSeed="..." is an option you can change to force the ETag
700
+ header (and validation against If-None-Match requests) to be
701
+ different even if the index has not changed (ie: when making
702
+ significant changes to your config file)
703
+
704
+ (lastModifiedFrom and etagSeed are both ignored if you use
705
+ the never304="true" option)
706
+ -->
707
+ <!--
708
+ <httpCaching lastModifiedFrom="openTime"
709
+ etagSeed="Solr">
710
+ <cacheControl>max-age=30, public</cacheControl>
711
+ </httpCaching>
712
+ -->
713
+ </requestDispatcher>
714
+
715
+ <!-- Request Handlers
716
+
717
+ http://wiki.apache.org/solr/SolrRequestHandler
718
+
719
+ Incoming queries will be dispatched to a specific handler by name
720
+ based on the path specified in the request.
721
+
722
+ Legacy behavior: If the request path uses "/select" but no Request
723
+ Handler has that name, and if handleSelect="true" has been specified in
724
+ the requestDispatcher, then the Request Handler is dispatched based on
725
+ the qt parameter. Handlers without a leading '/' are accessed this way
726
+ like so: http://host/app/[core/]select?qt=name If no qt is
727
+ given, then the requestHandler that declares default="true" will be
728
+ used or the one named "standard".
729
+
730
+ If a Request Handler is declared with startup="lazy", then it will
731
+ not be initialized until the first request that uses it.
732
+
733
+ -->
734
+ <!-- SearchHandler
735
+
736
+ http://wiki.apache.org/solr/SearchHandler
737
+
738
+ For processing Search Queries, the primary Request Handler
739
+ provided with Solr is "SearchHandler" It delegates to a sequent
740
+ of SearchComponents (see below) and supports distributed
741
+ queries across multiple shards
742
+ -->
743
+ <requestHandler name="/select" class="solr.SearchHandler">
744
+ <!-- default values for query parameters can be specified, these
745
+ will be overridden by parameters in the request
746
+ -->
747
+ <lst name="defaults">
748
+ <str name="echoParams">explicit</str>
749
+ <int name="rows">10</int>
750
+ <str name="fl">*</str>
751
+ <str name="q.alt">*:*</str>
752
+ <str name="defType">dismax</str>
753
+ <str name="facet">on</str>
754
+ <str name="facet.mincount">1</str>
755
+ <str name="facet.limit">10</str>
756
+ <str name="qf">access_terms author category collection collection_name collection_number collections comments content content_encoding content_ffb content_first_bytes content_language content_length content_metadata content_metadata_ss content_text_length content_type content_type_droid content_type_ext content_type_full content_type_norm content_type_served content_type_tika content_type_version crawl_date crawl_dates crawl_year crawl_year_month crawl_year_month_day crawl_years description domain elements_used generator hash hashes host host_surt id_long image_colours image_dominant_colour image_faces image_faces_count image_height image_size image_width institution keywords last_modified last_modified_year license_url links links_domains links_hosts links_hosts_surts links_norm links_public_suffixes locations parse_error pdf_pdfa_errors pdf_pdfa_is_valid postcode postcode_district public_suffix publication_date publication_year record_type referrer_url resourcename sentiment sentiment_score server source_file source_file_offset status_code subject text title type url url_norm url_path url_type wayback_date wct_agency wct_collections wct_description wct_instance_id wct_subjects wct_target_id wct_titlexml_root_ns</str>
757
+
758
+ <!-- <str name="df">text</str> -->
759
+ </lst>
760
+ <!-- In addition to defaults, "appends" params can be specified
761
+ to identify values which should be appended to the list of
762
+ multi-val params from the query (or the existing "defaults").
763
+ -->
764
+ <!-- In this example, the param "fq=instock:true" would be appended to
765
+ any query time fq params the user may specify, as a mechanism for
766
+ partitioning the index, independent of any user selected filtering
767
+ that may also be desired (perhaps as a result of faceted searching).
768
+
769
+ NOTE: there is *absolutely* nothing a client can do to prevent these
770
+ "appends" values from being used, so don't use this mechanism
771
+ unless you are sure you always want it.
772
+ -->
773
+ <!--
774
+ <lst name="appends">
775
+ <str name="fq">inStock:true</str>
776
+ </lst>
777
+ -->
778
+ <!-- "invariants" are a way of letting the Solr maintainer lock down
779
+ the options available to Solr clients. Any params values
780
+ specified here are used regardless of what values may be specified
781
+ in either the query, the "defaults", or the "appends" params.
782
+
783
+ In this example, the facet.field and facet.query params would
784
+ be fixed, limiting the facets clients can use. Faceting is
785
+ not turned on by default - but if the client does specify
786
+ facet=true in the request, these are the only facets they
787
+ will be able to see counts for; regardless of what other
788
+ facet.field or facet.query params they may specify.
789
+
790
+ NOTE: there is *absolutely* nothing a client can do to prevent these
791
+ "invariants" values from being used, so don't use this mechanism
792
+ unless you are sure you always want it.
793
+ -->
794
+ <!--
795
+ <lst name="invariants">
796
+ <str name="facet.field">cat</str>
797
+ <str name="facet.field">manu_exact</str>
798
+ <str name="facet.query">price:[* TO 500]</str>
799
+ <str name="facet.query">price:[500 TO *]</str>
800
+ </lst>
801
+ -->
802
+ <!-- If the default list of SearchComponents is not desired, that
803
+ list can either be overridden completely, or components can be
804
+ prepended or appended to the default list. (see below)
805
+ -->
806
+ <!--
807
+ <arr name="components">
808
+ <str>nameOfCustomComponent1</str>
809
+ <str>nameOfCustomComponent2</str>
810
+ </arr>
811
+ -->
812
+ </requestHandler>
813
+
814
+ <!-- A request handler that returns indented JSON by default -->
815
+ <requestHandler name="/query" class="solr.SearchHandler">
816
+ <lst name="defaults">
817
+ <str name="echoParams">explicit</str>
818
+ <str name="wt">json</str>
819
+ <str name="indent">true</str>
820
+ </lst>
821
+ </requestHandler>
822
+
823
+ <!-- A Robust Example
824
+
825
+ This example SearchHandler declaration shows off usage of the
826
+ SearchHandler with many defaults declared
827
+
828
+ Note that multiple instances of the same Request Handler
829
+ (SearchHandler) can be registered multiple times with different
830
+ names (and different init parameters)
831
+ -->
832
+ <requestHandler name="/browse" class="solr.SearchHandler" useParams="query,facets,velocity,browse">
833
+ <lst name="defaults">
834
+ <str name="echoParams">explicit</str>
835
+ </lst>
836
+ </requestHandler>
837
+
838
+ <initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse">
839
+ <lst name="defaults">
840
+ <str name="df">_text_</str>
841
+ </lst>
842
+ </initParams>
843
+
844
+ <!-- This enabled schemaless mode
845
+ <initParams path="/update/**">
846
+ <lst name="defaults">
847
+ <str name="update.chain">add-unknown-fields-to-the-schema</str>
848
+ </lst>
849
+ </initParams>
850
+ -->
851
+
852
+ <!-- Solr Cell Update Request Handler
853
+
854
+ http://wiki.apache.org/solr/ExtractingRequestHandler
855
+
856
+ -->
857
+ <requestHandler name="/update/extract"
858
+ startup="lazy"
859
+ class="solr.extraction.ExtractingRequestHandler" >
860
+ <lst name="defaults">
861
+ <str name="lowernames">true</str>
862
+ <str name="fmap.meta">ignored_</str>
863
+ <str name="fmap.content">_text_</str>
864
+ </lst>
865
+ </requestHandler>
866
+ <!-- Search Components
867
+
868
+ Search components are registered to SolrCore and used by
869
+ instances of SearchHandler (which can access them by name)
870
+
871
+ By default, the following components are available:
872
+
873
+ <searchComponent name="query" class="solr.QueryComponent" />
874
+ <searchComponent name="facet" class="solr.FacetComponent" />
875
+ <searchComponent name="mlt" class="solr.MoreLikeThisComponent" />
876
+ <searchComponent name="highlight" class="solr.HighlightComponent" />
877
+ <searchComponent name="stats" class="solr.StatsComponent" />
878
+ <searchComponent name="debug" class="solr.DebugComponent" />
879
+
880
+ Default configuration in a requestHandler would look like:
881
+
882
+ <arr name="components">
883
+ <str>query</str>
884
+ <str>facet</str>
885
+ <str>mlt</str>
886
+ <str>highlight</str>
887
+ <str>stats</str>
888
+ <str>debug</str>
889
+ </arr>
890
+
891
+ If you register a searchComponent to one of the standard names,
892
+ that will be used instead of the default.
893
+
894
+ To insert components before or after the 'standard' components, use:
895
+
896
+ <arr name="first-components">
897
+ <str>myFirstComponentName</str>
898
+ </arr>
899
+
900
+ <arr name="last-components">
901
+ <str>myLastComponentName</str>
902
+ </arr>
903
+
904
+ NOTE: The component registered with the name "debug" will
905
+ always be executed after the "last-components"
906
+
907
+ -->
908
+
909
+ <!-- Spell Check
910
+
911
+ The spell check component can return a list of alternative spelling
912
+ suggestions.
913
+
914
+ http://wiki.apache.org/solr/SpellCheckComponent
915
+ -->
916
+ <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
917
+
918
+ <str name="queryAnalyzerFieldType">text_general</str>
919
+
920
+ <!-- Multiple "Spell Checkers" can be declared and used by this
921
+ component
922
+ -->
923
+
924
+ <!-- a spellchecker built from a field of the main index -->
925
+ <lst name="spellchecker">
926
+ <str name="name">default</str>
927
+ <str name="field">_text_</str>
928
+ <str name="classname">solr.DirectSolrSpellChecker</str>
929
+ <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
930
+ <str name="distanceMeasure">internal</str>
931
+ <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
932
+ <float name="accuracy">0.5</float>
933
+ <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
934
+ <int name="maxEdits">2</int>
935
+ <!-- the minimum shared prefix when enumerating terms -->
936
+ <int name="minPrefix">1</int>
937
+ <!-- maximum number of inspections per result. -->
938
+ <int name="maxInspections">5</int>
939
+ <!-- minimum length of a query term to be considered for correction -->
940
+ <int name="minQueryLength">4</int>
941
+ <!-- maximum threshold of documents a query term can appear to be considered for correction -->
942
+ <float name="maxQueryFrequency">0.01</float>
943
+ <!-- uncomment this to require suggestions to occur in 1% of the documents
944
+ <float name="thresholdTokenFrequency">.01</float>
945
+ -->
946
+ </lst>
947
+
948
+ <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
949
+ <!--
950
+ <lst name="spellchecker">
951
+ <str name="name">wordbreak</str>
952
+ <str name="classname">solr.WordBreakSolrSpellChecker</str>
953
+ <str name="field">name</str>
954
+ <str name="combineWords">true</str>
955
+ <str name="breakWords">true</str>
956
+ <int name="maxChanges">10</int>
957
+ </lst>
958
+ -->
959
+ </searchComponent>
960
+
961
+ <!-- A request handler for demonstrating the spellcheck component.
962
+
963
+ NOTE: This is purely as an example. The whole purpose of the
964
+ SpellCheckComponent is to hook it into the request handler that
965
+ handles your normal user queries so that a separate request is
966
+ not needed to get suggestions.
967
+
968
+ IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
969
+ NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
970
+
971
+ See http://wiki.apache.org/solr/SpellCheckComponent for details
972
+ on the request parameters.
973
+ -->
974
+ <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
975
+ <lst name="defaults">
976
+ <!-- Solr will use suggestions from both the 'default' spellchecker
977
+ and from the 'wordbreak' spellchecker and combine them.
978
+ collations (re-written queries) can include a combination of
979
+ corrections from both spellcheckers -->
980
+ <str name="spellcheck.dictionary">default</str>
981
+ <str name="spellcheck">on</str>
982
+ <str name="spellcheck.extendedResults">true</str>
983
+ <str name="spellcheck.count">10</str>
984
+ <str name="spellcheck.alternativeTermCount">5</str>
985
+ <str name="spellcheck.maxResultsForSuggest">5</str>
986
+ <str name="spellcheck.collate">true</str>
987
+ <str name="spellcheck.collateExtendedResults">true</str>
988
+ <str name="spellcheck.maxCollationTries">10</str>
989
+ <str name="spellcheck.maxCollations">5</str>
990
+ </lst>
991
+ <arr name="last-components">
992
+ <str>spellcheck</str>
993
+ </arr>
994
+ </requestHandler>
995
+
996
+ <!-- Term Vector Component
997
+
998
+ http://wiki.apache.org/solr/TermVectorComponent
999
+ -->
1000
+ <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
1001
+
1002
+ <!-- A request handler for demonstrating the term vector component
1003
+
1004
+ This is purely as an example.
1005
+
1006
+ In reality you will likely want to add the component to your
1007
+ already specified request handlers.
1008
+ -->
1009
+ <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
1010
+ <lst name="defaults">
1011
+ <bool name="tv">true</bool>
1012
+ </lst>
1013
+ <arr name="last-components">
1014
+ <str>tvComponent</str>
1015
+ </arr>
1016
+ </requestHandler>
1017
+
1018
+ <!-- Clustering Component. (Omitted here. See the default Solr example for a typical configuration.) -->
1019
+
1020
+ <!-- Terms Component
1021
+
1022
+ http://wiki.apache.org/solr/TermsComponent
1023
+
1024
+ A component to return terms and document frequency of those
1025
+ terms
1026
+ -->
1027
+ <searchComponent name="terms" class="solr.TermsComponent"/>
1028
+
1029
+ <!-- A request handler for demonstrating the terms component -->
1030
+ <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
1031
+ <lst name="defaults">
1032
+ <bool name="terms">true</bool>
1033
+ <bool name="distrib">false</bool>
1034
+ </lst>
1035
+ <arr name="components">
1036
+ <str>terms</str>
1037
+ </arr>
1038
+ </requestHandler>
1039
+
1040
+
1041
+ <!-- Query Elevation Component
1042
+
1043
+ http://wiki.apache.org/solr/QueryElevationComponent
1044
+
1045
+ a search component that enables you to configure the top
1046
+ results for a given query regardless of the normal lucene
1047
+ scoring.
1048
+ -->
1049
+ <searchComponent name="elevator" class="solr.QueryElevationComponent" >
1050
+ <!-- pick a fieldType to analyze queries -->
1051
+ <str name="queryFieldType">string</str>
1052
+ <str name="config-file">elevate.xml</str>
1053
+ </searchComponent>
1054
+
1055
+ <!-- A request handler for demonstrating the elevator component -->
1056
+ <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
1057
+ <lst name="defaults">
1058
+ <str name="echoParams">explicit</str>
1059
+ </lst>
1060
+ <arr name="last-components">
1061
+ <str>elevator</str>
1062
+ </arr>
1063
+ </requestHandler>
1064
+
1065
+ <!-- Highlighting Component
1066
+
1067
+ http://wiki.apache.org/solr/HighlightingParameters
1068
+ -->
1069
+ <searchComponent class="solr.HighlightComponent" name="highlight">
1070
+ <highlighting>
1071
+ <!-- Configure the standard fragmenter -->
1072
+ <!-- This could most likely be commented out in the "default" case -->
1073
+ <fragmenter name="gap"
1074
+ default="true"
1075
+ class="solr.highlight.GapFragmenter">
1076
+ <lst name="defaults">
1077
+ <int name="hl.fragsize">100</int>
1078
+ </lst>
1079
+ </fragmenter>
1080
+
1081
+ <!-- A regular-expression-based fragmenter
1082
+ (for sentence extraction)
1083
+ -->
1084
+ <fragmenter name="regex"
1085
+ class="solr.highlight.RegexFragmenter">
1086
+ <lst name="defaults">
1087
+ <!-- slightly smaller fragsizes work better because of slop -->
1088
+ <int name="hl.fragsize">70</int>
1089
+ <!-- allow 50% slop on fragment sizes -->
1090
+ <float name="hl.regex.slop">0.5</float>
1091
+ <!-- a basic sentence pattern -->
1092
+ <str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
1093
+ </lst>
1094
+ </fragmenter>
1095
+
1096
+ <!-- Configure the standard formatter -->
1097
+ <formatter name="html"
1098
+ default="true"
1099
+ class="solr.highlight.HtmlFormatter">
1100
+ <lst name="defaults">
1101
+ <str name="hl.simple.pre"><![CDATA[<em>]]></str>
1102
+ <str name="hl.simple.post"><![CDATA[</em>]]></str>
1103
+ </lst>
1104
+ </formatter>
1105
+
1106
+ <!-- Configure the standard encoder -->
1107
+ <encoder name="html"
1108
+ class="solr.highlight.HtmlEncoder" />
1109
+
1110
+ <!-- Configure the standard fragListBuilder -->
1111
+ <fragListBuilder name="simple"
1112
+ class="solr.highlight.SimpleFragListBuilder"/>
1113
+
1114
+ <!-- Configure the single fragListBuilder -->
1115
+ <fragListBuilder name="single"
1116
+ class="solr.highlight.SingleFragListBuilder"/>
1117
+
1118
+ <!-- Configure the weighted fragListBuilder -->
1119
+ <fragListBuilder name="weighted"
1120
+ default="true"
1121
+ class="solr.highlight.WeightedFragListBuilder"/>
1122
+
1123
+ <!-- default tag FragmentsBuilder -->
1124
+ <fragmentsBuilder name="default"
1125
+ default="true"
1126
+ class="solr.highlight.ScoreOrderFragmentsBuilder">
1127
+ <!--
1128
+ <lst name="defaults">
1129
+ <str name="hl.multiValuedSeparatorChar">/</str>
1130
+ </lst>
1131
+ -->
1132
+ </fragmentsBuilder>
1133
+
1134
+ <!-- multi-colored tag FragmentsBuilder -->
1135
+ <fragmentsBuilder name="colored"
1136
+ class="solr.highlight.ScoreOrderFragmentsBuilder">
1137
+ <lst name="defaults">
1138
+ <str name="hl.tag.pre"><![CDATA[
1139
+ <b style="background:yellow">,<b style="background:lawgreen">,
1140
+ <b style="background:aquamarine">,<b style="background:magenta">,
1141
+ <b style="background:palegreen">,<b style="background:coral">,
1142
+ <b style="background:wheat">,<b style="background:khaki">,
1143
+ <b style="background:lime">,<b style="background:deepskyblue">]]></str>
1144
+ <str name="hl.tag.post"><![CDATA[</b>]]></str>
1145
+ </lst>
1146
+ </fragmentsBuilder>
1147
+
1148
+ <boundaryScanner name="default"
1149
+ default="true"
1150
+ class="solr.highlight.SimpleBoundaryScanner">
1151
+ <lst name="defaults">
1152
+ <str name="hl.bs.maxScan">10</str>
1153
+ <str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
1154
+ </lst>
1155
+ </boundaryScanner>
1156
+
1157
+ <boundaryScanner name="breakIterator"
1158
+ class="solr.highlight.BreakIteratorBoundaryScanner">
1159
+ <lst name="defaults">
1160
+ <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
1161
+ <str name="hl.bs.type">WORD</str>
1162
+ <!-- language and country are used when constructing Locale object. -->
1163
+ <!-- And the Locale object will be used when getting instance of BreakIterator -->
1164
+ <str name="hl.bs.language">en</str>
1165
+ <str name="hl.bs.country">US</str>
1166
+ </lst>
1167
+ </boundaryScanner>
1168
+ </highlighting>
1169
+ </searchComponent>
1170
+
1171
+ <!-- Update Processors
1172
+
1173
+ Chains of Update Processor Factories for dealing with Update
1174
+ Requests can be declared, and then used by name in Update
1175
+ Request Processors
1176
+
1177
+ http://wiki.apache.org/solr/UpdateRequestProcessor
1178
+
1179
+ -->
1180
+
1181
+ <!-- Add unknown fields to the schema
1182
+
1183
+ An example field type guessing update processor that will
1184
+ attempt to parse string-typed field values as Booleans, Longs,
1185
+ Doubles, or Dates, and then add schema fields with the guessed
1186
+ field types.
1187
+
1188
+ This requires that the schema is both managed and mutable, by
1189
+ declaring schemaFactory as ManagedIndexSchemaFactory, with
1190
+ mutable specified as true.
1191
+
1192
+ See http://wiki.apache.org/solr/GuessingFieldTypes
1193
+ -->
1194
+ <schemaFactory class="ClassicIndexSchemaFactory"/>
1195
+
1196
+ <!-- Deduplication
1197
+
1198
+ An example dedup update processor that creates the "id" field
1199
+ on the fly based on the hash code of some other fields. This
1200
+ example has overwriteDupes set to false since we are using the
1201
+ id field as the signatureField and Solr will maintain
1202
+ uniqueness based on that anyway.
1203
+
1204
+ -->
1205
+ <!--
1206
+ <updateRequestProcessorChain name="dedupe">
1207
+ <processor class="solr.processor.SignatureUpdateProcessorFactory">
1208
+ <bool name="enabled">true</bool>
1209
+ <str name="signatureField">id</str>
1210
+ <bool name="overwriteDupes">false</bool>
1211
+ <str name="fields">name,features,cat</str>
1212
+ <str name="signatureClass">solr.processor.Lookup3Signature</str>
1213
+ </processor>
1214
+ <processor class="solr.LogUpdateProcessorFactory" />
1215
+ <processor class="solr.RunUpdateProcessorFactory" />
1216
+ </updateRequestProcessorChain>
1217
+ -->
1218
+
1219
+ <!-- Language identification
1220
+
1221
+ This example update chain identifies the language of the incoming
1222
+ documents using the langid contrib. The detected language is
1223
+ written to field language_s. No field name mapping is done.
1224
+ The fields used for detection are text, title, subject and description,
1225
+ making this example suitable for detecting languages form full-text
1226
+ rich documents injected via ExtractingRequestHandler.
1227
+ See more about langId at http://wiki.apache.org/solr/LanguageDetection
1228
+ -->
1229
+ <!--
1230
+ <updateRequestProcessorChain name="langid">
1231
+ <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
1232
+ <str name="langid.fl">text,title,subject,description</str>
1233
+ <str name="langid.langField">language_s</str>
1234
+ <str name="langid.fallback">en</str>
1235
+ </processor>
1236
+ <processor class="solr.LogUpdateProcessorFactory" />
1237
+ <processor class="solr.RunUpdateProcessorFactory" />
1238
+ </updateRequestProcessorChain>
1239
+ -->
1240
+
1241
+ <!-- Script update processor
1242
+
1243
+ This example hooks in an update processor implemented using JavaScript.
1244
+
1245
+ See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor
1246
+ -->
1247
+ <!--
1248
+ <updateRequestProcessorChain name="script">
1249
+ <processor class="solr.StatelessScriptUpdateProcessorFactory">
1250
+ <str name="script">update-script.js</str>
1251
+ <lst name="params">
1252
+ <str name="config_param">example config parameter</str>
1253
+ </lst>
1254
+ </processor>
1255
+ <processor class="solr.RunUpdateProcessorFactory" />
1256
+ </updateRequestProcessorChain>
1257
+ -->
1258
+
1259
+ <!-- Response Writers
1260
+
1261
+ http://wiki.apache.org/solr/QueryResponseWriter
1262
+
1263
+ Request responses will be written using the writer specified by
1264
+ the 'wt' request parameter matching the name of a registered
1265
+ writer.
1266
+
1267
+ The "default" writer is the default and will be used if 'wt' is
1268
+ not specified in the request.
1269
+ -->
1270
+ <!-- The following response writers are implicitly configured unless
1271
+ overridden...
1272
+ -->
1273
+ <!--
1274
+ <queryResponseWriter name="xml"
1275
+ default="true"
1276
+ class="solr.XMLResponseWriter" />
1277
+ <queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
1278
+ <queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
1279
+ <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
1280
+ <queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
1281
+ <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
1282
+ <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>
1283
+ <queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/>
1284
+ -->
1285
+
1286
+ <queryResponseWriter name="json" class="solr.JSONResponseWriter">
1287
+ <!-- For the purposes of the tutorial, JSON responses are written as
1288
+ plain text so that they are easy to read in *any* browser.
1289
+ If you expect a MIME type of "application/json" just remove this override.
1290
+ -->
1291
+ <str name="content-type">text/plain; charset=UTF-8</str>
1292
+ </queryResponseWriter>
1293
+
1294
+ <!--
1295
+ Custom response writers can be declared as needed...
1296
+ -->
1297
+ <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy">
1298
+ <str name="template.base.dir">${velocity.template.base.dir:}</str>
1299
+ <str name="solr.resource.loader.enabled">${velocity.solr.resource.loader.enabled:true}</str>
1300
+ <str name="params.resource.loader.enabled">${velocity.params.resource.loader.enabled:false}</str>
1301
+ </queryResponseWriter>
1302
+
1303
+ <!-- XSLT response writer transforms the XML output by any xslt file found
1304
+ in Solr's conf/xslt directory. Changes to xslt files are checked for
1305
+ every xsltCacheLifetimeSeconds.
1306
+ -->
1307
+ <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
1308
+ <int name="xsltCacheLifetimeSeconds">5</int>
1309
+ </queryResponseWriter>
1310
+
1311
+ <!-- Query Parsers
1312
+
1313
+ https://cwiki.apache.org/confluence/display/solr/Query+Syntax+and+Parsing
1314
+
1315
+ Multiple QParserPlugins can be registered by name, and then
1316
+ used in either the "defType" param for the QueryComponent (used
1317
+ by SearchHandler) or in LocalParams
1318
+ -->
1319
+ <!-- example of registering a query parser -->
1320
+ <!--
1321
+ <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>
1322
+ -->
1323
+
1324
+ <!-- Function Parsers
1325
+
1326
+ http://wiki.apache.org/solr/FunctionQuery
1327
+
1328
+ Multiple ValueSourceParsers can be registered by name, and then
1329
+ used as function names when using the "func" QParser.
1330
+ -->
1331
+ <!-- example of registering a custom function parser -->
1332
+ <!--
1333
+ <valueSourceParser name="myfunc"
1334
+ class="com.mycompany.MyValueSourceParser" />
1335
+ -->
1336
+
1337
+
1338
+ <!-- Document Transformers
1339
+ http://wiki.apache.org/solr/DocTransformers
1340
+ -->
1341
+ <!--
1342
+ Could be something like:
1343
+ <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" >
1344
+ <int name="connection">jdbc://....</int>
1345
+ </transformer>
1346
+
1347
+ To add a constant value to all docs, use:
1348
+ <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
1349
+ <int name="value">5</int>
1350
+ </transformer>
1351
+
1352
+ If you want the user to still be able to change it with _value:something_ use this:
1353
+ <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
1354
+ <double name="defaultValue">5</double>
1355
+ </transformer>
1356
+
1357
+ If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The
1358
+ EditorialMarkerFactory will do exactly that:
1359
+ <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" />
1360
+ -->
1361
+ </config>