exhibits_solr_conf 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,529 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <schema name="Stanford Searchworks" version="1.5">
3
+ <uniqueKey>id</uniqueKey>
4
+
5
+ <fields>
6
+ <!-- needed by some of Solr 4.0 functionality like transaction log or partial documents update -->
7
+ <field name="_version_" type="long" indexed="true" stored="true"/>
8
+ <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" />
9
+
10
+ <field name="id" type="string_punct_stop" indexed="true" stored="true" required="true" />
11
+ <field name="created" type="date" indexed="true" stored="true" default="NOW/SECOND" />
12
+ <field name="last_updated" type="date" indexed="true" stored="true" default="NOW/SECOND" />
13
+ <!-- entire marc bib record -->
14
+ <field name="marcxml" type="string" indexed="false" stored="true" />
15
+ <!-- all_search: catch-all field for searchable text; stored for hit highlighting />-->
16
+ <field name="all_search" type="text" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" multiValued="true" />
17
+ <field name="all_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
18
+ <field name="vern_all_search" type="text" indexed="true" stored="true" multiValued="true" />
19
+
20
+ <!-- Format Field: facet and display -->
21
+ <field name="format" type="string" indexed="true" stored="true" multiValued="true" />
22
+
23
+ <!-- Language Field: facet and display -->
24
+ <field name="language" type="string" indexed="true" stored="true" multiValued="true" />
25
+
26
+ <!-- Standard Number Fields -->
27
+ <!-- allow end users to search our index by ISBN and ISSN; more values than display isbn/issn.-->
28
+ <!-- isbn_search is type text to accommodate hyphens that could be entered by end users -->
29
+ <field name="isbn_search" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true" />
30
+ <!-- text for issn to accommodate hyphen present or not -->
31
+ <field name="issn_search" type="text" indexed="true" stored="true" multiValued="true" omitNorms="true"/>
32
+ <!-- display fields to allow external lookups (e.g. Google book search, xISBN, xISSN ...) -->
33
+ <field name="isbn_display" type="string" indexed="false" stored="true" multiValued="true" />
34
+ <field name="issn_display" type="string" indexed="false" stored="true" multiValued="true" />
35
+ <field name="lccn" type="string" indexed="false" stored="true" />
36
+ <field name="oclc" type="string_punct_stop" indexed="true" stored="true" multiValued="true" />
37
+
38
+ <!-- Title Search Fields -->
39
+ <field name="title_245a_exact_search" type="text_anchored" indexed="true" stored="true" />
40
+ <field name="title_245a_search" type="text" indexed="true" stored="true" />
41
+ <field name="vern_title_245a_search" type="textNoStem" indexed="true" stored="true" />
42
+ <field name="title_245a_unstem_search" type="textNoStem" indexed="true" stored="true" />
43
+ <field name="title_245_search" type="text" indexed="true" stored="true" />
44
+ <field name="vern_title_245_search" type="textNoStem" indexed="true" stored="true" />
45
+ <field name="title_245_unstem_search" type="textNoStem" indexed="true" stored="true" />
46
+ <field name="title_uniform_search" type="text" indexed="true" stored="true" />
47
+ <field name="vern_title_uniform_search" type="textNoStem" indexed="true" stored="true" />
48
+ <field name="title_uniform_unstem_search" type="textNoStem" indexed="true" stored="true" />
49
+ <field name="title_variant_search" type="text" indexed="true" stored="true" multiValued="true" />
50
+ <field name="vern_title_variant_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
51
+ <field name="title_variant_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
52
+ <field name="title_related_search" type="text" indexed="true" stored="true" multiValued="true" />
53
+ <field name="vern_title_related_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
54
+ <field name="title_related_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
55
+
56
+ <!-- Title Display fields -->
57
+ <field name="title_display" type="string" indexed="false" stored="true" />
58
+ <field name="vern_title_display" type="string" indexed="false" stored="true" />
59
+ <field name="title_245a_display" type="string" indexed="false" stored="true" />
60
+ <field name="vern_title_245a_display" type="string" indexed="false" stored="true" />
61
+ <field name="title_245c_display" type="string" indexed="false" stored="true" />
62
+ <field name="vern_title_245c_display" type="string" indexed="false" stored="true" />
63
+ <field name="title_full_display" type="string" indexed="false" stored="true" />
64
+ <field name="vern_title_full_display" type="string" indexed="false" stored="true" />
65
+ <field name="title_uniform_display" type="string" indexed="false" stored="true" />
66
+ <field name="vern_title_uniform_display" type="string" indexed="false" stored="true" />
67
+ <field name="title_variant_display" type="string" indexed="false" stored="true" multiValued="true" />
68
+
69
+ <field name="title_sort" type="alphaSort" indexed="true" stored="true" />
70
+
71
+ <!-- Series Search Fields -->
72
+ <field name="series_exact_search" type="text_anchored" indexed="true" stored="true" multiValued="true" />
73
+ <field name="series_search" type="text" indexed="true" stored="true" multiValued="true" />
74
+ <field name="vern_series_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
75
+ <field name="series_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
76
+
77
+ <!-- Author Title Search Fields -->
78
+ <field name="author_title_search" type="text" indexed="true" stored="true" multiValued="true" />
79
+
80
+ <!-- Author Search Fields -->
81
+ <field name="author_1xx_search" type="text" indexed="true" stored="true" />
82
+ <field name="vern_author_1xx_search" type="textNoStem" indexed="true" stored="true" />
83
+ <field name="author_1xx_unstem_search" type="textNoStem" indexed="true" stored="true" />
84
+ <field name="author_7xx_search" type="text" indexed="true" stored="true" multiValued="true" />
85
+ <field name="vern_author_7xx_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
86
+ <field name="author_7xx_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
87
+ <field name="author_8xx_search" type="text" indexed="true" stored="true" multiValued="true" />
88
+ <field name="vern_author_8xx_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
89
+ <field name="author_8xx_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
90
+
91
+ <!-- Author Facet Fields -->
92
+ <field name="author_person_facet" type="string" indexed="true" stored="true" multiValued="true" />
93
+ <field name="author_other_facet" type="string" indexed="true" stored="true" multiValued="true" />
94
+
95
+ <field name="author_sort" type="alphaSort" indexed="true" stored="true" />
96
+
97
+ <!-- Author Display Fields -->
98
+ <field name="author_person_display" type="string" indexed="false" stored="true" multiValued="true" />
99
+ <field name="vern_author_person_display" type="string" indexed="false" stored="true" />
100
+ <field name="author_person_full_display" type="string" indexed="false" stored="true" multiValued="true" />
101
+ <field name="vern_author_person_full_display" type="string" indexed="false" stored="true" />
102
+ <field name="author_corp_display" type="string" indexed="false" stored="true" multiValued="true"/>
103
+ <field name="vern_author_corp_display" type="string" indexed="false" stored="true" />
104
+ <field name="author_meeting_display" type="string" indexed="false" stored="true" multiValued="true"/>
105
+ <field name="vern_author_meeting_display" type="string" indexed="false" stored="true" />
106
+
107
+ <!-- Subject Search Fields -->
108
+ <field name="topic_search" type="text" indexed="true" stored="true" multiValued="true" />
109
+ <field name="vern_topic_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
110
+ <field name="topic_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
111
+ <field name="topic_subx_search" type="text" indexed="true" stored="true" multiValued="true" />
112
+ <field name="vern_topic_subx_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
113
+ <field name="topic_subx_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
114
+ <field name="geographic_search" type="text" indexed="true" stored="true" multiValued="true" />
115
+ <field name="vern_geographic_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
116
+ <field name="geographic_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
117
+ <field name="geographic_subz_search" type="text" indexed="true" stored="true" multiValued="true" />
118
+ <field name="vern_geographic_subz_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
119
+ <field name="geographic_subz_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
120
+ <field name="subject_other_search" type="text" indexed="true" stored="true" multiValued="true" />
121
+ <field name="vern_subject_other_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
122
+ <field name="subject_other_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
123
+ <field name="subject_other_subvy_search" type="text" indexed="true" stored="true" multiValued="true" />
124
+ <field name="vern_subject_other_subvy_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
125
+ <field name="subject_other_subvy_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
126
+ <field name="subject_all_search" type="text" indexed="true" stored="true" multiValued="true" />
127
+ <field name="vern_subject_all_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
128
+ <field name="subject_all_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
129
+
130
+ <field name="db_az_subject" type="string" indexed="true" stored="true" multiValued="true" />
131
+ <field name="db_az_subject_search" type="text" indexed="true" stored="true" multiValued="true" />
132
+
133
+ <!-- Subject Facet Fields -->
134
+ <field name="topic_facet" type="string" indexed="true" stored="true" multiValued="true" />
135
+ <field name="geographic_facet" type="string" indexed="true" stored="true" multiValued="true" />
136
+ <field name="era_facet" type="string" indexed="true" stored="true" multiValued="true" />
137
+
138
+ <!-- Subject Display Fields -->
139
+ <field name="topic_display" type="string" indexed="false" stored="true" multiValued="true" />
140
+ <field name="subject_other_display" type="string" indexed="false" stored="true" multiValued="true" />
141
+
142
+ <!-- Publishing Fields -->
143
+ <field name="pub_search" type="text" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
144
+ <field name="vern_pub_search" type="textNoStem" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
145
+ <field name="pub_country" type="text" indexed="true" stored="true" omitNorms="true"/>
146
+ <!-- TODO: should pub_date_search be a date or a text field? -->
147
+ <field name="pub_date_search" type="text" indexed="true" stored="true" omitNorms="true"/>
148
+ <field name="pub_date_sort" type="alphaSort" indexed="true" stored="true" />
149
+ <!-- Pub Date Facet Fields -->
150
+ <!-- pub_date was facet and display, now deprecated -->
151
+ <field name="pub_date" type="string" indexed="true" stored="true" />
152
+ <field name="pub_year_tisim" type="tint" indexed="true" stored="true" multiValued="true" />
153
+ <field name="pub_display" type="string" indexed="false" stored="true" multiValued="true"/>
154
+ <field name="pub_date_display" type="string" indexed="false" stored="true"/>
155
+ <field name="imprint_display" type="string" indexed="false" stored="true" multiValued="true"/>
156
+
157
+ <!-- URL Fields -->
158
+ <field name="url_fulltext" type="string" indexed="false" stored="true" multiValued="true"/>
159
+ <field name="url_suppl" type="string" indexed="false" stored="true" multiValued="true"/>
160
+ <!-- sfx urls should rarely occur more than once in a marc bib record -->
161
+ <field name="url_sfx" type="string" indexed="false" stored="true" multiValued="true" />
162
+ <field name="url_restricted" type="string" indexed="false" stored="true" multiValued="true" />
163
+
164
+ <!-- Physical Fields -->
165
+ <field name="physical" type="text" indexed="true" stored="true" multiValued="true" />
166
+ <field name="vern_physical" type="textNoStem" indexed="true" stored="true" multiValued="true" />
167
+
168
+ <!-- Table of Contents -->
169
+ <field name="toc_search" type="text" indexed="true" stored="true" multiValued="true" />
170
+ <field name="vern_toc_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
171
+ <field name="toc_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
172
+ <!-- Context -->
173
+ <field name="context_search" type="text" indexed="true" stored="true" multiValued="true" />
174
+ <field name="vern_context_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
175
+ <field name="context_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
176
+ <!-- Summary -->
177
+ <field name="summary_search" type="text" indexed="true" stored="true" multiValued="true" />
178
+ <field name="vern_summary_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
179
+ <field name="summary_unstem_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
180
+ <field name="summary_display" type="string" indexed="false" stored="true" multiValued="true" />
181
+ <!-- Award -->
182
+ <field name="award_search" type="text" indexed="true" stored="true" multiValued="true" />
183
+
184
+ <!-- Item Info Fields (derived from 999) -->
185
+ <!-- Call Number Fields -->
186
+ <field name="callnum_search" type="callnum_ws" indexed="true" stored="true" multiValued="true"/>
187
+
188
+ <!-- for nearby on shelf: term lookups to get next X alpha sorted terms -->
189
+ <field name="shelfkey" type="alphaSort" indexed="true" stored="true" multiValued="true"/>
190
+ <field name="reverse_shelfkey" type="alphaSort" indexed="true" stored="true" multiValued="true"/>
191
+
192
+ <field name="barcode_search" type="string_punct_stop" indexed="true" stored="true" multiValued="true" />
193
+ <field name="preferred_barcode" type="string" indexed="false" stored="true" />
194
+ <field name="access_facet" type="string" indexed="true" stored="true" multiValued="true" />
195
+ <field name="building_facet" type="string" indexed="true" stored="true" multiValued="true" />
196
+ <!-- barcode -|- lib -|- location -|- lopped_callnum -|- shelfkey -|- reverse_shelfkey -|- full_callnum -|- callnum_show_sort -->
197
+ <field name="item_display" type="string" indexed="false" stored="true" multiValued="true" />
198
+
199
+ <!-- lib -|- location -|- note -|- holdings summary -|- last received -->
200
+ <field name="mhld_display" type="string" indexed="false" stored="true" multiValued="true" />
201
+
202
+ <!-- Course Reserve Fields -->
203
+ <field name="crez_course_id_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
204
+ <field name="crez_course_name_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
205
+ <field name="crez_instructor_search" type="textNoStem" indexed="true" stored="true" multiValued="true" />
206
+ <!-- instructor, course facet field names are exposed to end users in SW -->
207
+ <field name="instructor" type="string" indexed="true" stored="true" multiValued="true" />
208
+ <field name="course" type="string" indexed="true" stored="true" multiValued="true" />
209
+ <!-- crez_course_info is a facet and display field at this time (2012-03-21) -->
210
+ <field name="crez_course_info" type="string" indexed="true" stored="true" multiValued="true" />
211
+ <!-- the next two facets are not used in SW at this time -->
212
+ <field name="crez_dept_facet" type="string" indexed="true" stored="true" multiValued="true" />
213
+ <field name="crez_desk_facet" type="string" indexed="true" stored="true" multiValued="true" />
214
+
215
+ <!-- *************** additional fields for DOR objects ****************** -->
216
+ <field name="druid" type="string_punct_stop" indexed="true" stored="true" />
217
+ <field name="modsxml" type="string" indexed="false" stored="true" />
218
+ <!-- collection (facet and display): "sirsi" or, for DOR items, the id of their parent coll -->
219
+ <field name="collection" type="string" indexed="true" stored="true" multiValued="true" omitNorms="true" />
220
+ <!-- collection_with_title: easy way to indicate item's parent coll title in UI (may be deprecated in future) -->
221
+ <field name="collection_with_title" type="string" indexed="false" stored="true" multiValued="true" />
222
+ <!-- collection_search: allow searching within collections for aggr. colls -->
223
+ <field name="collection_search" type="string_punct_stop" indexed="true" stored="true" multiValued="true" omitNorms="true" />
224
+ <!-- display_type: used by UI code, e.g. 'file' or 'image' -->
225
+ <field name="display_type" type="string" indexed="true" stored="true" multiValued="true" omitNorms="true" />
226
+ <!-- used to determine when something is a digital collection -->
227
+ <field name="collection_type" type="string" indexed="true" stored="true" multiValued="true" />
228
+ <!-- file_id: ids of files (including images) in the digital stacks -->
229
+ <field name="file_id" type="string" indexed="false" stored="true" multiValued="true"/>
230
+
231
+ <!-- *************** dynamic field types ****************** -->
232
+ <!--
233
+ <dynamicField name="*_unstem_search" type="textNoStem" stored="true" indexed="true" multiValued="true" />
234
+ <dynamicField name="*_search" type="text" stored="true" indexed="true" multiValued="true" />
235
+ <dynamicField name="*_facet" type="string" stored="true" indexed="true" multiValued="true" />
236
+ <dynamicField name="*_display" type="string" stored="true" indexed="false" multiValued="true"/>
237
+ -->
238
+ <dynamicField name="*_si" type="string" stored="true" indexed="true" omitNorms="true" />
239
+ <dynamicField name="*_sim" type="string" stored="true" indexed="true" multiValued="true" omitNorms="true" />
240
+ <dynamicField name="*_ss" type="string" stored="true" indexed="false" omitNorms="true" />
241
+ <dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true" omitNorms="true" />
242
+ <dynamicField name="*_ssi" type="string" stored="true" indexed="true" omitNorms="true" />
243
+ <dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true" omitNorms="true" />
244
+ <dynamicField name="*_isi" type="tint" stored="true" indexed="true" omitNorms="true" />
245
+ <dynamicField name="*_isim" type="tint" stored="true" indexed="true" multiValued="true" omitNorms="true" />
246
+ <dynamicField name="*_sort" type="alphaSort" stored="true" indexed="true"/>
247
+ <dynamicField name="*_xml" type="string" stored="true" indexed="false" omitNorms="true" />
248
+ <dynamicField name="cjk_*" type="text_cjk" stored="true" indexed="true" multiValued="true" />
249
+ <dynamicField name="*_hsim" type="string_hierarch" stored="true" indexed="true" multiValued="true" />
250
+ <dynamicField name="*_tesim" type="text" stored="true" indexed="true" multiValued="true" omitNorms="true" />
251
+ <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="true" omitNorms="true" />
252
+ <dynamicField name="*_ng" type="text_en_ng" stored="false" indexed="true" multiValued="true"/>
253
+ <dynamicField name="*_pt" type="location" stored="true" indexed="true"/>
254
+ <dynamicField name="*_bbox" type="location_rpt" stored="true" indexed="true" multiValued="true"/>
255
+ </fields>
256
+
257
+ <!-- copy fields -->
258
+ <copyField source="collection" dest="collection_search" />
259
+ <copyField source="pub_date" dest="pub_date_search" />
260
+ <copyField source="db_az_subject" dest="db_az_subject_search" />
261
+
262
+ <!-- unstemmed and anchored search fields: title -->
263
+ <copyField source="title_245a_search" dest="title_245a_exact_search" />
264
+ <copyField source="title_245a_search" dest="title_245a_unstem_search" />
265
+ <copyField source="title_245_search" dest="title_245_unstem_search" />
266
+ <copyField source="title_uniform_search" dest="title_uniform_unstem_search" />
267
+ <copyField source="title_variant_search" dest="title_variant_unstem_search" />
268
+ <copyField source="title_related_search" dest="title_related_unstem_search" />
269
+ <!-- unstemmed search fields: author -->
270
+ <copyField source="author_1xx_search" dest="author_1xx_unstem_search" />
271
+ <copyField source="author_7xx_search" dest="author_7xx_unstem_search" />
272
+ <copyField source="author_8xx_search" dest="author_8xx_unstem_search" />
273
+ <!-- unstemmed search fields: subject -->
274
+ <copyField source="topic_search" dest="topic_unstem_search" />
275
+ <copyField source="topic_subx_search" dest="topic_subx_unstem_search" />
276
+ <copyField source="geographic_search" dest="geographic_unstem_search" />
277
+ <copyField source="geographic_subz_search" dest="geographic_subz_unstem_search" />
278
+ <copyField source="subject_other_search" dest="subject_other_unstem_search" />
279
+ <copyField source="subject_other_subvy_search" dest="subject_other_subvy_unstem_search" />
280
+ <copyField source="subject_all_search" dest="subject_all_unstem_search" />
281
+ <!-- unstemmed search fields: toc/summary -->
282
+ <copyField source="toc_search" dest="toc_unstem_search" />
283
+ <copyField source="context_search" dest="context_unstem_search" />
284
+ <copyField source="summary_search" dest="summary_unstem_search" />
285
+ <!-- other unstemmed search fields -->
286
+ <copyField source="series_search" dest="series_unstem_search" />
287
+ <copyField source="all_search" dest="all_unstem_search" />
288
+
289
+ <!-- course reserve fields -->
290
+ <copyField source="crez_instructor_search" dest="instructor" />
291
+ <copyField source="crez_course_id_search" dest="course" />
292
+
293
+ <!-- image fields -->
294
+ <copyField source="topic_search" dest="topic_display" />
295
+ <copyField source="subject_other_search" dest="subject_other_display" />
296
+ <copyField source="title_variant_search" dest="title_variant_display" />
297
+ <copyField source="summary_search" dest="summary_display" />
298
+ <copyField source="pub_search" dest="pub_display" />
299
+
300
+ <!-- CJK fields -->
301
+ <!-- both bigrams and unigrams in same field -->
302
+ <copyField source="vern_title_245a_search" dest="cjk_title_245a_search" />
303
+ <copyField source="vern_title_245_search" dest="cjk_title_245_search" />
304
+ <copyField source="vern_title_uniform_search" dest="cjk_title_uniform_search" />
305
+ <copyField source="vern_title_variant_search" dest="cjk_title_variant_search" />
306
+ <copyField source="vern_title_related_search" dest="cjk_title_related_search" />
307
+ <copyField source="vern_series_search" dest="cjk_series_search" />
308
+ <copyField source="vern_author_1xx_search" dest="cjk_author_1xx_search" />
309
+ <copyField source="vern_author_7xx_search" dest="cjk_author_7xx_search" />
310
+ <copyField source="vern_author_8xx_search" dest="cjk_author_8xx_search" />
311
+ <copyField source="vern_topic_search" dest="cjk_topic_search" />
312
+ <copyField source="vern_topic_subx_search" dest="cjk_topic_subx_search" />
313
+ <copyField source="vern_geographic_search" dest="cjk_geographic_search" />
314
+ <copyField source="vern_geographic_subz_search" dest="cjk_geographic_subz_search" />
315
+ <copyField source="vern_subject_other_search" dest="cjk_subject_other_search" />
316
+ <copyField source="vern_subject_other_subvy_search" dest="cjk_subject_other_subvy_search" />
317
+ <copyField source="vern_subject_all_search" dest="cjk_subject_all_search" />
318
+ <copyField source="vern_pub_search" dest="cjk_pub_search" />
319
+ <copyField source="vern_physical" dest="cjk_physical_search" />
320
+ <copyField source="toc_search" dest="cjk_toc_search" /> <!-- we find CJK text in regular 505s -->
321
+ <copyField source="vern_toc_search" dest="cjk_toc_search" />
322
+ <copyField source="vern_context_search" dest="cjk_context_search" />
323
+ <copyField source="summary_search" dest="cjk_summary_search" /> <!-- we find CJK text in regular 520s -->
324
+ <copyField source="vern_summary_search" dest="cjk_summary_search" />
325
+ <copyField source="vern_all_search" dest="cjk_all_search" />
326
+
327
+ <!-- Spotlight fields -->
328
+ <copyField source="id" dest="id_ng" maxChars="3000"/>
329
+ <copyField source="title_full_display" dest="full_title_ng" maxChars="3000"/>
330
+ <copyField source="*_tesim" dest="all_search" />
331
+ <copyField source="*_tesim" dest="all_unstem_search" />
332
+
333
+ <types>
334
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
335
+ <fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true" />
336
+ <fieldtype name="binary" class="solr.BinaryField"/>
337
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
338
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
339
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
340
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
341
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
342
+ <!-- t fields are for accelerating range queries -->
343
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="4" positionIncrementGap="0"/>
344
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
345
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
346
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
347
+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
348
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
349
+
350
+ <!-- Analyzed Text, general case -->
351
+ <fieldtype name="text" class="solr.TextField" positionIncrementGap="10000" autoGeneratePhraseQueries="true">
352
+ <analyzer>
353
+ <tokenizer class="solr.WhitespaceTokenizerFactory" />
354
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
355
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
356
+ <filter class="solr.WordDelimiterFilterFactory"
357
+ splitOnCaseChange="1" generateWordParts="1" catenateWords="1"
358
+ splitOnNumerics="0" generateNumberParts="1" catenateNumbers="1"
359
+ catenateAll="0" preserveOriginal="0" stemEnglishPossessive="1" />
360
+ <filter class="solr.SnowballPorterFilterFactory" language="English"/>
361
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
362
+ </analyzer>
363
+ </fieldtype>
364
+
365
+ <!-- Analyzed Text, no Stemming -->
366
+ <fieldtype name="textNoStem" class="solr.TextField" positionIncrementGap="10000" autoGeneratePhraseQueries="true">
367
+ <analyzer>
368
+ <tokenizer class="solr.WhitespaceTokenizerFactory" />
369
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
370
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
371
+ <filter class="solr.WordDelimiterFilterFactory"
372
+ splitOnCaseChange="1" generateWordParts="1" catenateWords="1"
373
+ splitOnNumerics="0" generateNumberParts="1" catenateNumbers="1"
374
+ catenateAll="0" preserveOriginal="0" stemEnglishPossessive="0" />
375
+ </analyzer>
376
+ </fieldtype>
377
+
378
+ <!-- Left and Right Anchored Analyzed Text, no Stemming -->
379
+ <fieldtype name="text_anchored" class="solr.TextField" positionIncrementGap="10000" autoGeneratePhraseQueries="true">
380
+ <analyzer>
381
+ <!-- put beginning and ending anchors on field value, removing trailing chars -->
382
+ <!-- watch out for query time whitespace separated chars that will be processed as their own token stream, e.g. in 'felines : warm and fuzzy' -->
383
+ <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(.*[\S&amp;&amp;[^\.\,:;/=&lt;&gt;\(\)\[\]\&amp;\|]])[\s\.\,:;/=&lt;&gt;\(\)\[\]\&amp;\|]*$" replacement="aaaaaa$1zzzzzz"/>
384
+ <tokenizer class="solr.WhitespaceTokenizerFactory" />
385
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
386
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
387
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms_both_anchors.txt" ignoreCase="true" expand="true"/>
388
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms_left_anchor.txt" ignoreCase="true" expand="true"/>
389
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms_right_anchor.txt" ignoreCase="true" expand="true"/>
390
+ <filter class="solr.WordDelimiterFilterFactory"
391
+ splitOnCaseChange="1" generateWordParts="1" catenateWords="1"
392
+ splitOnNumerics="0" generateNumberParts="1" catenateNumbers="1"
393
+ catenateAll="0" preserveOriginal="0" stemEnglishPossessive="1" />
394
+ </analyzer>
395
+ </fieldtype>
396
+
397
+ <fieldtype name="text_cjk" class="solr.TextField" positionIncrementGap="10000" autoGeneratePhraseQueries="false">
398
+ <analyzer>
399
+ <!-- remove spaces among hangul and han chars if there is at least one hangul char -->
400
+ <!-- a korean char guaranteed at the start of the pattern: pattern="(\p{Hangul}\p{Han}*)\s+(?=[\p{Hangul}\p{Han}])" -->
401
+ <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="([\p{InHangul_Jamo}\p{InHangul_Compatibility_Jamo}\p{InHangul_Syllables}][\p{InBopomofo}\p{InBopomofo_Extended}\p{InCJK_Compatibility}\p{InCJK_Compatibility_Forms}\p{InCJK_Compatibility_Ideographs}\p{InCJK_Compatibility_Ideographs_Supplement}\p{InCJK_Radicals_Supplement}\p{InCJK_Symbols_And_Punctuation}\p{InCJK_Unified_Ideographs}\p{InCJK_Unified_Ideographs_Extension_A}\p{InCJK_Unified_Ideographs_Extension_B}\p{InKangxi_Radicals}\p{InHalfwidth_And_Fullwidth_Forms}\p{InIdeographic_Description_Characters}]*)\s+(?=[\p{InHangul_Jamo}\p{InHangul_Compatibility_Jamo}\p{InHangul_Syllables}\p{InBopomofo}\p{InBopomofo_Extended}\p{InCJK_Compatibility}\p{InCJK_Compatibility_Forms}\p{InCJK_Compatibility_Ideographs}\p{InCJK_Compatibility_Ideographs_Supplement}\p{InCJK_Radicals_Supplement}\p{InCJK_Symbols_And_Punctuation}\p{InCJK_Unified_Ideographs}\p{InCJK_Unified_Ideographs_Extension_A}\p{InCJK_Unified_Ideographs_Extension_B}\p{InKangxi_Radicals}\p{InHalfwidth_And_Fullwidth_Forms}\p{InIdeographic_Description_Characters}])" replacement="$1"/>
402
+ <!-- a korean char guaranteed at the end of the pattern: pattern="([\p{Hangul}\p{Han}])\s+(?=[\p{Han}\s]*\p{Hangul})" -->
403
+ <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="([\p{InHangul_Jamo}\p{InHangul_Compatibility_Jamo}\p{InHangul_Syllables}\p{InBopomofo}\p{InBopomofo_Extended}\p{InCJK_Compatibility}\p{InCJK_Compatibility_Forms}\p{InCJK_Compatibility_Ideographs}\p{InCJK_Compatibility_Ideographs_Supplement}\p{InCJK_Radicals_Supplement}\p{InCJK_Symbols_And_Punctuation}\p{InCJK_Unified_Ideographs}\p{InCJK_Unified_Ideographs_Extension_A}\p{InCJK_Unified_Ideographs_Extension_B}\p{InKangxi_Radicals}\p{InHalfwidth_And_Fullwidth_Forms}\p{InIdeographic_Description_Characters}])\s+(?=[\p{InBopomofo}\p{InBopomofo_Extended}\p{InCJK_Compatibility}\p{InCJK_Compatibility_Forms}\p{InCJK_Compatibility_Ideographs}\p{InCJK_Compatibility_Ideographs_Supplement}\p{InCJK_Radicals_Supplement}\p{InCJK_Symbols_And_Punctuation}\p{InCJK_Unified_Ideographs}\p{InCJK_Unified_Ideographs_Extension_A}\p{InCJK_Unified_Ideographs_Extension_B}\p{InKangxi_Radicals}\p{InHalfwidth_And_Fullwidth_Forms}\p{InIdeographic_Description_Characters}\s]*[\p{InHangul_Jamo}\p{InHangul_Compatibility_Jamo}\p{InHangul_Syllables}])" replacement="$1"/>
404
+ <tokenizer class="solr.ICUTokenizerFactory" />
405
+ <filter class="solr.CJKWidthFilterFactory"/>
406
+ <!--<filter class="edu.stanford.lucene.analysis.CJKFoldingFilterFactory"/>-->
407
+ <filter class="solr.ICUTransformFilterFactory" id="Traditional-Simplified"/>
408
+ <filter class="solr.ICUTransformFilterFactory" id="Katakana-Hiragana"/>
409
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
410
+ <filter class="solr.CJKBigramFilterFactory" han="true" hiragana="true" katakana="true" hangul="true" outputUnigrams="true" />
411
+ </analyzer>
412
+ </fieldtype>
413
+
414
+ <!-- single token analyzed text, for sorting. Punctuation is significant. -->
415
+ <fieldtype name="alphaSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
416
+ <analyzer>
417
+ <tokenizer class="solr.KeywordTokenizerFactory" />
418
+ <filter class="solr.ICUFoldingFilterFactory"/>
419
+ <filter class="solr.TrimFilterFactory" />
420
+ </analyzer>
421
+ </fieldtype>
422
+
423
+ <!-- single token with punctuation terms removed so dismax doesn't look for punctuation terms in these fields -->
424
+ <!-- On client side, Lucene query parser breaks things up by whitespace *before* field analysis for dismax -->
425
+ <!-- so punctuation terms (& : ;) are stopwords to allow results from other fields when these chars are surrounded by spaces in query -->
426
+ <!-- do not lowercase -->
427
+ <fieldType name="string_punct_stop" class="solr.TextField" omitNorms="true">
428
+ <analyzer type="index">
429
+ <tokenizer class="solr.KeywordTokenizerFactory" />
430
+ <filter class="solr.ICUNormalizer2FilterFactory" name="nfkc" mode="compose" />
431
+ </analyzer>
432
+ <analyzer type="query">
433
+ <tokenizer class="solr.KeywordTokenizerFactory" />
434
+ <filter class="solr.ICUNormalizer2FilterFactory" name="nfkc" mode="compose" />
435
+ <!-- removing punctuation for Lucene query parser issues -->
436
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_punctuation.txt" />
437
+ </analyzer>
438
+ </fieldType>
439
+
440
+ <!-- field designed for LC call number searching -->
441
+ <fieldType name="callnum_ws" class="solr.TextField" omitNorms="true" positionIncrementGap="100" autoGeneratePhraseQueries="true">
442
+ <analyzer type="index">
443
+ <!-- LC: no space between class letters and digits; normalize to " ." before first cutter, no leading space -->
444
+ <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^ *([A-Za-z]{1,3}) ?(\d{1,4}(\.\d+)?) ?\.?([A-Za-z]\d+)" replacement="$1$2 .$4"/>
445
+ <!-- LC: add space between first cutter letter and its digits to allow matching on first cutter letter only -->
446
+ <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^([A-Za-z]{1,3}\d{1,4}(\.\d+)? \.([A-Za-z]))(\d+)" replacement="$1 $4"/>
447
+ <!-- prepend yyyy to string so searches can be left anchored -->
448
+ <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(\S{1})" replacement="yyyy$1"/>
449
+ <tokenizer class="solr.WhitespaceTokenizerFactory" />
450
+ <filter class="solr.LowerCaseFilterFactory" />
451
+ </analyzer>
452
+ <!-- Note that the query string could be a partial call number, so we can't combine all patterns -->
453
+ <analyzer type="query">
454
+ <!-- LC: no space between class letters and digits, no leading space -->
455
+ <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^ *([A-Za-z]{1,3}) *(\d{1,4})" replacement="$1$2" />
456
+ <!-- LC: normalize to " ." before first cutter or first letter of cutter (could be preceded by " ." "." " " or nothing) -->
457
+ <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^([A-Za-z]{1,3}\d{1,4}(\.\d+)?) *\.?([A-Za-z](\d+)?)" replacement="$1 .$3" />
458
+ <!-- LC: add space between first cutter letter and its digits to allow matching on first cutter letter only -->
459
+ <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^([A-Za-z]{1,3}\d{1,4}(\.\d+)? \.([A-Za-z]))([^ ])" replacement="$1 $4"/>
460
+ <!-- prepend yyyy to string so searches can be left anchored -->
461
+ <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(\S{1})" replacement="yyyy$1"/>
462
+ <tokenizer class="solr.WhitespaceTokenizerFactory" />
463
+ <filter class="solr.LowerCaseFilterFactory" />
464
+ </analyzer>
465
+ </fieldType>
466
+
467
+ <!-- for hierarchical facets
468
+ a query for Books/NonFic will match documents indexed with values like
469
+ Books/NonFic, Books/NonFic/Law, Books/NonFic/Science/Physics, etc.
470
+ But it will not match documents indexed with values like Books, or Books/Fic...
471
+ -->
472
+ <fieldType name="string_hierarch" class="solr.TextField" positionIncrementGap="100">
473
+ <analyzer type="index">
474
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="|" />
475
+ </analyzer>
476
+ <analyzer type="query">
477
+ <tokenizer class="solr.KeywordTokenizerFactory" />
478
+ </analyzer>
479
+ </fieldType>
480
+
481
+
482
+ <!-- A text field with defaults appropriate for English an NGrams -->
483
+ <fieldType name="text_en_ng" class="solr.TextField" positionIncrementGap="100">
484
+ <analyzer type="index">
485
+ <tokenizer class="solr.ICUTokenizerFactory"/>
486
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
487
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
488
+ <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
489
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
490
+ <filter class="solr.TrimFilterFactory"/>
491
+ <filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="15" />
492
+ </analyzer>
493
+
494
+ <analyzer type="index">
495
+ <tokenizer class="solr.ICUTokenizerFactory"/>
496
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
497
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
498
+ <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
499
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
500
+ <filter class="solr.TrimFilterFactory"/>
501
+ </analyzer>
502
+ </fieldType>
503
+
504
+ <!-- This point type indexes the coordinates as separate fields (subFields)
505
+ If subFieldType is defined, it references a type, and a dynamic field
506
+ definition is created matching *___<typename>. Alternately, if
507
+ subFieldSuffix is defined, that is used to create the subFields.
508
+ Example: if subFieldType="double", then the coordinates would be
509
+ indexed in fields myloc_0___double,myloc_1___double.
510
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
511
+ in fields myloc_0_d,myloc_1_d
512
+ The subFields are an implementation detail of the fieldType, and end
513
+ users normally should not need to know about them.
514
+ -->
515
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
516
+
517
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
518
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
519
+
520
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
521
+ For more information about this and other Spatial fields new to Solr 4, see:
522
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
523
+ -->
524
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
525
+ geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
526
+
527
+ </types>
528
+
529
+ </schema>