blacklight_oai_provider 0.1.0 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.rubocop.yml +39 -0
  4. data/.rubocop_todo.yml +130 -0
  5. data/.solr_wrapper +8 -0
  6. data/.travis.yml +16 -0
  7. data/Gemfile +34 -2
  8. data/README.md +131 -0
  9. data/Rakefile +25 -0
  10. data/VERSION +1 -1
  11. data/app/assets/{xsl → stylesheets/blacklight_oai_provider}/oai2.xsl +12 -5
  12. data/app/controllers/concerns/blacklight_oai_provider/controller.rb +38 -0
  13. data/app/models/concerns/blacklight_oai_provider/solr_document.rb +23 -0
  14. data/blacklight_oai_provider.gemspec +17 -10
  15. data/config/initializers/oai_patches.rb +20 -0
  16. data/lib/blacklight_oai_provider/engine.rb +15 -2
  17. data/lib/blacklight_oai_provider/exceptions.rb +9 -0
  18. data/lib/blacklight_oai_provider/resumption_token.rb +51 -0
  19. data/lib/blacklight_oai_provider/routes.rb +15 -0
  20. data/lib/blacklight_oai_provider/solr_document_provider.rb +2 -2
  21. data/lib/blacklight_oai_provider/solr_document_wrapper.rb +55 -27
  22. data/lib/blacklight_oai_provider/version.rb +1 -2
  23. data/lib/blacklight_oai_provider.rb +16 -14
  24. data/lib/generators/blacklight_oai_provider/install_generator.rb +27 -0
  25. data/lib/railties/blacklight_oai_provider.rake +14 -0
  26. data/solr/conf/_rest_managed.json +3 -0
  27. data/solr/conf/admin-extra.html +31 -0
  28. data/solr/conf/elevate.xml +36 -0
  29. data/solr/conf/mapping-ISOLatin1Accent.txt +246 -0
  30. data/solr/conf/protwords.txt +21 -0
  31. data/solr/conf/schema.xml +629 -0
  32. data/solr/conf/scripts.conf +24 -0
  33. data/solr/conf/solrconfig.xml +401 -0
  34. data/solr/conf/spellings.txt +2 -0
  35. data/solr/conf/stopwords.txt +58 -0
  36. data/solr/conf/stopwords_en.txt +58 -0
  37. data/solr/conf/synonyms.txt +31 -0
  38. data/solr/conf/xslt/example.xsl +132 -0
  39. data/solr/conf/xslt/example_atom.xsl +67 -0
  40. data/solr/conf/xslt/example_rss.xsl +66 -0
  41. data/solr/conf/xslt/luke.xsl +337 -0
  42. data/solr/sample_solr_documents.yml +2722 -0
  43. data/spec/controllers/catalog_controller_spec.rb +39 -0
  44. data/spec/features/html_rendering_spec.rb +24 -0
  45. data/spec/models/solr_document_spec.rb +43 -0
  46. data/spec/requests/get_record_spec.rb +47 -0
  47. data/spec/requests/identify_spec.rb +53 -0
  48. data/spec/requests/list_identifiers_spec.rb +80 -0
  49. data/spec/requests/list_metadata_formats_spec.rb +43 -0
  50. data/spec/requests/list_records_spec.rb +126 -0
  51. data/spec/spec_helper.rb +15 -38
  52. data/spec/test_app_templates/config/solr.yml +9 -0
  53. data/spec/test_app_templates/lib/generators/test_app_generator.rb +86 -0
  54. metadata +146 -87
  55. data/Gemfile.lock +0 -187
  56. data/README.rdoc +0 -74
  57. data/config/routes.rb +0 -5
  58. data/lib/blacklight_oai_provider/README.rdoc +0 -0
  59. data/lib/blacklight_oai_provider/controller_extension.rb +0 -29
  60. data/lib/blacklight_oai_provider/route_sets.rb +0 -13
  61. data/lib/blacklight_oai_provider/solr_document_extension.rb +0 -10
  62. data/lib/generators/blacklight_oai_provider/blacklight_oai_provider_generator.rb +0 -27
  63. data/spec/acceptance/blacklight_oai_provider_spec.rb +0 -49
  64. data/spec/integration/blacklight_stub_spec.rb +0 -10
  65. data/spec/internal/app/controllers/application_controller.rb +0 -4
  66. data/spec/internal/app/models/solr_document.rb +0 -3
  67. data/spec/internal/config/database.yml +0 -3
  68. data/spec/internal/config/routes.rb +0 -6
  69. data/spec/internal/config/solr.yml +0 -18
  70. data/spec/internal/db/combustion_test.sqlite +0 -0
  71. data/spec/internal/db/schema.rb +0 -53
  72. data/spec/internal/log/.gitignore +0 -1
  73. data/spec/internal/public/favicon.ico +0 -0
  74. data/spec/lib/solr_document_extension_spec.rb +0 -6
  75. data/spec/vcr_cassettes/solr.yml +0 -113
@@ -0,0 +1,629 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is the Solr schema file. This file should be named "schema.xml" and
21
+ should be in the conf directory under the solr home
22
+ (i.e. ./solr/conf/schema.xml by default)
23
+ or located where the classloader for the Solr webapp can find it.
24
+
25
+ This example schema is the recommended starting point for users.
26
+ It should be kept correct and concise, usable out-of-the-box.
27
+
28
+ For more information, on how to customize this file, please see
29
+ http://wiki.apache.org/solr/SchemaXml
30
+
31
+ PERFORMANCE NOTE: this schema includes many optional features and should not
32
+ be used for benchmarking. To improve performance one could
33
+ - set stored="false" for all fields possible (esp large fields) when you
34
+ only need to search on the field but don't need to return the original
35
+ value.
36
+ - set indexed="false" if you don't need to search on the field, but only
37
+ return the field as a result of searching on other indexed fields.
38
+ - remove all unneeded copyField statements
39
+ - for best index size and searching performance, set "index" to false
40
+ for all general text fields, use copyField to copy them to the
41
+ catchall "text" field, and use that for searching.
42
+ - For maximum indexing performance, use the StreamingUpdateSolrServer
43
+ java client.
44
+ - Remember to run the JVM in server mode, and use a higher logging level
45
+ that avoids logging every request
46
+ -->
47
+
48
+ <schema name="Blacklight Demo Index" version="1.5">
49
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
50
+ Applications should change this to reflect the nature of the search collection.
51
+ version="1.4" is Solr's version number for the schema syntax and semantics. It should
52
+ not normally be changed by applications.
53
+ 1.0: multiValued attribute did not exist, all fields are multiValued by nature
54
+ 1.1: multiValued attribute introduced, false by default
55
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
56
+ 1.3: removed optional field compress feature
57
+ 1.4: default auto-phrase (QueryParser feature) to off
58
+ -->
59
+
60
+ <types>
61
+ <!-- field type definitions. The "name" attribute is
62
+ just a label to be used by field definitions. The "class"
63
+ attribute and any other attributes determine the real
64
+ behavior of the fieldType.
65
+ Class names starting with "solr" refer to java classes in the
66
+ org.apache.solr.analysis package.
67
+ -->
68
+
69
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
70
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
71
+
72
+ <!-- boolean type: "true" or "false" -->
73
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
74
+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
75
+ <fieldtype name="binary" class="solr.BinaryField"/>
76
+
77
+ <!-- The optional sortMissingLast and sortMissingFirst attributes are
78
+ currently supported on types that are sorted internally as strings
79
+ and on numeric types.
80
+ This includes "string","boolean", and, as of 3.5 (and 4.x),
81
+ int, float, long, date, double, including the "Trie" variants.
82
+ - If sortMissingLast="true", then a sort on this field will cause documents
83
+ without the field to come after documents with the field,
84
+ regardless of the requested sort order (asc or desc).
85
+ - If sortMissingFirst="true", then a sort on this field will cause documents
86
+ without the field to come before documents with the field,
87
+ regardless of the requested sort order.
88
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
89
+ then default lucene sorting will be used which places docs without the
90
+ field first in an ascending sort and last in a descending sort.
91
+ -->
92
+
93
+ <!--
94
+ Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
95
+ -->
96
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
97
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
98
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
99
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
100
+
101
+ <!--
102
+ Numeric field types that index each value at various levels of precision
103
+ to accelerate range queries when the number of values between the range
104
+ endpoints is large. See the javadoc for NumericRangeQuery for internal
105
+ implementation details.
106
+
107
+ Smaller precisionStep values (specified in bits) will lead to more tokens
108
+ indexed per value, slightly larger index size, and faster range queries.
109
+ A precisionStep of 0 disables indexing at different precision levels.
110
+ -->
111
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
112
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
113
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
114
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
115
+
116
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
117
+ is a more restricted form of the canonical representation of dateTime
118
+ http://www.w3.org/TR/xmlschema-2/#dateTime
119
+ The trailing "Z" designates UTC time and is mandatory.
120
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
121
+ All other components are mandatory.
122
+
123
+ Expressions can also be used to denote calculations that should be
124
+ performed relative to "NOW" to determine the value, ie...
125
+
126
+ NOW/HOUR
127
+ ... Round to the start of the current hour
128
+ NOW-1DAY
129
+ ... Exactly 1 day prior to now
130
+ NOW/DAY+6MONTHS+3DAYS
131
+ ... 6 months and 3 days in the future from the start of
132
+ the current day
133
+
134
+ Consult the DateField javadocs for more information.
135
+
136
+ Note: For faster range queries, consider the tdate type
137
+ -->
138
+ <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
139
+
140
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
141
+ <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
142
+
143
+ <!-- The "RandomSortField" is not used to store or search any
144
+ data. You can declare fields of this type it in your schema
145
+ to generate pseudo-random orderings of your docs for sorting
146
+ purposes. The ordering is generated based on the field name
147
+ and the version of the index, As long as the index version
148
+ remains unchanged, and the same field name is reused,
149
+ the ordering of the docs will be consistent.
150
+ If you want different psuedo-random orderings of documents,
151
+ for the same version of the index, use a dynamicField and
152
+ change the name
153
+ -->
154
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
155
+
156
+ <!-- solr.TextField allows the specification of custom text analyzers
157
+ specified as a tokenizer and a list of token filters. Different
158
+ analyzers may be specified for indexing and querying.
159
+
160
+ The optional positionIncrementGap puts space between multiple fields of
161
+ this type on the same document, with the purpose of preventing false phrase
162
+ matching across fields.
163
+
164
+ For more info on customizing your analyzer chain, please see
165
+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
166
+ -->
167
+ <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
168
+ <analyzer>
169
+ <tokenizer class="solr.StandardTokenizerFactory"/>
170
+ <filter class="solr.ICUFoldingFilterFactory" />
171
+ <filter class="solr.SnowballPorterFilterFactory" language="English" />
172
+ </analyzer>
173
+ </fieldType>
174
+
175
+ <!-- One can also specify an existing Analyzer class that has a
176
+ default constructor via the class attribute on the analyzer element
177
+ <fieldType name="text_greek" class="solr.TextField">
178
+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
179
+ </fieldType>
180
+ -->
181
+
182
+ <!-- A text field that only splits on whitespace for exact matching of words -->
183
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
184
+ <analyzer>
185
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
186
+ </analyzer>
187
+ </fieldType>
188
+
189
+ <!-- A general text field that has reasonable, generic
190
+ cross-language defaults: it tokenizes with StandardTokenizer,
191
+ removes stop words from case-insensitive "stopwords.txt"
192
+ (empty by default), and down cases. At query time only, it
193
+ also applies synonyms. -->
194
+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
195
+ <analyzer type="index">
196
+ <tokenizer class="solr.StandardTokenizerFactory"/>
197
+ <!-- in this example, we will only use synonyms at query time
198
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
199
+ -->
200
+ <filter class="solr.LowerCaseFilterFactory"/>
201
+ </analyzer>
202
+ <analyzer type="query">
203
+ <tokenizer class="solr.StandardTokenizerFactory"/>
204
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
205
+ <filter class="solr.LowerCaseFilterFactory"/>
206
+ </analyzer>
207
+ </fieldType>
208
+
209
+ <!-- A text field with defaults appropriate for English: it
210
+ tokenizes with StandardTokenizer, removes English stop words
211
+ (stopwords_en.txt), down cases, protects words from protwords.txt, and
212
+ finally applies Porter's stemming. The query time analyzer
213
+ also applies synonyms from synonyms.txt. -->
214
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
215
+ <analyzer type="index">
216
+ <tokenizer class="solr.StandardTokenizerFactory"/>
217
+ <!-- in this example, we will only use synonyms at query time
218
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
219
+ -->
220
+ <filter class="solr.LowerCaseFilterFactory"/>
221
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
222
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
223
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
224
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
225
+ -->
226
+ <filter class="solr.PorterStemFilterFactory"/>
227
+ </analyzer>
228
+ <analyzer type="query">
229
+ <tokenizer class="solr.StandardTokenizerFactory"/>
230
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
231
+ <filter class="solr.LowerCaseFilterFactory"/>
232
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
233
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
234
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
235
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
236
+ -->
237
+ <filter class="solr.PorterStemFilterFactory"/>
238
+ </analyzer>
239
+ </fieldType>
240
+
241
+ <!-- A text field with defaults appropriate for English, plus
242
+ aggressive word-splitting and autophrase features enabled.
243
+ This field is just like text_en, except it adds
244
+ WordDelimiterFilter to enable splitting and matching of
245
+ words on case-change, alpha numeric boundaries, and
246
+ non-alphanumeric chars. This means certain compound word
247
+ cases will work, for example query "wi fi" will match
248
+ document "WiFi" or "wi-fi". However, other cases will still
249
+ not match, for example if the query is "wifi" and the
250
+ document is "wi fi" or if the query is "wi-fi" and the
251
+ document is "wifi".
252
+ -->
253
+ <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
254
+ <analyzer type="index">
255
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
256
+ <!-- in this example, we will only use synonyms at query time
257
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
258
+ -->
259
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
260
+ <filter class="solr.LowerCaseFilterFactory"/>
261
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
262
+ <filter class="solr.PorterStemFilterFactory"/>
263
+ </analyzer>
264
+ <analyzer type="query">
265
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
266
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
267
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
268
+ <filter class="solr.LowerCaseFilterFactory"/>
269
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
270
+ <filter class="solr.PorterStemFilterFactory"/>
271
+ </analyzer>
272
+ </fieldType>
273
+
274
+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
275
+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
276
+ <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
277
+ <analyzer>
278
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
279
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
280
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
281
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
282
+ <filter class="solr.LowerCaseFilterFactory"/>
283
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
284
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
285
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
286
+ possible with WordDelimiterFilter in conjuncton with stemming. -->
287
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
288
+ </analyzer>
289
+ </fieldType>
290
+
291
+ <!-- Just like text_general except it reverses the characters of
292
+ each token, to enable more efficient leading wildcard queries. -->
293
+ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
294
+ <analyzer type="index">
295
+ <tokenizer class="solr.StandardTokenizerFactory"/>
296
+ <filter class="solr.LowerCaseFilterFactory"/>
297
+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
298
+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
299
+ </analyzer>
300
+ <analyzer type="query">
301
+ <tokenizer class="solr.StandardTokenizerFactory"/>
302
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
303
+ <filter class="solr.LowerCaseFilterFactory"/>
304
+ </analyzer>
305
+ </fieldType>
306
+
307
+ <fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100" >
308
+ <analyzer>
309
+ <tokenizer class="solr.StandardTokenizerFactory"/>
310
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
311
+ <filter class="solr.StandardFilterFactory"/>
312
+ <filter class="solr.LowerCaseFilterFactory"/>
313
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
314
+ </analyzer>
315
+ </fieldType>
316
+
317
+ <fieldType class="solr.TextField" name="textSuggest" positionIncrementGap="100">
318
+ <analyzer>
319
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
320
+ <filter class="solr.StandardFilterFactory"/>
321
+ <filter class="solr.LowerCaseFilterFactory"/>
322
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
323
+ </analyzer>
324
+ </fieldType>
325
+
326
+ <!-- charFilter + WhitespaceTokenizer -->
327
+ <!--
328
+ <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
329
+ <analyzer>
330
+ <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
331
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
332
+ </analyzer>
333
+ </fieldType>
334
+ -->
335
+
336
+ <!-- This is an example of using the KeywordTokenizer along
337
+ With various TokenFilterFactories to produce a sortable field
338
+ that does not include some properties of the source text
339
+ -->
340
+ <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
341
+ <analyzer>
342
+ <!-- KeywordTokenizer does no actual tokenizing, so the entire
343
+ input string is preserved as a single token
344
+ -->
345
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
346
+ <!-- The LowerCase TokenFilter does what you expect, which can be
347
+ when you want your sorting to be case insensitive
348
+ -->
349
+ <filter class="solr.LowerCaseFilterFactory" />
350
+ <!-- The TrimFilter removes any leading or trailing whitespace -->
351
+ <filter class="solr.TrimFilterFactory" />
352
+ <!-- The PatternReplaceFilter gives you the flexibility to use
353
+ Java Regular expression to replace any sequence of characters
354
+ matching a pattern with an arbitrary replacement string,
355
+ which may include back references to portions of the original
356
+ string matched by the pattern.
357
+
358
+ See the Java Regular Expression documentation for more
359
+ information on pattern and replacement string syntax.
360
+
361
+ http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
362
+ -->
363
+ <filter class="solr.PatternReplaceFilterFactory"
364
+ pattern="([^a-z])" replacement="" replace="all"
365
+ />
366
+ </analyzer>
367
+ </fieldType>
368
+
369
+ <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
370
+ <analyzer>
371
+ <tokenizer class="solr.StandardTokenizerFactory"/>
372
+ <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
373
+ </analyzer>
374
+ </fieldtype>
375
+
376
+ <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
377
+ <analyzer>
378
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
379
+ <!--
380
+ The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
381
+ a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
382
+ Attributes of the DelimitedPayloadTokenFilterFactory :
383
+ "delimiter" - a one character delimiter. Default is | (pipe)
384
+ "encoder" - how to encode the following value into a playload
385
+ float -> org.apache.lucene.analysis.payloads.FloatEncoder,
386
+ integer -> o.a.l.a.p.IntegerEncoder
387
+ identity -> o.a.l.a.p.IdentityEncoder
388
+ Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
389
+ -->
390
+ <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
391
+ </analyzer>
392
+ </fieldtype>
393
+
394
+ <!-- lowercases the entire field value, keeping it as a single token. -->
395
+ <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
396
+ <analyzer>
397
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
398
+ <filter class="solr.LowerCaseFilterFactory" />
399
+ </analyzer>
400
+ </fieldType>
401
+
402
+ <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
403
+ <analyzer>
404
+ <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
405
+ </analyzer>
406
+ </fieldType>
407
+
408
+ <!-- since fields of this type are by default not stored or indexed,
409
+ any data added to them will be ignored outright. -->
410
+ <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
411
+
412
+ <!-- This point type indexes the coordinates as separate fields (subFields)
413
+ If subFieldType is defined, it references a type, and a dynamic field
414
+ definition is created matching *___<typename>. Alternately, if
415
+ subFieldSuffix is defined, that is used to create the subFields.
416
+ Example: if subFieldType="double", then the coordinates would be
417
+ indexed in fields myloc_0___double,myloc_1___double.
418
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
419
+ in fields myloc_0_d,myloc_1_d
420
+ The subFields are an implementation detail of the fieldType, and end
421
+ users normally should not need to know about them.
422
+ -->
423
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
424
+
425
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
426
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
427
+
428
+ <!--
429
+ A Geohash is a compact representation of a latitude longitude pair in a single field.
430
+ See http://wiki.apache.org/solr/SpatialSearch
431
+ -->
432
+ <fieldtype name="geohash" class="solr.GeoHashField"/>
433
+ </types>
434
+
435
+
436
+ <fields>
437
+ <!-- Valid attributes for fields:
438
+ name: mandatory - the name for the field
439
+ type: mandatory - the name of a previously defined type from the
440
+ <types> section
441
+ indexed: true if this field should be indexed (searchable or sortable)
442
+ stored: true if this field should be retrievable
443
+ multiValued: true if this field may contain multiple values per document
444
+ omitNorms: (expert) set to true to omit the norms associated with
445
+ this field (this disables length normalization and index-time
446
+ boosting for the field, and saves some memory). Only full-text
447
+ fields or fields that need an index-time boost need norms.
448
+ termVectors: [false] set to true to store the term vector for a
449
+ given field.
450
+ When using MoreLikeThis, fields used for similarity should be
451
+ stored for best performance.
452
+ termPositions: Store position information with the term vector.
453
+ This will increase storage costs.
454
+ termOffsets: Store offset information with the term vector. This
455
+ will increase storage costs.
456
+ default: a value that should be used if no value is specified
457
+ when adding a document.
458
+ -->
459
+
460
+ <!-- NOTE: this is not a full list of fields in the index; dynamic fields are also used -->
461
+ <field name="id" type="string" indexed="true" stored="true" required="true" />
462
+ <field name="_version_" type="long" indexed="true" stored="true" multiValued="false" />
463
+ <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
464
+ <!-- default, catch all search field -->
465
+ <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
466
+
467
+ <!-- these display fields are NOT multi-valued -->
468
+ <field name="marc_display" type="string" indexed="false" stored="true" multiValued="false"/>
469
+ <field name="title_display" type="string" indexed="false" stored="true" multiValued="false"/>
470
+ <field name="title_vern_display" type="string" indexed="false" stored="true" multiValued="false"/>
471
+ <field name="subtitle_display" type="string" indexed="false" stored="true" multiValued="false"/>
472
+ <field name="subtitle_vern_display" type="string" indexed="false" stored="true" multiValued="false"/>
473
+ <field name="author_display" type="string" indexed="false" stored="true" multiValued="false"/>
474
+ <field name="author_vern_display" type="string" indexed="false" stored="true" multiValued="false"/>
475
+
476
+ <!-- these fields are also used for display, so they must be stored -->
477
+ <field name="isbn_t" type="text" indexed="true" stored="true" multiValued="true"/>
478
+ <field name="language_facet" type="string" indexed="true" stored="true" multiValued="true" />
479
+ <field name="subject_topic_facet" type="string" indexed="true" stored="true" multiValued="true" />
480
+ <field name="subject_era_facet" type="string" indexed="true" stored="true" multiValued="true" />
481
+ <field name="subject_geo_facet" type="string" indexed="true" stored="true" multiValued="true" />
482
+ <!-- pub_date is used for facet and display so it must be indexed and stored -->
483
+ <field name="pub_date" type="string" indexed="true" stored="true" multiValued="true"/>
484
+ <!-- pub_date sort uses new trie-based int fields, which are recommended for any int and are displayable, sortable, and range-quer
485
+ we use 'tint' for faster range-queries. -->
486
+ <field name="pub_date_sort" type="tint" indexed="true" stored="true" multiValued="false"/>
487
+
488
+ <!-- format is used for facet, display, and choosing which partial to use for the show view, so it must be stored and indexed -->
489
+ <field name="format" type="string" indexed="true" stored="true"/>
490
+
491
+
492
+
493
+ <!-- Dynamic field definitions. If a field name is not found, dynamicFields
494
+ will be used if the name matches any of the patterns.
495
+ RESTRICTION: the glob-like pattern in the name attribute must have
496
+ a "*" only at the start or the end.
497
+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
498
+ Longer patterns will be matched first. if equal size patterns
499
+ both match, the first appearing in the schema will be used. -->
500
+ <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
501
+ <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
502
+ <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
503
+ <dynamicField name="*_t" type="text" indexed="true" stored="true" multiValued="true"/>
504
+ <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
505
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
506
+ <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
507
+ <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
508
+
509
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
510
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
511
+
512
+ <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
513
+ <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
514
+
515
+ <!-- some trie-coded dynamic fields for faster range queries -->
516
+ <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
517
+ <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
518
+ <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
519
+ <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
520
+ <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
521
+
522
+ <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
523
+ <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
524
+
525
+ <dynamicField name="random_*" type="random" />
526
+
527
+ <dynamicField name="*_display" type="string" indexed="false" stored="true" multiValued="true" />
528
+ <dynamicField name="*_facet" type="string" indexed="true" stored="false" multiValued="true" />
529
+ <dynamicField name="*_sort" type="alphaOnlySort" indexed="true" stored="false" multiValued="false" />
530
+ <dynamicField name="*_unstem_search" type="text_general" indexed="true" stored="false" multiValued="true" />
531
+ <dynamicField name="*spell" type="textSpell" indexed="true" stored="false" multiValued="true" />
532
+ <dynamicField name="*suggest" type="textSuggest" indexed="true" stored="false" multiValued="true" />
533
+
534
+ <!-- uncomment the following to ignore any fields that don't already match an existing
535
+ field name or dynamic field, rather than reporting them as an error.
536
+ alternately, change the type="ignored" to some other type e.g. "text" if you want
537
+ unknown fields indexed and/or stored by default -->
538
+ <!--dynamicField name="*" type="ignored" multiValued="true" /-->
539
+
540
+ </fields>
541
+
542
+ <!-- Field to use to determine and enforce document uniqueness.
543
+ Unless this field is marked with required="false", it will be a required field
544
+ -->
545
+ <uniqueKey>id</uniqueKey>
546
+
547
+ <!-- copyField commands copy one field to another at the time a document
548
+ is added to the index. It's used either to index the same field differently,
549
+ or to add multiple fields to the same field for easier/faster searching. -->
550
+ <!-- Copy Fields -->
551
+
552
+ <!-- unstemmed fields -->
553
+ <copyField source="title_t" dest="title_unstem_search"/>
554
+ <copyField source="subtitle_t" dest="subtitle_unstem_search"/>
555
+ <copyField source="title_addl_t" dest="title_addl_unstem_search"/>
556
+ <copyField source="title_added_entry_t" dest="title_added_entry_unstem_search"/>
557
+ <copyField source="title_series_t" dest="title_series_unstem_search"/>
558
+ <copyField source="author_t" dest="author_unstem_search"/>
559
+ <copyField source="author_addl_t" dest="author_addl_unstem_search"/>
560
+ <copyField source="subject_t" dest="subject_unstem_search"/>
561
+ <copyField source="subject_addl_t" dest="subject_addl_unstem_search"/>
562
+ <copyField source="subject_topic_facet" dest="subject_topic_unstem_search"/>
563
+
564
+ <!-- sort fields -->
565
+ <copyField source="pub_date" dest="pub_date_sort"/>
566
+
567
+
568
+ <!-- spellcheck fields -->
569
+ <!-- default spell check; should match fields for default request handler -->
570
+ <!-- it won't work with a copy of a copy field -->
571
+ <copyField source="*_t" dest="spell"/>
572
+ <copyField source="*_facet" dest="spell"/>
573
+ <!-- title spell check; should match fields for title request handler -->
574
+ <copyField source="title_t" dest="title_spell"/>
575
+ <copyField source="subtitle_t" dest="title_spell"/>
576
+ <copyField source="addl_titles_t" dest="title_spell"/>
577
+ <copyField source="title_added_entry_t" dest="title_spell"/>
578
+ <copyField source="title_series_t" dest="title_spell"/>
579
+ <!-- author spell check; should match fields for author request handler -->
580
+ <copyField source="author_t" dest="author_spell"/>
581
+ <copyField source="author_addl_t" dest="author_spell"/>
582
+ <!-- subject spell check; should match fields for subject request handler -->
583
+ <copyField source="subject_topic_facet" dest="subject_spell"/>
584
+ <copyField source="subject_t" dest="subject_spell"/>
585
+ <copyField source="subject_addl_t" dest="subject_spell"/>
586
+
587
+ <!-- OpenSearch query field should match request handler search fields -->
588
+ <copyField source="title_t" dest="opensearch_display"/>
589
+ <copyField source="subtitle_t" dest="opensearch_display"/>
590
+ <copyField source="addl_titles_t" dest="opensearch_display"/>
591
+ <copyField source="title_added_entry_t" dest="opensearch_display"/>
592
+ <copyField source="title_series_t" dest="opensearch_display"/>
593
+ <copyField source="author_t" dest="opensearch_display"/>
594
+ <copyField source="author_addl_t" dest="opensearch_display"/>
595
+ <copyField source="subject_topic_facet" dest="opensearch_display"/>
596
+ <copyField source="subject_t" dest="opensearch_display"/>
597
+ <copyField source="subject_addl_t" dest="opensearch_display"/>
598
+
599
+ <!-- for suggestions -->
600
+ <copyField source="*_t" dest="suggest"/>
601
+ <copyField source="*_facet" dest="suggest"/>
602
+
603
+ <!-- Above, multiple source fields are copied to the [text] field.
604
+ Another way to map multiple source fields to the same
605
+ destination field is to use the dynamic field syntax.
606
+ copyField also supports a maxChars to copy setting. -->
607
+
608
+ <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
609
+
610
+ <!-- copy name to alphaNameSort, a field designed for sorting by name -->
611
+ <!-- <copyField source="name" dest="alphaNameSort"/> -->
612
+
613
+
614
+ <!-- Similarity is the scoring routine for each document vs. a query.
615
+ A custom similarity may be specified here, but the default is fine
616
+ for most applications. -->
617
+ <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
618
+ <!-- ... OR ...
619
+ Specify a SimilarityFactory class name implementation
620
+ allowing parameters to be used.
621
+ -->
622
+ <!--
623
+ <similarity class="com.example.solr.CustomSimilarityFactory">
624
+ <str name="paramkey">param value</str>
625
+ </similarity>
626
+ -->
627
+
628
+
629
+ </schema>
@@ -0,0 +1,24 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ user=
17
+ solr_hostname=localhost
18
+ solr_port=8983
19
+ rsyncd_port=18983
20
+ data_dir=
21
+ webapp_name=solr
22
+ master_host=
23
+ master_data_dir=
24
+ master_status_dir=