active-fedora 5.0.0 → 5.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. data/History.txt +14 -1
  2. data/README.textile +6 -0
  3. data/active-fedora.gemspec +1 -1
  4. data/lib/active_fedora.rb +2 -0
  5. data/lib/active_fedora/associations.rb +22 -2
  6. data/lib/active_fedora/associations/association_collection.rb +37 -0
  7. data/lib/active_fedora/associations/belongs_to_association.rb +8 -0
  8. data/lib/active_fedora/associations/has_many_association.rb +2 -0
  9. data/lib/active_fedora/base.rb +43 -6
  10. data/lib/active_fedora/datastream.rb +13 -37
  11. data/lib/active_fedora/datastreams.rb +2 -6
  12. data/lib/active_fedora/digital_object.rb +8 -1
  13. data/lib/active_fedora/metadata_datastream_helper.rb +2 -2
  14. data/lib/active_fedora/nokogiri_datastream.rb +55 -16
  15. data/lib/active_fedora/persistence.rb +14 -9
  16. data/lib/active_fedora/railtie.rb +15 -0
  17. data/lib/active_fedora/rdf_datastream.rb +4 -0
  18. data/lib/active_fedora/rdfxml_rdf_datastream.rb +2 -6
  19. data/lib/active_fedora/reflection.rb +11 -0
  20. data/lib/active_fedora/relationships.rb +4 -4
  21. data/lib/active_fedora/rels_ext_datastream.rb +21 -6
  22. data/lib/active_fedora/semantic_node.rb +3 -3
  23. data/lib/active_fedora/test_support.rb +38 -0
  24. data/lib/active_fedora/version.rb +1 -1
  25. data/lib/generators/active_fedora/config/USAGE +9 -0
  26. data/lib/generators/active_fedora/config/config_generator.rb +10 -0
  27. data/lib/generators/active_fedora/config/fedora/fedora_generator.rb +12 -0
  28. data/lib/generators/active_fedora/config/fedora/templates/fedora.yml +14 -0
  29. data/lib/generators/active_fedora/config/fedora/templates/fedora_conf/conf/development/fedora.fcfg +953 -0
  30. data/lib/generators/active_fedora/config/fedora/templates/fedora_conf/conf/test/fedora.fcfg +953 -0
  31. data/lib/generators/active_fedora/config/solr/solr_generator.rb +12 -0
  32. data/lib/generators/active_fedora/config/solr/templates/solr.yml +10 -0
  33. data/lib/generators/active_fedora/config/solr/templates/solr_conf/conf/schema.xml +692 -0
  34. data/lib/generators/active_fedora/config/solr/templates/solr_conf/conf/solrconfig.xml +299 -0
  35. data/lib/generators/active_fedora/config/solr/templates/solr_conf/solr.xml +35 -0
  36. data/lib/generators/active_fedora/model/USAGE +9 -0
  37. data/lib/generators/active_fedora/model/model_generator.rb +21 -0
  38. data/lib/generators/active_fedora/model/templates/model.rb.erb +6 -0
  39. data/lib/generators/active_fedora/model/templates/model_spec.rb.erb +21 -0
  40. data/lib/tasks/active_fedora_dev.rake +8 -0
  41. data/spec/fixtures/hydrangea_fixture_mods_article2.foxml.xml +234 -0
  42. data/spec/integration/associations_spec.rb +76 -15
  43. data/spec/integration/base_spec.rb +38 -10
  44. data/spec/integration/datastreams_spec.rb +24 -2
  45. data/spec/integration/nokogiri_datastream_spec.rb +23 -5
  46. data/spec/unit/base_extra_spec.rb +0 -1
  47. data/spec/unit/base_spec.rb +7 -47
  48. data/spec/unit/datastream_collections_spec.rb +0 -7
  49. data/spec/unit/datastream_spec.rb +7 -16
  50. data/spec/unit/datastreams_spec.rb +2 -2
  51. data/spec/unit/nokogiri_datastream_spec.rb +31 -20
  52. data/spec/unit/ntriples_datastream_spec.rb +7 -10
  53. data/spec/unit/persistence_spec.rb +0 -11
  54. data/spec/unit/qualified_dublin_core_datastream_spec.rb +1 -2
  55. data/spec/unit/relationships_spec.rb +5 -5
  56. data/spec/unit/rels_ext_datastream_spec.rb +14 -9
  57. data/spec/unit/semantic_node_spec.rb +4 -4
  58. metadata +25 -6
@@ -0,0 +1,12 @@
1
+ require 'rails/generators'
2
+
3
+ module ActiveFedora
4
+ class Config::SolrGenerator < Rails::Generators::Base
5
+ source_root File.expand_path('../templates', __FILE__)
6
+
7
+ def generate
8
+ copy_file('solr.yml', 'config/solr.yml')
9
+ directory('solr_conf', 'solr_conf')
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,10 @@
1
+ # This is a sample config file that does not have multiple solr instances. You will also need to be sure to
2
+ # edit the fedora.yml file to match the solr URL for active-fedora.
3
+ development:
4
+ url: http://localhost:8983/solr/development
5
+ test: &TEST
6
+ url: <%= "http://127.0.0.1:#{ENV['TEST_JETTY_PORT'] || 8983}/solr/test" %>
7
+ cucumber:
8
+ <<: *TEST
9
+ production:
10
+ url: http://your.production.server:8080/bl_solr/core0
@@ -0,0 +1,692 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is the Solr schema file. This file should be named "schema.xml" and
21
+ should be in the conf directory under the solr home
22
+ (i.e. ./solr/conf/schema.xml by default)
23
+ or located where the classloader for the Solr webapp can find it.
24
+
25
+ This example schema is the recommended starting point for users.
26
+ It should be kept correct and concise, usable out-of-the-box.
27
+
28
+ For more information, on how to customize this file, please see
29
+ http://wiki.apache.org/solr/SchemaXml
30
+
31
+ PERFORMANCE NOTE: this schema includes many optional features and should not
32
+ be used for benchmarking. To improve performance one could
33
+ - set stored="false" for all fields possible (esp large fields) when you
34
+ only need to search on the field but don't need to return the original
35
+ value.
36
+ - set indexed="false" if you don't need to search on the field, but only
37
+ return the field as a result of searching on other indexed fields.
38
+ - remove all unneeded copyField statements
39
+ - for best index size and searching performance, set "index" to false
40
+ for all general text fields, use copyField to copy them to the
41
+ catchall "text" field, and use that for searching.
42
+ - For maximum indexing performance, use the StreamingUpdateSolrServer
43
+ java client.
44
+ - Remember to run the JVM in server mode, and use a higher logging level
45
+ that avoids logging every request
46
+ -->
47
+
48
+ <schema name="Hydra" version="1.4">
49
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
50
+ Applications should change this to reflect the nature of the search collection.
51
+ version="1.4" is Solr's version number for the schema syntax and semantics. It should
52
+ not normally be changed by applications.
53
+ 1.0: multiValued attribute did not exist, all fields are multiValued by nature
54
+ 1.1: multiValued attribute introduced, false by default
55
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
56
+ 1.3: removed optional field compress feature
57
+ 1.4: default auto-phrase (QueryParser feature) to off
58
+ -->
59
+
60
+ <types>
61
+ <!-- field type definitions. The "name" attribute is
62
+ just a label to be used by field definitions. The "class"
63
+ attribute and any other attributes determine the real
64
+ behavior of the fieldType.
65
+ Class names starting with "solr" refer to java classes in the
66
+ org.apache.solr.analysis package.
67
+ -->
68
+
69
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
70
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
71
+
72
+ <!-- boolean type: "true" or "false" -->
73
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
74
+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
75
+ <fieldtype name="binary" class="solr.BinaryField"/>
76
+
77
+ <!-- The optional sortMissingLast and sortMissingFirst attributes are
78
+ currently supported on types that are sorted internally as strings
79
+ and on numeric types.
80
+ This includes "string","boolean", and, as of 3.5 (and 4.x),
81
+ int, float, long, date, double, including the "Trie" variants.
82
+ - If sortMissingLast="true", then a sort on this field will cause documents
83
+ without the field to come after documents with the field,
84
+ regardless of the requested sort order (asc or desc).
85
+ - If sortMissingFirst="true", then a sort on this field will cause documents
86
+ without the field to come before documents with the field,
87
+ regardless of the requested sort order.
88
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
89
+ then default lucene sorting will be used which places docs without the
90
+ field first in an ascending sort and last in a descending sort.
91
+ -->
92
+
93
+ <!--
94
+ Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
95
+ -->
96
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
97
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
98
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
99
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
100
+
101
+ <!--
102
+ Numeric field types that index each value at various levels of precision
103
+ to accelerate range queries when the number of values between the range
104
+ endpoints is large. See the javadoc for NumericRangeQuery for internal
105
+ implementation details.
106
+
107
+ Smaller precisionStep values (specified in bits) will lead to more tokens
108
+ indexed per value, slightly larger index size, and faster range queries.
109
+ A precisionStep of 0 disables indexing at different precision levels.
110
+ -->
111
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
112
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
113
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
114
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
115
+
116
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
117
+ is a more restricted form of the canonical representation of dateTime
118
+ http://www.w3.org/TR/xmlschema-2/#dateTime
119
+ The trailing "Z" designates UTC time and is mandatory.
120
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
121
+ All other components are mandatory.
122
+
123
+ Expressions can also be used to denote calculations that should be
124
+ performed relative to "NOW" to determine the value, ie...
125
+
126
+ NOW/HOUR
127
+ ... Round to the start of the current hour
128
+ NOW-1DAY
129
+ ... Exactly 1 day prior to now
130
+ NOW/DAY+6MONTHS+3DAYS
131
+ ... 6 months and 3 days in the future from the start of
132
+ the current day
133
+
134
+ Consult the DateField javadocs for more information.
135
+
136
+ Note: For faster range queries, consider the tdate type
137
+ -->
138
+ <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
139
+
140
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
141
+ <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
142
+
143
+
144
+ <!--
145
+ Note:
146
+ These should only be used for compatibility with existing indexes (created with older Solr versions)
147
+ or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
148
+
149
+ Plain numeric field types that store and index the text
150
+ value verbatim (and hence don't support range queries, since the
151
+ lexicographic ordering isn't equal to the numeric ordering)
152
+ -->
153
+ <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
154
+ <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
155
+ <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
156
+ <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
157
+ <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
158
+
159
+
160
+ <!--
161
+ Note:
162
+ These should only be used for compatibility with existing indexes (created with older Solr versions).
163
+ Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last
164
+
165
+ Numeric field types that manipulate the value into
166
+ a string value that isn't human-readable in its internal form,
167
+ but with a lexicographic ordering the same as the numeric ordering,
168
+ so that range queries work correctly.
169
+ -->
170
+ <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
171
+ <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
172
+ <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
173
+ <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
174
+
175
+
176
+ <!-- The "RandomSortField" is not used to store or search any
177
+ data. You can declare fields of this type it in your schema
178
+ to generate pseudo-random orderings of your docs for sorting
179
+ purposes. The ordering is generated based on the field name
180
+ and the version of the index, As long as the index version
181
+ remains unchanged, and the same field name is reused,
182
+ the ordering of the docs will be consistent.
183
+ If you want different psuedo-random orderings of documents,
184
+ for the same version of the index, use a dynamicField and
185
+ change the name
186
+ -->
187
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
188
+
189
+ <!-- solr.TextField allows the specification of custom text analyzers
190
+ specified as a tokenizer and a list of token filters. Different
191
+ analyzers may be specified for indexing and querying.
192
+
193
+ The optional positionIncrementGap puts space between multiple fields of
194
+ this type on the same document, with the purpose of preventing false phrase
195
+ matching across fields.
196
+
197
+ For more info on customizing your analyzer chain, please see
198
+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
199
+ -->
200
+ <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
201
+ <analyzer>
202
+ <tokenizer class="solr.StandardTokenizerFactory"/>
203
+ <filter class="solr.ICUFoldingFilterFactory" />
204
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
205
+ <filter class="solr.SnowballPorterFilterFactory" language="English" />
206
+ </analyzer>
207
+ </fieldType>
208
+
209
+ <!-- One can also specify an existing Analyzer class that has a
210
+ default constructor via the class attribute on the analyzer element
211
+ <fieldType name="text_greek" class="solr.TextField">
212
+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
213
+ </fieldType>
214
+ -->
215
+
216
+ <!-- A text field that only splits on whitespace for exact matching of words -->
217
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
218
+ <analyzer>
219
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
220
+ </analyzer>
221
+ </fieldType>
222
+
223
+ <!-- A general text field that has reasonable, generic
224
+ cross-language defaults: it tokenizes with StandardTokenizer,
225
+ removes stop words from case-insensitive "stopwords.txt"
226
+ (empty by default), and down cases. At query time only, it
227
+ also applies synonyms. -->
228
+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
229
+ <analyzer type="index">
230
+ <tokenizer class="solr.StandardTokenizerFactory"/>
231
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
232
+ <!-- in this example, we will only use synonyms at query time
233
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
234
+ -->
235
+ <filter class="solr.LowerCaseFilterFactory"/>
236
+ </analyzer>
237
+ <analyzer type="query">
238
+ <tokenizer class="solr.StandardTokenizerFactory"/>
239
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
240
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
241
+ <filter class="solr.LowerCaseFilterFactory"/>
242
+ </analyzer>
243
+ </fieldType>
244
+
245
+ <!-- A text field with defaults appropriate for English: it
246
+ tokenizes with StandardTokenizer, removes English stop words
247
+ (stopwords_en.txt), down cases, protects words from protwords.txt, and
248
+ finally applies Porter's stemming. The query time analyzer
249
+ also applies synonyms from synonyms.txt. -->
250
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
251
+ <analyzer type="index">
252
+ <tokenizer class="solr.StandardTokenizerFactory"/>
253
+ <!-- in this example, we will only use synonyms at query time
254
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
255
+ -->
256
+ <!-- Case insensitive stop word removal.
257
+ add enablePositionIncrements=true in both the index and query
258
+ analyzers to leave a 'gap' for more accurate phrase queries.
259
+ -->
260
+ <filter class="solr.StopFilterFactory"
261
+ ignoreCase="true"
262
+ words="stopwords_en.txt"
263
+ enablePositionIncrements="true"
264
+ />
265
+ <filter class="solr.LowerCaseFilterFactory"/>
266
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
267
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
268
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
269
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
270
+ -->
271
+ <filter class="solr.PorterStemFilterFactory"/>
272
+ </analyzer>
273
+ <analyzer type="query">
274
+ <tokenizer class="solr.StandardTokenizerFactory"/>
275
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
276
+ <filter class="solr.StopFilterFactory"
277
+ ignoreCase="true"
278
+ words="stopwords_en.txt"
279
+ enablePositionIncrements="true"
280
+ />
281
+ <filter class="solr.LowerCaseFilterFactory"/>
282
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
283
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
284
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
285
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
286
+ -->
287
+ <filter class="solr.PorterStemFilterFactory"/>
288
+ </analyzer>
289
+ </fieldType>
290
+
291
+ <!-- A text field with defaults appropriate for English, plus
292
+ aggressive word-splitting and autophrase features enabled.
293
+ This field is just like text_en, except it adds
294
+ WordDelimiterFilter to enable splitting and matching of
295
+ words on case-change, alpha numeric boundaries, and
296
+ non-alphanumeric chars. This means certain compound word
297
+ cases will work, for example query "wi fi" will match
298
+ document "WiFi" or "wi-fi". However, other cases will still
299
+ not match, for example if the query is "wifi" and the
300
+ document is "wi fi" or if the query is "wi-fi" and the
301
+ document is "wifi".
302
+ -->
303
+ <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
304
+ <analyzer type="index">
305
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
306
+ <!-- in this example, we will only use synonyms at query time
307
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
308
+ -->
309
+ <!-- Case insensitive stop word removal.
310
+ add enablePositionIncrements=true in both the index and query
311
+ analyzers to leave a 'gap' for more accurate phrase queries.
312
+ -->
313
+ <filter class="solr.StopFilterFactory"
314
+ ignoreCase="true"
315
+ words="stopwords_en.txt"
316
+ enablePositionIncrements="true"
317
+ />
318
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
319
+ <filter class="solr.LowerCaseFilterFactory"/>
320
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
321
+ <filter class="solr.PorterStemFilterFactory"/>
322
+ </analyzer>
323
+ <analyzer type="query">
324
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
325
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
326
+ <filter class="solr.StopFilterFactory"
327
+ ignoreCase="true"
328
+ words="stopwords_en.txt"
329
+ enablePositionIncrements="true"
330
+ />
331
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
332
+ <filter class="solr.LowerCaseFilterFactory"/>
333
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
334
+ <filter class="solr.PorterStemFilterFactory"/>
335
+ </analyzer>
336
+ </fieldType>
337
+
338
+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
339
+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
340
+ <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
341
+ <analyzer>
342
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
343
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
344
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
345
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
346
+ <filter class="solr.LowerCaseFilterFactory"/>
347
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
348
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
349
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
350
+ possible with WordDelimiterFilter in conjuncton with stemming. -->
351
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
352
+ </analyzer>
353
+ </fieldType>
354
+
355
+ <!-- Just like text_general except it reverses the characters of
356
+ each token, to enable more efficient leading wildcard queries. -->
357
+ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
358
+ <analyzer type="index">
359
+ <tokenizer class="solr.StandardTokenizerFactory"/>
360
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
361
+ <filter class="solr.LowerCaseFilterFactory"/>
362
+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
363
+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
364
+ </analyzer>
365
+ <analyzer type="query">
366
+ <tokenizer class="solr.StandardTokenizerFactory"/>
367
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
368
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
369
+ <filter class="solr.LowerCaseFilterFactory"/>
370
+ </analyzer>
371
+ </fieldType>
372
+
373
+ <fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100" >
374
+ <analyzer>
375
+ <tokenizer class="solr.StandardTokenizerFactory"/>
376
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
377
+ <filter class="solr.StandardFilterFactory"/>
378
+ <filter class="solr.LowerCaseFilterFactory"/>
379
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
380
+ </analyzer>
381
+ </fieldType>
382
+
383
+ <!-- charFilter + WhitespaceTokenizer -->
384
+ <!--
385
+ <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
386
+ <analyzer>
387
+ <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
388
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
389
+ </analyzer>
390
+ </fieldType>
391
+ -->
392
+
393
+ <!-- This is an example of using the KeywordTokenizer along
394
+ With various TokenFilterFactories to produce a sortable field
395
+ that does not include some properties of the source text
396
+ -->
397
+ <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
398
+ <analyzer>
399
+ <!-- KeywordTokenizer does no actual tokenizing, so the entire
400
+ input string is preserved as a single token
401
+ -->
402
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
403
+ <!-- The LowerCase TokenFilter does what you expect, which can be
404
+ when you want your sorting to be case insensitive
405
+ -->
406
+ <filter class="solr.LowerCaseFilterFactory" />
407
+ <!-- The TrimFilter removes any leading or trailing whitespace -->
408
+ <filter class="solr.TrimFilterFactory" />
409
+ <!-- The PatternReplaceFilter gives you the flexibility to use
410
+ Java Regular expression to replace any sequence of characters
411
+ matching a pattern with an arbitrary replacement string,
412
+ which may include back references to portions of the original
413
+ string matched by the pattern.
414
+
415
+ See the Java Regular Expression documentation for more
416
+ information on pattern and replacement string syntax.
417
+
418
+ http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
419
+ -->
420
+ <filter class="solr.PatternReplaceFilterFactory"
421
+ pattern="([^a-z])" replacement="" replace="all"
422
+ />
423
+ </analyzer>
424
+ </fieldType>
425
+
426
+ <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
427
+ <analyzer>
428
+ <tokenizer class="solr.StandardTokenizerFactory"/>
429
+ <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
430
+ </analyzer>
431
+ </fieldtype>
432
+
433
+ <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
434
+ <analyzer>
435
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
436
+ <!--
437
+ The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
438
+ a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
439
+ Attributes of the DelimitedPayloadTokenFilterFactory :
440
+ "delimiter" - a one character delimiter. Default is | (pipe)
441
+ "encoder" - how to encode the following value into a playload
442
+ float -> org.apache.lucene.analysis.payloads.FloatEncoder,
443
+ integer -> o.a.l.a.p.IntegerEncoder
444
+ identity -> o.a.l.a.p.IdentityEncoder
445
+ Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
446
+ -->
447
+ <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
448
+ </analyzer>
449
+ </fieldtype>
450
+
451
+ <!-- lowercases the entire field value, keeping it as a single token. -->
452
+ <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
453
+ <analyzer>
454
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
455
+ <filter class="solr.LowerCaseFilterFactory" />
456
+ </analyzer>
457
+ </fieldType>
458
+
459
+ <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
460
+ <analyzer>
461
+ <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
462
+ </analyzer>
463
+ </fieldType>
464
+
465
+ <!-- since fields of this type are by default not stored or indexed,
466
+ any data added to them will be ignored outright. -->
467
+ <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
468
+
469
+ <!-- This point type indexes the coordinates as separate fields (subFields)
470
+ If subFieldType is defined, it references a type, and a dynamic field
471
+ definition is created matching *___<typename>. Alternately, if
472
+ subFieldSuffix is defined, that is used to create the subFields.
473
+ Example: if subFieldType="double", then the coordinates would be
474
+ indexed in fields myloc_0___double,myloc_1___double.
475
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
476
+ in fields myloc_0_d,myloc_1_d
477
+ The subFields are an implementation detail of the fieldType, and end
478
+ users normally should not need to know about them.
479
+ -->
480
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
481
+
482
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
483
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
484
+
485
+ <!--
486
+ A Geohash is a compact representation of a latitude longitude pair in a single field.
487
+ See http://wiki.apache.org/solr/SpatialSearch
488
+ -->
489
+ <fieldtype name="geohash" class="solr.GeoHashField"/>
490
+ </types>
491
+
492
+
493
+ <fields>
494
+ <!-- Valid attributes for fields:
495
+ name: mandatory - the name for the field
496
+ type: mandatory - the name of a previously defined type from the
497
+ <types> section
498
+ indexed: true if this field should be indexed (searchable or sortable)
499
+ stored: true if this field should be retrievable
500
+ multiValued: true if this field may contain multiple values per document
501
+ omitNorms: (expert) set to true to omit the norms associated with
502
+ this field (this disables length normalization and index-time
503
+ boosting for the field, and saves some memory). Only full-text
504
+ fields or fields that need an index-time boost need norms.
505
+ termVectors: [false] set to true to store the term vector for a
506
+ given field.
507
+ When using MoreLikeThis, fields used for similarity should be
508
+ stored for best performance.
509
+ termPositions: Store position information with the term vector.
510
+ This will increase storage costs.
511
+ termOffsets: Store offset information with the term vector. This
512
+ will increase storage costs.
513
+ default: a value that should be used if no value is specified
514
+ when adding a document.
515
+ -->
516
+
517
+ <!-- NOTE: this is not a full list of fields in the index; dynamic fields are also used -->
518
+ <field name="id" type="string" indexed="true" stored="true" required="true" />
519
+ <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
520
+ <!-- default, catch all search field -->
521
+ <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
522
+
523
+ <!-- these display fields are NOT multi-valued -->
524
+ <field name="marc_display" type="string" indexed="false" stored="true" multiValued="false"/>
525
+ <field name="title_display" type="string" indexed="false" stored="true" multiValued="false"/>
526
+ <field name="title_vern_display" type="string" indexed="false" stored="true" multiValued="false"/>
527
+ <field name="subtitle_display" type="string" indexed="false" stored="true" multiValued="false"/>
528
+ <field name="subtitle_vern_display" type="string" indexed="false" stored="true" multiValued="false"/>
529
+ <field name="author_display" type="string" indexed="false" stored="true" multiValued="false"/>
530
+ <field name="author_vern_display" type="string" indexed="false" stored="true" multiValued="false"/>
531
+
532
+ <!-- these fields are also used for display, so they must be stored -->
533
+ <field name="isbn_t" type="text" indexed="true" stored="true" multiValued="true"/>
534
+ <field name="language_facet" type="string" indexed="true" stored="true" multiValued="true" />
535
+ <field name="subject_topic_facet" type="string" indexed="true" stored="true" multiValued="true" />
536
+ <field name="subject_era_facet" type="string" indexed="true" stored="true" multiValued="true" />
537
+ <field name="subject_geo_facet" type="string" indexed="true" stored="true" multiValued="true" />
538
+ <!-- pub_date is used for facet and display so it must be indexed and stored -->
539
+ <field name="pub_date" type="string" indexed="true" stored="true" multiValued="true"/>
540
+ <!-- pub_date sort uses new trie-based int fields, which are recommended for any int and are displayable, sortable, and range-quer
541
+ we use 'tint' for faster range-queries. -->
542
+ <field name="pub_date_sort" type="tint" indexed="true" stored="true" multiValued="false"/>
543
+
544
+ <!-- format is used for facet, display, and choosing which partial to use for the show view, so it must be stored and indexed -->
545
+ <field name="format" type="string" indexed="true" stored="true"/>
546
+
547
+
548
+
549
+ <!-- Dynamic field definitions. If a field name is not found, dynamicFields
550
+ will be used if the name matches any of the patterns.
551
+ RESTRICTION: the glob-like pattern in the name attribute must have
552
+ a "*" only at the start or the end.
553
+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
554
+ Longer patterns will be matched first. if equal size patterns
555
+ both match, the first appearing in the schema will be used. -->
556
+ <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
557
+ <dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/>
558
+ <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
559
+ <dynamicField name="*_t" type="text" indexed="true" stored="true" multiValued="true"/>
560
+ <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
561
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
562
+ <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
563
+ <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
564
+
565
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
566
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
567
+
568
+ <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
569
+ <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
570
+
571
+ <!-- some trie-coded dynamic fields for faster range queries -->
572
+ <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
573
+ <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
574
+ <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
575
+ <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
576
+ <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
577
+
578
+ <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
579
+
580
+ <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
581
+ <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
582
+
583
+ <dynamicField name="random_*" type="random" />
584
+
585
+ <dynamicField name="*_display" type="string" indexed="false" stored="true" multiValued="true" />
586
+ <dynamicField name="*_facet" type="string" indexed="true" stored="true" multiValued="true" />
587
+ <dynamicField name="*_sort" type="string" indexed="true" stored="false" multiValued="false" />
588
+ <dynamicField name="*_unstem_search" type="text_general" indexed="true" stored="false" multiValued="true" />
589
+ <dynamicField name="*spell" type="textSpell" indexed="true" stored="false" multiValued="true" />
590
+
591
+ <!-- uncomment the following to ignore any fields that don't already match an existing
592
+ field name or dynamic field, rather than reporting them as an error.
593
+ alternately, change the type="ignored" to some other type e.g. "text" if you want
594
+ unknown fields indexed and/or stored by default -->
595
+ <!--dynamicField name="*" type="ignored" multiValued="true" /-->
596
+
597
+ </fields>
598
+
599
+ <!-- Field to use to determine and enforce document uniqueness.
600
+ Unless this field is marked with required="false", it will be a required field
601
+ -->
602
+ <uniqueKey>id</uniqueKey>
603
+
604
+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->
605
+ <defaultSearchField>text</defaultSearchField>
606
+
607
+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
608
+ <solrQueryParser defaultOperator="AND"/>
609
+
610
+ <!-- copyField commands copy one field to another at the time a document
611
+ is added to the index. It's used either to index the same field differently,
612
+ or to add multiple fields to the same field for easier/faster searching. -->
613
+ <!-- Copy Fields -->
614
+
615
+ <!-- unstemmed fields -->
616
+ <copyField source="title_t" dest="title_unstem_search"/>
617
+ <copyField source="subtitle_t" dest="subtitle_unstem_search"/>
618
+ <copyField source="title_addl_t" dest="title_addl_unstem_search"/>
619
+ <copyField source="title_added_entry_t" dest="title_added_entry_unstem_search"/>
620
+ <copyField source="title_series_t" dest="title_series_unstem_search"/>
621
+ <copyField source="author_t" dest="author_unstem_search"/>
622
+ <copyField source="author_addl_t" dest="author_addl_unstem_search"/>
623
+ <copyField source="subject_t" dest="subject_unstem_search"/>
624
+ <copyField source="subject_addl_t" dest="subject_addl_unstem_search"/>
625
+ <copyField source="subject_topic_facet" dest="subject_topic_unstem_search"/>
626
+
627
+ <!-- sort fields -->
628
+ <copyField source="pub_date" dest="pub_date_sort"/>
629
+
630
+
631
+ <!-- spellcheck fields -->
632
+ <!-- default spell check; should match fields for default request handler -->
633
+ <!-- it won't work with a copy of a copy field -->
634
+ <copyField source="*_t" dest="spell"/>
635
+ <copyField source="*_facet" dest="spell"/>
636
+ <!-- title spell check; should match fields for title request handler -->
637
+ <copyField source="title_t" dest="title_spell"/>
638
+ <copyField source="subtitle_t" dest="title_spell"/>
639
+ <copyField source="addl_titles_t" dest="title_spell"/>
640
+ <copyField source="title_added_entry_t" dest="title_spell"/>
641
+ <copyField source="title_series_t" dest="title_spell"/>
642
+ <!-- author spell check; should match fields for author request handler -->
643
+ <copyField source="author_t" dest="author_spell"/>
644
+ <copyField source="author_addl_t" dest="author_spell"/>
645
+ <!-- subject spell check; should match fields for subject request handler -->
646
+ <copyField source="subject_topic_facet" dest="subject_spell"/>
647
+ <copyField source="subject_t" dest="subject_spell"/>
648
+ <copyField source="subject_addl_t" dest="subject_spell"/>
649
+
650
+ <!-- OpenSearch query field should match request handler search fields -->
651
+ <copyField source="title_t" dest="opensearch_display"/>
652
+ <copyField source="subtitle_t" dest="opensearch_display"/>
653
+ <copyField source="addl_titles_t" dest="opensearch_display"/>
654
+ <copyField source="title_added_entry_t" dest="opensearch_display"/>
655
+ <copyField source="title_series_t" dest="opensearch_display"/>
656
+ <copyField source="author_t" dest="opensearch_display"/>
657
+ <copyField source="author_addl_t" dest="opensearch_display"/>
658
+ <copyField source="subject_topic_facet" dest="opensearch_display"/>
659
+ <copyField source="subject_t" dest="opensearch_display"/>
660
+ <copyField source="subject_addl_t" dest="opensearch_display"/>
661
+
662
+
663
+ <!-- Above, multiple source fields are copied to the [text] field.
664
+ Another way to map multiple source fields to the same
665
+ destination field is to use the dynamic field syntax.
666
+ copyField also supports a maxChars to copy setting. -->
667
+
668
+ <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
669
+ <copyField source="*_s" dest="text"/>
670
+ <copyField source="*_t" dest="text"/>
671
+ <copyField source="*_facet" dest="text"/>
672
+
673
+ <!-- copy name to alphaNameSort, a field designed for sorting by name -->
674
+ <!-- <copyField source="name" dest="alphaNameSort"/> -->
675
+
676
+
677
+ <!-- Similarity is the scoring routine for each document vs. a query.
678
+ A custom similarity may be specified here, but the default is fine
679
+ for most applications. -->
680
+ <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
681
+ <!-- ... OR ...
682
+ Specify a SimilarityFactory class name implementation
683
+ allowing parameters to be used.
684
+ -->
685
+ <!--
686
+ <similarity class="com.example.solr.CustomSimilarityFactory">
687
+ <str name="paramkey">param value</str>
688
+ </similarity>
689
+ -->
690
+
691
+
692
+ </schema>